[llvm] [Test] Add and update tests for `lrint`/`llrint` (NFC) (PR #152662)

Trevor Gross via llvm-commits llvm-commits at lists.llvm.org
Sat Aug 9 21:33:22 PDT 2025


https://github.com/tgross35 updated https://github.com/llvm/llvm-project/pull/152662

>From c0e510ce1f6a398c7f39f7b2e8b55cb54a7059aa Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross at umich.edu>
Date: Fri, 8 Aug 2025 03:15:39 -0500
Subject: [PATCH 1/7] [Test] Add and update tests for lrint

A number of backends are missing either all tests for lrint, or
specifically those for f16 which currently crashes for `softPromoteHalf`
targets. For a number of popular backends, do the following:

* Ensure f16, f32, f64, and f128 are all covered
* Ensure both a 32- and 64-bit target are tested, if relevant
* Add `nounwind` to clean up CFI output
* Add a test covering the above if one did not exist
---
 llvm/test/CodeGen/ARM/llrint-conv.ll        |  21 ++++
 llvm/test/CodeGen/ARM/lrint-conv.ll         |  18 +++
 llvm/test/CodeGen/AVR/llrint.ll             |  18 +++
 llvm/test/CodeGen/AVR/lrint.ll              |  18 +++
 llvm/test/CodeGen/LoongArch/lrint-conv.ll   |  96 +++++++++++++++
 llvm/test/CodeGen/MSP430/lrint-conv.ll      |  60 +++++++++
 llvm/test/CodeGen/Mips/llrint-conv.ll       |  15 +++
 llvm/test/CodeGen/Mips/lrint-conv.ll        |  15 +++
 llvm/test/CodeGen/PowerPC/llrint-conv.ll    |  32 +++++
 llvm/test/CodeGen/PowerPC/lrint-conv.ll     |  32 +++++
 llvm/test/CodeGen/RISCV/lrint-conv.ll       |  76 ++++++++++++
 llvm/test/CodeGen/SPARC/lrint-conv.ll       |  68 +++++++++++
 llvm/test/CodeGen/WebAssembly/lrint-conv.ll |  62 ++++++++++
 llvm/test/CodeGen/X86/llrint-conv.ll        | 128 +++++++++++++++-----
 llvm/test/CodeGen/X86/lrint-conv-i32.ll     |  74 +++++++++--
 llvm/test/CodeGen/X86/lrint-conv-i64.ll     |  34 +++++-
 16 files changed, 723 insertions(+), 44 deletions(-)
 create mode 100644 llvm/test/CodeGen/LoongArch/lrint-conv.ll
 create mode 100644 llvm/test/CodeGen/MSP430/lrint-conv.ll
 create mode 100644 llvm/test/CodeGen/RISCV/lrint-conv.ll
 create mode 100644 llvm/test/CodeGen/SPARC/lrint-conv.ll
 create mode 100644 llvm/test/CodeGen/WebAssembly/lrint-conv.ll

diff --git a/llvm/test/CodeGen/ARM/llrint-conv.ll b/llvm/test/CodeGen/ARM/llrint-conv.ll
index 017955bb43afb..f0fb2e7543be6 100644
--- a/llvm/test/CodeGen/ARM/llrint-conv.ll
+++ b/llvm/test/CodeGen/ARM/llrint-conv.ll
@@ -1,6 +1,16 @@
 ; RUN: llc < %s -mtriple=arm-eabi -float-abi=soft | FileCheck %s --check-prefix=SOFTFP
 ; RUN: llc < %s -mtriple=arm-eabi -float-abi=hard | FileCheck %s --check-prefix=HARDFP
 
+; SOFTFP-LABEL: testmsxh_builtin:
+; SOFTFP:       bl      llrintf
+; HARDFP-LABEL: testmsxh_builtin:
+; HARDFP:       bl      llrintf
+define i64 @testmsxh_builtin(half %x) {
+entry:
+  %0 = tail call i64 @llvm.llrint.f16(half %x)
+  ret i64 %0
+}
+
 ; SOFTFP-LABEL: testmsxs_builtin:
 ; SOFTFP:       bl      llrintf
 ; HARDFP-LABEL: testmsxs_builtin:
@@ -21,5 +31,16 @@ entry:
   ret i64 %0
 }
 
+; FIXME(#44744): incorrect libcall
+; SOFTFP-LABEL: testmsxq_builtin:
+; SOFTFP:       bl      llrintl
+; HARDFP-LABEL: testmsxq_builtin:
+; HARDFP:       bl      llrintl
+define i64 @testmsxq_builtin(fp128 %x) {
+entry:
+  %0 = tail call i64 @llvm.llrint.f128(fp128 %x)
+  ret i64 %0
+}
+
 declare i64 @llvm.llrint.f32(float) nounwind readnone
 declare i64 @llvm.llrint.f64(double) nounwind readnone
diff --git a/llvm/test/CodeGen/ARM/lrint-conv.ll b/llvm/test/CodeGen/ARM/lrint-conv.ll
index 192da565c12fd..9aa95112af533 100644
--- a/llvm/test/CodeGen/ARM/lrint-conv.ll
+++ b/llvm/test/CodeGen/ARM/lrint-conv.ll
@@ -1,6 +1,13 @@
 ; RUN: llc < %s -mtriple=arm-eabi -float-abi=soft | FileCheck %s --check-prefix=SOFTFP
 ; RUN: llc < %s -mtriple=arm-eabi -float-abi=hard | FileCheck %s --check-prefix=HARDFP
 
+; FIXME: crash
+; define i32 @testmswh_builtin(half %x) {
+; entry:
+;   %0 = tail call i32 @llvm.lrint.i32.f16(half %x)
+;   ret i32 %0
+; }
+
 ; SOFTFP-LABEL: testmsws_builtin:
 ; SOFTFP:       bl      lrintf
 ; HARDFP-LABEL: testmsws_builtin:
@@ -21,5 +28,16 @@ entry:
   ret i32 %0
 }
 
+; FIXME(#44744): incorrect libcall
+; SOFTFP-LABEL: testmswq_builtin:
+; SOFTFP:       bl      lrintl
+; HARDFP-LABEL: testmswq_builtin:
+; HARDFP:       bl      lrintl
+define i32 @testmswq_builtin(fp128 %x) {
+entry:
+  %0 = tail call i32 @llvm.lrint.i32.f128(fp128 %x)
+  ret i32 %0
+}
+
 declare i32 @llvm.lrint.i32.f32(float) nounwind readnone
 declare i32 @llvm.lrint.i32.f64(double) nounwind readnone
diff --git a/llvm/test/CodeGen/AVR/llrint.ll b/llvm/test/CodeGen/AVR/llrint.ll
index 32b4c7ab12a4b..c55664f2d7353 100644
--- a/llvm/test/CodeGen/AVR/llrint.ll
+++ b/llvm/test/CodeGen/AVR/llrint.ll
@@ -1,6 +1,13 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=avr -mcpu=atmega328p | FileCheck %s
 
+; FIXME: crash "Input type needs to be promoted!"
+; define i64 @testmsxh_builtin(half %x) {
+; entry:
+;   %0 = tail call i64 @llvm.llrint.f16(half %x)
+;   ret i64 %0
+; }
+
 define i64 @testmsxs_builtin(float %x) {
 ; CHECK-LABEL: testmsxs_builtin:
 ; CHECK:       ; %bb.0: ; %entry
@@ -21,5 +28,16 @@ entry:
   ret i64 %0
 }
 
+; FIXME(#44744): incorrect libcall
+define i64 @testmsxq_builtin(fp128 %x) {
+; CHECK-LABEL: testmsxq_builtin:
+; CHECK:       ; %bb.0: ; %entry
+; CHECK-NEXT:    call llrintl
+; CHECK-NEXT:    ret
+entry:
+  %0 = tail call i64 @llvm.llrint.fp128(fp128 %x)
+  ret i64 %0
+}
+
 declare i64 @llvm.llrint.f32(float) nounwind readnone
 declare i64 @llvm.llrint.f64(double) nounwind readnone
diff --git a/llvm/test/CodeGen/AVR/lrint.ll b/llvm/test/CodeGen/AVR/lrint.ll
index d7568305f7b51..4ef656060bd10 100644
--- a/llvm/test/CodeGen/AVR/lrint.ll
+++ b/llvm/test/CodeGen/AVR/lrint.ll
@@ -1,6 +1,13 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=avr -mcpu=atmega328p | FileCheck %s
 
+; FIXME: crash "Input type needs to be promoted!"
+; define i32 @testmswh_builtin(half %x) {
+; entry:
+;   %0 = tail call i32 @llvm.lrint.i32.f16(half %x)
+;   ret i32 %0
+; }
+
 define i32 @testmsws_builtin(float %x) {
 ; CHECK-LABEL: testmsws_builtin:
 ; CHECK:       ; %bb.0: ; %entry
@@ -21,5 +28,16 @@ entry:
   ret i32 %0
 }
 
+; FIXME(#44744): incorrect libcall
+define i32 @testmswq_builtin(fp128 %x) {
+; CHECK-LABEL: testmswq_builtin:
+; CHECK:       ; %bb.0: ; %entry
+; CHECK-NEXT:    call lrint
+; CHECK-NEXT:    ret
+entry:
+  %0 = tail call i32 @llvm.lrint.i32.fp128(fp128 %x)
+  ret i32 %0
+}
+
 declare i32 @llvm.lrint.i32.f32(float) nounwind readnone
 declare i32 @llvm.lrint.i32.f64(double) nounwind readnone
diff --git a/llvm/test/CodeGen/LoongArch/lrint-conv.ll b/llvm/test/CodeGen/LoongArch/lrint-conv.ll
new file mode 100644
index 0000000000000..85de820025614
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/lrint-conv.ll
@@ -0,0 +1,96 @@
+; Tests for lrint and llrint, with both i32 and i64 checked.
+
+; RUN: sed 's/ITy/i32/g' %s | llc -mtriple=loongarch32 | FileCheck %s --check-prefixes=LA32
+; RUN: sed 's/ITy/i64/g' %s | llc -mtriple=loongarch32 | FileCheck %s --check-prefixes=LA32
+; RUN: sed 's/ITy/i32/g' %s | llc -mtriple=loongarch64 | FileCheck %s --check-prefixes=LA64-I32
+; RUN: sed 's/ITy/i64/g' %s | llc -mtriple=loongarch64 | FileCheck %s --check-prefixes=LA64-I64
+
+; FIXME: crash
+; define ITy @test_lrint_ixx_f16(half %x) nounwind {
+;   %res = tail call ITy @llvm.lrint.ITy.f16(half %x)
+;   ret ITy %res
+; }
+
+; define ITy @test_llrint_ixx_f16(half %x) nounwind {
+;   %res = tail call ITy @llvm.llrint.ITy.f16(half %x)
+;   ret ITy %res
+; }
+
+define ITy @test_lrint_ixx_f32(float %x) nounwind {
+; LA32-LABEL: test_lrint_ixx_f32:
+; LA32:         bl lrintf
+;
+; LA64-I32-LABEL: test_lrint_ixx_f32:
+; LA64-I32:         pcaddu18i $ra, %call36(lrintf)
+;
+; LA64-I64-LABEL: test_lrint_ixx_f32:
+; LA64-I64:         pcaddu18i $t8, %call36(lrintf)
+  %res = tail call ITy @llvm.lrint.ITy.f32(float %x)
+  ret ITy %res
+}
+
+define ITy @test_llrint_ixx_f32(float %x) nounwind {
+; LA32-LABEL: test_llrint_ixx_f32:
+; LA32:         bl llrintf
+;
+; LA64-I32-LABEL: test_llrint_ixx_f32:
+; LA64-I32:         pcaddu18i $ra, %call36(llrintf)
+;
+; LA64-I64-LABEL: test_llrint_ixx_f32:
+; LA64-I64:         pcaddu18i $t8, %call36(llrintf)
+  %res = tail call ITy @llvm.llrint.ITy.f32(float %x)
+  ret ITy %res
+}
+
+define ITy @test_lrint_ixx_f64(double %x) nounwind {
+; LA32-LABEL: test_lrint_ixx_f64:
+; LA32:         bl lrint
+;
+; LA64-I32-LABEL: test_lrint_ixx_f64:
+; LA64-I32:         pcaddu18i $ra, %call36(lrint)
+;
+; LA64-I64-LABEL: test_lrint_ixx_f64:
+; LA64-I64:         pcaddu18i $t8, %call36(lrint)
+  %res = tail call ITy @llvm.lrint.ITy.f64(double %x)
+  ret ITy %res
+}
+
+define ITy @test_llrint_ixx_f64(double %x) nounwind {
+; LA32-LABEL: test_llrint_ixx_f64:
+; LA32:         bl llrint
+;
+; LA64-I32-LABEL: test_llrint_ixx_f64:
+; LA64-I32:         pcaddu18i $ra, %call36(llrint)
+;
+; LA64-I64-LABEL: test_llrint_ixx_f64:
+; LA64-I64:         pcaddu18i $t8, %call36(llrint)
+  %res = tail call ITy @llvm.llrint.ITy.f64(double %x)
+  ret ITy %res
+}
+
+; FIXME(#44744): incorrect libcall on loongarch32
+define ITy @test_lrint_ixx_f128(fp128 %x) nounwind {
+; LA32-LABEL: test_lrint_ixx_f128:
+; LA32:         bl lrintl
+;
+; LA64-I32-LABEL: test_lrint_ixx_f128:
+; LA64-I32:         pcaddu18i $ra, %call36(lrintl)
+;
+; LA64-I64-LABEL: test_lrint_ixx_f128:
+; LA64-I64:         pcaddu18i $ra, %call36(lrintl)
+  %res = tail call ITy @llvm.lrint.ITy.f128(fp128 %x)
+  ret ITy %res
+}
+
+define ITy @test_llrint_ixx_f128(fp128 %x) nounwind {
+; LA32-LABEL: test_llrint_ixx_f128:
+; LA32:         bl llrintl
+;
+; LA64-I32-LABEL: test_llrint_ixx_f128:
+; LA64-I32:         pcaddu18i $ra, %call36(llrintl)
+;
+; LA64-I64-LABEL: test_llrint_ixx_f128:
+; LA64-I64:         pcaddu18i $ra, %call36(llrintl)
+  %res = tail call ITy @llvm.llrint.ITy.f128(fp128 %x)
+  ret ITy %res
+}
diff --git a/llvm/test/CodeGen/MSP430/lrint-conv.ll b/llvm/test/CodeGen/MSP430/lrint-conv.ll
new file mode 100644
index 0000000000000..04ab2af6102a0
--- /dev/null
+++ b/llvm/test/CodeGen/MSP430/lrint-conv.ll
@@ -0,0 +1,60 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+
+; Tests for lrint and llrint, with both i32 and i64 checked.
+
+; RUN: sed 's/ITy/i32/g' %s | llc -mtriple=msp430-unknown-unknown | FileCheck %s --check-prefixes=CHECK
+; RUN: sed 's/ITy/i64/g' %s | llc -mtriple=msp430-unknown-unknown | FileCheck %s --check-prefixes=CHECK
+
+; FIXME: crash "Input type needs to be promoted!"
+; define ITy @test_lrint_ixx_f16(half %x) nounwind {
+;   %res = tail call ITy @llvm.lrint.ITy.f16(half %x)
+;   ret ITy %res
+; }
+
+; define ITy @test_llrint_ixx_f16(half %x) nounwind {
+;   %res = tail call ITy @llvm.llrint.ITy.f16(half %x)
+;   ret ITy %res
+; }
+
+define ITy @test_lrint_ixx_f32(float %x) nounwind {
+; CHECK-LABEL: test_lrint_ixx_f32:
+; CHECK:         call #lrintf
+  %res = tail call ITy @llvm.lrint.ITy.f32(float %x)
+  ret ITy %res
+}
+
+define ITy @test_llrint_ixx_f32(float %x) nounwind {
+; CHECK-LABEL: test_llrint_ixx_f32:
+; CHECK:         call #llrintf
+  %res = tail call ITy @llvm.llrint.ITy.f32(float %x)
+  ret ITy %res
+}
+
+define ITy @test_lrint_ixx_f64(double %x) nounwind {
+; CHECK-LABEL: test_lrint_ixx_f64:
+; CHECK:         call #lrint
+  %res = tail call ITy @llvm.lrint.ITy.f64(double %x)
+  ret ITy %res
+}
+
+define ITy @test_llrint_ixx_f64(double %x) nounwind {
+; CHECK-LABEL: test_llrint_ixx_f64:
+; CHECK:         call #llrint
+  %res = tail call ITy @llvm.llrint.ITy.f64(double %x)
+  ret ITy %res
+}
+
+; FIXME(#44744): incorrect libcall
+define ITy @test_lrint_ixx_f128(fp128 %x) nounwind {
+; CHECK-LABEL: test_lrint_ixx_f128:
+; CHECK:         call #lrintl
+  %res = tail call ITy @llvm.lrint.ITy.f128(fp128 %x)
+  ret ITy %res
+}
+
+define ITy @test_llrint_ixx_f128(fp128 %x) nounwind {
+; CHECK-LABEL: test_llrint_ixx_f128:
+; CHECK:         call #llrintl
+  %res = tail call ITy @llvm.llrint.ITy.f128(fp128 %x)
+  ret ITy %res
+}
diff --git a/llvm/test/CodeGen/Mips/llrint-conv.ll b/llvm/test/CodeGen/Mips/llrint-conv.ll
index dcb4e5657e80b..ee3c0d99253a6 100644
--- a/llvm/test/CodeGen/Mips/llrint-conv.ll
+++ b/llvm/test/CodeGen/Mips/llrint-conv.ll
@@ -1,4 +1,19 @@
 ; RUN: llc < %s -mtriple=mips64el -mattr=+soft-float | FileCheck %s
+; RUN: llc < %s -mtriple=mips -mattr=+soft-float     | FileCheck %s
+
+; FIXME: crash
+; define signext i32 @testmswh(half %x) {
+; entry:
+;   %0 = tail call i64 @llvm.llrint.f16(half %x)
+;   %conv = trunc i64 %0 to i32
+;   ret i32 %conv
+; }
+
+; define i64 @testmsxh(half %x) {
+; entry:
+;   %0 = tail call i64 @llvm.llrint.f16(half %x)
+;   ret i64 %0
+; }
 
 define signext i32 @testmsws(float %x) {
 ; CHECK-LABEL: testmsws:
diff --git a/llvm/test/CodeGen/Mips/lrint-conv.ll b/llvm/test/CodeGen/Mips/lrint-conv.ll
index bd3f7b3babe10..6d2e392675f1c 100644
--- a/llvm/test/CodeGen/Mips/lrint-conv.ll
+++ b/llvm/test/CodeGen/Mips/lrint-conv.ll
@@ -1,4 +1,19 @@
 ; RUN: llc < %s -mtriple=mips64el -mattr=+soft-float | FileCheck %s
+; RUN: llc < %s -mtriple=mips -mattr=+soft-float     | FileCheck %s
+
+; FIXME: crash
+; define signext i32 @testmswh(half %x) {
+; entry:
+;   %0 = tail call i64 @llvm.lrint.i64.f16(half %x)
+;   %conv = trunc i64 %0 to i32
+;   ret i32 %conv
+; }
+
+; define i64 @testmsxh(half %x) {
+; entry:
+;   %0 = tail call i64 @llvm.lrint.i64.f16(half %x)
+;   ret i64 %0
+; }
 
 define signext i32 @testmsws(float %x) {
 ; CHECK-LABEL: testmsws:
diff --git a/llvm/test/CodeGen/PowerPC/llrint-conv.ll b/llvm/test/CodeGen/PowerPC/llrint-conv.ll
index daadf85b4085a..dcd3bd25a83c5 100644
--- a/llvm/test/CodeGen/PowerPC/llrint-conv.ll
+++ b/llvm/test/CodeGen/PowerPC/llrint-conv.ll
@@ -1,4 +1,19 @@
 ; RUN: llc < %s -mtriple=powerpc64le | FileCheck %s
+; RUN: llc < %s -mtriple=powerpc | FileCheck %s
+
+; FIXME: crash "Input type needs to be promoted!"
+; define signext i32 @testmswh(half %x) {
+; entry:
+;   %0 = tail call i64 @llvm.llrint.f16(half %x)
+;   %conv = trunc i64 %0 to i32
+;   ret i32 %conv
+; }
+
+; define i64 @testmsxh(half %x) {
+; entry:
+;   %0 = tail call i64 @llvm.llrint.f16(half %x)
+;   ret i64 %0
+; }
 
 ; CHECK-LABEL: testmsws:
 ; CHECK:       bl      llrintf
@@ -51,6 +66,23 @@ entry:
   ret i64 %0
 }
 
+; CHECK-LABEL: testmswq:
+; CHECK:       bl      llrintf128
+define signext i32 @testmswq(fp128 %x) {
+entry:
+  %0 = tail call i64 @llvm.llrint.f128(fp128 %x)
+  %conv = trunc i64 %0 to i32
+  ret i32 %conv
+}
+
+; CHECK-LABEL: testmslq:
+; CHECK:       bl      llrintf128
+define i64 @testmslq(fp128 %x) {
+entry:
+  %0 = tail call i64 @llvm.llrint.f128(fp128 %x)
+  ret i64 %0
+}
+
 declare i64 @llvm.llrint.f32(float) nounwind readnone
 declare i64 @llvm.llrint.f64(double) nounwind readnone
 declare i64 @llvm.llrint.ppcf128(ppc_fp128) nounwind readnone
diff --git a/llvm/test/CodeGen/PowerPC/lrint-conv.ll b/llvm/test/CodeGen/PowerPC/lrint-conv.ll
index adfc994497323..bc77a200757f4 100644
--- a/llvm/test/CodeGen/PowerPC/lrint-conv.ll
+++ b/llvm/test/CodeGen/PowerPC/lrint-conv.ll
@@ -1,4 +1,19 @@
 ; RUN: llc < %s -mtriple=powerpc64le | FileCheck %s
+; RUN: llc < %s -mtriple=powerpc | FileCheck %s
+
+; FIXME: crash "Input type needs to be promoted!"
+; define signext i32 @testmswh(half %x) {
+; entry:
+;   %0 = tail call i64 @llvm.lrint.i64.f16(half %x)
+;   %conv = trunc i64 %0 to i32
+;   ret i32 %conv
+; }
+
+; define i64 @testmsxh(half %x) {
+; entry:
+;   %0 = tail call i64 @llvm.lrint.i64.f16(half %x)
+;   ret i64 %0
+; }
 
 ; CHECK-LABEL: testmsws:
 ; CHECK:       bl      lrintf
@@ -51,6 +66,23 @@ entry:
   ret i64 %0
 }
 
+; CHECK-LABEL: testmswq:
+; CHECK:       bl      lrintf128
+define signext i32 @testmswq(fp128 %x) {
+entry:
+  %0 = tail call i64 @llvm.lrint.i64.f128(fp128 %x)
+  %conv = trunc i64 %0 to i32
+  ret i32 %conv
+}
+
+; CHECK-LABEL: testmslq:
+; CHECK:       bl      lrintf128
+define i64 @testmslq(fp128 %x) {
+entry:
+  %0 = tail call i64 @llvm.lrint.i64.f128(fp128 %x)
+  ret i64 %0
+}
+
 declare i64 @llvm.lrint.i64.f32(float) nounwind readnone
 declare i64 @llvm.lrint.i64.f64(double) nounwind readnone
 declare i64 @llvm.lrint.i64.ppcf128(ppc_fp128) nounwind readnone
diff --git a/llvm/test/CodeGen/RISCV/lrint-conv.ll b/llvm/test/CodeGen/RISCV/lrint-conv.ll
new file mode 100644
index 0000000000000..d3af2153588a1
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/lrint-conv.ll
@@ -0,0 +1,76 @@
+; Tests for lrint and llrint, with both i32 and i64 checked.
+
+; RUN: sed 's/ITy/i32/g' %s | llc -mtriple=riscv32 | FileCheck %s --check-prefixes=RV32
+; RUN: sed 's/ITy/i64/g' %s | llc -mtriple=riscv32 | FileCheck %s --check-prefixes=RV32
+; RUN: sed 's/ITy/i32/g' %s | llc -mtriple=riscv64 | FileCheck %s --check-prefixes=RV64
+; RUN: sed 's/ITy/i64/g' %s | llc -mtriple=riscv64 | FileCheck %s --check-prefixes=RV64
+
+; FIXME: crash
+; define ITy @test_lrint_ixx_f16(half %x) nounwind {
+;   %res = tail call ITy @llvm.lrint.ITy.f16(half %x)
+; }
+
+; define ITy @test_llrint_ixx_f16(half %x) nounwind {
+;   %res = tail call ITy @llvm.llrint.ITy.f16(half %x)
+; }
+
+define ITy @test_lrint_ixx_f32(float %x) nounwind {
+; RV32-LABEL: test_lrint_ixx_f32:
+; RV32:         call lrintf
+;
+; RV64-LABEL: test_lrint_ixx_f32:
+; RV64:         call lrintf
+  %res = tail call ITy @llvm.lrint.ITy.f32(float %x)
+  ret ITy %res
+}
+
+define ITy @test_llrint_ixx_f32(float %x) nounwind {
+; RV32-LABEL: test_llrint_ixx_f32:
+; RV32:         call llrintf
+;
+; RV64-LABEL: test_llrint_ixx_f32:
+; RV64:         call llrintf
+  %res = tail call ITy @llvm.llrint.ITy.f32(float %x)
+  ret ITy %res
+}
+
+define ITy @test_lrint_ixx_f64(double %x) nounwind {
+; RV32-LABEL: test_lrint_ixx_f64:
+; RV32:         call lrint
+;
+; RV64-LABEL: test_lrint_ixx_f64:
+; RV64:         call lrint
+  %res = tail call ITy @llvm.lrint.ITy.f64(double %x)
+  ret ITy %res
+}
+
+define ITy @test_llrint_ixx_f64(double %x) nounwind {
+; RV32-LABEL: test_llrint_ixx_f64:
+; RV32:         call llrint
+;
+; RV64-LABEL: test_llrint_ixx_f64:
+; RV64:         call llrint
+  %res = tail call ITy @llvm.llrint.ITy.f64(double %x)
+  ret ITy %res
+}
+
+; FIXME(#44744): incorrect libcall on riscv32
+define ITy @test_lrint_ixx_f128(fp128 %x) nounwind {
+; RV32-LABEL: test_lrint_ixx_f128:
+; RV32:         call lrintl
+;
+; RV64-LABEL: test_lrint_ixx_f128:
+; RV64:         call lrintl
+  %res = tail call ITy @llvm.lrint.ITy.f128(fp128 %x)
+  ret ITy %res
+}
+
+define ITy @test_llrint_ixx_f128(fp128 %x) nounwind {
+; RV32-LABEL: test_llrint_ixx_f128:
+; RV32:         call llrintl
+;
+; RV64-LABEL: test_llrint_ixx_f128:
+; RV64:         call llrintl
+  %res = tail call ITy @llvm.llrint.ITy.f128(fp128 %x)
+  ret ITy %res
+}
diff --git a/llvm/test/CodeGen/SPARC/lrint-conv.ll b/llvm/test/CodeGen/SPARC/lrint-conv.ll
new file mode 100644
index 0000000000000..81934114f548f
--- /dev/null
+++ b/llvm/test/CodeGen/SPARC/lrint-conv.ll
@@ -0,0 +1,68 @@
+; Tests for lrint and llrint, with both i32 and i64 checked.
+
+; RUN: sed 's/ITy/i32/g' %s | llc -mtriple=sparc   | FileCheck %s --check-prefixes=SPARC32
+; RUN: sed 's/ITy/i64/g' %s | llc -mtriple=sparc   | FileCheck %s --check-prefixes=SPARC32
+; RUN: sed 's/ITy/i32/g' %s | llc -mtriple=sparc64 | FileCheck %s --check-prefixes=SPARC64
+; RUN: sed 's/ITy/i64/g' %s | llc -mtriple=sparc64 | FileCheck %s --check-prefixes=SPARC64
+
+; FIXME: crash "Input type needs to be promoted!"
+; define ITy @test_lrint_ixx_f16(half %x) nounwind {
+;   %res = tail call ITy @llvm.lrint.ITy.f16(half %x)
+;   ret ITy %res
+; }
+
+; define ITy @test_llrint_ixx_f16(half %x) nounwind {
+;   %res = tail call ITy @llvm.llrint.ITy.f16(half %x)
+;   ret ITy %res
+; }
+
+define ITy @test_lrint_ixx_f32(float %x) nounwind {
+; SPARC32-LABEL: test_lrint_ixx_f32:
+; SPARC32:         call lrintf
+;
+; SPARC64-LABEL: test_lrint_ixx_f32:
+; SPARC64:         call lrintf
+  %res = tail call ITy @llvm.lrint.ITy.f32(float %x)
+  ret ITy %res
+}
+
+define ITy @test_llrint_ixx_f32(float %x) nounwind {
+; SPARC32-LABEL: test_llrint_ixx_f32:
+; SPARC32:         call llrintf
+;
+; SPARC64-LABEL: test_llrint_ixx_f32:
+; SPARC64:         call llrintf
+  %res = tail call ITy @llvm.llrint.ITy.f32(float %x)
+  ret ITy %res
+}
+
+define ITy @test_lrint_ixx_f64(double %x) nounwind {
+; SPARC32-LABEL: test_lrint_ixx_f64:
+; SPARC32:         call lrint
+;
+; SPARC64-LABEL: test_lrint_ixx_f64:
+; SPARC64:         call lrint
+  %res = tail call ITy @llvm.lrint.ITy.f64(double %x)
+  ret ITy %res
+}
+
+define ITy @test_llrint_ixx_f64(double %x) nounwind {
+; SPARC32-LABEL: test_llrint_ixx_f64:
+; SPARC32:         call llrint
+;
+; SPARC64-LABEL: test_llrint_ixx_f64:
+; SPARC64:         call llrint
+  %res = tail call ITy @llvm.llrint.ITy.f64(double %x)
+  ret ITy %res
+}
+
+; FIXME(#41838): unsupported type
+; define ITy @test_lrint_ixx_f128(fp128 %x) nounwind {
+;   %res = tail call ITy @llvm.lrint.ITy.f128(fp128 %x)
+;   ret ITy %res
+; }
+
+; define ITy @test_llrint_ixx_f128(fp128 %x) nounwind {
+;   %res = tail call ITy @llvm.llrint.ITy.f128(fp128 %x)
+;   ret ITy %res
+; }
diff --git a/llvm/test/CodeGen/WebAssembly/lrint-conv.ll b/llvm/test/CodeGen/WebAssembly/lrint-conv.ll
new file mode 100644
index 0000000000000..0571150cb3505
--- /dev/null
+++ b/llvm/test/CodeGen/WebAssembly/lrint-conv.ll
@@ -0,0 +1,62 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+
+; Tests for lrint and llrint, with both i32 and i64 checked.
+
+; RUN: sed 's/ITy/i32/g' %s | llc -mtriple=wasm32-unknown-unknown | FileCheck %s
+; RUN: sed 's/ITy/i64/g' %s | llc -mtriple=wasm32-unknown-unknown | FileCheck %s
+
+define ITy @test_lrint_ixx_f16(half %x) nounwind {
+; CHECK-LABEL: test_lrint_ixx_f16:
+; CHECK:         call lrintf
+  %res = tail call ITy @llvm.lrint.ITy.f16(half %x)
+  ret ITy %res
+}
+
+define ITy @test_llrint_ixx_f16(half %x) nounwind {
+; CHECK-LABEL: test_llrint_ixx_f16:
+; CHECK:         call llrintf
+  %res = tail call ITy @llvm.llrint.ITy.f16(half %x)
+  ret ITy %res
+}
+
+define ITy @test_lrint_ixx_f32(float %x) nounwind {
+; CHECK-LABEL: test_lrint_ixx_f32:
+; CHECK:         call lrintf
+  %res = tail call ITy @llvm.lrint.ITy.f32(float %x)
+  ret ITy %res
+}
+
+define ITy @test_llrint_ixx_f32(float %x) nounwind {
+; CHECK-LABEL: test_llrint_ixx_f32:
+; CHECK:         call llrintf
+  %res = tail call ITy @llvm.llrint.ITy.f32(float %x)
+  ret ITy %res
+}
+
+define ITy @test_lrint_ixx_f64(double %x) nounwind {
+; CHECK-LABEL: test_lrint_ixx_f64:
+; CHECK:         call lrint
+  %res = tail call ITy @llvm.lrint.ITy.f64(double %x)
+  ret ITy %res
+}
+
+define ITy @test_llrint_ixx_f64(double %x) nounwind {
+; CHECK-LABEL: test_llrint_ixx_f64:
+; CHECK:         call llrint
+  %res = tail call ITy @llvm.llrint.ITy.f64(double %x)
+  ret ITy %res
+}
+
+define ITy @test_lrint_ixx_f128(fp128 %x) nounwind {
+; CHECK-LABEL: test_lrint_ixx_f128:
+; CHECK:         call lrintl
+  %res = tail call ITy @llvm.lrint.ITy.f128(fp128 %x)
+  ret ITy %res
+}
+
+define ITy @test_llrint_ixx_f128(fp128 %x) nounwind {
+; CHECK-LABEL: test_llrint_ixx_f128:
+; CHECK:         call llrintl
+  %res = tail call ITy @llvm.llrint.ITy.f128(fp128 %x)
+  ret ITy %res
+}
diff --git a/llvm/test/CodeGen/X86/llrint-conv.ll b/llvm/test/CodeGen/X86/llrint-conv.ll
index 402daf80a15e8..d3eca5197a94b 100644
--- a/llvm/test/CodeGen/X86/llrint-conv.ll
+++ b/llvm/test/CodeGen/X86/llrint-conv.ll
@@ -7,14 +7,50 @@
 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=avx | FileCheck %s --check-prefixes=X64,X64-AVX
 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=avx512f | FileCheck %s --check-prefixes=X64,X64-AVX
 
-define i64 @testmsxs(float %x) {
+define i64 @testmsxh(half %x) nounwind {
+; X86-NOSSE-LABEL: testmsxh:
+; X86-NOSSE:       # %bb.0: # %entry
+; X86-NOSSE-NEXT:    pushl %eax
+; X86-NOSSE-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NOSSE-NEXT:    movl %eax, (%esp)
+; X86-NOSSE-NEXT:    calll __extendhfsf2
+; X86-NOSSE-NEXT:    fstps (%esp)
+; X86-NOSSE-NEXT:    calll llrintf
+; X86-NOSSE-NEXT:    popl %ecx
+; X86-NOSSE-NEXT:    retl
+;
+; X86-SSE2-LABEL: testmsxh:
+; X86-SSE2:       # %bb.0: # %entry
+; X86-SSE2-NEXT:    pushl %eax
+; X86-SSE2-NEXT:    pinsrw $0, {{[0-9]+}}(%esp), %xmm0
+; X86-SSE2-NEXT:    pextrw $0, %xmm0, %eax
+; X86-SSE2-NEXT:    movw %ax, (%esp)
+; X86-SSE2-NEXT:    calll __extendhfsf2
+; X86-SSE2-NEXT:    fstps (%esp)
+; X86-SSE2-NEXT:    calll llrintf
+; X86-SSE2-NEXT:    popl %ecx
+; X86-SSE2-NEXT:    retl
+;
+; X64-SSE-LABEL: testmsxh:
+; X64-SSE:       # %bb.0: # %entry
+; X64-SSE-NEXT:    pushq %rax
+; X64-SSE-NEXT:    callq __extendhfsf2 at PLT
+; X64-SSE-NEXT:    callq rintf at PLT
+; X64-SSE-NEXT:    callq __truncsfhf2 at PLT
+; X64-SSE-NEXT:    callq __extendhfsf2 at PLT
+; X64-SSE-NEXT:    cvttss2si %xmm0, %rax
+; X64-SSE-NEXT:    popq %rcx
+; X64-SSE-NEXT:    retq
+entry:
+  %0 = tail call i64 @llvm.llrint.f16(half %x)
+  ret i64 %0
+}
+
+define i64 @testmsxs(float %x) nounwind {
 ; X86-NOSSE-LABEL: testmsxs:
 ; X86-NOSSE:       # %bb.0: # %entry
 ; X86-NOSSE-NEXT:    pushl %ebp
-; X86-NOSSE-NEXT:    .cfi_def_cfa_offset 8
-; X86-NOSSE-NEXT:    .cfi_offset %ebp, -8
 ; X86-NOSSE-NEXT:    movl %esp, %ebp
-; X86-NOSSE-NEXT:    .cfi_def_cfa_register %ebp
 ; X86-NOSSE-NEXT:    andl $-8, %esp
 ; X86-NOSSE-NEXT:    subl $8, %esp
 ; X86-NOSSE-NEXT:    flds 8(%ebp)
@@ -23,16 +59,12 @@ define i64 @testmsxs(float %x) {
 ; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-NOSSE-NEXT:    movl %ebp, %esp
 ; X86-NOSSE-NEXT:    popl %ebp
-; X86-NOSSE-NEXT:    .cfi_def_cfa %esp, 4
 ; X86-NOSSE-NEXT:    retl
 ;
 ; X86-SSE2-LABEL: testmsxs:
 ; X86-SSE2:       # %bb.0: # %entry
 ; X86-SSE2-NEXT:    pushl %ebp
-; X86-SSE2-NEXT:    .cfi_def_cfa_offset 8
-; X86-SSE2-NEXT:    .cfi_offset %ebp, -8
 ; X86-SSE2-NEXT:    movl %esp, %ebp
-; X86-SSE2-NEXT:    .cfi_def_cfa_register %ebp
 ; X86-SSE2-NEXT:    andl $-8, %esp
 ; X86-SSE2-NEXT:    subl $8, %esp
 ; X86-SSE2-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
@@ -43,16 +75,12 @@ define i64 @testmsxs(float %x) {
 ; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-SSE2-NEXT:    movl %ebp, %esp
 ; X86-SSE2-NEXT:    popl %ebp
-; X86-SSE2-NEXT:    .cfi_def_cfa %esp, 4
 ; X86-SSE2-NEXT:    retl
 ;
 ; X86-AVX-LABEL: testmsxs:
 ; X86-AVX:       # %bb.0: # %entry
 ; X86-AVX-NEXT:    pushl %ebp
-; X86-AVX-NEXT:    .cfi_def_cfa_offset 8
-; X86-AVX-NEXT:    .cfi_offset %ebp, -8
 ; X86-AVX-NEXT:    movl %esp, %ebp
-; X86-AVX-NEXT:    .cfi_def_cfa_register %ebp
 ; X86-AVX-NEXT:    andl $-8, %esp
 ; X86-AVX-NEXT:    subl $8, %esp
 ; X86-AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
@@ -63,7 +91,6 @@ define i64 @testmsxs(float %x) {
 ; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-AVX-NEXT:    movl %ebp, %esp
 ; X86-AVX-NEXT:    popl %ebp
-; X86-AVX-NEXT:    .cfi_def_cfa %esp, 4
 ; X86-AVX-NEXT:    retl
 ;
 ; X64-SSE-LABEL: testmsxs:
@@ -80,14 +107,11 @@ entry:
   ret i64 %0
 }
 
-define i64 @testmsxd(double %x) {
+define i64 @testmsxd(double %x) nounwind {
 ; X86-NOSSE-LABEL: testmsxd:
 ; X86-NOSSE:       # %bb.0: # %entry
 ; X86-NOSSE-NEXT:    pushl %ebp
-; X86-NOSSE-NEXT:    .cfi_def_cfa_offset 8
-; X86-NOSSE-NEXT:    .cfi_offset %ebp, -8
 ; X86-NOSSE-NEXT:    movl %esp, %ebp
-; X86-NOSSE-NEXT:    .cfi_def_cfa_register %ebp
 ; X86-NOSSE-NEXT:    andl $-8, %esp
 ; X86-NOSSE-NEXT:    subl $8, %esp
 ; X86-NOSSE-NEXT:    fldl 8(%ebp)
@@ -96,16 +120,12 @@ define i64 @testmsxd(double %x) {
 ; X86-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-NOSSE-NEXT:    movl %ebp, %esp
 ; X86-NOSSE-NEXT:    popl %ebp
-; X86-NOSSE-NEXT:    .cfi_def_cfa %esp, 4
 ; X86-NOSSE-NEXT:    retl
 ;
 ; X86-SSE2-LABEL: testmsxd:
 ; X86-SSE2:       # %bb.0: # %entry
 ; X86-SSE2-NEXT:    pushl %ebp
-; X86-SSE2-NEXT:    .cfi_def_cfa_offset 8
-; X86-SSE2-NEXT:    .cfi_offset %ebp, -8
 ; X86-SSE2-NEXT:    movl %esp, %ebp
-; X86-SSE2-NEXT:    .cfi_def_cfa_register %ebp
 ; X86-SSE2-NEXT:    andl $-8, %esp
 ; X86-SSE2-NEXT:    subl $8, %esp
 ; X86-SSE2-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
@@ -116,16 +136,12 @@ define i64 @testmsxd(double %x) {
 ; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-SSE2-NEXT:    movl %ebp, %esp
 ; X86-SSE2-NEXT:    popl %ebp
-; X86-SSE2-NEXT:    .cfi_def_cfa %esp, 4
 ; X86-SSE2-NEXT:    retl
 ;
 ; X86-AVX-LABEL: testmsxd:
 ; X86-AVX:       # %bb.0: # %entry
 ; X86-AVX-NEXT:    pushl %ebp
-; X86-AVX-NEXT:    .cfi_def_cfa_offset 8
-; X86-AVX-NEXT:    .cfi_offset %ebp, -8
 ; X86-AVX-NEXT:    movl %esp, %ebp
-; X86-AVX-NEXT:    .cfi_def_cfa_register %ebp
 ; X86-AVX-NEXT:    andl $-8, %esp
 ; X86-AVX-NEXT:    subl $8, %esp
 ; X86-AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
@@ -136,7 +152,6 @@ define i64 @testmsxd(double %x) {
 ; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-AVX-NEXT:    movl %ebp, %esp
 ; X86-AVX-NEXT:    popl %ebp
-; X86-AVX-NEXT:    .cfi_def_cfa %esp, 4
 ; X86-AVX-NEXT:    retl
 ;
 ; X64-SSE-LABEL: testmsxd:
@@ -153,14 +168,11 @@ entry:
   ret i64 %0
 }
 
-define i64 @testmsll(x86_fp80 %x) {
+define i64 @testmsll(x86_fp80 %x) nounwind {
 ; X86-LABEL: testmsll:
 ; X86:       # %bb.0: # %entry
 ; X86-NEXT:    pushl %ebp
-; X86-NEXT:    .cfi_def_cfa_offset 8
-; X86-NEXT:    .cfi_offset %ebp, -8
 ; X86-NEXT:    movl %esp, %ebp
-; X86-NEXT:    .cfi_def_cfa_register %ebp
 ; X86-NEXT:    andl $-8, %esp
 ; X86-NEXT:    subl $8, %esp
 ; X86-NEXT:    fldt 8(%ebp)
@@ -169,7 +181,6 @@ define i64 @testmsll(x86_fp80 %x) {
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-NEXT:    movl %ebp, %esp
 ; X86-NEXT:    popl %ebp
-; X86-NEXT:    .cfi_def_cfa %esp, 4
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: testmsll:
@@ -183,6 +194,61 @@ entry:
   ret i64 %0
 }
 
+; FIXME(#44744): incorrect libcall
+define i64 @testmslq(fp128 %x) nounwind {
+; X86-NOSSE-LABEL: testmslq:
+; X86-NOSSE:       # %bb.0: # %entry
+; X86-NOSSE-NEXT:    pushl %ebp
+; X86-NOSSE-NEXT:    movl %esp, %ebp
+; X86-NOSSE-NEXT:    andl $-16, %esp
+; X86-NOSSE-NEXT:    subl $16, %esp
+; X86-NOSSE-NEXT:    pushl 20(%ebp)
+; X86-NOSSE-NEXT:    pushl 16(%ebp)
+; X86-NOSSE-NEXT:    pushl 12(%ebp)
+; X86-NOSSE-NEXT:    pushl 8(%ebp)
+; X86-NOSSE-NEXT:    calll llrintl
+; X86-NOSSE-NEXT:    addl $16, %esp
+; X86-NOSSE-NEXT:    movl %ebp, %esp
+; X86-NOSSE-NEXT:    popl %ebp
+; X86-NOSSE-NEXT:    retl
+;
+; X86-SSE2-LABEL: testmslq:
+; X86-SSE2:       # %bb.0: # %entry
+; X86-SSE2-NEXT:    pushl %ebp
+; X86-SSE2-NEXT:    movl %esp, %ebp
+; X86-SSE2-NEXT:    andl $-16, %esp
+; X86-SSE2-NEXT:    subl $16, %esp
+; X86-SSE2-NEXT:    pushl 20(%ebp)
+; X86-SSE2-NEXT:    pushl 16(%ebp)
+; X86-SSE2-NEXT:    pushl 12(%ebp)
+; X86-SSE2-NEXT:    pushl 8(%ebp)
+; X86-SSE2-NEXT:    calll llrintl
+; X86-SSE2-NEXT:    addl $16, %esp
+; X86-SSE2-NEXT:    movl %ebp, %esp
+; X86-SSE2-NEXT:    popl %ebp
+; X86-SSE2-NEXT:    retl
+;
+; X86-AVX-LABEL: testmslq:
+; X86-AVX:       # %bb.0: # %entry
+; X86-AVX-NEXT:    pushl %ebp
+; X86-AVX-NEXT:    movl %esp, %ebp
+; X86-AVX-NEXT:    andl $-16, %esp
+; X86-AVX-NEXT:    subl $32, %esp
+; X86-AVX-NEXT:    vmovups 8(%ebp), %xmm0
+; X86-AVX-NEXT:    vmovups %xmm0, (%esp)
+; X86-AVX-NEXT:    calll llrintl
+; X86-AVX-NEXT:    movl %ebp, %esp
+; X86-AVX-NEXT:    popl %ebp
+; X86-AVX-NEXT:    retl
+;
+; X64-LABEL: testmslq:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    jmp llrintl at PLT # TAILCALL
+entry:
+  %0 = tail call i64 @llvm.llrint.fp128(fp128 %x)
+  ret i64 %0
+}
+
 declare i64 @llvm.llrint.f32(float) nounwind readnone
 declare i64 @llvm.llrint.f64(double) nounwind readnone
 declare i64 @llvm.llrint.f80(x86_fp80) nounwind readnone
diff --git a/llvm/test/CodeGen/X86/lrint-conv-i32.ll b/llvm/test/CodeGen/X86/lrint-conv-i32.ll
index 21580f53ec9b3..3c50aea1095f4 100644
--- a/llvm/test/CodeGen/X86/lrint-conv-i32.ll
+++ b/llvm/test/CodeGen/X86/lrint-conv-i32.ll
@@ -7,16 +7,21 @@
 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=avx | FileCheck %s --check-prefixes=X64,X64-AVX
 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=avx512f | FileCheck %s --check-prefixes=X64,X64-AVX
 
-define i32 @testmsws(float %x) {
+; FIXME: crash
+; define i32 @testmswh(half %x) nounwind {
+; entry:
+;   %0 = tail call i32 @llvm.lrint.i32.f16(half %x)
+;   ret i32 %0
+; }
+
+define i32 @testmsws(float %x) nounwind {
 ; X86-NOSSE-LABEL: testmsws:
 ; X86-NOSSE:       # %bb.0: # %entry
 ; X86-NOSSE-NEXT:    pushl %eax
-; X86-NOSSE-NEXT:    .cfi_def_cfa_offset 8
 ; X86-NOSSE-NEXT:    flds {{[0-9]+}}(%esp)
 ; X86-NOSSE-NEXT:    fistpl (%esp)
 ; X86-NOSSE-NEXT:    movl (%esp), %eax
 ; X86-NOSSE-NEXT:    popl %ecx
-; X86-NOSSE-NEXT:    .cfi_def_cfa_offset 4
 ; X86-NOSSE-NEXT:    retl
 ;
 ; X86-SSE2-LABEL: testmsws:
@@ -43,16 +48,14 @@ entry:
   ret i32 %0
 }
 
-define i32 @testmswd(double %x) {
+define i32 @testmswd(double %x) nounwind {
 ; X86-NOSSE-LABEL: testmswd:
 ; X86-NOSSE:       # %bb.0: # %entry
 ; X86-NOSSE-NEXT:    pushl %eax
-; X86-NOSSE-NEXT:    .cfi_def_cfa_offset 8
 ; X86-NOSSE-NEXT:    fldl {{[0-9]+}}(%esp)
 ; X86-NOSSE-NEXT:    fistpl (%esp)
 ; X86-NOSSE-NEXT:    movl (%esp), %eax
 ; X86-NOSSE-NEXT:    popl %ecx
-; X86-NOSSE-NEXT:    .cfi_def_cfa_offset 4
 ; X86-NOSSE-NEXT:    retl
 ;
 ; X86-SSE2-LABEL: testmswd:
@@ -79,16 +82,14 @@ entry:
   ret i32 %0
 }
 
-define i32 @testmsll(x86_fp80 %x) {
+define i32 @testmsll(x86_fp80 %x) nounwind {
 ; X86-LABEL: testmsll:
 ; X86:       # %bb.0: # %entry
 ; X86-NEXT:    pushl %eax
-; X86-NEXT:    .cfi_def_cfa_offset 8
 ; X86-NEXT:    fldt {{[0-9]+}}(%esp)
 ; X86-NEXT:    fistpl (%esp)
 ; X86-NEXT:    movl (%esp), %eax
 ; X86-NEXT:    popl %ecx
-; X86-NEXT:    .cfi_def_cfa_offset 4
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: testmsll:
@@ -102,6 +103,61 @@ entry:
   ret i32 %0
 }
 
+; FIXME(#44744): incorrect libcall
+define i32 @testmswq(fp128 %x) nounwind {
+; X86-NOSSE-LABEL: testmswq:
+; X86-NOSSE:       # %bb.0: # %entry
+; X86-NOSSE-NEXT:    pushl %ebp
+; X86-NOSSE-NEXT:    movl %esp, %ebp
+; X86-NOSSE-NEXT:    andl $-16, %esp
+; X86-NOSSE-NEXT:    subl $16, %esp
+; X86-NOSSE-NEXT:    pushl 20(%ebp)
+; X86-NOSSE-NEXT:    pushl 16(%ebp)
+; X86-NOSSE-NEXT:    pushl 12(%ebp)
+; X86-NOSSE-NEXT:    pushl 8(%ebp)
+; X86-NOSSE-NEXT:    calll lrintl
+; X86-NOSSE-NEXT:    addl $16, %esp
+; X86-NOSSE-NEXT:    movl %ebp, %esp
+; X86-NOSSE-NEXT:    popl %ebp
+; X86-NOSSE-NEXT:    retl
+;
+; X86-SSE2-LABEL: testmswq:
+; X86-SSE2:       # %bb.0: # %entry
+; X86-SSE2-NEXT:    pushl %ebp
+; X86-SSE2-NEXT:    movl %esp, %ebp
+; X86-SSE2-NEXT:    andl $-16, %esp
+; X86-SSE2-NEXT:    subl $16, %esp
+; X86-SSE2-NEXT:    pushl 20(%ebp)
+; X86-SSE2-NEXT:    pushl 16(%ebp)
+; X86-SSE2-NEXT:    pushl 12(%ebp)
+; X86-SSE2-NEXT:    pushl 8(%ebp)
+; X86-SSE2-NEXT:    calll lrintl
+; X86-SSE2-NEXT:    addl $16, %esp
+; X86-SSE2-NEXT:    movl %ebp, %esp
+; X86-SSE2-NEXT:    popl %ebp
+; X86-SSE2-NEXT:    retl
+;
+; X86-AVX-LABEL: testmswq:
+; X86-AVX:       # %bb.0: # %entry
+; X86-AVX-NEXT:    pushl %ebp
+; X86-AVX-NEXT:    movl %esp, %ebp
+; X86-AVX-NEXT:    andl $-16, %esp
+; X86-AVX-NEXT:    subl $32, %esp
+; X86-AVX-NEXT:    vmovups 8(%ebp), %xmm0
+; X86-AVX-NEXT:    vmovups %xmm0, (%esp)
+; X86-AVX-NEXT:    calll lrintl
+; X86-AVX-NEXT:    movl %ebp, %esp
+; X86-AVX-NEXT:    popl %ebp
+; X86-AVX-NEXT:    retl
+;
+; X64-LABEL: testmswq:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    jmp lrintl at PLT # TAILCALL
+entry:
+  %0 = tail call i32 @llvm.lrint.i32.f128(fp128 %x)
+  ret i32 %0
+}
+
 declare i32 @llvm.lrint.i32.f32(float) nounwind readnone
 declare i32 @llvm.lrint.i32.f64(double) nounwind readnone
 declare i32 @llvm.lrint.i32.f80(x86_fp80) nounwind readnone
diff --git a/llvm/test/CodeGen/X86/lrint-conv-i64.ll b/llvm/test/CodeGen/X86/lrint-conv-i64.ll
index 38fa09085e189..2ba1500df0b6e 100644
--- a/llvm/test/CodeGen/X86/lrint-conv-i64.ll
+++ b/llvm/test/CodeGen/X86/lrint-conv-i64.ll
@@ -3,7 +3,23 @@
 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=avx | FileCheck %s --check-prefixes=CHECK,AVX
 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=avx512f | FileCheck %s --check-prefixes=CHECK,AVX
 
-define i64 @testmsxs(float %x) {
+define i64 @testmsxh(half %x) nounwind {
+; SSE-LABEL: testmsxh:
+; SSE:       # %bb.0: # %entry
+; SSE-NEXT:    pushq %rax
+; SSE-NEXT:    callq __extendhfsf2 at PLT
+; SSE-NEXT:    callq rintf at PLT
+; SSE-NEXT:    callq __truncsfhf2 at PLT
+; SSE-NEXT:    callq __extendhfsf2 at PLT
+; SSE-NEXT:    cvttss2si %xmm0, %rax
+; SSE-NEXT:    popq %rcx
+; SSE-NEXT:    retq
+entry:
+  %0 = tail call i64 @llvm.lrint.i64.f16(half %x)
+  ret i64 %0
+}
+
+define i64 @testmsxs(float %x) nounwind {
 ; SSE-LABEL: testmsxs:
 ; SSE:       # %bb.0: # %entry
 ; SSE-NEXT:    cvtss2si %xmm0, %rax
@@ -18,7 +34,7 @@ entry:
   ret i64 %0
 }
 
-define i64 @testmsxd(double %x) {
+define i64 @testmsxd(double %x) nounwind {
 ; SSE-LABEL: testmsxd:
 ; SSE:       # %bb.0: # %entry
 ; SSE-NEXT:    cvtsd2si %xmm0, %rax
@@ -33,7 +49,7 @@ entry:
   ret i64 %0
 }
 
-define i64 @testmsll(x86_fp80 %x) {
+define i64 @testmsll(x86_fp80 %x) nounwind {
 ; CHECK-LABEL: testmsll:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
@@ -45,7 +61,17 @@ entry:
   ret i64 %0
 }
 
-define i32 @PR125324(float %x) {
+; FIXME(#44744): incorrect libcall
+define i64 @testmsxq(fp128 %x) nounwind {
+; CHECK-LABEL: testmsxq:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    jmp lrintl at PLT # TAILCALL
+entry:
+  %0 = tail call i64 @llvm.lrint.i64.f128(fp128 %x)
+  ret i64 %0
+}
+
+define i32 @PR125324(float %x) nounwind {
 ; SSE-LABEL: PR125324:
 ; SSE:       # %bb.0: # %entry
 ; SSE-NEXT:    cvtss2si %xmm0, %rax

>From 6ab0fe9c09e317f6c0c8cf80c37c7ca93c3196fb Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross at umich.edu>
Date: Fri, 8 Aug 2025 06:26:56 -0500
Subject: [PATCH 2/7] Update existing vector tests

---
 llvm/test/CodeGen/PowerPC/vector-llrint.ll |   4 +
 llvm/test/CodeGen/PowerPC/vector-lrint.ll  |   8 +
 llvm/test/CodeGen/X86/vector-llrint-f16.ll |   3 +
 llvm/test/CodeGen/X86/vector-llrint.ll     | 573 ++++++++++++++++++
 llvm/test/CodeGen/X86/vector-lrint.ll      | 650 +++++++++++++++++++++
 5 files changed, 1238 insertions(+)

diff --git a/llvm/test/CodeGen/PowerPC/vector-llrint.ll b/llvm/test/CodeGen/PowerPC/vector-llrint.ll
index 9229fefced67e..7085cf51916da 100644
--- a/llvm/test/CodeGen/PowerPC/vector-llrint.ll
+++ b/llvm/test/CodeGen/PowerPC/vector-llrint.ll
@@ -1,4 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; FIXME: crash "Input type needs to be promoted!"
+; SKIP: llc -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
+; SKIP:   -mtriple=powerpc-unknown-unknown -verify-machineinstrs < %s | \
+; SKIP:   FileCheck %s --check-prefix=PPC32
 ; RUN: llc -mcpu=pwr7 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
 ; RUN:   -mtriple=powerpc64-unknown-unknown -verify-machineinstrs < %s | \
 ; RUN:   FileCheck %s --check-prefix=BE
diff --git a/llvm/test/CodeGen/PowerPC/vector-lrint.ll b/llvm/test/CodeGen/PowerPC/vector-lrint.ll
index c2576d4631db8..b2ade5300dbc3 100644
--- a/llvm/test/CodeGen/PowerPC/vector-lrint.ll
+++ b/llvm/test/CodeGen/PowerPC/vector-lrint.ll
@@ -1,4 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; FIXME: crash "Input type needs to be promoted!"
+; SKIP: sed 's/iXLen/i32/g' %s | llc -ppc-asm-full-reg-names \
+; SKIP:   -ppc-vsr-nums-as-vr -mtriple=powerpc-unknown-unknown \
+; SKIP:   -verify-machineinstrs | FileCheck %s --check-prefixes=PPC32
 ; RUN: sed 's/iXLen/i32/g' %s | llc -mcpu=pwr7 -ppc-asm-full-reg-names \
 ; RUN:   -ppc-vsr-nums-as-vr -mtriple=powerpc64-unknown-unknown \
 ; RUN:   -verify-machineinstrs | FileCheck %s --check-prefixes=BE
@@ -9,6 +13,10 @@
 ; RUN:   -ppc-vsr-nums-as-vr -mtriple=powerpc64le-unknown-unknown \
 ; RUN:   -verify-machineinstrs --enable-unsafe-fp-math | \
 ; RUN:   FileCheck %s --check-prefixes=FAST
+; FIXME: crash "Input type needs to be promoted!"
+; SKIP: sed 's/iXLen/i64/g' %s | llc -ppc-asm-full-reg-names \
+; SKIP:   -ppc-vsr-nums-as-vr -mtriple=powerpc-unknown-unknown \
+; SKIP:   -verify-machineinstrs | FileCheck %s --check-prefixes=PPC32
 ; RUN: sed 's/iXLen/i64/g' %s | llc -mcpu=pwr7 -ppc-asm-full-reg-names \
 ; RUN:   -ppc-vsr-nums-as-vr -mtriple=powerpc64-unknown-unknown \
 ; RUN:   -verify-machineinstrs | FileCheck %s --check-prefixes=BE
diff --git a/llvm/test/CodeGen/X86/vector-llrint-f16.ll b/llvm/test/CodeGen/X86/vector-llrint-f16.ll
index 5e5c5849fc22e..d6a21e1c00502 100644
--- a/llvm/test/CodeGen/X86/vector-llrint-f16.ll
+++ b/llvm/test/CodeGen/X86/vector-llrint-f16.ll
@@ -1,4 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; FIXME: crash "Do not know how to split the result of this operator!"
+; SKIP: sed 's/XRINT/lrint/g' %s | llc -mtriple=i686-unknown | FileCheck %s --check-prefix=X86
+; SKIP: sed 's/XRINT/llrint/g' %s | llc -mtriple=i686-unknown | FileCheck %s --check-prefix=X86
 ; RUN: sed 's/XRINT/lrint/g' %s | llc -mtriple=x86_64-unknown -mattr=avx2,f16c | FileCheck %s --check-prefix=AVX
 ; RUN: sed 's/XRINT/llrint/g' %s | llc -mtriple=x86_64-unknown -mattr=avx2,f16c | FileCheck %s --check-prefix=AVX
 ; RUN: sed 's/XRINT/lrint/g' %s | llc -mtriple=x86_64-unknown -mattr=avx512fp16,avx512vl | FileCheck %s --check-prefix=FP16
diff --git a/llvm/test/CodeGen/X86/vector-llrint.ll b/llvm/test/CodeGen/X86/vector-llrint.ll
index 7017eb60df41d..08ee748497650 100644
--- a/llvm/test/CodeGen/X86/vector-llrint.ll
+++ b/llvm/test/CodeGen/X86/vector-llrint.ll
@@ -1,10 +1,29 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i686-unknown | FileCheck %s --check-prefix=X86
 ; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s --check-prefix=SSE
 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=avx | FileCheck %s --check-prefixes=AVX,AVX1
 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=avx512f | FileCheck %s --check-prefixes=AVX,AVX512
 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=avx512dq,avx512vl | FileCheck %s --check-prefixes=AVX512DQ
 
 define <1 x i64> @llrint_v1i64_v1f32(<1 x float> %x) {
+; X86-LABEL: llrint_v1i64_v1f32:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebp
+; X86-NEXT:    .cfi_def_cfa_offset 8
+; X86-NEXT:    .cfi_offset %ebp, -8
+; X86-NEXT:    movl %esp, %ebp
+; X86-NEXT:    .cfi_def_cfa_register %ebp
+; X86-NEXT:    andl $-8, %esp
+; X86-NEXT:    subl $8, %esp
+; X86-NEXT:    flds 8(%ebp)
+; X86-NEXT:    fistpll (%esp)
+; X86-NEXT:    movl (%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl %ebp, %esp
+; X86-NEXT:    popl %ebp
+; X86-NEXT:    .cfi_def_cfa %esp, 4
+; X86-NEXT:    retl
+;
 ; SSE-LABEL: llrint_v1i64_v1f32:
 ; SSE:       # %bb.0:
 ; SSE-NEXT:    cvtss2si %xmm0, %rax
@@ -25,6 +44,39 @@ define <1 x i64> @llrint_v1i64_v1f32(<1 x float> %x) {
 declare <1 x i64> @llvm.llrint.v1i64.v1f32(<1 x float>)
 
 define <2 x i64> @llrint_v2i64_v2f32(<2 x float> %x) {
+; X86-LABEL: llrint_v2i64_v2f32:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebp
+; X86-NEXT:    .cfi_def_cfa_offset 8
+; X86-NEXT:    .cfi_offset %ebp, -8
+; X86-NEXT:    movl %esp, %ebp
+; X86-NEXT:    .cfi_def_cfa_register %ebp
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    andl $-8, %esp
+; X86-NEXT:    subl $16, %esp
+; X86-NEXT:    .cfi_offset %esi, -16
+; X86-NEXT:    .cfi_offset %edi, -12
+; X86-NEXT:    movl 8(%ebp), %eax
+; X86-NEXT:    flds 16(%ebp)
+; X86-NEXT:    flds 12(%ebp)
+; X86-NEXT:    fistpll (%esp)
+; X86-NEXT:    fistpll {{[0-9]+}}(%esp)
+; X86-NEXT:    movl (%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    movl %edi, 12(%eax)
+; X86-NEXT:    movl %esi, 8(%eax)
+; X86-NEXT:    movl %edx, 4(%eax)
+; X86-NEXT:    movl %ecx, (%eax)
+; X86-NEXT:    leal -8(%ebp), %esp
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    popl %ebp
+; X86-NEXT:    .cfi_def_cfa %esp, 4
+; X86-NEXT:    retl $4
+;
 ; SSE-LABEL: llrint_v2i64_v2f32:
 ; SSE:       # %bb.0:
 ; SSE-NEXT:    cvtss2si %xmm0, %rax
@@ -56,6 +108,60 @@ define <2 x i64> @llrint_v2i64_v2f32(<2 x float> %x) {
 declare <2 x i64> @llvm.llrint.v2i64.v2f32(<2 x float>)
 
 define <4 x i64> @llrint_v4i64_v4f32(<4 x float> %x) {
+; X86-LABEL: llrint_v4i64_v4f32:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebp
+; X86-NEXT:    .cfi_def_cfa_offset 8
+; X86-NEXT:    .cfi_offset %ebp, -8
+; X86-NEXT:    movl %esp, %ebp
+; X86-NEXT:    .cfi_def_cfa_register %ebp
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    andl $-8, %esp
+; X86-NEXT:    subl $56, %esp
+; X86-NEXT:    .cfi_offset %esi, -20
+; X86-NEXT:    .cfi_offset %edi, -16
+; X86-NEXT:    .cfi_offset %ebx, -12
+; X86-NEXT:    movl 8(%ebp), %eax
+; X86-NEXT:    flds 24(%ebp)
+; X86-NEXT:    flds 20(%ebp)
+; X86-NEXT:    flds 16(%ebp)
+; X86-NEXT:    flds 12(%ebp)
+; X86-NEXT:    fistpll {{[0-9]+}}(%esp)
+; X86-NEXT:    fistpll {{[0-9]+}}(%esp)
+; X86-NEXT:    fistpll {{[0-9]+}}(%esp)
+; X86-NEXT:    fistpll {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    movl %esi, 28(%eax)
+; X86-NEXT:    movl %ecx, 24(%eax)
+; X86-NEXT:    movl %edx, 20(%eax)
+; X86-NEXT:    movl %ebx, 16(%eax)
+; X86-NEXT:    movl %edi, 12(%eax)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, 8(%eax)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, 4(%eax)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, (%eax)
+; X86-NEXT:    leal -12(%ebp), %esp
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    popl %ebx
+; X86-NEXT:    popl %ebp
+; X86-NEXT:    .cfi_def_cfa %esp, 4
+; X86-NEXT:    retl $4
+;
 ; SSE-LABEL: llrint_v4i64_v4f32:
 ; SSE:       # %bb.0:
 ; SSE-NEXT:    cvtss2si %xmm0, %rax
@@ -122,6 +228,100 @@ define <4 x i64> @llrint_v4i64_v4f32(<4 x float> %x) {
 declare <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float>)
 
 define <8 x i64> @llrint_v8i64_v8f32(<8 x float> %x) {
+; X86-LABEL: llrint_v8i64_v8f32:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebp
+; X86-NEXT:    .cfi_def_cfa_offset 8
+; X86-NEXT:    .cfi_offset %ebp, -8
+; X86-NEXT:    movl %esp, %ebp
+; X86-NEXT:    .cfi_def_cfa_register %ebp
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    andl $-8, %esp
+; X86-NEXT:    subl $120, %esp
+; X86-NEXT:    .cfi_offset %esi, -20
+; X86-NEXT:    .cfi_offset %edi, -16
+; X86-NEXT:    .cfi_offset %ebx, -12
+; X86-NEXT:    flds 12(%ebp)
+; X86-NEXT:    fistpll {{[0-9]+}}(%esp)
+; X86-NEXT:    flds 16(%ebp)
+; X86-NEXT:    fistpll {{[0-9]+}}(%esp)
+; X86-NEXT:    flds 20(%ebp)
+; X86-NEXT:    fistpll {{[0-9]+}}(%esp)
+; X86-NEXT:    flds 24(%ebp)
+; X86-NEXT:    fistpll {{[0-9]+}}(%esp)
+; X86-NEXT:    flds 28(%ebp)
+; X86-NEXT:    fistpll {{[0-9]+}}(%esp)
+; X86-NEXT:    flds 32(%ebp)
+; X86-NEXT:    fistpll {{[0-9]+}}(%esp)
+; X86-NEXT:    flds 36(%ebp)
+; X86-NEXT:    fistpll {{[0-9]+}}(%esp)
+; X86-NEXT:    flds 40(%ebp)
+; X86-NEXT:    fistpll {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 8(%ebp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT:    movl %ebx, 60(%eax)
+; X86-NEXT:    movl %ecx, 56(%eax)
+; X86-NEXT:    movl %edx, 52(%eax)
+; X86-NEXT:    movl %esi, 48(%eax)
+; X86-NEXT:    movl %edi, 44(%eax)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, 40(%eax)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, 36(%eax)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, 32(%eax)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, 28(%eax)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, 24(%eax)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, 20(%eax)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, 16(%eax)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, 12(%eax)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, 8(%eax)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, 4(%eax)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, (%eax)
+; X86-NEXT:    leal -12(%ebp), %esp
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    popl %ebx
+; X86-NEXT:    popl %ebp
+; X86-NEXT:    .cfi_def_cfa %esp, 4
+; X86-NEXT:    retl $4
+;
 ; SSE-LABEL: llrint_v8i64_v8f32:
 ; SSE:       # %bb.0:
 ; SSE-NEXT:    movaps %xmm0, %xmm2
@@ -236,6 +436,180 @@ define <8 x i64> @llrint_v8i64_v8f32(<8 x float> %x) {
 declare <8 x i64> @llvm.llrint.v8i64.v8f32(<8 x float>)
 
 define <16 x i64> @llrint_v16i64_v16f32(<16 x float> %x) {
+; X86-LABEL: llrint_v16i64_v16f32:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebp
+; X86-NEXT:    .cfi_def_cfa_offset 8
+; X86-NEXT:    .cfi_offset %ebp, -8
+; X86-NEXT:    movl %esp, %ebp
+; X86-NEXT:    .cfi_def_cfa_register %ebp
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    andl $-8, %esp
+; X86-NEXT:    subl $248, %esp
+; X86-NEXT:    .cfi_offset %esi, -20
+; X86-NEXT:    .cfi_offset %edi, -16
+; X86-NEXT:    .cfi_offset %ebx, -12
+; X86-NEXT:    flds 12(%ebp)
+; X86-NEXT:    fistpll {{[0-9]+}}(%esp)
+; X86-NEXT:    flds 16(%ebp)
+; X86-NEXT:    fistpll {{[0-9]+}}(%esp)
+; X86-NEXT:    flds 20(%ebp)
+; X86-NEXT:    fistpll {{[0-9]+}}(%esp)
+; X86-NEXT:    flds 24(%ebp)
+; X86-NEXT:    fistpll {{[0-9]+}}(%esp)
+; X86-NEXT:    flds 28(%ebp)
+; X86-NEXT:    fistpll {{[0-9]+}}(%esp)
+; X86-NEXT:    flds 32(%ebp)
+; X86-NEXT:    fistpll {{[0-9]+}}(%esp)
+; X86-NEXT:    flds 36(%ebp)
+; X86-NEXT:    fistpll {{[0-9]+}}(%esp)
+; X86-NEXT:    flds 40(%ebp)
+; X86-NEXT:    fistpll {{[0-9]+}}(%esp)
+; X86-NEXT:    flds 44(%ebp)
+; X86-NEXT:    fistpll {{[0-9]+}}(%esp)
+; X86-NEXT:    flds 48(%ebp)
+; X86-NEXT:    fistpll {{[0-9]+}}(%esp)
+; X86-NEXT:    flds 52(%ebp)
+; X86-NEXT:    fistpll {{[0-9]+}}(%esp)
+; X86-NEXT:    flds 56(%ebp)
+; X86-NEXT:    fistpll {{[0-9]+}}(%esp)
+; X86-NEXT:    flds 60(%ebp)
+; X86-NEXT:    fistpll {{[0-9]+}}(%esp)
+; X86-NEXT:    flds 64(%ebp)
+; X86-NEXT:    fistpll {{[0-9]+}}(%esp)
+; X86-NEXT:    flds 68(%ebp)
+; X86-NEXT:    fistpll {{[0-9]+}}(%esp)
+; X86-NEXT:    flds 72(%ebp)
+; X86-NEXT:    fistpll {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 8(%ebp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT:    movl %ebx, 124(%eax)
+; X86-NEXT:    movl %ecx, 120(%eax)
+; X86-NEXT:    movl %edx, 116(%eax)
+; X86-NEXT:    movl %esi, 112(%eax)
+; X86-NEXT:    movl %edi, 108(%eax)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, 104(%eax)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, 100(%eax)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, 96(%eax)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, 92(%eax)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, 88(%eax)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, 84(%eax)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, 80(%eax)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, 76(%eax)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, 72(%eax)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, 68(%eax)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, 64(%eax)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, 60(%eax)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, 56(%eax)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, 52(%eax)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, 48(%eax)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, 44(%eax)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, 40(%eax)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, 36(%eax)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, 32(%eax)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, 28(%eax)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, 24(%eax)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, 20(%eax)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, 16(%eax)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, 12(%eax)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, 8(%eax)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, 4(%eax)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, (%eax)
+; X86-NEXT:    leal -12(%ebp), %esp
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    popl %ebx
+; X86-NEXT:    popl %ebp
+; X86-NEXT:    .cfi_def_cfa %esp, 4
+; X86-NEXT:    retl $4
+;
 ; SSE-LABEL: llrint_v16i64_v16f32:
 ; SSE:       # %bb.0:
 ; SSE-NEXT:    movq %rdi, %rax
@@ -452,6 +826,24 @@ define <16 x i64> @llrint_v16i64_v16f32(<16 x float> %x) {
 declare <16 x i64> @llvm.llrint.v16i64.v16f32(<16 x float>)
 
 define <1 x i64> @llrint_v1i64_v1f64(<1 x double> %x) {
+; X86-LABEL: llrint_v1i64_v1f64:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebp
+; X86-NEXT:    .cfi_def_cfa_offset 8
+; X86-NEXT:    .cfi_offset %ebp, -8
+; X86-NEXT:    movl %esp, %ebp
+; X86-NEXT:    .cfi_def_cfa_register %ebp
+; X86-NEXT:    andl $-8, %esp
+; X86-NEXT:    subl $8, %esp
+; X86-NEXT:    fldl 8(%ebp)
+; X86-NEXT:    fistpll (%esp)
+; X86-NEXT:    movl (%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl %ebp, %esp
+; X86-NEXT:    popl %ebp
+; X86-NEXT:    .cfi_def_cfa %esp, 4
+; X86-NEXT:    retl
+;
 ; SSE-LABEL: llrint_v1i64_v1f64:
 ; SSE:       # %bb.0:
 ; SSE-NEXT:    cvtsd2si %xmm0, %rax
@@ -472,6 +864,39 @@ define <1 x i64> @llrint_v1i64_v1f64(<1 x double> %x) {
 declare <1 x i64> @llvm.llrint.v1i64.v1f64(<1 x double>)
 
 define <2 x i64> @llrint_v2i64_v2f64(<2 x double> %x) {
+; X86-LABEL: llrint_v2i64_v2f64:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebp
+; X86-NEXT:    .cfi_def_cfa_offset 8
+; X86-NEXT:    .cfi_offset %ebp, -8
+; X86-NEXT:    movl %esp, %ebp
+; X86-NEXT:    .cfi_def_cfa_register %ebp
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    andl $-8, %esp
+; X86-NEXT:    subl $16, %esp
+; X86-NEXT:    .cfi_offset %esi, -16
+; X86-NEXT:    .cfi_offset %edi, -12
+; X86-NEXT:    movl 8(%ebp), %eax
+; X86-NEXT:    fldl 20(%ebp)
+; X86-NEXT:    fldl 12(%ebp)
+; X86-NEXT:    fistpll (%esp)
+; X86-NEXT:    fistpll {{[0-9]+}}(%esp)
+; X86-NEXT:    movl (%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    movl %edi, 12(%eax)
+; X86-NEXT:    movl %esi, 8(%eax)
+; X86-NEXT:    movl %edx, 4(%eax)
+; X86-NEXT:    movl %ecx, (%eax)
+; X86-NEXT:    leal -8(%ebp), %esp
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    popl %ebp
+; X86-NEXT:    .cfi_def_cfa %esp, 4
+; X86-NEXT:    retl $4
+;
 ; SSE-LABEL: llrint_v2i64_v2f64:
 ; SSE:       # %bb.0:
 ; SSE-NEXT:    cvtsd2si %xmm0, %rax
@@ -503,6 +928,60 @@ define <2 x i64> @llrint_v2i64_v2f64(<2 x double> %x) {
 declare <2 x i64> @llvm.llrint.v2i64.v2f64(<2 x double>)
 
 define <4 x i64> @llrint_v4i64_v4f64(<4 x double> %x) {
+; X86-LABEL: llrint_v4i64_v4f64:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebp
+; X86-NEXT:    .cfi_def_cfa_offset 8
+; X86-NEXT:    .cfi_offset %ebp, -8
+; X86-NEXT:    movl %esp, %ebp
+; X86-NEXT:    .cfi_def_cfa_register %ebp
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    andl $-8, %esp
+; X86-NEXT:    subl $56, %esp
+; X86-NEXT:    .cfi_offset %esi, -20
+; X86-NEXT:    .cfi_offset %edi, -16
+; X86-NEXT:    .cfi_offset %ebx, -12
+; X86-NEXT:    movl 8(%ebp), %eax
+; X86-NEXT:    fldl 36(%ebp)
+; X86-NEXT:    fldl 28(%ebp)
+; X86-NEXT:    fldl 20(%ebp)
+; X86-NEXT:    fldl 12(%ebp)
+; X86-NEXT:    fistpll {{[0-9]+}}(%esp)
+; X86-NEXT:    fistpll {{[0-9]+}}(%esp)
+; X86-NEXT:    fistpll {{[0-9]+}}(%esp)
+; X86-NEXT:    fistpll {{[0-9]+}}(%esp)
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    movl %esi, 28(%eax)
+; X86-NEXT:    movl %ecx, 24(%eax)
+; X86-NEXT:    movl %edx, 20(%eax)
+; X86-NEXT:    movl %ebx, 16(%eax)
+; X86-NEXT:    movl %edi, 12(%eax)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, 8(%eax)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, 4(%eax)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, (%eax)
+; X86-NEXT:    leal -12(%ebp), %esp
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    popl %ebx
+; X86-NEXT:    popl %ebp
+; X86-NEXT:    .cfi_def_cfa %esp, 4
+; X86-NEXT:    retl $4
+;
 ; SSE-LABEL: llrint_v4i64_v4f64:
 ; SSE:       # %bb.0:
 ; SSE-NEXT:    cvtsd2si %xmm0, %rax
@@ -567,6 +1046,100 @@ define <4 x i64> @llrint_v4i64_v4f64(<4 x double> %x) {
 declare <4 x i64> @llvm.llrint.v4i64.v4f64(<4 x double>)
 
 define <8 x i64> @llrint_v8i64_v8f64(<8 x double> %x) {
+; X86-LABEL: llrint_v8i64_v8f64:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebp
+; X86-NEXT:    .cfi_def_cfa_offset 8
+; X86-NEXT:    .cfi_offset %ebp, -8
+; X86-NEXT:    movl %esp, %ebp
+; X86-NEXT:    .cfi_def_cfa_register %ebp
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    andl $-8, %esp
+; X86-NEXT:    subl $120, %esp
+; X86-NEXT:    .cfi_offset %esi, -20
+; X86-NEXT:    .cfi_offset %edi, -16
+; X86-NEXT:    .cfi_offset %ebx, -12
+; X86-NEXT:    fldl 12(%ebp)
+; X86-NEXT:    fistpll {{[0-9]+}}(%esp)
+; X86-NEXT:    fldl 20(%ebp)
+; X86-NEXT:    fistpll {{[0-9]+}}(%esp)
+; X86-NEXT:    fldl 28(%ebp)
+; X86-NEXT:    fistpll {{[0-9]+}}(%esp)
+; X86-NEXT:    fldl 36(%ebp)
+; X86-NEXT:    fistpll {{[0-9]+}}(%esp)
+; X86-NEXT:    fldl 44(%ebp)
+; X86-NEXT:    fistpll {{[0-9]+}}(%esp)
+; X86-NEXT:    fldl 52(%ebp)
+; X86-NEXT:    fistpll {{[0-9]+}}(%esp)
+; X86-NEXT:    fldl 60(%ebp)
+; X86-NEXT:    fistpll {{[0-9]+}}(%esp)
+; X86-NEXT:    fldl 68(%ebp)
+; X86-NEXT:    fistpll {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 8(%ebp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT:    movl %ebx, 60(%eax)
+; X86-NEXT:    movl %ecx, 56(%eax)
+; X86-NEXT:    movl %edx, 52(%eax)
+; X86-NEXT:    movl %esi, 48(%eax)
+; X86-NEXT:    movl %edi, 44(%eax)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, 40(%eax)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, 36(%eax)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, 32(%eax)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, 28(%eax)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, 24(%eax)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, 20(%eax)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, 16(%eax)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, 12(%eax)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, 8(%eax)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, 4(%eax)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, (%eax)
+; X86-NEXT:    leal -12(%ebp), %esp
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    popl %ebx
+; X86-NEXT:    popl %ebp
+; X86-NEXT:    .cfi_def_cfa %esp, 4
+; X86-NEXT:    retl $4
+;
 ; SSE-LABEL: llrint_v8i64_v8f64:
 ; SSE:       # %bb.0:
 ; SSE-NEXT:    cvtsd2si %xmm0, %rax
diff --git a/llvm/test/CodeGen/X86/vector-lrint.ll b/llvm/test/CodeGen/X86/vector-lrint.ll
index b1c8d46f497f3..a4c50e539d661 100644
--- a/llvm/test/CodeGen/X86/vector-lrint.ll
+++ b/llvm/test/CodeGen/X86/vector-lrint.ll
@@ -1,4 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=i686-unknown | FileCheck %s --check-prefix=X86-I32
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=i686-unknown | FileCheck %s --check-prefix=X86-I64
 ; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=i686-unknown -mattr=sse2 | FileCheck %s --check-prefix=X86-SSE2
 ; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=i686-unknown -mattr=avx | FileCheck %s --check-prefixes=X86-AVX,X86-AVX1
 ; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=i686-unknown -mattr=avx512f | FileCheck %s --check-prefixes=X86-AVX,AVX512-i32
@@ -11,6 +13,35 @@
 ; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=x86_64-unknown -mattr=avx512dq,avx512vl | FileCheck %s --check-prefixes=X64-AVX-i64,AVX512DQ-i64
 
 define <1 x iXLen> @lrint_v1f32(<1 x float> %x) {
+; X86-I32-LABEL: lrint_v1f32:
+; X86-I32:       # %bb.0:
+; X86-I32-NEXT:    pushl %eax
+; X86-I32-NEXT:    .cfi_def_cfa_offset 8
+; X86-I32-NEXT:    flds {{[0-9]+}}(%esp)
+; X86-I32-NEXT:    fistpl (%esp)
+; X86-I32-NEXT:    movl (%esp), %eax
+; X86-I32-NEXT:    popl %ecx
+; X86-I32-NEXT:    .cfi_def_cfa_offset 4
+; X86-I32-NEXT:    retl
+;
+; X86-I64-LABEL: lrint_v1f32:
+; X86-I64:       # %bb.0:
+; X86-I64-NEXT:    pushl %ebp
+; X86-I64-NEXT:    .cfi_def_cfa_offset 8
+; X86-I64-NEXT:    .cfi_offset %ebp, -8
+; X86-I64-NEXT:    movl %esp, %ebp
+; X86-I64-NEXT:    .cfi_def_cfa_register %ebp
+; X86-I64-NEXT:    andl $-8, %esp
+; X86-I64-NEXT:    subl $8, %esp
+; X86-I64-NEXT:    flds 8(%ebp)
+; X86-I64-NEXT:    fistpll (%esp)
+; X86-I64-NEXT:    movl (%esp), %eax
+; X86-I64-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-I64-NEXT:    movl %ebp, %esp
+; X86-I64-NEXT:    popl %ebp
+; X86-I64-NEXT:    .cfi_def_cfa %esp, 4
+; X86-I64-NEXT:    retl
+;
 ; X86-SSE2-LABEL: lrint_v1f32:
 ; X86-SSE2:       # %bb.0:
 ; X86-SSE2-NEXT:    cvtss2si {{[0-9]+}}(%esp), %eax
@@ -36,6 +67,53 @@ define <1 x iXLen> @lrint_v1f32(<1 x float> %x) {
 declare <1 x iXLen> @llvm.lrint.v1iXLen.v1f32(<1 x float>)
 
 define <2 x iXLen> @lrint_v2f32(<2 x float> %x) {
+; X86-I32-LABEL: lrint_v2f32:
+; X86-I32:       # %bb.0:
+; X86-I32-NEXT:    subl $8, %esp
+; X86-I32-NEXT:    .cfi_def_cfa_offset 12
+; X86-I32-NEXT:    flds {{[0-9]+}}(%esp)
+; X86-I32-NEXT:    flds {{[0-9]+}}(%esp)
+; X86-I32-NEXT:    fistpl (%esp)
+; X86-I32-NEXT:    fistpl {{[0-9]+}}(%esp)
+; X86-I32-NEXT:    movl (%esp), %eax
+; X86-I32-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-I32-NEXT:    addl $8, %esp
+; X86-I32-NEXT:    .cfi_def_cfa_offset 4
+; X86-I32-NEXT:    retl
+;
+; X86-I64-LABEL: lrint_v2f32:
+; X86-I64:       # %bb.0:
+; X86-I64-NEXT:    pushl %ebp
+; X86-I64-NEXT:    .cfi_def_cfa_offset 8
+; X86-I64-NEXT:    .cfi_offset %ebp, -8
+; X86-I64-NEXT:    movl %esp, %ebp
+; X86-I64-NEXT:    .cfi_def_cfa_register %ebp
+; X86-I64-NEXT:    pushl %edi
+; X86-I64-NEXT:    pushl %esi
+; X86-I64-NEXT:    andl $-8, %esp
+; X86-I64-NEXT:    subl $16, %esp
+; X86-I64-NEXT:    .cfi_offset %esi, -16
+; X86-I64-NEXT:    .cfi_offset %edi, -12
+; X86-I64-NEXT:    movl 8(%ebp), %eax
+; X86-I64-NEXT:    flds 16(%ebp)
+; X86-I64-NEXT:    flds 12(%ebp)
+; X86-I64-NEXT:    fistpll (%esp)
+; X86-I64-NEXT:    fistpll {{[0-9]+}}(%esp)
+; X86-I64-NEXT:    movl (%esp), %ecx
+; X86-I64-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-I64-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-I64-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-I64-NEXT:    movl %edi, 12(%eax)
+; X86-I64-NEXT:    movl %esi, 8(%eax)
+; X86-I64-NEXT:    movl %edx, 4(%eax)
+; X86-I64-NEXT:    movl %ecx, (%eax)
+; X86-I64-NEXT:    leal -8(%ebp), %esp
+; X86-I64-NEXT:    popl %esi
+; X86-I64-NEXT:    popl %edi
+; X86-I64-NEXT:    popl %ebp
+; X86-I64-NEXT:    .cfi_def_cfa %esp, 4
+; X86-I64-NEXT:    retl $4
+;
 ; X86-SSE2-LABEL: lrint_v2f32:
 ; X86-SSE2:       # %bb.0:
 ; X86-SSE2-NEXT:    cvtps2dq %xmm0, %xmm0
@@ -81,6 +159,95 @@ define <2 x iXLen> @lrint_v2f32(<2 x float> %x) {
 declare <2 x iXLen> @llvm.lrint.v2iXLen.v2f32(<2 x float>)
 
 define <4 x iXLen> @lrint_v4f32(<4 x float> %x) {
+; X86-I32-LABEL: lrint_v4f32:
+; X86-I32:       # %bb.0:
+; X86-I32-NEXT:    pushl %edi
+; X86-I32-NEXT:    .cfi_def_cfa_offset 8
+; X86-I32-NEXT:    pushl %esi
+; X86-I32-NEXT:    .cfi_def_cfa_offset 12
+; X86-I32-NEXT:    subl $16, %esp
+; X86-I32-NEXT:    .cfi_def_cfa_offset 28
+; X86-I32-NEXT:    .cfi_offset %esi, -12
+; X86-I32-NEXT:    .cfi_offset %edi, -8
+; X86-I32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-I32-NEXT:    flds {{[0-9]+}}(%esp)
+; X86-I32-NEXT:    flds {{[0-9]+}}(%esp)
+; X86-I32-NEXT:    flds {{[0-9]+}}(%esp)
+; X86-I32-NEXT:    flds {{[0-9]+}}(%esp)
+; X86-I32-NEXT:    fistpl (%esp)
+; X86-I32-NEXT:    fistpl {{[0-9]+}}(%esp)
+; X86-I32-NEXT:    fistpl {{[0-9]+}}(%esp)
+; X86-I32-NEXT:    fistpl {{[0-9]+}}(%esp)
+; X86-I32-NEXT:    movl (%esp), %ecx
+; X86-I32-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-I32-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-I32-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-I32-NEXT:    movl %edi, 12(%eax)
+; X86-I32-NEXT:    movl %esi, 8(%eax)
+; X86-I32-NEXT:    movl %edx, 4(%eax)
+; X86-I32-NEXT:    movl %ecx, (%eax)
+; X86-I32-NEXT:    addl $16, %esp
+; X86-I32-NEXT:    .cfi_def_cfa_offset 12
+; X86-I32-NEXT:    popl %esi
+; X86-I32-NEXT:    .cfi_def_cfa_offset 8
+; X86-I32-NEXT:    popl %edi
+; X86-I32-NEXT:    .cfi_def_cfa_offset 4
+; X86-I32-NEXT:    retl $4
+;
+; X86-I64-LABEL: lrint_v4f32:
+; X86-I64:       # %bb.0:
+; X86-I64-NEXT:    pushl %ebp
+; X86-I64-NEXT:    .cfi_def_cfa_offset 8
+; X86-I64-NEXT:    .cfi_offset %ebp, -8
+; X86-I64-NEXT:    movl %esp, %ebp
+; X86-I64-NEXT:    .cfi_def_cfa_register %ebp
+; X86-I64-NEXT:    pushl %ebx
+; X86-I64-NEXT:    pushl %edi
+; X86-I64-NEXT:    pushl %esi
+; X86-I64-NEXT:    andl $-8, %esp
+; X86-I64-NEXT:    subl $56, %esp
+; X86-I64-NEXT:    .cfi_offset %esi, -20
+; X86-I64-NEXT:    .cfi_offset %edi, -16
+; X86-I64-NEXT:    .cfi_offset %ebx, -12
+; X86-I64-NEXT:    movl 8(%ebp), %eax
+; X86-I64-NEXT:    flds 24(%ebp)
+; X86-I64-NEXT:    flds 20(%ebp)
+; X86-I64-NEXT:    flds 16(%ebp)
+; X86-I64-NEXT:    flds 12(%ebp)
+; X86-I64-NEXT:    fistpll {{[0-9]+}}(%esp)
+; X86-I64-NEXT:    fistpll {{[0-9]+}}(%esp)
+; X86-I64-NEXT:    fistpll {{[0-9]+}}(%esp)
+; X86-I64-NEXT:    fistpll {{[0-9]+}}(%esp)
+; X86-I64-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-I64-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-I64-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-I64-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-I64-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-I64-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-I64-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-I64-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-I64-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-I64-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-I64-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-I64-NEXT:    movl %esi, 28(%eax)
+; X86-I64-NEXT:    movl %ecx, 24(%eax)
+; X86-I64-NEXT:    movl %edx, 20(%eax)
+; X86-I64-NEXT:    movl %ebx, 16(%eax)
+; X86-I64-NEXT:    movl %edi, 12(%eax)
+; X86-I64-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-I64-NEXT:    movl %ecx, 8(%eax)
+; X86-I64-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-I64-NEXT:    movl %ecx, 4(%eax)
+; X86-I64-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-I64-NEXT:    movl %ecx, (%eax)
+; X86-I64-NEXT:    leal -12(%ebp), %esp
+; X86-I64-NEXT:    popl %esi
+; X86-I64-NEXT:    popl %edi
+; X86-I64-NEXT:    popl %ebx
+; X86-I64-NEXT:    popl %ebp
+; X86-I64-NEXT:    .cfi_def_cfa %esp, 4
+; X86-I64-NEXT:    retl $4
+;
 ; X86-SSE2-LABEL: lrint_v4f32:
 ; X86-SSE2:       # %bb.0:
 ; X86-SSE2-NEXT:    cvtps2dq %xmm0, %xmm0
@@ -142,6 +309,165 @@ define <4 x iXLen> @lrint_v4f32(<4 x float> %x) {
 declare <4 x iXLen> @llvm.lrint.v4iXLen.v4f32(<4 x float>)
 
 define <8 x iXLen> @lrint_v8f32(<8 x float> %x) {
+; X86-I32-LABEL: lrint_v8f32:
+; X86-I32:       # %bb.0:
+; X86-I32-NEXT:    pushl %ebp
+; X86-I32-NEXT:    .cfi_def_cfa_offset 8
+; X86-I32-NEXT:    pushl %ebx
+; X86-I32-NEXT:    .cfi_def_cfa_offset 12
+; X86-I32-NEXT:    pushl %edi
+; X86-I32-NEXT:    .cfi_def_cfa_offset 16
+; X86-I32-NEXT:    pushl %esi
+; X86-I32-NEXT:    .cfi_def_cfa_offset 20
+; X86-I32-NEXT:    subl $40, %esp
+; X86-I32-NEXT:    .cfi_def_cfa_offset 60
+; X86-I32-NEXT:    .cfi_offset %esi, -20
+; X86-I32-NEXT:    .cfi_offset %edi, -16
+; X86-I32-NEXT:    .cfi_offset %ebx, -12
+; X86-I32-NEXT:    .cfi_offset %ebp, -8
+; X86-I32-NEXT:    flds {{[0-9]+}}(%esp)
+; X86-I32-NEXT:    fistpl {{[0-9]+}}(%esp)
+; X86-I32-NEXT:    flds {{[0-9]+}}(%esp)
+; X86-I32-NEXT:    fistpl {{[0-9]+}}(%esp)
+; X86-I32-NEXT:    flds {{[0-9]+}}(%esp)
+; X86-I32-NEXT:    fistpl {{[0-9]+}}(%esp)
+; X86-I32-NEXT:    flds {{[0-9]+}}(%esp)
+; X86-I32-NEXT:    fistpl {{[0-9]+}}(%esp)
+; X86-I32-NEXT:    flds {{[0-9]+}}(%esp)
+; X86-I32-NEXT:    fistpl {{[0-9]+}}(%esp)
+; X86-I32-NEXT:    flds {{[0-9]+}}(%esp)
+; X86-I32-NEXT:    fistpl {{[0-9]+}}(%esp)
+; X86-I32-NEXT:    flds {{[0-9]+}}(%esp)
+; X86-I32-NEXT:    fistpl {{[0-9]+}}(%esp)
+; X86-I32-NEXT:    flds {{[0-9]+}}(%esp)
+; X86-I32-NEXT:    fistpl {{[0-9]+}}(%esp)
+; X86-I32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-I32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-I32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-I32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-I32-NEXT:    movl %ecx, (%esp) # 4-byte Spill
+; X86-I32-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-I32-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-I32-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-I32-NEXT:    movl {{[0-9]+}}(%esp), %ebp
+; X86-I32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-I32-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-I32-NEXT:    movl %edx, 28(%eax)
+; X86-I32-NEXT:    movl %ecx, 24(%eax)
+; X86-I32-NEXT:    movl %ebp, 20(%eax)
+; X86-I32-NEXT:    movl %ebx, 16(%eax)
+; X86-I32-NEXT:    movl %edi, 12(%eax)
+; X86-I32-NEXT:    movl %esi, 8(%eax)
+; X86-I32-NEXT:    movl (%esp), %ecx # 4-byte Reload
+; X86-I32-NEXT:    movl %ecx, 4(%eax)
+; X86-I32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-I32-NEXT:    movl %ecx, (%eax)
+; X86-I32-NEXT:    addl $40, %esp
+; X86-I32-NEXT:    .cfi_def_cfa_offset 20
+; X86-I32-NEXT:    popl %esi
+; X86-I32-NEXT:    .cfi_def_cfa_offset 16
+; X86-I32-NEXT:    popl %edi
+; X86-I32-NEXT:    .cfi_def_cfa_offset 12
+; X86-I32-NEXT:    popl %ebx
+; X86-I32-NEXT:    .cfi_def_cfa_offset 8
+; X86-I32-NEXT:    popl %ebp
+; X86-I32-NEXT:    .cfi_def_cfa_offset 4
+; X86-I32-NEXT:    retl $4
+;
+; X86-I64-LABEL: lrint_v8f32:
+; X86-I64:       # %bb.0:
+; X86-I64-NEXT:    pushl %ebp
+; X86-I64-NEXT:    .cfi_def_cfa_offset 8
+; X86-I64-NEXT:    .cfi_offset %ebp, -8
+; X86-I64-NEXT:    movl %esp, %ebp
+; X86-I64-NEXT:    .cfi_def_cfa_register %ebp
+; X86-I64-NEXT:    pushl %ebx
+; X86-I64-NEXT:    pushl %edi
+; X86-I64-NEXT:    pushl %esi
+; X86-I64-NEXT:    andl $-8, %esp
+; X86-I64-NEXT:    subl $120, %esp
+; X86-I64-NEXT:    .cfi_offset %esi, -20
+; X86-I64-NEXT:    .cfi_offset %edi, -16
+; X86-I64-NEXT:    .cfi_offset %ebx, -12
+; X86-I64-NEXT:    flds 12(%ebp)
+; X86-I64-NEXT:    fistpll {{[0-9]+}}(%esp)
+; X86-I64-NEXT:    flds 16(%ebp)
+; X86-I64-NEXT:    fistpll {{[0-9]+}}(%esp)
+; X86-I64-NEXT:    flds 20(%ebp)
+; X86-I64-NEXT:    fistpll {{[0-9]+}}(%esp)
+; X86-I64-NEXT:    flds 24(%ebp)
+; X86-I64-NEXT:    fistpll {{[0-9]+}}(%esp)
+; X86-I64-NEXT:    flds 28(%ebp)
+; X86-I64-NEXT:    fistpll {{[0-9]+}}(%esp)
+; X86-I64-NEXT:    flds 32(%ebp)
+; X86-I64-NEXT:    fistpll {{[0-9]+}}(%esp)
+; X86-I64-NEXT:    flds 36(%ebp)
+; X86-I64-NEXT:    fistpll {{[0-9]+}}(%esp)
+; X86-I64-NEXT:    flds 40(%ebp)
+; X86-I64-NEXT:    fistpll {{[0-9]+}}(%esp)
+; X86-I64-NEXT:    movl 8(%ebp), %eax
+; X86-I64-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-I64-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-I64-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-I64-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-I64-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-I64-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-I64-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-I64-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-I64-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-I64-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-I64-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-I64-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-I64-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-I64-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-I64-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-I64-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-I64-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-I64-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-I64-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-I64-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-I64-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-I64-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-I64-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-I64-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-I64-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-I64-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-I64-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-I64-NEXT:    movl %ebx, 60(%eax)
+; X86-I64-NEXT:    movl %ecx, 56(%eax)
+; X86-I64-NEXT:    movl %edx, 52(%eax)
+; X86-I64-NEXT:    movl %esi, 48(%eax)
+; X86-I64-NEXT:    movl %edi, 44(%eax)
+; X86-I64-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-I64-NEXT:    movl %ecx, 40(%eax)
+; X86-I64-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-I64-NEXT:    movl %ecx, 36(%eax)
+; X86-I64-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-I64-NEXT:    movl %ecx, 32(%eax)
+; X86-I64-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-I64-NEXT:    movl %ecx, 28(%eax)
+; X86-I64-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-I64-NEXT:    movl %ecx, 24(%eax)
+; X86-I64-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-I64-NEXT:    movl %ecx, 20(%eax)
+; X86-I64-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-I64-NEXT:    movl %ecx, 16(%eax)
+; X86-I64-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-I64-NEXT:    movl %ecx, 12(%eax)
+; X86-I64-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-I64-NEXT:    movl %ecx, 8(%eax)
+; X86-I64-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-I64-NEXT:    movl %ecx, 4(%eax)
+; X86-I64-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-I64-NEXT:    movl %ecx, (%eax)
+; X86-I64-NEXT:    leal -12(%ebp), %esp
+; X86-I64-NEXT:    popl %esi
+; X86-I64-NEXT:    popl %edi
+; X86-I64-NEXT:    popl %ebx
+; X86-I64-NEXT:    popl %ebp
+; X86-I64-NEXT:    .cfi_def_cfa %esp, 4
+; X86-I64-NEXT:    retl $4
+;
 ; X86-SSE2-LABEL: lrint_v8f32:
 ; X86-SSE2:       # %bb.0:
 ; X86-SSE2-NEXT:    cvtps2dq %xmm0, %xmm0
@@ -242,6 +568,35 @@ define <16 x iXLen> @lrint_v16iXLen_v16f32(<16 x float> %x) {
 declare <16 x iXLen> @llvm.lrint.v16iXLen.v16f32(<16 x float>)
 
 define <1 x iXLen> @lrint_v1f64(<1 x double> %x) {
+; X86-I32-LABEL: lrint_v1f64:
+; X86-I32:       # %bb.0:
+; X86-I32-NEXT:    pushl %eax
+; X86-I32-NEXT:    .cfi_def_cfa_offset 8
+; X86-I32-NEXT:    fldl {{[0-9]+}}(%esp)
+; X86-I32-NEXT:    fistpl (%esp)
+; X86-I32-NEXT:    movl (%esp), %eax
+; X86-I32-NEXT:    popl %ecx
+; X86-I32-NEXT:    .cfi_def_cfa_offset 4
+; X86-I32-NEXT:    retl
+;
+; X86-I64-LABEL: lrint_v1f64:
+; X86-I64:       # %bb.0:
+; X86-I64-NEXT:    pushl %ebp
+; X86-I64-NEXT:    .cfi_def_cfa_offset 8
+; X86-I64-NEXT:    .cfi_offset %ebp, -8
+; X86-I64-NEXT:    movl %esp, %ebp
+; X86-I64-NEXT:    .cfi_def_cfa_register %ebp
+; X86-I64-NEXT:    andl $-8, %esp
+; X86-I64-NEXT:    subl $8, %esp
+; X86-I64-NEXT:    fldl 8(%ebp)
+; X86-I64-NEXT:    fistpll (%esp)
+; X86-I64-NEXT:    movl (%esp), %eax
+; X86-I64-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-I64-NEXT:    movl %ebp, %esp
+; X86-I64-NEXT:    popl %ebp
+; X86-I64-NEXT:    .cfi_def_cfa %esp, 4
+; X86-I64-NEXT:    retl
+;
 ; X86-SSE2-LABEL: lrint_v1f64:
 ; X86-SSE2:       # %bb.0:
 ; X86-SSE2-NEXT:    cvtsd2si {{[0-9]+}}(%esp), %eax
@@ -267,6 +622,53 @@ define <1 x iXLen> @lrint_v1f64(<1 x double> %x) {
 declare <1 x iXLen> @llvm.lrint.v1iXLen.v1f64(<1 x double>)
 
 define <2 x iXLen> @lrint_v2f64(<2 x double> %x) {
+; X86-I32-LABEL: lrint_v2f64:
+; X86-I32:       # %bb.0:
+; X86-I32-NEXT:    subl $8, %esp
+; X86-I32-NEXT:    .cfi_def_cfa_offset 12
+; X86-I32-NEXT:    fldl {{[0-9]+}}(%esp)
+; X86-I32-NEXT:    fldl {{[0-9]+}}(%esp)
+; X86-I32-NEXT:    fistpl (%esp)
+; X86-I32-NEXT:    fistpl {{[0-9]+}}(%esp)
+; X86-I32-NEXT:    movl (%esp), %eax
+; X86-I32-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-I32-NEXT:    addl $8, %esp
+; X86-I32-NEXT:    .cfi_def_cfa_offset 4
+; X86-I32-NEXT:    retl
+;
+; X86-I64-LABEL: lrint_v2f64:
+; X86-I64:       # %bb.0:
+; X86-I64-NEXT:    pushl %ebp
+; X86-I64-NEXT:    .cfi_def_cfa_offset 8
+; X86-I64-NEXT:    .cfi_offset %ebp, -8
+; X86-I64-NEXT:    movl %esp, %ebp
+; X86-I64-NEXT:    .cfi_def_cfa_register %ebp
+; X86-I64-NEXT:    pushl %edi
+; X86-I64-NEXT:    pushl %esi
+; X86-I64-NEXT:    andl $-8, %esp
+; X86-I64-NEXT:    subl $16, %esp
+; X86-I64-NEXT:    .cfi_offset %esi, -16
+; X86-I64-NEXT:    .cfi_offset %edi, -12
+; X86-I64-NEXT:    movl 8(%ebp), %eax
+; X86-I64-NEXT:    fldl 20(%ebp)
+; X86-I64-NEXT:    fldl 12(%ebp)
+; X86-I64-NEXT:    fistpll (%esp)
+; X86-I64-NEXT:    fistpll {{[0-9]+}}(%esp)
+; X86-I64-NEXT:    movl (%esp), %ecx
+; X86-I64-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-I64-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-I64-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-I64-NEXT:    movl %edi, 12(%eax)
+; X86-I64-NEXT:    movl %esi, 8(%eax)
+; X86-I64-NEXT:    movl %edx, 4(%eax)
+; X86-I64-NEXT:    movl %ecx, (%eax)
+; X86-I64-NEXT:    leal -8(%ebp), %esp
+; X86-I64-NEXT:    popl %esi
+; X86-I64-NEXT:    popl %edi
+; X86-I64-NEXT:    popl %ebp
+; X86-I64-NEXT:    .cfi_def_cfa %esp, 4
+; X86-I64-NEXT:    retl $4
+;
 ; X86-SSE2-LABEL: lrint_v2f64:
 ; X86-SSE2:       # %bb.0:
 ; X86-SSE2-NEXT:    cvtpd2dq %xmm0, %xmm0
@@ -312,6 +714,95 @@ define <2 x iXLen> @lrint_v2f64(<2 x double> %x) {
 declare <2 x iXLen> @llvm.lrint.v2iXLen.v2f64(<2 x double>)
 
 define <4 x iXLen> @lrint_v4f64(<4 x double> %x) {
+; X86-I32-LABEL: lrint_v4f64:
+; X86-I32:       # %bb.0:
+; X86-I32-NEXT:    pushl %edi
+; X86-I32-NEXT:    .cfi_def_cfa_offset 8
+; X86-I32-NEXT:    pushl %esi
+; X86-I32-NEXT:    .cfi_def_cfa_offset 12
+; X86-I32-NEXT:    subl $16, %esp
+; X86-I32-NEXT:    .cfi_def_cfa_offset 28
+; X86-I32-NEXT:    .cfi_offset %esi, -12
+; X86-I32-NEXT:    .cfi_offset %edi, -8
+; X86-I32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-I32-NEXT:    fldl {{[0-9]+}}(%esp)
+; X86-I32-NEXT:    fldl {{[0-9]+}}(%esp)
+; X86-I32-NEXT:    fldl {{[0-9]+}}(%esp)
+; X86-I32-NEXT:    fldl {{[0-9]+}}(%esp)
+; X86-I32-NEXT:    fistpl (%esp)
+; X86-I32-NEXT:    fistpl {{[0-9]+}}(%esp)
+; X86-I32-NEXT:    fistpl {{[0-9]+}}(%esp)
+; X86-I32-NEXT:    fistpl {{[0-9]+}}(%esp)
+; X86-I32-NEXT:    movl (%esp), %ecx
+; X86-I32-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-I32-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-I32-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-I32-NEXT:    movl %edi, 12(%eax)
+; X86-I32-NEXT:    movl %esi, 8(%eax)
+; X86-I32-NEXT:    movl %edx, 4(%eax)
+; X86-I32-NEXT:    movl %ecx, (%eax)
+; X86-I32-NEXT:    addl $16, %esp
+; X86-I32-NEXT:    .cfi_def_cfa_offset 12
+; X86-I32-NEXT:    popl %esi
+; X86-I32-NEXT:    .cfi_def_cfa_offset 8
+; X86-I32-NEXT:    popl %edi
+; X86-I32-NEXT:    .cfi_def_cfa_offset 4
+; X86-I32-NEXT:    retl $4
+;
+; X86-I64-LABEL: lrint_v4f64:
+; X86-I64:       # %bb.0:
+; X86-I64-NEXT:    pushl %ebp
+; X86-I64-NEXT:    .cfi_def_cfa_offset 8
+; X86-I64-NEXT:    .cfi_offset %ebp, -8
+; X86-I64-NEXT:    movl %esp, %ebp
+; X86-I64-NEXT:    .cfi_def_cfa_register %ebp
+; X86-I64-NEXT:    pushl %ebx
+; X86-I64-NEXT:    pushl %edi
+; X86-I64-NEXT:    pushl %esi
+; X86-I64-NEXT:    andl $-8, %esp
+; X86-I64-NEXT:    subl $56, %esp
+; X86-I64-NEXT:    .cfi_offset %esi, -20
+; X86-I64-NEXT:    .cfi_offset %edi, -16
+; X86-I64-NEXT:    .cfi_offset %ebx, -12
+; X86-I64-NEXT:    movl 8(%ebp), %eax
+; X86-I64-NEXT:    fldl 36(%ebp)
+; X86-I64-NEXT:    fldl 28(%ebp)
+; X86-I64-NEXT:    fldl 20(%ebp)
+; X86-I64-NEXT:    fldl 12(%ebp)
+; X86-I64-NEXT:    fistpll {{[0-9]+}}(%esp)
+; X86-I64-NEXT:    fistpll {{[0-9]+}}(%esp)
+; X86-I64-NEXT:    fistpll {{[0-9]+}}(%esp)
+; X86-I64-NEXT:    fistpll {{[0-9]+}}(%esp)
+; X86-I64-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-I64-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-I64-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-I64-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-I64-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-I64-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-I64-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-I64-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-I64-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-I64-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-I64-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-I64-NEXT:    movl %esi, 28(%eax)
+; X86-I64-NEXT:    movl %ecx, 24(%eax)
+; X86-I64-NEXT:    movl %edx, 20(%eax)
+; X86-I64-NEXT:    movl %ebx, 16(%eax)
+; X86-I64-NEXT:    movl %edi, 12(%eax)
+; X86-I64-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-I64-NEXT:    movl %ecx, 8(%eax)
+; X86-I64-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-I64-NEXT:    movl %ecx, 4(%eax)
+; X86-I64-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-I64-NEXT:    movl %ecx, (%eax)
+; X86-I64-NEXT:    leal -12(%ebp), %esp
+; X86-I64-NEXT:    popl %esi
+; X86-I64-NEXT:    popl %edi
+; X86-I64-NEXT:    popl %ebx
+; X86-I64-NEXT:    popl %ebp
+; X86-I64-NEXT:    .cfi_def_cfa %esp, 4
+; X86-I64-NEXT:    retl $4
+;
 ; X86-SSE2-LABEL: lrint_v4f64:
 ; X86-SSE2:       # %bb.0:
 ; X86-SSE2-NEXT:    cvtpd2dq %xmm1, %xmm1
@@ -377,6 +868,165 @@ define <4 x iXLen> @lrint_v4f64(<4 x double> %x) {
 declare <4 x iXLen> @llvm.lrint.v4iXLen.v4f64(<4 x double>)
 
 define <8 x iXLen> @lrint_v8f64(<8 x double> %x) {
+; X86-I32-LABEL: lrint_v8f64:
+; X86-I32:       # %bb.0:
+; X86-I32-NEXT:    pushl %ebp
+; X86-I32-NEXT:    .cfi_def_cfa_offset 8
+; X86-I32-NEXT:    pushl %ebx
+; X86-I32-NEXT:    .cfi_def_cfa_offset 12
+; X86-I32-NEXT:    pushl %edi
+; X86-I32-NEXT:    .cfi_def_cfa_offset 16
+; X86-I32-NEXT:    pushl %esi
+; X86-I32-NEXT:    .cfi_def_cfa_offset 20
+; X86-I32-NEXT:    subl $40, %esp
+; X86-I32-NEXT:    .cfi_def_cfa_offset 60
+; X86-I32-NEXT:    .cfi_offset %esi, -20
+; X86-I32-NEXT:    .cfi_offset %edi, -16
+; X86-I32-NEXT:    .cfi_offset %ebx, -12
+; X86-I32-NEXT:    .cfi_offset %ebp, -8
+; X86-I32-NEXT:    fldl {{[0-9]+}}(%esp)
+; X86-I32-NEXT:    fistpl {{[0-9]+}}(%esp)
+; X86-I32-NEXT:    fldl {{[0-9]+}}(%esp)
+; X86-I32-NEXT:    fistpl {{[0-9]+}}(%esp)
+; X86-I32-NEXT:    fldl {{[0-9]+}}(%esp)
+; X86-I32-NEXT:    fistpl {{[0-9]+}}(%esp)
+; X86-I32-NEXT:    fldl {{[0-9]+}}(%esp)
+; X86-I32-NEXT:    fistpl {{[0-9]+}}(%esp)
+; X86-I32-NEXT:    fldl {{[0-9]+}}(%esp)
+; X86-I32-NEXT:    fistpl {{[0-9]+}}(%esp)
+; X86-I32-NEXT:    fldl {{[0-9]+}}(%esp)
+; X86-I32-NEXT:    fistpl {{[0-9]+}}(%esp)
+; X86-I32-NEXT:    fldl {{[0-9]+}}(%esp)
+; X86-I32-NEXT:    fistpl {{[0-9]+}}(%esp)
+; X86-I32-NEXT:    fldl {{[0-9]+}}(%esp)
+; X86-I32-NEXT:    fistpl {{[0-9]+}}(%esp)
+; X86-I32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-I32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-I32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-I32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-I32-NEXT:    movl %ecx, (%esp) # 4-byte Spill
+; X86-I32-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-I32-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-I32-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-I32-NEXT:    movl {{[0-9]+}}(%esp), %ebp
+; X86-I32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-I32-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-I32-NEXT:    movl %edx, 28(%eax)
+; X86-I32-NEXT:    movl %ecx, 24(%eax)
+; X86-I32-NEXT:    movl %ebp, 20(%eax)
+; X86-I32-NEXT:    movl %ebx, 16(%eax)
+; X86-I32-NEXT:    movl %edi, 12(%eax)
+; X86-I32-NEXT:    movl %esi, 8(%eax)
+; X86-I32-NEXT:    movl (%esp), %ecx # 4-byte Reload
+; X86-I32-NEXT:    movl %ecx, 4(%eax)
+; X86-I32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-I32-NEXT:    movl %ecx, (%eax)
+; X86-I32-NEXT:    addl $40, %esp
+; X86-I32-NEXT:    .cfi_def_cfa_offset 20
+; X86-I32-NEXT:    popl %esi
+; X86-I32-NEXT:    .cfi_def_cfa_offset 16
+; X86-I32-NEXT:    popl %edi
+; X86-I32-NEXT:    .cfi_def_cfa_offset 12
+; X86-I32-NEXT:    popl %ebx
+; X86-I32-NEXT:    .cfi_def_cfa_offset 8
+; X86-I32-NEXT:    popl %ebp
+; X86-I32-NEXT:    .cfi_def_cfa_offset 4
+; X86-I32-NEXT:    retl $4
+;
+; X86-I64-LABEL: lrint_v8f64:
+; X86-I64:       # %bb.0:
+; X86-I64-NEXT:    pushl %ebp
+; X86-I64-NEXT:    .cfi_def_cfa_offset 8
+; X86-I64-NEXT:    .cfi_offset %ebp, -8
+; X86-I64-NEXT:    movl %esp, %ebp
+; X86-I64-NEXT:    .cfi_def_cfa_register %ebp
+; X86-I64-NEXT:    pushl %ebx
+; X86-I64-NEXT:    pushl %edi
+; X86-I64-NEXT:    pushl %esi
+; X86-I64-NEXT:    andl $-8, %esp
+; X86-I64-NEXT:    subl $120, %esp
+; X86-I64-NEXT:    .cfi_offset %esi, -20
+; X86-I64-NEXT:    .cfi_offset %edi, -16
+; X86-I64-NEXT:    .cfi_offset %ebx, -12
+; X86-I64-NEXT:    fldl 12(%ebp)
+; X86-I64-NEXT:    fistpll {{[0-9]+}}(%esp)
+; X86-I64-NEXT:    fldl 20(%ebp)
+; X86-I64-NEXT:    fistpll {{[0-9]+}}(%esp)
+; X86-I64-NEXT:    fldl 28(%ebp)
+; X86-I64-NEXT:    fistpll {{[0-9]+}}(%esp)
+; X86-I64-NEXT:    fldl 36(%ebp)
+; X86-I64-NEXT:    fistpll {{[0-9]+}}(%esp)
+; X86-I64-NEXT:    fldl 44(%ebp)
+; X86-I64-NEXT:    fistpll {{[0-9]+}}(%esp)
+; X86-I64-NEXT:    fldl 52(%ebp)
+; X86-I64-NEXT:    fistpll {{[0-9]+}}(%esp)
+; X86-I64-NEXT:    fldl 60(%ebp)
+; X86-I64-NEXT:    fistpll {{[0-9]+}}(%esp)
+; X86-I64-NEXT:    fldl 68(%ebp)
+; X86-I64-NEXT:    fistpll {{[0-9]+}}(%esp)
+; X86-I64-NEXT:    movl 8(%ebp), %eax
+; X86-I64-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-I64-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-I64-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-I64-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-I64-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-I64-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-I64-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-I64-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-I64-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-I64-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-I64-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-I64-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-I64-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-I64-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-I64-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-I64-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-I64-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-I64-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-I64-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-I64-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-I64-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-I64-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-I64-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-I64-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-I64-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-I64-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-I64-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-I64-NEXT:    movl %ebx, 60(%eax)
+; X86-I64-NEXT:    movl %ecx, 56(%eax)
+; X86-I64-NEXT:    movl %edx, 52(%eax)
+; X86-I64-NEXT:    movl %esi, 48(%eax)
+; X86-I64-NEXT:    movl %edi, 44(%eax)
+; X86-I64-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-I64-NEXT:    movl %ecx, 40(%eax)
+; X86-I64-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-I64-NEXT:    movl %ecx, 36(%eax)
+; X86-I64-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-I64-NEXT:    movl %ecx, 32(%eax)
+; X86-I64-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-I64-NEXT:    movl %ecx, 28(%eax)
+; X86-I64-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-I64-NEXT:    movl %ecx, 24(%eax)
+; X86-I64-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-I64-NEXT:    movl %ecx, 20(%eax)
+; X86-I64-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-I64-NEXT:    movl %ecx, 16(%eax)
+; X86-I64-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-I64-NEXT:    movl %ecx, 12(%eax)
+; X86-I64-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-I64-NEXT:    movl %ecx, 8(%eax)
+; X86-I64-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-I64-NEXT:    movl %ecx, 4(%eax)
+; X86-I64-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-I64-NEXT:    movl %ecx, (%eax)
+; X86-I64-NEXT:    leal -12(%ebp), %esp
+; X86-I64-NEXT:    popl %esi
+; X86-I64-NEXT:    popl %edi
+; X86-I64-NEXT:    popl %ebx
+; X86-I64-NEXT:    popl %ebp
+; X86-I64-NEXT:    .cfi_def_cfa %esp, 4
+; X86-I64-NEXT:    retl $4
+;
 ; X86-SSE2-LABEL: lrint_v8f64:
 ; X86-SSE2:       # %bb.0:
 ; X86-SSE2-NEXT:    pushl %ebp

>From f834954c1a12aa1c0af63e32d9b02ba11000027a Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross at umich.edu>
Date: Fri, 8 Aug 2025 06:48:37 -0500
Subject: [PATCH 3/7] Add f128 to vector tests

---
 .../AArch64/sve-fixed-vector-llrint.ll        |  654 +++++
 .../CodeGen/AArch64/sve-fixed-vector-lrint.ll | 1115 ++++++++
 llvm/test/CodeGen/AArch64/vector-llrint.ll    |  516 ++++
 llvm/test/CodeGen/AArch64/vector-lrint.ll     |  948 +++++++
 llvm/test/CodeGen/PowerPC/vector-llrint.ll    |  596 +++++
 llvm/test/CodeGen/PowerPC/vector-lrint.ll     | 2324 +++++++++++++++++
 llvm/test/CodeGen/X86/vector-llrint.ll        |  705 +++++
 llvm/test/CodeGen/X86/vector-lrint.ll         | 1271 +++++++++
 8 files changed, 8129 insertions(+)

diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-vector-llrint.ll b/llvm/test/CodeGen/AArch64/sve-fixed-vector-llrint.ll
index 7f144df499be0..838aac0edcb73 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-vector-llrint.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-vector-llrint.ll
@@ -861,3 +861,657 @@ define <32 x i64> @llrint_v32f64(<32 x double> %x) {
   ret <32 x i64> %a
 }
 declare <32 x i64> @llvm.llrint.v32i64.v32f64(<32 x double>)
+
+define <1 x i64> @llrint_v1i64_v1fp128(<1 x fp128> %x) {
+; CHECK-LABEL: llrint_v1i64_v1fp128:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl llrintl
+; CHECK-NEXT:    fmov d0, x0
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
+  %a = call <1 x i64> @llvm.llrint.v1i64.v1fp128(<1 x fp128> %x)
+  ret <1 x i64> %a
+}
+declare <1 x i64> @llvm.llrint.v1i64.v1fp128(<1 x fp128>)
+
+define <2 x i64> @llrint_v2i64_v2fp128(<2 x fp128> %x) {
+; CHECK-LABEL: llrint_v2i64_v2fp128:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sub sp, sp, #48
+; CHECK-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 48
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT:    mov v0.16b, v1.16b
+; CHECK-NEXT:    bl llrintl
+; CHECK-NEXT:    fmov d0, x0
+; CHECK-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    bl llrintl
+; CHECK-NEXT:    fmov d0, x0
+; CHECK-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
+; CHECK-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-NEXT:    add sp, sp, #48
+; CHECK-NEXT:    ret
+  %a = call <2 x i64> @llvm.llrint.v2i64.v2fp128(<2 x fp128> %x)
+  ret <2 x i64> %a
+}
+declare <2 x i64> @llvm.llrint.v2i64.v2fp128(<2 x fp128>)
+
+define <4 x i64> @llrint_v4i64_v4fp128(<4 x fp128> %x) {
+; CHECK-LABEL: llrint_v4i64_v4fp128:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
+; CHECK-NEXT:    sub sp, sp, #64
+; CHECK-NEXT:    addvl sp, sp, #-1
+; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0xd0, 0x00, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 80 + 8 * VG
+; CHECK-NEXT:    .cfi_offset w30, -8
+; CHECK-NEXT:    .cfi_offset w29, -16
+; CHECK-NEXT:    str q0, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEXT:    mov v0.16b, v3.16b
+; CHECK-NEXT:    stp q2, q1, [sp, #16] // 32-byte Folded Spill
+; CHECK-NEXT:    bl llrintl
+; CHECK-NEXT:    fmov d0, x0
+; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT:    bl llrintl
+; CHECK-NEXT:    fmov d0, x0
+; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    add x8, sp, #64
+; CHECK-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-NEXT:    str z0, [x8] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT:    bl llrintl
+; CHECK-NEXT:    fmov d0, x0
+; CHECK-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT:    bl llrintl
+; CHECK-NEXT:    fmov d0, x0
+; CHECK-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT:    add x8, sp, #64
+; CHECK-NEXT:    ptrue p0.d, vl2
+; CHECK-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-NEXT:    ldr z1, [x8] // 16-byte Folded Reload
+; CHECK-NEXT:    splice z0.d, p0, z0.d, z1.d
+; CHECK-NEXT:    mov z1.d, z0.d
+; CHECK-NEXT:    ext z1.b, z1.b, z0.b, #16
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 killed $z1
+; CHECK-NEXT:    addvl sp, sp, #1
+; CHECK-NEXT:    add sp, sp, #64
+; CHECK-NEXT:    ldp x29, x30, [sp], #16 // 16-byte Folded Reload
+; CHECK-NEXT:    ret
+  %a = call <4 x i64> @llvm.llrint.v4i64.v4fp128(<4 x fp128> %x)
+  ret <4 x i64> %a
+}
+declare <4 x i64> @llvm.llrint.v4i64.v4fp128(<4 x fp128>)
+
+define <8 x i64> @llrint_v8i64_v8fp128(<8 x fp128> %x) {
+; CHECK-LABEL: llrint_v8i64_v8fp128:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
+; CHECK-NEXT:    sub sp, sp, #128
+; CHECK-NEXT:    addvl sp, sp, #-2
+; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x90, 0x01, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 144 + 16 * VG
+; CHECK-NEXT:    .cfi_offset w30, -8
+; CHECK-NEXT:    .cfi_offset w29, -16
+; CHECK-NEXT:    str q0, [sp, #112] // 16-byte Folded Spill
+; CHECK-NEXT:    mov v0.16b, v7.16b
+; CHECK-NEXT:    stp q6, q5, [sp, #16] // 32-byte Folded Spill
+; CHECK-NEXT:    stp q4, q3, [sp, #48] // 32-byte Folded Spill
+; CHECK-NEXT:    stp q2, q1, [sp, #80] // 32-byte Folded Spill
+; CHECK-NEXT:    bl llrintl
+; CHECK-NEXT:    fmov d0, x0
+; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT:    bl llrintl
+; CHECK-NEXT:    fmov d0, x0
+; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    add x8, sp, #128
+; CHECK-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-NEXT:    str z0, [x8, #1, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT:    bl llrintl
+; CHECK-NEXT:    fmov d0, x0
+; CHECK-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT:    bl llrintl
+; CHECK-NEXT:    fmov d0, x0
+; CHECK-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT:    add x8, sp, #128
+; CHECK-NEXT:    ptrue p0.d, vl2
+; CHECK-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-NEXT:    ldr z1, [x8, #1, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT:    splice z0.d, p0, z0.d, z1.d
+; CHECK-NEXT:    str z0, [x8, #1, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT:    bl llrintl
+; CHECK-NEXT:    fmov d0, x0
+; CHECK-NEXT:    str q0, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #80] // 16-byte Folded Reload
+; CHECK-NEXT:    bl llrintl
+; CHECK-NEXT:    fmov d0, x0
+; CHECK-NEXT:    ldr q1, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT:    add x8, sp, #128
+; CHECK-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-NEXT:    str z0, [x8] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #96] // 16-byte Folded Reload
+; CHECK-NEXT:    bl llrintl
+; CHECK-NEXT:    fmov d0, x0
+; CHECK-NEXT:    str q0, [sp, #96] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #112] // 16-byte Folded Reload
+; CHECK-NEXT:    bl llrintl
+; CHECK-NEXT:    fmov d0, x0
+; CHECK-NEXT:    ldr q1, [sp, #96] // 16-byte Folded Reload
+; CHECK-NEXT:    add x8, sp, #128
+; CHECK-NEXT:    ptrue p0.d, vl2
+; CHECK-NEXT:    ldr z2, [x8, #1, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-NEXT:    ldr z1, [x8] // 16-byte Folded Reload
+; CHECK-NEXT:    mov z3.d, z2.d
+; CHECK-NEXT:    splice z0.d, p0, z0.d, z1.d
+; CHECK-NEXT:    ext z3.b, z3.b, z2.b, #16
+; CHECK-NEXT:    // kill: def $q2 killed $q2 killed $z2
+; CHECK-NEXT:    // kill: def $q3 killed $q3 killed $z3
+; CHECK-NEXT:    mov z1.d, z0.d
+; CHECK-NEXT:    ext z1.b, z1.b, z0.b, #16
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 killed $z1
+; CHECK-NEXT:    addvl sp, sp, #2
+; CHECK-NEXT:    add sp, sp, #128
+; CHECK-NEXT:    ldp x29, x30, [sp], #16 // 16-byte Folded Reload
+; CHECK-NEXT:    ret
+  %a = call <8 x i64> @llvm.llrint.v8i64.v8fp128(<8 x fp128> %x)
+  ret <8 x i64> %a
+}
+declare <8 x i64> @llvm.llrint.v8i64.v8fp128(<8 x fp128>)
+
+define <16 x i64> @llrint_v16fp128(<16 x fp128> %x) {
+; CHECK-LABEL: llrint_v16fp128:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
+; CHECK-NEXT:    sub sp, sp, #256
+; CHECK-NEXT:    addvl sp, sp, #-4
+; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x90, 0x02, 0x22, 0x11, 0x20, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 272 + 32 * VG
+; CHECK-NEXT:    .cfi_offset w30, -8
+; CHECK-NEXT:    .cfi_offset w29, -16
+; CHECK-NEXT:    addvl x8, sp, #4
+; CHECK-NEXT:    str q1, [sp, #240] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q1, [x8, #272]
+; CHECK-NEXT:    addvl x8, sp, #4
+; CHECK-NEXT:    str q0, [sp, #224] // 16-byte Folded Spill
+; CHECK-NEXT:    stp q7, q6, [sp, #128] // 32-byte Folded Spill
+; CHECK-NEXT:    str q1, [sp, #112] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q1, [x8, #288]
+; CHECK-NEXT:    addvl x8, sp, #4
+; CHECK-NEXT:    stp q5, q4, [sp, #160] // 32-byte Folded Spill
+; CHECK-NEXT:    str q1, [sp, #96] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q1, [x8, #304]
+; CHECK-NEXT:    addvl x8, sp, #4
+; CHECK-NEXT:    stp q3, q2, [sp, #192] // 32-byte Folded Spill
+; CHECK-NEXT:    str q1, [sp, #80] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q1, [x8, #320]
+; CHECK-NEXT:    addvl x8, sp, #4
+; CHECK-NEXT:    str q1, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q1, [x8, #336]
+; CHECK-NEXT:    addvl x8, sp, #4
+; CHECK-NEXT:    str q1, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q1, [x8, #352]
+; CHECK-NEXT:    addvl x8, sp, #4
+; CHECK-NEXT:    str q1, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q1, [x8, #368]
+; CHECK-NEXT:    addvl x8, sp, #4
+; CHECK-NEXT:    str q1, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q1, [x8, #384]
+; CHECK-NEXT:    mov v0.16b, v1.16b
+; CHECK-NEXT:    bl llrintl
+; CHECK-NEXT:    fmov d0, x0
+; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT:    bl llrintl
+; CHECK-NEXT:    fmov d0, x0
+; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    add x8, sp, #256
+; CHECK-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-NEXT:    str z0, [x8, #3, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT:    bl llrintl
+; CHECK-NEXT:    fmov d0, x0
+; CHECK-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT:    bl llrintl
+; CHECK-NEXT:    fmov d0, x0
+; CHECK-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT:    add x8, sp, #256
+; CHECK-NEXT:    ptrue p0.d, vl2
+; CHECK-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-NEXT:    ldr z1, [x8, #3, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT:    splice z0.d, p0, z0.d, z1.d
+; CHECK-NEXT:    str z0, [x8, #3, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT:    bl llrintl
+; CHECK-NEXT:    fmov d0, x0
+; CHECK-NEXT:    str q0, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #80] // 16-byte Folded Reload
+; CHECK-NEXT:    bl llrintl
+; CHECK-NEXT:    fmov d0, x0
+; CHECK-NEXT:    ldr q1, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT:    add x8, sp, #256
+; CHECK-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-NEXT:    str z0, [x8, #2, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #96] // 16-byte Folded Reload
+; CHECK-NEXT:    bl llrintl
+; CHECK-NEXT:    fmov d0, x0
+; CHECK-NEXT:    str q0, [sp, #96] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #112] // 16-byte Folded Reload
+; CHECK-NEXT:    bl llrintl
+; CHECK-NEXT:    fmov d0, x0
+; CHECK-NEXT:    ldr q1, [sp, #96] // 16-byte Folded Reload
+; CHECK-NEXT:    add x8, sp, #256
+; CHECK-NEXT:    ptrue p0.d, vl2
+; CHECK-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-NEXT:    ldr z1, [x8, #2, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT:    splice z0.d, p0, z0.d, z1.d
+; CHECK-NEXT:    str z0, [x8, #2, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #128] // 16-byte Folded Reload
+; CHECK-NEXT:    bl llrintl
+; CHECK-NEXT:    fmov d0, x0
+; CHECK-NEXT:    str q0, [sp, #128] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #144] // 16-byte Folded Reload
+; CHECK-NEXT:    bl llrintl
+; CHECK-NEXT:    fmov d0, x0
+; CHECK-NEXT:    ldr q1, [sp, #128] // 16-byte Folded Reload
+; CHECK-NEXT:    add x8, sp, #256
+; CHECK-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-NEXT:    str z0, [x8, #1, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #160] // 16-byte Folded Reload
+; CHECK-NEXT:    bl llrintl
+; CHECK-NEXT:    fmov d0, x0
+; CHECK-NEXT:    str q0, [sp, #160] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #176] // 16-byte Folded Reload
+; CHECK-NEXT:    bl llrintl
+; CHECK-NEXT:    fmov d0, x0
+; CHECK-NEXT:    ldr q1, [sp, #160] // 16-byte Folded Reload
+; CHECK-NEXT:    add x8, sp, #256
+; CHECK-NEXT:    ptrue p0.d, vl2
+; CHECK-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-NEXT:    ldr z1, [x8, #1, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT:    splice z0.d, p0, z0.d, z1.d
+; CHECK-NEXT:    str z0, [x8, #1, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #192] // 16-byte Folded Reload
+; CHECK-NEXT:    bl llrintl
+; CHECK-NEXT:    fmov d0, x0
+; CHECK-NEXT:    str q0, [sp, #192] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #208] // 16-byte Folded Reload
+; CHECK-NEXT:    bl llrintl
+; CHECK-NEXT:    fmov d0, x0
+; CHECK-NEXT:    ldr q1, [sp, #192] // 16-byte Folded Reload
+; CHECK-NEXT:    add x8, sp, #256
+; CHECK-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-NEXT:    str z0, [x8] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #240] // 16-byte Folded Reload
+; CHECK-NEXT:    bl llrintl
+; CHECK-NEXT:    fmov d0, x0
+; CHECK-NEXT:    str q0, [sp, #240] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #224] // 16-byte Folded Reload
+; CHECK-NEXT:    bl llrintl
+; CHECK-NEXT:    fmov d0, x0
+; CHECK-NEXT:    ldr q1, [sp, #240] // 16-byte Folded Reload
+; CHECK-NEXT:    add x8, sp, #256
+; CHECK-NEXT:    ptrue p0.d, vl2
+; CHECK-NEXT:    ldr z2, [x8, #1, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT:    ldr z4, [x8, #2, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT:    ldr z6, [x8, #3, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-NEXT:    ldr z1, [x8] // 16-byte Folded Reload
+; CHECK-NEXT:    mov z3.d, z2.d
+; CHECK-NEXT:    mov z5.d, z4.d
+; CHECK-NEXT:    mov z7.d, z6.d
+; CHECK-NEXT:    splice z0.d, p0, z0.d, z1.d
+; CHECK-NEXT:    ext z3.b, z3.b, z2.b, #16
+; CHECK-NEXT:    ext z5.b, z5.b, z4.b, #16
+; CHECK-NEXT:    ext z7.b, z7.b, z6.b, #16
+; CHECK-NEXT:    // kill: def $q2 killed $q2 killed $z2
+; CHECK-NEXT:    // kill: def $q4 killed $q4 killed $z4
+; CHECK-NEXT:    // kill: def $q3 killed $q3 killed $z3
+; CHECK-NEXT:    // kill: def $q5 killed $q5 killed $z5
+; CHECK-NEXT:    // kill: def $q6 killed $q6 killed $z6
+; CHECK-NEXT:    // kill: def $q7 killed $q7 killed $z7
+; CHECK-NEXT:    mov z1.d, z0.d
+; CHECK-NEXT:    ext z1.b, z1.b, z0.b, #16
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 killed $z1
+; CHECK-NEXT:    addvl sp, sp, #4
+; CHECK-NEXT:    add sp, sp, #256
+; CHECK-NEXT:    ldp x29, x30, [sp], #16 // 16-byte Folded Reload
+; CHECK-NEXT:    ret
+  %a = call <16 x i64> @llvm.llrint.v16i64.v16fp128(<16 x fp128> %x)
+  ret <16 x i64> %a
+}
+declare <16 x i64> @llvm.llrint.v16i64.v16fp128(<16 x fp128>)
+
+define <32 x i64> @llrint_v32fp128(<32 x fp128> %x) {
+; CHECK-LABEL: llrint_v32fp128:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x29, [sp, #-32]! // 8-byte Folded Spill
+; CHECK-NEXT:    stp x30, x19, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT:    sub sp, sp, #512
+; CHECK-NEXT:    addvl sp, sp, #-8
+; CHECK-NEXT:    .cfi_escape 0x0f, 0x0e, 0x8f, 0x00, 0x11, 0xa0, 0x04, 0x22, 0x11, 0xc0, 0x00, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 544 + 64 * VG
+; CHECK-NEXT:    .cfi_offset w19, -8
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    .cfi_offset w29, -32
+; CHECK-NEXT:    addvl x9, sp, #8
+; CHECK-NEXT:    stp q2, q1, [sp, #16] // 32-byte Folded Spill
+; CHECK-NEXT:    mov x19, x8
+; CHECK-NEXT:    stp q0, q7, [sp, #48] // 32-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [x9, #864]
+; CHECK-NEXT:    addvl x9, sp, #8
+; CHECK-NEXT:    str q6, [sp, #96] // 16-byte Folded Spill
+; CHECK-NEXT:    str q0, [sp, #496] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [x9, #880]
+; CHECK-NEXT:    addvl x9, sp, #8
+; CHECK-NEXT:    stp q5, q4, [sp, #128] // 32-byte Folded Spill
+; CHECK-NEXT:    str q0, [sp, #480] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [x9, #896]
+; CHECK-NEXT:    addvl x9, sp, #8
+; CHECK-NEXT:    str q0, [sp, #464] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [x9, #912]
+; CHECK-NEXT:    addvl x9, sp, #8
+; CHECK-NEXT:    str q0, [sp, #448] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [x9, #800]
+; CHECK-NEXT:    addvl x9, sp, #8
+; CHECK-NEXT:    str q0, [sp, #432] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [x9, #816]
+; CHECK-NEXT:    addvl x9, sp, #8
+; CHECK-NEXT:    str q0, [sp, #416] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [x9, #832]
+; CHECK-NEXT:    addvl x9, sp, #8
+; CHECK-NEXT:    str q0, [sp, #400] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [x9, #848]
+; CHECK-NEXT:    addvl x9, sp, #8
+; CHECK-NEXT:    str q0, [sp, #384] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [x9, #736]
+; CHECK-NEXT:    addvl x9, sp, #8
+; CHECK-NEXT:    str q0, [sp, #368] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [x9, #752]
+; CHECK-NEXT:    addvl x9, sp, #8
+; CHECK-NEXT:    str q0, [sp, #352] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [x9, #768]
+; CHECK-NEXT:    addvl x9, sp, #8
+; CHECK-NEXT:    str q0, [sp, #336] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [x9, #784]
+; CHECK-NEXT:    addvl x9, sp, #8
+; CHECK-NEXT:    str q0, [sp, #320] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [x9, #672]
+; CHECK-NEXT:    addvl x9, sp, #8
+; CHECK-NEXT:    str q0, [sp, #304] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [x9, #688]
+; CHECK-NEXT:    addvl x9, sp, #8
+; CHECK-NEXT:    str q0, [sp, #288] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [x9, #704]
+; CHECK-NEXT:    addvl x9, sp, #8
+; CHECK-NEXT:    str q0, [sp, #272] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [x9, #720]
+; CHECK-NEXT:    addvl x9, sp, #8
+; CHECK-NEXT:    str q0, [sp, #256] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [x9, #608]
+; CHECK-NEXT:    addvl x9, sp, #8
+; CHECK-NEXT:    str q0, [sp, #240] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [x9, #624]
+; CHECK-NEXT:    addvl x9, sp, #8
+; CHECK-NEXT:    str q0, [sp, #224] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [x9, #640]
+; CHECK-NEXT:    addvl x9, sp, #8
+; CHECK-NEXT:    str q0, [sp, #208] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [x9, #656]
+; CHECK-NEXT:    addvl x9, sp, #8
+; CHECK-NEXT:    str q0, [sp, #192] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [x9, #544]
+; CHECK-NEXT:    addvl x9, sp, #8
+; CHECK-NEXT:    str q0, [sp, #176] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [x9, #560]
+; CHECK-NEXT:    addvl x9, sp, #8
+; CHECK-NEXT:    str q0, [sp, #160] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [x9, #576]
+; CHECK-NEXT:    addvl x9, sp, #8
+; CHECK-NEXT:    str q0, [sp, #112] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [x9, #592]
+; CHECK-NEXT:    str q0, [sp, #80] // 16-byte Folded Spill
+; CHECK-NEXT:    mov v0.16b, v3.16b
+; CHECK-NEXT:    bl llrintl
+; CHECK-NEXT:    fmov d0, x0
+; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT:    bl llrintl
+; CHECK-NEXT:    fmov d0, x0
+; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    add x8, sp, #512
+; CHECK-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-NEXT:    str z0, [x8, #7, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT:    bl llrintl
+; CHECK-NEXT:    fmov d0, x0
+; CHECK-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT:    bl llrintl
+; CHECK-NEXT:    fmov d0, x0
+; CHECK-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT:    add x8, sp, #512
+; CHECK-NEXT:    ptrue p0.d, vl2
+; CHECK-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-NEXT:    ldr z1, [x8, #7, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT:    splice z0.d, p0, z0.d, z1.d
+; CHECK-NEXT:    str z0, [x8, #7, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT:    bl llrintl
+; CHECK-NEXT:    fmov d0, x0
+; CHECK-NEXT:    str q0, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #96] // 16-byte Folded Reload
+; CHECK-NEXT:    bl llrintl
+; CHECK-NEXT:    fmov d0, x0
+; CHECK-NEXT:    ldr q1, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT:    add x8, sp, #512
+; CHECK-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-NEXT:    str z0, [x8, #6, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #128] // 16-byte Folded Reload
+; CHECK-NEXT:    bl llrintl
+; CHECK-NEXT:    fmov d0, x0
+; CHECK-NEXT:    str q0, [sp, #128] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #144] // 16-byte Folded Reload
+; CHECK-NEXT:    bl llrintl
+; CHECK-NEXT:    fmov d0, x0
+; CHECK-NEXT:    ldr q1, [sp, #128] // 16-byte Folded Reload
+; CHECK-NEXT:    add x8, sp, #512
+; CHECK-NEXT:    ptrue p0.d, vl2
+; CHECK-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-NEXT:    ldr z1, [x8, #6, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT:    splice z0.d, p0, z0.d, z1.d
+; CHECK-NEXT:    str z0, [x8, #6, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #80] // 16-byte Folded Reload
+; CHECK-NEXT:    bl llrintl
+; CHECK-NEXT:    fmov d0, x0
+; CHECK-NEXT:    str q0, [sp, #144] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #112] // 16-byte Folded Reload
+; CHECK-NEXT:    bl llrintl
+; CHECK-NEXT:    fmov d0, x0
+; CHECK-NEXT:    ldr q1, [sp, #144] // 16-byte Folded Reload
+; CHECK-NEXT:    add x8, sp, #512
+; CHECK-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-NEXT:    str z0, [x8, #5, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #160] // 16-byte Folded Reload
+; CHECK-NEXT:    bl llrintl
+; CHECK-NEXT:    fmov d0, x0
+; CHECK-NEXT:    str q0, [sp, #160] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #176] // 16-byte Folded Reload
+; CHECK-NEXT:    bl llrintl
+; CHECK-NEXT:    fmov d0, x0
+; CHECK-NEXT:    ldr q1, [sp, #160] // 16-byte Folded Reload
+; CHECK-NEXT:    add x8, sp, #512
+; CHECK-NEXT:    ptrue p0.d, vl2
+; CHECK-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-NEXT:    ldr z1, [x8, #5, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT:    splice z0.d, p0, z0.d, z1.d
+; CHECK-NEXT:    str z0, [x8, #5, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #192] // 16-byte Folded Reload
+; CHECK-NEXT:    bl llrintl
+; CHECK-NEXT:    fmov d0, x0
+; CHECK-NEXT:    str q0, [sp, #192] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #208] // 16-byte Folded Reload
+; CHECK-NEXT:    bl llrintl
+; CHECK-NEXT:    fmov d0, x0
+; CHECK-NEXT:    ldr q1, [sp, #192] // 16-byte Folded Reload
+; CHECK-NEXT:    add x8, sp, #512
+; CHECK-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-NEXT:    str z0, [x8, #4, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #224] // 16-byte Folded Reload
+; CHECK-NEXT:    bl llrintl
+; CHECK-NEXT:    fmov d0, x0
+; CHECK-NEXT:    str q0, [sp, #224] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #240] // 16-byte Folded Reload
+; CHECK-NEXT:    bl llrintl
+; CHECK-NEXT:    fmov d0, x0
+; CHECK-NEXT:    ldr q1, [sp, #224] // 16-byte Folded Reload
+; CHECK-NEXT:    add x8, sp, #512
+; CHECK-NEXT:    ptrue p0.d, vl2
+; CHECK-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-NEXT:    ldr z1, [x8, #4, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT:    splice z0.d, p0, z0.d, z1.d
+; CHECK-NEXT:    str z0, [x8, #4, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #256] // 16-byte Folded Reload
+; CHECK-NEXT:    bl llrintl
+; CHECK-NEXT:    fmov d0, x0
+; CHECK-NEXT:    str q0, [sp, #256] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #272] // 16-byte Folded Reload
+; CHECK-NEXT:    bl llrintl
+; CHECK-NEXT:    fmov d0, x0
+; CHECK-NEXT:    ldr q1, [sp, #256] // 16-byte Folded Reload
+; CHECK-NEXT:    add x8, sp, #512
+; CHECK-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-NEXT:    str z0, [x8, #3, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #288] // 16-byte Folded Reload
+; CHECK-NEXT:    bl llrintl
+; CHECK-NEXT:    fmov d0, x0
+; CHECK-NEXT:    str q0, [sp, #288] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #304] // 16-byte Folded Reload
+; CHECK-NEXT:    bl llrintl
+; CHECK-NEXT:    fmov d0, x0
+; CHECK-NEXT:    ldr q1, [sp, #288] // 16-byte Folded Reload
+; CHECK-NEXT:    add x8, sp, #512
+; CHECK-NEXT:    ptrue p0.d, vl2
+; CHECK-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-NEXT:    ldr z1, [x8, #3, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT:    splice z0.d, p0, z0.d, z1.d
+; CHECK-NEXT:    str z0, [x8, #3, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #320] // 16-byte Folded Reload
+; CHECK-NEXT:    bl llrintl
+; CHECK-NEXT:    fmov d0, x0
+; CHECK-NEXT:    str q0, [sp, #320] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #336] // 16-byte Folded Reload
+; CHECK-NEXT:    bl llrintl
+; CHECK-NEXT:    fmov d0, x0
+; CHECK-NEXT:    ldr q1, [sp, #320] // 16-byte Folded Reload
+; CHECK-NEXT:    add x8, sp, #512
+; CHECK-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-NEXT:    str z0, [x8, #2, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #352] // 16-byte Folded Reload
+; CHECK-NEXT:    bl llrintl
+; CHECK-NEXT:    fmov d0, x0
+; CHECK-NEXT:    str q0, [sp, #352] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #368] // 16-byte Folded Reload
+; CHECK-NEXT:    bl llrintl
+; CHECK-NEXT:    fmov d0, x0
+; CHECK-NEXT:    ldr q1, [sp, #352] // 16-byte Folded Reload
+; CHECK-NEXT:    add x8, sp, #512
+; CHECK-NEXT:    ptrue p0.d, vl2
+; CHECK-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-NEXT:    ldr z1, [x8, #2, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT:    splice z0.d, p0, z0.d, z1.d
+; CHECK-NEXT:    str z0, [x8, #2, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #384] // 16-byte Folded Reload
+; CHECK-NEXT:    bl llrintl
+; CHECK-NEXT:    fmov d0, x0
+; CHECK-NEXT:    str q0, [sp, #384] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #400] // 16-byte Folded Reload
+; CHECK-NEXT:    bl llrintl
+; CHECK-NEXT:    fmov d0, x0
+; CHECK-NEXT:    ldr q1, [sp, #384] // 16-byte Folded Reload
+; CHECK-NEXT:    add x8, sp, #512
+; CHECK-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-NEXT:    str z0, [x8, #1, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #416] // 16-byte Folded Reload
+; CHECK-NEXT:    bl llrintl
+; CHECK-NEXT:    fmov d0, x0
+; CHECK-NEXT:    str q0, [sp, #416] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #432] // 16-byte Folded Reload
+; CHECK-NEXT:    bl llrintl
+; CHECK-NEXT:    fmov d0, x0
+; CHECK-NEXT:    ldr q1, [sp, #416] // 16-byte Folded Reload
+; CHECK-NEXT:    add x8, sp, #512
+; CHECK-NEXT:    ptrue p0.d, vl2
+; CHECK-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-NEXT:    ldr z1, [x8, #1, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT:    splice z0.d, p0, z0.d, z1.d
+; CHECK-NEXT:    str z0, [x8, #1, mul vl] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #448] // 16-byte Folded Reload
+; CHECK-NEXT:    bl llrintl
+; CHECK-NEXT:    fmov d0, x0
+; CHECK-NEXT:    str q0, [sp, #448] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #464] // 16-byte Folded Reload
+; CHECK-NEXT:    bl llrintl
+; CHECK-NEXT:    fmov d0, x0
+; CHECK-NEXT:    ldr q1, [sp, #448] // 16-byte Folded Reload
+; CHECK-NEXT:    add x8, sp, #512
+; CHECK-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-NEXT:    str z0, [x8] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #480] // 16-byte Folded Reload
+; CHECK-NEXT:    bl llrintl
+; CHECK-NEXT:    fmov d0, x0
+; CHECK-NEXT:    str q0, [sp, #480] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #496] // 16-byte Folded Reload
+; CHECK-NEXT:    bl llrintl
+; CHECK-NEXT:    fmov d0, x0
+; CHECK-NEXT:    ldr q1, [sp, #480] // 16-byte Folded Reload
+; CHECK-NEXT:    add x8, sp, #512
+; CHECK-NEXT:    ptrue p0.d, vl2
+; CHECK-NEXT:    add x9, sp, #512
+; CHECK-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-NEXT:    ldr z1, [x8] // 16-byte Folded Reload
+; CHECK-NEXT:    mov x8, #28 // =0x1c
+; CHECK-NEXT:    splice z0.d, p0, z0.d, z1.d
+; CHECK-NEXT:    ptrue p0.d, vl4
+; CHECK-NEXT:    st1d { z0.d }, p0, [x19, x8, lsl #3]
+; CHECK-NEXT:    mov x8, #24 // =0x18
+; CHECK-NEXT:    ldr z0, [x9, #1, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT:    st1d { z0.d }, p0, [x19, x8, lsl #3]
+; CHECK-NEXT:    mov x8, #20 // =0x14
+; CHECK-NEXT:    ldr z0, [x9, #2, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT:    st1d { z0.d }, p0, [x19, x8, lsl #3]
+; CHECK-NEXT:    mov x8, #16 // =0x10
+; CHECK-NEXT:    ldr z0, [x9, #3, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT:    st1d { z0.d }, p0, [x19, x8, lsl #3]
+; CHECK-NEXT:    mov x8, #12 // =0xc
+; CHECK-NEXT:    ldr z0, [x9, #4, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT:    st1d { z0.d }, p0, [x19, x8, lsl #3]
+; CHECK-NEXT:    mov x8, #8 // =0x8
+; CHECK-NEXT:    ldr z0, [x9, #5, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT:    st1d { z0.d }, p0, [x19, x8, lsl #3]
+; CHECK-NEXT:    mov x8, #4 // =0x4
+; CHECK-NEXT:    ldr z0, [x9, #6, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT:    st1d { z0.d }, p0, [x19, x8, lsl #3]
+; CHECK-NEXT:    add x8, sp, #512
+; CHECK-NEXT:    ldr z0, [x8, #7, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT:    st1d { z0.d }, p0, [x19]
+; CHECK-NEXT:    addvl sp, sp, #8
+; CHECK-NEXT:    add sp, sp, #512
+; CHECK-NEXT:    ldp x30, x19, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT:    ldr x29, [sp], #32 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
+  %a = call <32 x i64> @llvm.llrint.v32i64.v16fp128(<32 x fp128> %x)
+  ret <32 x i64> %a
+}
+declare <32 x i64> @llvm.llrint.v32i64.v32fp128(<32 x fp128>)
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-vector-lrint.ll b/llvm/test/CodeGen/AArch64/sve-fixed-vector-lrint.ll
index 9fe8d92a182ac..0b5e27f9fe15d 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-vector-lrint.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-vector-lrint.ll
@@ -1611,3 +1611,1118 @@ define <32 x iXLen> @lrint_v32f64(<32 x double> %x) {
   ret <32 x iXLen> %a
 }
 declare <32 x iXLen> @llvm.lrint.v32iXLen.v32f64(<32 x double>)
+
+define <1 x iXLen> @lrint_v1fp128(<1 x fp128> %x) {
+; CHECK-i32-LABEL: lrint_v1fp128:
+; CHECK-i32:       // %bb.0:
+; CHECK-i32-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-i32-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-i32-NEXT:    .cfi_offset w30, -16
+; CHECK-i32-NEXT:    bl lrintl
+; CHECK-i32-NEXT:    fmov s0, w0
+; CHECK-i32-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-i32-NEXT:    ret
+;
+; CHECK-i64-LABEL: lrint_v1fp128:
+; CHECK-i64:       // %bb.0:
+; CHECK-i64-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-i64-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-i64-NEXT:    .cfi_offset w30, -16
+; CHECK-i64-NEXT:    bl lrintl
+; CHECK-i64-NEXT:    fmov d0, x0
+; CHECK-i64-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-i64-NEXT:    ret
+  %a = call <1 x iXLen> @llvm.lrint.v1iXLen.v1fp128(<1 x fp128> %x)
+  ret <1 x iXLen> %a
+}
+declare <1 x iXLen> @llvm.lrint.v1iXLen.v1fp128(<1 x fp128>)
+
+define <2 x iXLen> @lrint_v2fp128(<2 x fp128> %x) {
+; CHECK-i32-LABEL: lrint_v2fp128:
+; CHECK-i32:       // %bb.0:
+; CHECK-i32-NEXT:    sub sp, sp, #48
+; CHECK-i32-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
+; CHECK-i32-NEXT:    .cfi_def_cfa_offset 48
+; CHECK-i32-NEXT:    .cfi_offset w30, -16
+; CHECK-i32-NEXT:    str q1, [sp, #16] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    bl lrintl
+; CHECK-i32-NEXT:    fmov s0, w0
+; CHECK-i32-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    bl lrintl
+; CHECK-i32-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
+; CHECK-i32-NEXT:    mov v0.s[1], w0
+; CHECK-i32-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-i32-NEXT:    add sp, sp, #48
+; CHECK-i32-NEXT:    ret
+;
+; CHECK-i64-LABEL: lrint_v2fp128:
+; CHECK-i64:       // %bb.0:
+; CHECK-i64-NEXT:    sub sp, sp, #48
+; CHECK-i64-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
+; CHECK-i64-NEXT:    .cfi_def_cfa_offset 48
+; CHECK-i64-NEXT:    .cfi_offset w30, -16
+; CHECK-i64-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    mov v0.16b, v1.16b
+; CHECK-i64-NEXT:    bl lrintl
+; CHECK-i64-NEXT:    fmov d0, x0
+; CHECK-i64-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    bl lrintl
+; CHECK-i64-NEXT:    fmov d0, x0
+; CHECK-i64-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
+; CHECK-i64-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-i64-NEXT:    add sp, sp, #48
+; CHECK-i64-NEXT:    ret
+  %a = call <2 x iXLen> @llvm.lrint.v2iXLen.v2fp128(<2 x fp128> %x)
+  ret <2 x iXLen> %a
+}
+declare <2 x iXLen> @llvm.lrint.v2iXLen.v2fp128(<2 x fp128>)
+
+define <4 x iXLen> @lrint_v4fp128(<4 x fp128> %x) {
+; CHECK-i32-LABEL: lrint_v4fp128:
+; CHECK-i32:       // %bb.0:
+; CHECK-i32-NEXT:    sub sp, sp, #80
+; CHECK-i32-NEXT:    str x30, [sp, #64] // 8-byte Folded Spill
+; CHECK-i32-NEXT:    .cfi_def_cfa_offset 80
+; CHECK-i32-NEXT:    .cfi_offset w30, -16
+; CHECK-i32-NEXT:    stp q2, q3, [sp, #16] // 32-byte Folded Spill
+; CHECK-i32-NEXT:    str q1, [sp] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    bl lrintl
+; CHECK-i32-NEXT:    fmov s0, w0
+; CHECK-i32-NEXT:    str q0, [sp, #48] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    bl lrintl
+; CHECK-i32-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    mov v0.s[1], w0
+; CHECK-i32-NEXT:    str q0, [sp, #48] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    bl lrintl
+; CHECK-i32-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    mov v0.s[2], w0
+; CHECK-i32-NEXT:    str q0, [sp, #48] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    bl lrintl
+; CHECK-i32-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    ldr x30, [sp, #64] // 8-byte Folded Reload
+; CHECK-i32-NEXT:    mov v0.s[3], w0
+; CHECK-i32-NEXT:    add sp, sp, #80
+; CHECK-i32-NEXT:    ret
+;
+; CHECK-i64-LABEL: lrint_v4fp128:
+; CHECK-i64:       // %bb.0:
+; CHECK-i64-NEXT:    stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
+; CHECK-i64-NEXT:    sub sp, sp, #64
+; CHECK-i64-NEXT:    addvl sp, sp, #-1
+; CHECK-i64-NEXT:    .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0xd0, 0x00, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 80 + 8 * VG
+; CHECK-i64-NEXT:    .cfi_offset w30, -8
+; CHECK-i64-NEXT:    .cfi_offset w29, -16
+; CHECK-i64-NEXT:    str q0, [sp, #48] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    mov v0.16b, v3.16b
+; CHECK-i64-NEXT:    stp q2, q1, [sp, #16] // 32-byte Folded Spill
+; CHECK-i64-NEXT:    bl lrintl
+; CHECK-i64-NEXT:    fmov d0, x0
+; CHECK-i64-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    bl lrintl
+; CHECK-i64-NEXT:    fmov d0, x0
+; CHECK-i64-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    add x8, sp, #64
+; CHECK-i64-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-i64-NEXT:    str z0, [x8] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    bl lrintl
+; CHECK-i64-NEXT:    fmov d0, x0
+; CHECK-i64-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    bl lrintl
+; CHECK-i64-NEXT:    fmov d0, x0
+; CHECK-i64-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    add x8, sp, #64
+; CHECK-i64-NEXT:    ptrue p0.d, vl2
+; CHECK-i64-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-i64-NEXT:    ldr z1, [x8] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    splice z0.d, p0, z0.d, z1.d
+; CHECK-i64-NEXT:    mov z1.d, z0.d
+; CHECK-i64-NEXT:    ext z1.b, z1.b, z0.b, #16
+; CHECK-i64-NEXT:    // kill: def $q0 killed $q0 killed $z0
+; CHECK-i64-NEXT:    // kill: def $q1 killed $q1 killed $z1
+; CHECK-i64-NEXT:    addvl sp, sp, #1
+; CHECK-i64-NEXT:    add sp, sp, #64
+; CHECK-i64-NEXT:    ldp x29, x30, [sp], #16 // 16-byte Folded Reload
+; CHECK-i64-NEXT:    ret
+  %a = call <4 x iXLen> @llvm.lrint.v4iXLen.v4fp128(<4 x fp128> %x)
+  ret <4 x iXLen> %a
+}
+declare <4 x iXLen> @llvm.lrint.v4iXLen.v4fp128(<4 x fp128>)
+
+define <8 x iXLen> @lrint_v8fp128(<8 x fp128> %x) {
+; CHECK-i32-LABEL: lrint_v8fp128:
+; CHECK-i32:       // %bb.0:
+; CHECK-i32-NEXT:    sub sp, sp, #176
+; CHECK-i32-NEXT:    stp x30, x25, [sp, #112] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    stp x24, x23, [sp, #128] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    stp x22, x21, [sp, #144] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    stp x20, x19, [sp, #160] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    .cfi_def_cfa_offset 176
+; CHECK-i32-NEXT:    .cfi_offset w19, -8
+; CHECK-i32-NEXT:    .cfi_offset w20, -16
+; CHECK-i32-NEXT:    .cfi_offset w21, -24
+; CHECK-i32-NEXT:    .cfi_offset w22, -32
+; CHECK-i32-NEXT:    .cfi_offset w23, -40
+; CHECK-i32-NEXT:    .cfi_offset w24, -48
+; CHECK-i32-NEXT:    .cfi_offset w25, -56
+; CHECK-i32-NEXT:    .cfi_offset w30, -64
+; CHECK-i32-NEXT:    str q0, [sp, #96] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    mov v0.16b, v7.16b
+; CHECK-i32-NEXT:    stp q6, q5, [sp] // 32-byte Folded Spill
+; CHECK-i32-NEXT:    stp q4, q3, [sp, #32] // 32-byte Folded Spill
+; CHECK-i32-NEXT:    stp q2, q1, [sp, #64] // 32-byte Folded Spill
+; CHECK-i32-NEXT:    bl lrintl
+; CHECK-i32-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    mov w19, w0
+; CHECK-i32-NEXT:    bl lrintl
+; CHECK-i32-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    mov w20, w0
+; CHECK-i32-NEXT:    bl lrintl
+; CHECK-i32-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    mov w21, w0
+; CHECK-i32-NEXT:    bl lrintl
+; CHECK-i32-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    mov w22, w0
+; CHECK-i32-NEXT:    bl lrintl
+; CHECK-i32-NEXT:    ldr q0, [sp, #64] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    mov w23, w0
+; CHECK-i32-NEXT:    bl lrintl
+; CHECK-i32-NEXT:    ldr q0, [sp, #80] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    mov w24, w0
+; CHECK-i32-NEXT:    bl lrintl
+; CHECK-i32-NEXT:    ldr q0, [sp, #96] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    mov w25, w0
+; CHECK-i32-NEXT:    bl lrintl
+; CHECK-i32-NEXT:    fmov s1, w22
+; CHECK-i32-NEXT:    fmov s0, w0
+; CHECK-i32-NEXT:    mov v0.s[1], w25
+; CHECK-i32-NEXT:    mov v1.s[1], w21
+; CHECK-i32-NEXT:    ldp x22, x21, [sp, #144] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    ldp x30, x25, [sp, #112] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    mov v0.s[2], w24
+; CHECK-i32-NEXT:    mov v1.s[2], w20
+; CHECK-i32-NEXT:    mov v0.s[3], w23
+; CHECK-i32-NEXT:    mov v1.s[3], w19
+; CHECK-i32-NEXT:    ldp x20, x19, [sp, #160] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    ldp x24, x23, [sp, #128] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    add sp, sp, #176
+; CHECK-i32-NEXT:    ret
+;
+; CHECK-i64-LABEL: lrint_v8fp128:
+; CHECK-i64:       // %bb.0:
+; CHECK-i64-NEXT:    stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
+; CHECK-i64-NEXT:    sub sp, sp, #128
+; CHECK-i64-NEXT:    addvl sp, sp, #-2
+; CHECK-i64-NEXT:    .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x90, 0x01, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 144 + 16 * VG
+; CHECK-i64-NEXT:    .cfi_offset w30, -8
+; CHECK-i64-NEXT:    .cfi_offset w29, -16
+; CHECK-i64-NEXT:    str q0, [sp, #112] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    mov v0.16b, v7.16b
+; CHECK-i64-NEXT:    stp q6, q5, [sp, #16] // 32-byte Folded Spill
+; CHECK-i64-NEXT:    stp q4, q3, [sp, #48] // 32-byte Folded Spill
+; CHECK-i64-NEXT:    stp q2, q1, [sp, #80] // 32-byte Folded Spill
+; CHECK-i64-NEXT:    bl lrintl
+; CHECK-i64-NEXT:    fmov d0, x0
+; CHECK-i64-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    bl lrintl
+; CHECK-i64-NEXT:    fmov d0, x0
+; CHECK-i64-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    add x8, sp, #128
+; CHECK-i64-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-i64-NEXT:    str z0, [x8, #1, mul vl] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    bl lrintl
+; CHECK-i64-NEXT:    fmov d0, x0
+; CHECK-i64-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    bl lrintl
+; CHECK-i64-NEXT:    fmov d0, x0
+; CHECK-i64-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    add x8, sp, #128
+; CHECK-i64-NEXT:    ptrue p0.d, vl2
+; CHECK-i64-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-i64-NEXT:    ldr z1, [x8, #1, mul vl] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    splice z0.d, p0, z0.d, z1.d
+; CHECK-i64-NEXT:    str z0, [x8, #1, mul vl] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp, #64] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    bl lrintl
+; CHECK-i64-NEXT:    fmov d0, x0
+; CHECK-i64-NEXT:    str q0, [sp, #64] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp, #80] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    bl lrintl
+; CHECK-i64-NEXT:    fmov d0, x0
+; CHECK-i64-NEXT:    ldr q1, [sp, #64] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    add x8, sp, #128
+; CHECK-i64-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-i64-NEXT:    str z0, [x8] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp, #96] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    bl lrintl
+; CHECK-i64-NEXT:    fmov d0, x0
+; CHECK-i64-NEXT:    str q0, [sp, #96] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp, #112] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    bl lrintl
+; CHECK-i64-NEXT:    fmov d0, x0
+; CHECK-i64-NEXT:    ldr q1, [sp, #96] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    add x8, sp, #128
+; CHECK-i64-NEXT:    ptrue p0.d, vl2
+; CHECK-i64-NEXT:    ldr z2, [x8, #1, mul vl] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-i64-NEXT:    ldr z1, [x8] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    mov z3.d, z2.d
+; CHECK-i64-NEXT:    splice z0.d, p0, z0.d, z1.d
+; CHECK-i64-NEXT:    ext z3.b, z3.b, z2.b, #16
+; CHECK-i64-NEXT:    // kill: def $q2 killed $q2 killed $z2
+; CHECK-i64-NEXT:    // kill: def $q3 killed $q3 killed $z3
+; CHECK-i64-NEXT:    mov z1.d, z0.d
+; CHECK-i64-NEXT:    ext z1.b, z1.b, z0.b, #16
+; CHECK-i64-NEXT:    // kill: def $q0 killed $q0 killed $z0
+; CHECK-i64-NEXT:    // kill: def $q1 killed $q1 killed $z1
+; CHECK-i64-NEXT:    addvl sp, sp, #2
+; CHECK-i64-NEXT:    add sp, sp, #128
+; CHECK-i64-NEXT:    ldp x29, x30, [sp], #16 // 16-byte Folded Reload
+; CHECK-i64-NEXT:    ret
+  %a = call <8 x iXLen> @llvm.lrint.v8iXLen.v8fp128(<8 x fp128> %x)
+  ret <8 x iXLen> %a
+}
+declare <8 x iXLen> @llvm.lrint.v8iXLen.v8fp128(<8 x fp128>)
+
+define <16 x iXLen> @lrint_v16fp128(<16 x fp128> %x) {
+; CHECK-i32-LABEL: lrint_v16fp128:
+; CHECK-i32:       // %bb.0:
+; CHECK-i32-NEXT:    sub sp, sp, #368
+; CHECK-i32-NEXT:    stp x29, x30, [sp, #272] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    stp x28, x27, [sp, #288] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    stp x26, x25, [sp, #304] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    stp x24, x23, [sp, #320] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    stp x22, x21, [sp, #336] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    stp x20, x19, [sp, #352] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    .cfi_def_cfa_offset 368
+; CHECK-i32-NEXT:    .cfi_offset w19, -8
+; CHECK-i32-NEXT:    .cfi_offset w20, -16
+; CHECK-i32-NEXT:    .cfi_offset w21, -24
+; CHECK-i32-NEXT:    .cfi_offset w22, -32
+; CHECK-i32-NEXT:    .cfi_offset w23, -40
+; CHECK-i32-NEXT:    .cfi_offset w24, -48
+; CHECK-i32-NEXT:    .cfi_offset w25, -56
+; CHECK-i32-NEXT:    .cfi_offset w26, -64
+; CHECK-i32-NEXT:    .cfi_offset w27, -72
+; CHECK-i32-NEXT:    .cfi_offset w28, -80
+; CHECK-i32-NEXT:    .cfi_offset w30, -88
+; CHECK-i32-NEXT:    .cfi_offset w29, -96
+; CHECK-i32-NEXT:    stp q7, q6, [sp, #80] // 32-byte Folded Spill
+; CHECK-i32-NEXT:    stp q5, q4, [sp, #112] // 32-byte Folded Spill
+; CHECK-i32-NEXT:    stp q3, q0, [sp, #144] // 32-byte Folded Spill
+; CHECK-i32-NEXT:    stp q2, q1, [sp, #176] // 32-byte Folded Spill
+; CHECK-i32-NEXT:    ldr q1, [sp, #368]
+; CHECK-i32-NEXT:    str q1, [sp, #64] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    ldr q1, [sp, #384]
+; CHECK-i32-NEXT:    str q1, [sp, #48] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    ldr q1, [sp, #400]
+; CHECK-i32-NEXT:    str q1, [sp, #32] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    ldr q1, [sp, #416]
+; CHECK-i32-NEXT:    str q1, [sp, #208] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    ldr q1, [sp, #432]
+; CHECK-i32-NEXT:    str q1, [sp, #16] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    ldr q1, [sp, #448]
+; CHECK-i32-NEXT:    str q1, [sp, #224] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    ldr q1, [sp, #464]
+; CHECK-i32-NEXT:    str q1, [sp, #240] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    ldr q1, [sp, #480]
+; CHECK-i32-NEXT:    mov v0.16b, v1.16b
+; CHECK-i32-NEXT:    bl lrintl
+; CHECK-i32-NEXT:    ldr q0, [sp, #240] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    str w0, [sp, #268] // 4-byte Folded Spill
+; CHECK-i32-NEXT:    bl lrintl
+; CHECK-i32-NEXT:    ldr q0, [sp, #224] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    str w0, [sp, #240] // 4-byte Folded Spill
+; CHECK-i32-NEXT:    bl lrintl
+; CHECK-i32-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    str w0, [sp, #224] // 4-byte Folded Spill
+; CHECK-i32-NEXT:    bl lrintl
+; CHECK-i32-NEXT:    ldr q0, [sp, #208] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    mov w23, w0
+; CHECK-i32-NEXT:    bl lrintl
+; CHECK-i32-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    str w0, [sp, #208] // 4-byte Folded Spill
+; CHECK-i32-NEXT:    bl lrintl
+; CHECK-i32-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    mov w24, w0
+; CHECK-i32-NEXT:    bl lrintl
+; CHECK-i32-NEXT:    ldr q0, [sp, #64] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    mov w25, w0
+; CHECK-i32-NEXT:    bl lrintl
+; CHECK-i32-NEXT:    ldr q0, [sp, #80] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    mov w27, w0
+; CHECK-i32-NEXT:    bl lrintl
+; CHECK-i32-NEXT:    ldr q0, [sp, #96] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    mov w26, w0
+; CHECK-i32-NEXT:    bl lrintl
+; CHECK-i32-NEXT:    ldr q0, [sp, #112] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    mov w28, w0
+; CHECK-i32-NEXT:    bl lrintl
+; CHECK-i32-NEXT:    ldr q0, [sp, #128] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    mov w29, w0
+; CHECK-i32-NEXT:    bl lrintl
+; CHECK-i32-NEXT:    ldr q0, [sp, #144] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    mov w19, w0
+; CHECK-i32-NEXT:    bl lrintl
+; CHECK-i32-NEXT:    ldr q0, [sp, #176] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    mov w20, w0
+; CHECK-i32-NEXT:    bl lrintl
+; CHECK-i32-NEXT:    ldr q0, [sp, #192] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    mov w21, w0
+; CHECK-i32-NEXT:    bl lrintl
+; CHECK-i32-NEXT:    ldr q0, [sp, #160] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    mov w22, w0
+; CHECK-i32-NEXT:    bl lrintl
+; CHECK-i32-NEXT:    fmov s1, w19
+; CHECK-i32-NEXT:    fmov s0, w0
+; CHECK-i32-NEXT:    ldr w8, [sp, #224] // 4-byte Folded Reload
+; CHECK-i32-NEXT:    fmov s2, w27
+; CHECK-i32-NEXT:    fmov s3, w23
+; CHECK-i32-NEXT:    mov v0.s[1], w22
+; CHECK-i32-NEXT:    mov v1.s[1], w29
+; CHECK-i32-NEXT:    mov v2.s[1], w25
+; CHECK-i32-NEXT:    mov v3.s[1], w8
+; CHECK-i32-NEXT:    ldr w8, [sp, #240] // 4-byte Folded Reload
+; CHECK-i32-NEXT:    ldp x29, x30, [sp, #272] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    mov v0.s[2], w21
+; CHECK-i32-NEXT:    mov v1.s[2], w28
+; CHECK-i32-NEXT:    mov v2.s[2], w24
+; CHECK-i32-NEXT:    mov v3.s[2], w8
+; CHECK-i32-NEXT:    ldr w8, [sp, #208] // 4-byte Folded Reload
+; CHECK-i32-NEXT:    ldp x22, x21, [sp, #336] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    ldp x24, x23, [sp, #320] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    mov v0.s[3], w20
+; CHECK-i32-NEXT:    mov v1.s[3], w26
+; CHECK-i32-NEXT:    mov v2.s[3], w8
+; CHECK-i32-NEXT:    ldr w8, [sp, #268] // 4-byte Folded Reload
+; CHECK-i32-NEXT:    ldp x20, x19, [sp, #352] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    ldp x26, x25, [sp, #304] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    mov v3.s[3], w8
+; CHECK-i32-NEXT:    ldp x28, x27, [sp, #288] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    add sp, sp, #368
+; CHECK-i32-NEXT:    ret
+;
+; CHECK-i64-LABEL: lrint_v16fp128:
+; CHECK-i64:       // %bb.0:
+; CHECK-i64-NEXT:    stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
+; CHECK-i64-NEXT:    sub sp, sp, #256
+; CHECK-i64-NEXT:    addvl sp, sp, #-4
+; CHECK-i64-NEXT:    .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x90, 0x02, 0x22, 0x11, 0x20, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 272 + 32 * VG
+; CHECK-i64-NEXT:    .cfi_offset w30, -8
+; CHECK-i64-NEXT:    .cfi_offset w29, -16
+; CHECK-i64-NEXT:    addvl x8, sp, #4
+; CHECK-i64-NEXT:    str q1, [sp, #240] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q1, [x8, #272]
+; CHECK-i64-NEXT:    addvl x8, sp, #4
+; CHECK-i64-NEXT:    str q0, [sp, #224] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    stp q7, q6, [sp, #128] // 32-byte Folded Spill
+; CHECK-i64-NEXT:    str q1, [sp, #112] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q1, [x8, #288]
+; CHECK-i64-NEXT:    addvl x8, sp, #4
+; CHECK-i64-NEXT:    stp q5, q4, [sp, #160] // 32-byte Folded Spill
+; CHECK-i64-NEXT:    str q1, [sp, #96] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q1, [x8, #304]
+; CHECK-i64-NEXT:    addvl x8, sp, #4
+; CHECK-i64-NEXT:    stp q3, q2, [sp, #192] // 32-byte Folded Spill
+; CHECK-i64-NEXT:    str q1, [sp, #80] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q1, [x8, #320]
+; CHECK-i64-NEXT:    addvl x8, sp, #4
+; CHECK-i64-NEXT:    str q1, [sp, #64] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q1, [x8, #336]
+; CHECK-i64-NEXT:    addvl x8, sp, #4
+; CHECK-i64-NEXT:    str q1, [sp, #48] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q1, [x8, #352]
+; CHECK-i64-NEXT:    addvl x8, sp, #4
+; CHECK-i64-NEXT:    str q1, [sp, #32] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q1, [x8, #368]
+; CHECK-i64-NEXT:    addvl x8, sp, #4
+; CHECK-i64-NEXT:    str q1, [sp, #16] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q1, [x8, #384]
+; CHECK-i64-NEXT:    mov v0.16b, v1.16b
+; CHECK-i64-NEXT:    bl lrintl
+; CHECK-i64-NEXT:    fmov d0, x0
+; CHECK-i64-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    bl lrintl
+; CHECK-i64-NEXT:    fmov d0, x0
+; CHECK-i64-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    add x8, sp, #256
+; CHECK-i64-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-i64-NEXT:    str z0, [x8, #3, mul vl] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    bl lrintl
+; CHECK-i64-NEXT:    fmov d0, x0
+; CHECK-i64-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    bl lrintl
+; CHECK-i64-NEXT:    fmov d0, x0
+; CHECK-i64-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    add x8, sp, #256
+; CHECK-i64-NEXT:    ptrue p0.d, vl2
+; CHECK-i64-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-i64-NEXT:    ldr z1, [x8, #3, mul vl] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    splice z0.d, p0, z0.d, z1.d
+; CHECK-i64-NEXT:    str z0, [x8, #3, mul vl] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp, #64] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    bl lrintl
+; CHECK-i64-NEXT:    fmov d0, x0
+; CHECK-i64-NEXT:    str q0, [sp, #64] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp, #80] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    bl lrintl
+; CHECK-i64-NEXT:    fmov d0, x0
+; CHECK-i64-NEXT:    ldr q1, [sp, #64] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    add x8, sp, #256
+; CHECK-i64-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-i64-NEXT:    str z0, [x8, #2, mul vl] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp, #96] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    bl lrintl
+; CHECK-i64-NEXT:    fmov d0, x0
+; CHECK-i64-NEXT:    str q0, [sp, #96] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp, #112] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    bl lrintl
+; CHECK-i64-NEXT:    fmov d0, x0
+; CHECK-i64-NEXT:    ldr q1, [sp, #96] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    add x8, sp, #256
+; CHECK-i64-NEXT:    ptrue p0.d, vl2
+; CHECK-i64-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-i64-NEXT:    ldr z1, [x8, #2, mul vl] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    splice z0.d, p0, z0.d, z1.d
+; CHECK-i64-NEXT:    str z0, [x8, #2, mul vl] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp, #128] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    bl lrintl
+; CHECK-i64-NEXT:    fmov d0, x0
+; CHECK-i64-NEXT:    str q0, [sp, #128] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp, #144] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    bl lrintl
+; CHECK-i64-NEXT:    fmov d0, x0
+; CHECK-i64-NEXT:    ldr q1, [sp, #128] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    add x8, sp, #256
+; CHECK-i64-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-i64-NEXT:    str z0, [x8, #1, mul vl] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp, #160] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    bl lrintl
+; CHECK-i64-NEXT:    fmov d0, x0
+; CHECK-i64-NEXT:    str q0, [sp, #160] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp, #176] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    bl lrintl
+; CHECK-i64-NEXT:    fmov d0, x0
+; CHECK-i64-NEXT:    ldr q1, [sp, #160] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    add x8, sp, #256
+; CHECK-i64-NEXT:    ptrue p0.d, vl2
+; CHECK-i64-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-i64-NEXT:    ldr z1, [x8, #1, mul vl] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    splice z0.d, p0, z0.d, z1.d
+; CHECK-i64-NEXT:    str z0, [x8, #1, mul vl] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp, #192] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    bl lrintl
+; CHECK-i64-NEXT:    fmov d0, x0
+; CHECK-i64-NEXT:    str q0, [sp, #192] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp, #208] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    bl lrintl
+; CHECK-i64-NEXT:    fmov d0, x0
+; CHECK-i64-NEXT:    ldr q1, [sp, #192] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    add x8, sp, #256
+; CHECK-i64-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-i64-NEXT:    str z0, [x8] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp, #240] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    bl lrintl
+; CHECK-i64-NEXT:    fmov d0, x0
+; CHECK-i64-NEXT:    str q0, [sp, #240] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp, #224] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    bl lrintl
+; CHECK-i64-NEXT:    fmov d0, x0
+; CHECK-i64-NEXT:    ldr q1, [sp, #240] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    add x8, sp, #256
+; CHECK-i64-NEXT:    ptrue p0.d, vl2
+; CHECK-i64-NEXT:    ldr z2, [x8, #1, mul vl] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    ldr z4, [x8, #2, mul vl] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    ldr z6, [x8, #3, mul vl] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-i64-NEXT:    ldr z1, [x8] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    mov z3.d, z2.d
+; CHECK-i64-NEXT:    mov z5.d, z4.d
+; CHECK-i64-NEXT:    mov z7.d, z6.d
+; CHECK-i64-NEXT:    splice z0.d, p0, z0.d, z1.d
+; CHECK-i64-NEXT:    ext z3.b, z3.b, z2.b, #16
+; CHECK-i64-NEXT:    ext z5.b, z5.b, z4.b, #16
+; CHECK-i64-NEXT:    ext z7.b, z7.b, z6.b, #16
+; CHECK-i64-NEXT:    // kill: def $q2 killed $q2 killed $z2
+; CHECK-i64-NEXT:    // kill: def $q4 killed $q4 killed $z4
+; CHECK-i64-NEXT:    // kill: def $q3 killed $q3 killed $z3
+; CHECK-i64-NEXT:    // kill: def $q5 killed $q5 killed $z5
+; CHECK-i64-NEXT:    // kill: def $q6 killed $q6 killed $z6
+; CHECK-i64-NEXT:    // kill: def $q7 killed $q7 killed $z7
+; CHECK-i64-NEXT:    mov z1.d, z0.d
+; CHECK-i64-NEXT:    ext z1.b, z1.b, z0.b, #16
+; CHECK-i64-NEXT:    // kill: def $q0 killed $q0 killed $z0
+; CHECK-i64-NEXT:    // kill: def $q1 killed $q1 killed $z1
+; CHECK-i64-NEXT:    addvl sp, sp, #4
+; CHECK-i64-NEXT:    add sp, sp, #256
+; CHECK-i64-NEXT:    ldp x29, x30, [sp], #16 // 16-byte Folded Reload
+; CHECK-i64-NEXT:    ret
+  %a = call <16 x iXLen> @llvm.lrint.v16iXLen.v16fp128(<16 x fp128> %x)
+  ret <16 x iXLen> %a
+}
+declare <16 x iXLen> @llvm.lrint.v16iXLen.v16fp128(<16 x fp128>)
+
+define <32 x iXLen> @lrint_v32fp128(<32 x fp128> %x) {
+; CHECK-i32-LABEL: lrint_v32fp128:
+; CHECK-i32:       // %bb.0:
+; CHECK-i32-NEXT:    stp x29, x30, [sp, #-96]! // 16-byte Folded Spill
+; CHECK-i32-NEXT:    stp x28, x27, [sp, #16] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    stp x26, x25, [sp, #32] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    stp x24, x23, [sp, #48] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    stp x22, x21, [sp, #64] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    stp x20, x19, [sp, #80] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    sub sp, sp, #528
+; CHECK-i32-NEXT:    .cfi_def_cfa_offset 624
+; CHECK-i32-NEXT:    .cfi_offset w19, -8
+; CHECK-i32-NEXT:    .cfi_offset w20, -16
+; CHECK-i32-NEXT:    .cfi_offset w21, -24
+; CHECK-i32-NEXT:    .cfi_offset w22, -32
+; CHECK-i32-NEXT:    .cfi_offset w23, -40
+; CHECK-i32-NEXT:    .cfi_offset w24, -48
+; CHECK-i32-NEXT:    .cfi_offset w25, -56
+; CHECK-i32-NEXT:    .cfi_offset w26, -64
+; CHECK-i32-NEXT:    .cfi_offset w27, -72
+; CHECK-i32-NEXT:    .cfi_offset w28, -80
+; CHECK-i32-NEXT:    .cfi_offset w30, -88
+; CHECK-i32-NEXT:    .cfi_offset w29, -96
+; CHECK-i32-NEXT:    stp q2, q1, [sp, #368] // 32-byte Folded Spill
+; CHECK-i32-NEXT:    ldr q1, [sp, #624]
+; CHECK-i32-NEXT:    str q0, [sp, #224] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    str q1, [sp, #160] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    ldr q1, [sp, #640]
+; CHECK-i32-NEXT:    str q7, [sp, #208] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    str q1, [sp, #128] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    ldr q1, [sp, #656]
+; CHECK-i32-NEXT:    str q6, [sp, #240] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    str q1, [sp, #96] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    ldr q1, [sp, #672]
+; CHECK-i32-NEXT:    str q5, [sp, #272] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    str q1, [sp, #80] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    ldr q1, [sp, #688]
+; CHECK-i32-NEXT:    str q4, [sp, #304] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    str q1, [sp, #48] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    ldr q1, [sp, #704]
+; CHECK-i32-NEXT:    str q3, [sp, #336] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    str q1, [sp, #32] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    ldr q1, [sp, #720]
+; CHECK-i32-NEXT:    str q1, [sp, #112] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    ldr q1, [sp, #736]
+; CHECK-i32-NEXT:    str q1, [sp, #176] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    ldr q1, [sp, #752]
+; CHECK-i32-NEXT:    str q1, [sp, #16] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    ldr q1, [sp, #768]
+; CHECK-i32-NEXT:    str q1, [sp, #64] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    ldr q1, [sp, #784]
+; CHECK-i32-NEXT:    str q1, [sp, #192] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    ldr q1, [sp, #800]
+; CHECK-i32-NEXT:    str q1, [sp, #288] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    ldr q1, [sp, #816]
+; CHECK-i32-NEXT:    str q1, [sp, #144] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    ldr q1, [sp, #832]
+; CHECK-i32-NEXT:    str q1, [sp, #256] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    ldr q1, [sp, #848]
+; CHECK-i32-NEXT:    str q1, [sp, #352] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    ldr q1, [sp, #864]
+; CHECK-i32-NEXT:    str q1, [sp, #416] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    ldr q1, [sp, #880]
+; CHECK-i32-NEXT:    str q1, [sp, #320] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    ldr q1, [sp, #896]
+; CHECK-i32-NEXT:    str q1, [sp, #400] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    ldr q1, [sp, #912]
+; CHECK-i32-NEXT:    str q1, [sp, #448] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    ldr q1, [sp, #928]
+; CHECK-i32-NEXT:    str q1, [sp, #480] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    ldr q1, [sp, #944]
+; CHECK-i32-NEXT:    str q1, [sp, #432] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    ldr q1, [sp, #960]
+; CHECK-i32-NEXT:    str q1, [sp, #464] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    ldr q1, [sp, #976]
+; CHECK-i32-NEXT:    str q1, [sp, #496] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    ldr q1, [sp, #992]
+; CHECK-i32-NEXT:    mov v0.16b, v1.16b
+; CHECK-i32-NEXT:    bl lrintl
+; CHECK-i32-NEXT:    ldr q0, [sp, #496] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    str w0, [sp, #524] // 4-byte Folded Spill
+; CHECK-i32-NEXT:    bl lrintl
+; CHECK-i32-NEXT:    ldr q0, [sp, #464] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    str w0, [sp, #496] // 4-byte Folded Spill
+; CHECK-i32-NEXT:    bl lrintl
+; CHECK-i32-NEXT:    ldr q0, [sp, #432] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    str w0, [sp, #464] // 4-byte Folded Spill
+; CHECK-i32-NEXT:    bl lrintl
+; CHECK-i32-NEXT:    ldr q0, [sp, #480] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    str w0, [sp, #432] // 4-byte Folded Spill
+; CHECK-i32-NEXT:    bl lrintl
+; CHECK-i32-NEXT:    ldr q0, [sp, #448] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    str w0, [sp, #480] // 4-byte Folded Spill
+; CHECK-i32-NEXT:    bl lrintl
+; CHECK-i32-NEXT:    ldr q0, [sp, #400] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    str w0, [sp, #448] // 4-byte Folded Spill
+; CHECK-i32-NEXT:    bl lrintl
+; CHECK-i32-NEXT:    ldr q0, [sp, #320] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    str w0, [sp, #400] // 4-byte Folded Spill
+; CHECK-i32-NEXT:    bl lrintl
+; CHECK-i32-NEXT:    ldr q0, [sp, #416] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    str w0, [sp, #320] // 4-byte Folded Spill
+; CHECK-i32-NEXT:    bl lrintl
+; CHECK-i32-NEXT:    ldr q0, [sp, #352] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    str w0, [sp, #416] // 4-byte Folded Spill
+; CHECK-i32-NEXT:    bl lrintl
+; CHECK-i32-NEXT:    ldr q0, [sp, #256] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    str w0, [sp, #352] // 4-byte Folded Spill
+; CHECK-i32-NEXT:    bl lrintl
+; CHECK-i32-NEXT:    ldr q0, [sp, #144] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    str w0, [sp, #256] // 4-byte Folded Spill
+; CHECK-i32-NEXT:    bl lrintl
+; CHECK-i32-NEXT:    ldr q0, [sp, #288] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    str w0, [sp, #144] // 4-byte Folded Spill
+; CHECK-i32-NEXT:    bl lrintl
+; CHECK-i32-NEXT:    ldr q0, [sp, #192] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    str w0, [sp, #288] // 4-byte Folded Spill
+; CHECK-i32-NEXT:    bl lrintl
+; CHECK-i32-NEXT:    ldr q0, [sp, #64] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    str w0, [sp, #192] // 4-byte Folded Spill
+; CHECK-i32-NEXT:    bl lrintl
+; CHECK-i32-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    str w0, [sp, #64] // 4-byte Folded Spill
+; CHECK-i32-NEXT:    bl lrintl
+; CHECK-i32-NEXT:    ldr q0, [sp, #176] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    str w0, [sp, #16] // 4-byte Folded Spill
+; CHECK-i32-NEXT:    bl lrintl
+; CHECK-i32-NEXT:    ldr q0, [sp, #112] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    str w0, [sp, #176] // 4-byte Folded Spill
+; CHECK-i32-NEXT:    bl lrintl
+; CHECK-i32-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    str w0, [sp, #112] // 4-byte Folded Spill
+; CHECK-i32-NEXT:    bl lrintl
+; CHECK-i32-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    mov w29, w0
+; CHECK-i32-NEXT:    bl lrintl
+; CHECK-i32-NEXT:    ldr q0, [sp, #80] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    mov w21, w0
+; CHECK-i32-NEXT:    bl lrintl
+; CHECK-i32-NEXT:    ldr q0, [sp, #96] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    str w0, [sp, #80] // 4-byte Folded Spill
+; CHECK-i32-NEXT:    bl lrintl
+; CHECK-i32-NEXT:    ldr q0, [sp, #128] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    str w0, [sp, #96] // 4-byte Folded Spill
+; CHECK-i32-NEXT:    bl lrintl
+; CHECK-i32-NEXT:    ldr q0, [sp, #160] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    mov w23, w0
+; CHECK-i32-NEXT:    bl lrintl
+; CHECK-i32-NEXT:    ldr q0, [sp, #208] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    mov w24, w0
+; CHECK-i32-NEXT:    bl lrintl
+; CHECK-i32-NEXT:    ldr q0, [sp, #240] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    mov w28, w0
+; CHECK-i32-NEXT:    bl lrintl
+; CHECK-i32-NEXT:    ldr q0, [sp, #272] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    mov w19, w0
+; CHECK-i32-NEXT:    bl lrintl
+; CHECK-i32-NEXT:    ldr q0, [sp, #304] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    mov w25, w0
+; CHECK-i32-NEXT:    bl lrintl
+; CHECK-i32-NEXT:    ldr q0, [sp, #336] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    mov w26, w0
+; CHECK-i32-NEXT:    bl lrintl
+; CHECK-i32-NEXT:    ldr q0, [sp, #368] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    mov w20, w0
+; CHECK-i32-NEXT:    bl lrintl
+; CHECK-i32-NEXT:    ldr q0, [sp, #384] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    mov w22, w0
+; CHECK-i32-NEXT:    bl lrintl
+; CHECK-i32-NEXT:    ldr q0, [sp, #224] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    mov w27, w0
+; CHECK-i32-NEXT:    bl lrintl
+; CHECK-i32-NEXT:    ldr s4, [sp, #16] // 4-byte Folded Reload
+; CHECK-i32-NEXT:    ldr w8, [sp, #64] // 4-byte Folded Reload
+; CHECK-i32-NEXT:    fmov s2, w24
+; CHECK-i32-NEXT:    ldr s5, [sp, #144] // 4-byte Folded Reload
+; CHECK-i32-NEXT:    ldr s6, [sp, #320] // 4-byte Folded Reload
+; CHECK-i32-NEXT:    fmov s3, w21
+; CHECK-i32-NEXT:    mov v4.s[1], w8
+; CHECK-i32-NEXT:    ldr w8, [sp, #256] // 4-byte Folded Reload
+; CHECK-i32-NEXT:    ldr s7, [sp, #432] // 4-byte Folded Reload
+; CHECK-i32-NEXT:    mov v2.s[1], w23
+; CHECK-i32-NEXT:    fmov s1, w26
+; CHECK-i32-NEXT:    fmov s0, w0
+; CHECK-i32-NEXT:    mov v5.s[1], w8
+; CHECK-i32-NEXT:    ldr w8, [sp, #400] // 4-byte Folded Reload
+; CHECK-i32-NEXT:    mov v3.s[1], w29
+; CHECK-i32-NEXT:    mov v6.s[1], w8
+; CHECK-i32-NEXT:    ldr w8, [sp, #464] // 4-byte Folded Reload
+; CHECK-i32-NEXT:    mov v1.s[1], w25
+; CHECK-i32-NEXT:    mov v0.s[1], w27
+; CHECK-i32-NEXT:    mov v7.s[1], w8
+; CHECK-i32-NEXT:    ldr w8, [sp, #96] // 4-byte Folded Reload
+; CHECK-i32-NEXT:    mov v2.s[2], w8
+; CHECK-i32-NEXT:    ldr w8, [sp, #112] // 4-byte Folded Reload
+; CHECK-i32-NEXT:    mov v1.s[2], w19
+; CHECK-i32-NEXT:    mov v0.s[2], w22
+; CHECK-i32-NEXT:    mov v3.s[2], w8
+; CHECK-i32-NEXT:    ldr w8, [sp, #192] // 4-byte Folded Reload
+; CHECK-i32-NEXT:    mov v4.s[2], w8
+; CHECK-i32-NEXT:    ldr w8, [sp, #352] // 4-byte Folded Reload
+; CHECK-i32-NEXT:    mov v1.s[3], w28
+; CHECK-i32-NEXT:    mov v0.s[3], w20
+; CHECK-i32-NEXT:    mov v5.s[2], w8
+; CHECK-i32-NEXT:    ldr w8, [sp, #448] // 4-byte Folded Reload
+; CHECK-i32-NEXT:    mov v6.s[2], w8
+; CHECK-i32-NEXT:    ldr w8, [sp, #496] // 4-byte Folded Reload
+; CHECK-i32-NEXT:    mov v7.s[2], w8
+; CHECK-i32-NEXT:    ldr w8, [sp, #80] // 4-byte Folded Reload
+; CHECK-i32-NEXT:    mov v2.s[3], w8
+; CHECK-i32-NEXT:    ldr w8, [sp, #176] // 4-byte Folded Reload
+; CHECK-i32-NEXT:    mov v3.s[3], w8
+; CHECK-i32-NEXT:    ldr w8, [sp, #288] // 4-byte Folded Reload
+; CHECK-i32-NEXT:    mov v4.s[3], w8
+; CHECK-i32-NEXT:    ldr w8, [sp, #416] // 4-byte Folded Reload
+; CHECK-i32-NEXT:    mov v5.s[3], w8
+; CHECK-i32-NEXT:    ldr w8, [sp, #480] // 4-byte Folded Reload
+; CHECK-i32-NEXT:    mov v6.s[3], w8
+; CHECK-i32-NEXT:    ldr w8, [sp, #524] // 4-byte Folded Reload
+; CHECK-i32-NEXT:    mov v7.s[3], w8
+; CHECK-i32-NEXT:    add sp, sp, #528
+; CHECK-i32-NEXT:    ldp x20, x19, [sp, #80] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    ldp x22, x21, [sp, #64] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    ldp x24, x23, [sp, #48] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    ldp x26, x25, [sp, #32] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    ldp x28, x27, [sp, #16] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    ldp x29, x30, [sp], #96 // 16-byte Folded Reload
+; CHECK-i32-NEXT:    ret
+;
+; CHECK-i64-LABEL: lrint_v32fp128:
+; CHECK-i64:       // %bb.0:
+; CHECK-i64-NEXT:    str x29, [sp, #-32]! // 8-byte Folded Spill
+; CHECK-i64-NEXT:    stp x30, x19, [sp, #16] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    sub sp, sp, #512
+; CHECK-i64-NEXT:    addvl sp, sp, #-8
+; CHECK-i64-NEXT:    .cfi_escape 0x0f, 0x0e, 0x8f, 0x00, 0x11, 0xa0, 0x04, 0x22, 0x11, 0xc0, 0x00, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 544 + 64 * VG
+; CHECK-i64-NEXT:    .cfi_offset w19, -8
+; CHECK-i64-NEXT:    .cfi_offset w30, -16
+; CHECK-i64-NEXT:    .cfi_offset w29, -32
+; CHECK-i64-NEXT:    addvl x9, sp, #8
+; CHECK-i64-NEXT:    stp q2, q1, [sp, #16] // 32-byte Folded Spill
+; CHECK-i64-NEXT:    mov x19, x8
+; CHECK-i64-NEXT:    stp q0, q7, [sp, #48] // 32-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [x9, #864]
+; CHECK-i64-NEXT:    addvl x9, sp, #8
+; CHECK-i64-NEXT:    str q6, [sp, #96] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    str q0, [sp, #496] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [x9, #880]
+; CHECK-i64-NEXT:    addvl x9, sp, #8
+; CHECK-i64-NEXT:    stp q5, q4, [sp, #128] // 32-byte Folded Spill
+; CHECK-i64-NEXT:    str q0, [sp, #480] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [x9, #896]
+; CHECK-i64-NEXT:    addvl x9, sp, #8
+; CHECK-i64-NEXT:    str q0, [sp, #464] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [x9, #912]
+; CHECK-i64-NEXT:    addvl x9, sp, #8
+; CHECK-i64-NEXT:    str q0, [sp, #448] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [x9, #800]
+; CHECK-i64-NEXT:    addvl x9, sp, #8
+; CHECK-i64-NEXT:    str q0, [sp, #432] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [x9, #816]
+; CHECK-i64-NEXT:    addvl x9, sp, #8
+; CHECK-i64-NEXT:    str q0, [sp, #416] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [x9, #832]
+; CHECK-i64-NEXT:    addvl x9, sp, #8
+; CHECK-i64-NEXT:    str q0, [sp, #400] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [x9, #848]
+; CHECK-i64-NEXT:    addvl x9, sp, #8
+; CHECK-i64-NEXT:    str q0, [sp, #384] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [x9, #736]
+; CHECK-i64-NEXT:    addvl x9, sp, #8
+; CHECK-i64-NEXT:    str q0, [sp, #368] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [x9, #752]
+; CHECK-i64-NEXT:    addvl x9, sp, #8
+; CHECK-i64-NEXT:    str q0, [sp, #352] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [x9, #768]
+; CHECK-i64-NEXT:    addvl x9, sp, #8
+; CHECK-i64-NEXT:    str q0, [sp, #336] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [x9, #784]
+; CHECK-i64-NEXT:    addvl x9, sp, #8
+; CHECK-i64-NEXT:    str q0, [sp, #320] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [x9, #672]
+; CHECK-i64-NEXT:    addvl x9, sp, #8
+; CHECK-i64-NEXT:    str q0, [sp, #304] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [x9, #688]
+; CHECK-i64-NEXT:    addvl x9, sp, #8
+; CHECK-i64-NEXT:    str q0, [sp, #288] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [x9, #704]
+; CHECK-i64-NEXT:    addvl x9, sp, #8
+; CHECK-i64-NEXT:    str q0, [sp, #272] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [x9, #720]
+; CHECK-i64-NEXT:    addvl x9, sp, #8
+; CHECK-i64-NEXT:    str q0, [sp, #256] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [x9, #608]
+; CHECK-i64-NEXT:    addvl x9, sp, #8
+; CHECK-i64-NEXT:    str q0, [sp, #240] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [x9, #624]
+; CHECK-i64-NEXT:    addvl x9, sp, #8
+; CHECK-i64-NEXT:    str q0, [sp, #224] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [x9, #640]
+; CHECK-i64-NEXT:    addvl x9, sp, #8
+; CHECK-i64-NEXT:    str q0, [sp, #208] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [x9, #656]
+; CHECK-i64-NEXT:    addvl x9, sp, #8
+; CHECK-i64-NEXT:    str q0, [sp, #192] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [x9, #544]
+; CHECK-i64-NEXT:    addvl x9, sp, #8
+; CHECK-i64-NEXT:    str q0, [sp, #176] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [x9, #560]
+; CHECK-i64-NEXT:    addvl x9, sp, #8
+; CHECK-i64-NEXT:    str q0, [sp, #160] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [x9, #576]
+; CHECK-i64-NEXT:    addvl x9, sp, #8
+; CHECK-i64-NEXT:    str q0, [sp, #112] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [x9, #592]
+; CHECK-i64-NEXT:    str q0, [sp, #80] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    mov v0.16b, v3.16b
+; CHECK-i64-NEXT:    bl lrintl
+; CHECK-i64-NEXT:    fmov d0, x0
+; CHECK-i64-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    bl lrintl
+; CHECK-i64-NEXT:    fmov d0, x0
+; CHECK-i64-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    add x8, sp, #512
+; CHECK-i64-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-i64-NEXT:    str z0, [x8, #7, mul vl] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    bl lrintl
+; CHECK-i64-NEXT:    fmov d0, x0
+; CHECK-i64-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    bl lrintl
+; CHECK-i64-NEXT:    fmov d0, x0
+; CHECK-i64-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    add x8, sp, #512
+; CHECK-i64-NEXT:    ptrue p0.d, vl2
+; CHECK-i64-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-i64-NEXT:    ldr z1, [x8, #7, mul vl] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    splice z0.d, p0, z0.d, z1.d
+; CHECK-i64-NEXT:    str z0, [x8, #7, mul vl] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp, #64] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    bl lrintl
+; CHECK-i64-NEXT:    fmov d0, x0
+; CHECK-i64-NEXT:    str q0, [sp, #64] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp, #96] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    bl lrintl
+; CHECK-i64-NEXT:    fmov d0, x0
+; CHECK-i64-NEXT:    ldr q1, [sp, #64] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    add x8, sp, #512
+; CHECK-i64-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-i64-NEXT:    str z0, [x8, #6, mul vl] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp, #128] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    bl lrintl
+; CHECK-i64-NEXT:    fmov d0, x0
+; CHECK-i64-NEXT:    str q0, [sp, #128] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp, #144] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    bl lrintl
+; CHECK-i64-NEXT:    fmov d0, x0
+; CHECK-i64-NEXT:    ldr q1, [sp, #128] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    add x8, sp, #512
+; CHECK-i64-NEXT:    ptrue p0.d, vl2
+; CHECK-i64-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-i64-NEXT:    ldr z1, [x8, #6, mul vl] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    splice z0.d, p0, z0.d, z1.d
+; CHECK-i64-NEXT:    str z0, [x8, #6, mul vl] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp, #80] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    bl lrintl
+; CHECK-i64-NEXT:    fmov d0, x0
+; CHECK-i64-NEXT:    str q0, [sp, #144] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp, #112] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    bl lrintl
+; CHECK-i64-NEXT:    fmov d0, x0
+; CHECK-i64-NEXT:    ldr q1, [sp, #144] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    add x8, sp, #512
+; CHECK-i64-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-i64-NEXT:    str z0, [x8, #5, mul vl] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp, #160] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    bl lrintl
+; CHECK-i64-NEXT:    fmov d0, x0
+; CHECK-i64-NEXT:    str q0, [sp, #160] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp, #176] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    bl lrintl
+; CHECK-i64-NEXT:    fmov d0, x0
+; CHECK-i64-NEXT:    ldr q1, [sp, #160] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    add x8, sp, #512
+; CHECK-i64-NEXT:    ptrue p0.d, vl2
+; CHECK-i64-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-i64-NEXT:    ldr z1, [x8, #5, mul vl] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    splice z0.d, p0, z0.d, z1.d
+; CHECK-i64-NEXT:    str z0, [x8, #5, mul vl] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp, #192] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    bl lrintl
+; CHECK-i64-NEXT:    fmov d0, x0
+; CHECK-i64-NEXT:    str q0, [sp, #192] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp, #208] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    bl lrintl
+; CHECK-i64-NEXT:    fmov d0, x0
+; CHECK-i64-NEXT:    ldr q1, [sp, #192] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    add x8, sp, #512
+; CHECK-i64-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-i64-NEXT:    str z0, [x8, #4, mul vl] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp, #224] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    bl lrintl
+; CHECK-i64-NEXT:    fmov d0, x0
+; CHECK-i64-NEXT:    str q0, [sp, #224] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp, #240] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    bl lrintl
+; CHECK-i64-NEXT:    fmov d0, x0
+; CHECK-i64-NEXT:    ldr q1, [sp, #224] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    add x8, sp, #512
+; CHECK-i64-NEXT:    ptrue p0.d, vl2
+; CHECK-i64-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-i64-NEXT:    ldr z1, [x8, #4, mul vl] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    splice z0.d, p0, z0.d, z1.d
+; CHECK-i64-NEXT:    str z0, [x8, #4, mul vl] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp, #256] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    bl lrintl
+; CHECK-i64-NEXT:    fmov d0, x0
+; CHECK-i64-NEXT:    str q0, [sp, #256] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp, #272] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    bl lrintl
+; CHECK-i64-NEXT:    fmov d0, x0
+; CHECK-i64-NEXT:    ldr q1, [sp, #256] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    add x8, sp, #512
+; CHECK-i64-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-i64-NEXT:    str z0, [x8, #3, mul vl] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp, #288] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    bl lrintl
+; CHECK-i64-NEXT:    fmov d0, x0
+; CHECK-i64-NEXT:    str q0, [sp, #288] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp, #304] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    bl lrintl
+; CHECK-i64-NEXT:    fmov d0, x0
+; CHECK-i64-NEXT:    ldr q1, [sp, #288] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    add x8, sp, #512
+; CHECK-i64-NEXT:    ptrue p0.d, vl2
+; CHECK-i64-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-i64-NEXT:    ldr z1, [x8, #3, mul vl] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    splice z0.d, p0, z0.d, z1.d
+; CHECK-i64-NEXT:    str z0, [x8, #3, mul vl] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp, #320] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    bl lrintl
+; CHECK-i64-NEXT:    fmov d0, x0
+; CHECK-i64-NEXT:    str q0, [sp, #320] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp, #336] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    bl lrintl
+; CHECK-i64-NEXT:    fmov d0, x0
+; CHECK-i64-NEXT:    ldr q1, [sp, #320] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    add x8, sp, #512
+; CHECK-i64-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-i64-NEXT:    str z0, [x8, #2, mul vl] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp, #352] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    bl lrintl
+; CHECK-i64-NEXT:    fmov d0, x0
+; CHECK-i64-NEXT:    str q0, [sp, #352] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp, #368] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    bl lrintl
+; CHECK-i64-NEXT:    fmov d0, x0
+; CHECK-i64-NEXT:    ldr q1, [sp, #352] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    add x8, sp, #512
+; CHECK-i64-NEXT:    ptrue p0.d, vl2
+; CHECK-i64-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-i64-NEXT:    ldr z1, [x8, #2, mul vl] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    splice z0.d, p0, z0.d, z1.d
+; CHECK-i64-NEXT:    str z0, [x8, #2, mul vl] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp, #384] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    bl lrintl
+; CHECK-i64-NEXT:    fmov d0, x0
+; CHECK-i64-NEXT:    str q0, [sp, #384] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp, #400] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    bl lrintl
+; CHECK-i64-NEXT:    fmov d0, x0
+; CHECK-i64-NEXT:    ldr q1, [sp, #384] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    add x8, sp, #512
+; CHECK-i64-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-i64-NEXT:    str z0, [x8, #1, mul vl] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp, #416] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    bl lrintl
+; CHECK-i64-NEXT:    fmov d0, x0
+; CHECK-i64-NEXT:    str q0, [sp, #416] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp, #432] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    bl lrintl
+; CHECK-i64-NEXT:    fmov d0, x0
+; CHECK-i64-NEXT:    ldr q1, [sp, #416] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    add x8, sp, #512
+; CHECK-i64-NEXT:    ptrue p0.d, vl2
+; CHECK-i64-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-i64-NEXT:    ldr z1, [x8, #1, mul vl] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    splice z0.d, p0, z0.d, z1.d
+; CHECK-i64-NEXT:    str z0, [x8, #1, mul vl] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp, #448] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    bl lrintl
+; CHECK-i64-NEXT:    fmov d0, x0
+; CHECK-i64-NEXT:    str q0, [sp, #448] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp, #464] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    bl lrintl
+; CHECK-i64-NEXT:    fmov d0, x0
+; CHECK-i64-NEXT:    ldr q1, [sp, #448] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    add x8, sp, #512
+; CHECK-i64-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-i64-NEXT:    str z0, [x8] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp, #480] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    bl lrintl
+; CHECK-i64-NEXT:    fmov d0, x0
+; CHECK-i64-NEXT:    str q0, [sp, #480] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp, #496] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    bl lrintl
+; CHECK-i64-NEXT:    fmov d0, x0
+; CHECK-i64-NEXT:    ldr q1, [sp, #480] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    add x8, sp, #512
+; CHECK-i64-NEXT:    ptrue p0.d, vl2
+; CHECK-i64-NEXT:    add x9, sp, #512
+; CHECK-i64-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-i64-NEXT:    ldr z1, [x8] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    mov x8, #28 // =0x1c
+; CHECK-i64-NEXT:    splice z0.d, p0, z0.d, z1.d
+; CHECK-i64-NEXT:    ptrue p0.d, vl4
+; CHECK-i64-NEXT:    st1d { z0.d }, p0, [x19, x8, lsl #3]
+; CHECK-i64-NEXT:    mov x8, #24 // =0x18
+; CHECK-i64-NEXT:    ldr z0, [x9, #1, mul vl] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    st1d { z0.d }, p0, [x19, x8, lsl #3]
+; CHECK-i64-NEXT:    mov x8, #20 // =0x14
+; CHECK-i64-NEXT:    ldr z0, [x9, #2, mul vl] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    st1d { z0.d }, p0, [x19, x8, lsl #3]
+; CHECK-i64-NEXT:    mov x8, #16 // =0x10
+; CHECK-i64-NEXT:    ldr z0, [x9, #3, mul vl] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    st1d { z0.d }, p0, [x19, x8, lsl #3]
+; CHECK-i64-NEXT:    mov x8, #12 // =0xc
+; CHECK-i64-NEXT:    ldr z0, [x9, #4, mul vl] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    st1d { z0.d }, p0, [x19, x8, lsl #3]
+; CHECK-i64-NEXT:    mov x8, #8 // =0x8
+; CHECK-i64-NEXT:    ldr z0, [x9, #5, mul vl] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    st1d { z0.d }, p0, [x19, x8, lsl #3]
+; CHECK-i64-NEXT:    mov x8, #4 // =0x4
+; CHECK-i64-NEXT:    ldr z0, [x9, #6, mul vl] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    st1d { z0.d }, p0, [x19, x8, lsl #3]
+; CHECK-i64-NEXT:    add x8, sp, #512
+; CHECK-i64-NEXT:    ldr z0, [x8, #7, mul vl] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    st1d { z0.d }, p0, [x19]
+; CHECK-i64-NEXT:    addvl sp, sp, #8
+; CHECK-i64-NEXT:    add sp, sp, #512
+; CHECK-i64-NEXT:    ldp x30, x19, [sp, #16] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    ldr x29, [sp], #32 // 8-byte Folded Reload
+; CHECK-i64-NEXT:    ret
+  %a = call <32 x iXLen> @llvm.lrint.v32iXLen.v16fp128(<32 x fp128> %x)
+  ret <32 x iXLen> %a
+}
+declare <32 x iXLen> @llvm.lrint.v32iXLen.v32fp128(<32 x fp128>)
diff --git a/llvm/test/CodeGen/AArch64/vector-llrint.ll b/llvm/test/CodeGen/AArch64/vector-llrint.ll
index 5503de2b4c5db..9e6f46df05fec 100644
--- a/llvm/test/CodeGen/AArch64/vector-llrint.ll
+++ b/llvm/test/CodeGen/AArch64/vector-llrint.ll
@@ -674,3 +674,519 @@ define <32 x i64> @llrint_v32f64(<32 x double> %x) {
   ret <32 x i64> %a
 }
 declare <32 x i64> @llvm.llrint.v32i64.v32f64(<32 x double>)
+
+define <1 x i64> @llrint_v1i64_v1f128(<1 x fp128> %x) {
+; CHECK-LABEL: llrint_v1i64_v1f128:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl llrintl
+; CHECK-NEXT:    fmov d0, x0
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
+  %a = call <1 x i64> @llvm.llrint.v1i64.v1f128(<1 x fp128> %x)
+  ret <1 x i64> %a
+}
+declare <1 x i64> @llvm.llrint.v1i64.v1f128(<1 x fp128>)
+
+define <2 x i64> @llrint_v2i64_v2f128(<2 x fp128> %x) {
+; CHECK-LABEL: llrint_v2i64_v2f128:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sub sp, sp, #48
+; CHECK-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 48
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT:    mov v0.16b, v1.16b
+; CHECK-NEXT:    bl llrintl
+; CHECK-NEXT:    fmov d0, x0
+; CHECK-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    bl llrintl
+; CHECK-NEXT:    fmov d0, x0
+; CHECK-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
+; CHECK-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-NEXT:    add sp, sp, #48
+; CHECK-NEXT:    ret
+  %a = call <2 x i64> @llvm.llrint.v2i64.v2f128(<2 x fp128> %x)
+  ret <2 x i64> %a
+}
+declare <2 x i64> @llvm.llrint.v2i64.v2f128(<2 x fp128>)
+
+define <4 x i64> @llrint_v4i64_v4f128(<4 x fp128> %x) {
+; CHECK-LABEL: llrint_v4i64_v4f128:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sub sp, sp, #80
+; CHECK-NEXT:    str x30, [sp, #64] // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 80
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT:    mov v0.16b, v1.16b
+; CHECK-NEXT:    stp q3, q2, [sp, #32] // 32-byte Folded Spill
+; CHECK-NEXT:    bl llrintl
+; CHECK-NEXT:    fmov d0, x0
+; CHECK-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    bl llrintl
+; CHECK-NEXT:    fmov d0, x0
+; CHECK-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT:    bl llrintl
+; CHECK-NEXT:    fmov d0, x0
+; CHECK-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT:    bl llrintl
+; CHECK-NEXT:    fmov d1, x0
+; CHECK-NEXT:    ldp q0, q4, [sp, #16] // 32-byte Folded Reload
+; CHECK-NEXT:    ldr x30, [sp, #64] // 8-byte Folded Reload
+; CHECK-NEXT:    mov v1.d[1], v4.d[0]
+; CHECK-NEXT:    add sp, sp, #80
+; CHECK-NEXT:    ret
+  %a = call <4 x i64> @llvm.llrint.v4i64.v4f128(<4 x fp128> %x)
+  ret <4 x i64> %a
+}
+declare <4 x i64> @llvm.llrint.v4i64.v4f128(<4 x fp128>)
+
+define <8 x i64> @llrint_v8i64_v8f128(<8 x fp128> %x) {
+; CHECK-LABEL: llrint_v8i64_v8f128:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sub sp, sp, #144
+; CHECK-NEXT:    str x30, [sp, #128] // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 144
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT:    mov v0.16b, v1.16b
+; CHECK-NEXT:    stp q3, q2, [sp, #16] // 32-byte Folded Spill
+; CHECK-NEXT:    stp q5, q4, [sp, #48] // 32-byte Folded Spill
+; CHECK-NEXT:    stp q7, q6, [sp, #96] // 32-byte Folded Spill
+; CHECK-NEXT:    bl llrintl
+; CHECK-NEXT:    fmov d0, x0
+; CHECK-NEXT:    str q0, [sp, #80] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    bl llrintl
+; CHECK-NEXT:    fmov d0, x0
+; CHECK-NEXT:    ldr q1, [sp, #80] // 16-byte Folded Reload
+; CHECK-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-NEXT:    str q0, [sp, #80] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT:    bl llrintl
+; CHECK-NEXT:    fmov d0, x0
+; CHECK-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT:    bl llrintl
+; CHECK-NEXT:    fmov d0, x0
+; CHECK-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT:    bl llrintl
+; CHECK-NEXT:    fmov d0, x0
+; CHECK-NEXT:    str q0, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT:    bl llrintl
+; CHECK-NEXT:    fmov d0, x0
+; CHECK-NEXT:    ldr q1, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-NEXT:    str q0, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #96] // 16-byte Folded Reload
+; CHECK-NEXT:    bl llrintl
+; CHECK-NEXT:    fmov d0, x0
+; CHECK-NEXT:    str q0, [sp, #96] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #112] // 16-byte Folded Reload
+; CHECK-NEXT:    bl llrintl
+; CHECK-NEXT:    fmov d3, x0
+; CHECK-NEXT:    ldp q0, q1, [sp, #80] // 32-byte Folded Reload
+; CHECK-NEXT:    ldr q2, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT:    ldr x30, [sp, #128] // 8-byte Folded Reload
+; CHECK-NEXT:    mov v3.d[1], v1.d[0]
+; CHECK-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT:    add sp, sp, #144
+; CHECK-NEXT:    ret
+  %a = call <8 x i64> @llvm.llrint.v8i64.v8f128(<8 x fp128> %x)
+  ret <8 x i64> %a
+}
+declare <8 x i64> @llvm.llrint.v8i64.v8f128(<8 x fp128>)
+
+define <16 x i64> @llrint_v16f128(<16 x fp128> %x) {
+; CHECK-LABEL: llrint_v16f128:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sub sp, sp, #272
+; CHECK-NEXT:    stp x29, x30, [sp, #256] // 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 272
+; CHECK-NEXT:    .cfi_offset w30, -8
+; CHECK-NEXT:    .cfi_offset w29, -16
+; CHECK-NEXT:    str q2, [sp, #160] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q2, [sp, #368]
+; CHECK-NEXT:    stp q0, q3, [sp] // 32-byte Folded Spill
+; CHECK-NEXT:    mov v0.16b, v1.16b
+; CHECK-NEXT:    str q2, [sp, #240] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q2, [sp, #384]
+; CHECK-NEXT:    stp q5, q7, [sp, #32] // 32-byte Folded Spill
+; CHECK-NEXT:    str q2, [sp, #224] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q2, [sp, #336]
+; CHECK-NEXT:    str q2, [sp, #192] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q2, [sp, #352]
+; CHECK-NEXT:    str q2, [sp, #176] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q2, [sp, #304]
+; CHECK-NEXT:    str q2, [sp, #144] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q2, [sp, #320]
+; CHECK-NEXT:    stp q4, q2, [sp, #112] // 32-byte Folded Spill
+; CHECK-NEXT:    ldr q2, [sp, #272]
+; CHECK-NEXT:    stp q6, q2, [sp, #80] // 32-byte Folded Spill
+; CHECK-NEXT:    ldr q2, [sp, #288]
+; CHECK-NEXT:    str q2, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT:    bl llrintl
+; CHECK-NEXT:    fmov d0, x0
+; CHECK-NEXT:    str q0, [sp, #208] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    bl llrintl
+; CHECK-NEXT:    fmov d0, x0
+; CHECK-NEXT:    ldr q1, [sp, #208] // 16-byte Folded Reload
+; CHECK-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-NEXT:    str q0, [sp, #208] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT:    bl llrintl
+; CHECK-NEXT:    fmov d0, x0
+; CHECK-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #160] // 16-byte Folded Reload
+; CHECK-NEXT:    bl llrintl
+; CHECK-NEXT:    fmov d0, x0
+; CHECK-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-NEXT:    str q0, [sp, #160] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT:    bl llrintl
+; CHECK-NEXT:    fmov d0, x0
+; CHECK-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #112] // 16-byte Folded Reload
+; CHECK-NEXT:    bl llrintl
+; CHECK-NEXT:    fmov d0, x0
+; CHECK-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-NEXT:    str q0, [sp, #112] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT:    bl llrintl
+; CHECK-NEXT:    fmov d0, x0
+; CHECK-NEXT:    str q0, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #80] // 16-byte Folded Reload
+; CHECK-NEXT:    bl llrintl
+; CHECK-NEXT:    fmov d0, x0
+; CHECK-NEXT:    ldr q1, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-NEXT:    str q0, [sp, #80] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT:    bl llrintl
+; CHECK-NEXT:    fmov d0, x0
+; CHECK-NEXT:    str q0, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #96] // 16-byte Folded Reload
+; CHECK-NEXT:    bl llrintl
+; CHECK-NEXT:    fmov d0, x0
+; CHECK-NEXT:    ldr q1, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-NEXT:    str q0, [sp, #96] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #128] // 16-byte Folded Reload
+; CHECK-NEXT:    bl llrintl
+; CHECK-NEXT:    fmov d0, x0
+; CHECK-NEXT:    str q0, [sp, #128] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #144] // 16-byte Folded Reload
+; CHECK-NEXT:    bl llrintl
+; CHECK-NEXT:    fmov d0, x0
+; CHECK-NEXT:    ldr q1, [sp, #128] // 16-byte Folded Reload
+; CHECK-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-NEXT:    str q0, [sp, #144] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #176] // 16-byte Folded Reload
+; CHECK-NEXT:    bl llrintl
+; CHECK-NEXT:    fmov d0, x0
+; CHECK-NEXT:    str q0, [sp, #176] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #192] // 16-byte Folded Reload
+; CHECK-NEXT:    bl llrintl
+; CHECK-NEXT:    fmov d0, x0
+; CHECK-NEXT:    ldr q1, [sp, #176] // 16-byte Folded Reload
+; CHECK-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-NEXT:    str q0, [sp, #192] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #224] // 16-byte Folded Reload
+; CHECK-NEXT:    bl llrintl
+; CHECK-NEXT:    fmov d0, x0
+; CHECK-NEXT:    str q0, [sp, #224] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #240] // 16-byte Folded Reload
+; CHECK-NEXT:    bl llrintl
+; CHECK-NEXT:    fmov d7, x0
+; CHECK-NEXT:    ldp q0, q1, [sp, #208] // 32-byte Folded Reload
+; CHECK-NEXT:    ldp q4, q2, [sp, #96] // 32-byte Folded Reload
+; CHECK-NEXT:    ldr q3, [sp, #80] // 16-byte Folded Reload
+; CHECK-NEXT:    ldp x29, x30, [sp, #256] // 16-byte Folded Reload
+; CHECK-NEXT:    ldr q6, [sp, #192] // 16-byte Folded Reload
+; CHECK-NEXT:    mov v7.d[1], v1.d[0]
+; CHECK-NEXT:    ldp q5, q1, [sp, #144] // 32-byte Folded Reload
+; CHECK-NEXT:    add sp, sp, #272
+; CHECK-NEXT:    ret
+  %a = call <16 x i64> @llvm.llrint.v16i64.v16f128(<16 x fp128> %x)
+  ret <16 x i64> %a
+}
+declare <16 x i64> @llvm.llrint.v16i64.v16f128(<16 x fp128>)
+
+define <32 x i64> @llrint_v32f128(<32 x fp128> %x) {
+; CHECK-LABEL: llrint_v32f128:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x29, [sp, #-32]! // 8-byte Folded Spill
+; CHECK-NEXT:    stp x30, x19, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT:    sub sp, sp, #512
+; CHECK-NEXT:    .cfi_def_cfa_offset 544
+; CHECK-NEXT:    .cfi_offset w19, -8
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    .cfi_offset w29, -32
+; CHECK-NEXT:    str q0, [sp, #464] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #896]
+; CHECK-NEXT:    mov x19, x8
+; CHECK-NEXT:    str q7, [sp, #272] // 16-byte Folded Spill
+; CHECK-NEXT:    str q0, [sp, #496] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #912]
+; CHECK-NEXT:    str q6, [sp, #320] // 16-byte Folded Spill
+; CHECK-NEXT:    str q0, [sp, #480] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #864]
+; CHECK-NEXT:    stp q3, q5, [sp, #16] // 32-byte Folded Spill
+; CHECK-NEXT:    str q0, [sp, #448] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #880]
+; CHECK-NEXT:    stp q2, q0, [sp, #416] // 32-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #832]
+; CHECK-NEXT:    str q0, [sp, #400] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #848]
+; CHECK-NEXT:    stp q4, q0, [sp, #368] // 32-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #800]
+; CHECK-NEXT:    str q0, [sp, #352] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #816]
+; CHECK-NEXT:    str q0, [sp, #336] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #768]
+; CHECK-NEXT:    str q0, [sp, #304] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #784]
+; CHECK-NEXT:    str q0, [sp, #288] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #736]
+; CHECK-NEXT:    str q0, [sp, #256] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #752]
+; CHECK-NEXT:    str q0, [sp, #240] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #704]
+; CHECK-NEXT:    str q0, [sp, #208] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #720]
+; CHECK-NEXT:    str q0, [sp, #192] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #672]
+; CHECK-NEXT:    str q0, [sp, #160] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #688]
+; CHECK-NEXT:    str q0, [sp, #128] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #640]
+; CHECK-NEXT:    str q0, [sp, #144] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #656]
+; CHECK-NEXT:    str q0, [sp, #112] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #608]
+; CHECK-NEXT:    str q0, [sp, #176] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #624]
+; CHECK-NEXT:    str q0, [sp, #96] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #576]
+; CHECK-NEXT:    str q0, [sp, #224] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #592]
+; CHECK-NEXT:    str q0, [sp, #80] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #544]
+; CHECK-NEXT:    str q0, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #560]
+; CHECK-NEXT:    str q0, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEXT:    mov v0.16b, v1.16b
+; CHECK-NEXT:    bl llrintl
+; CHECK-NEXT:    fmov d0, x0
+; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #464] // 16-byte Folded Reload
+; CHECK-NEXT:    bl llrintl
+; CHECK-NEXT:    fmov d0, x0
+; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-NEXT:    str q0, [sp, #464] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT:    bl llrintl
+; CHECK-NEXT:    fmov d0, x0
+; CHECK-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #416] // 16-byte Folded Reload
+; CHECK-NEXT:    bl llrintl
+; CHECK-NEXT:    fmov d0, x0
+; CHECK-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-NEXT:    str q0, [sp, #416] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT:    bl llrintl
+; CHECK-NEXT:    fmov d0, x0
+; CHECK-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #368] // 16-byte Folded Reload
+; CHECK-NEXT:    bl llrintl
+; CHECK-NEXT:    fmov d0, x0
+; CHECK-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-NEXT:    str q0, [sp, #368] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #272] // 16-byte Folded Reload
+; CHECK-NEXT:    bl llrintl
+; CHECK-NEXT:    fmov d0, x0
+; CHECK-NEXT:    str q0, [sp, #272] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #320] // 16-byte Folded Reload
+; CHECK-NEXT:    bl llrintl
+; CHECK-NEXT:    fmov d0, x0
+; CHECK-NEXT:    ldr q1, [sp, #272] // 16-byte Folded Reload
+; CHECK-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-NEXT:    str q0, [sp, #320] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT:    bl llrintl
+; CHECK-NEXT:    fmov d0, x0
+; CHECK-NEXT:    str q0, [sp, #272] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT:    bl llrintl
+; CHECK-NEXT:    fmov d0, x0
+; CHECK-NEXT:    ldr q1, [sp, #272] // 16-byte Folded Reload
+; CHECK-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-NEXT:    str q0, [sp, #272] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #80] // 16-byte Folded Reload
+; CHECK-NEXT:    bl llrintl
+; CHECK-NEXT:    fmov d0, x0
+; CHECK-NEXT:    str q0, [sp, #80] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #224] // 16-byte Folded Reload
+; CHECK-NEXT:    bl llrintl
+; CHECK-NEXT:    fmov d0, x0
+; CHECK-NEXT:    ldr q1, [sp, #80] // 16-byte Folded Reload
+; CHECK-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-NEXT:    str q0, [sp, #224] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #96] // 16-byte Folded Reload
+; CHECK-NEXT:    bl llrintl
+; CHECK-NEXT:    fmov d0, x0
+; CHECK-NEXT:    str q0, [sp, #96] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #176] // 16-byte Folded Reload
+; CHECK-NEXT:    bl llrintl
+; CHECK-NEXT:    fmov d0, x0
+; CHECK-NEXT:    ldr q1, [sp, #96] // 16-byte Folded Reload
+; CHECK-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-NEXT:    str q0, [sp, #176] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #112] // 16-byte Folded Reload
+; CHECK-NEXT:    bl llrintl
+; CHECK-NEXT:    fmov d0, x0
+; CHECK-NEXT:    str q0, [sp, #112] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #144] // 16-byte Folded Reload
+; CHECK-NEXT:    bl llrintl
+; CHECK-NEXT:    fmov d0, x0
+; CHECK-NEXT:    ldr q1, [sp, #112] // 16-byte Folded Reload
+; CHECK-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-NEXT:    str q0, [sp, #144] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #128] // 16-byte Folded Reload
+; CHECK-NEXT:    bl llrintl
+; CHECK-NEXT:    fmov d0, x0
+; CHECK-NEXT:    str q0, [sp, #128] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #160] // 16-byte Folded Reload
+; CHECK-NEXT:    bl llrintl
+; CHECK-NEXT:    fmov d0, x0
+; CHECK-NEXT:    ldr q1, [sp, #128] // 16-byte Folded Reload
+; CHECK-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-NEXT:    str q0, [sp, #160] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #192] // 16-byte Folded Reload
+; CHECK-NEXT:    bl llrintl
+; CHECK-NEXT:    fmov d0, x0
+; CHECK-NEXT:    str q0, [sp, #192] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #208] // 16-byte Folded Reload
+; CHECK-NEXT:    bl llrintl
+; CHECK-NEXT:    fmov d0, x0
+; CHECK-NEXT:    ldr q1, [sp, #192] // 16-byte Folded Reload
+; CHECK-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-NEXT:    str q0, [sp, #208] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #240] // 16-byte Folded Reload
+; CHECK-NEXT:    bl llrintl
+; CHECK-NEXT:    fmov d0, x0
+; CHECK-NEXT:    str q0, [sp, #240] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #256] // 16-byte Folded Reload
+; CHECK-NEXT:    bl llrintl
+; CHECK-NEXT:    fmov d0, x0
+; CHECK-NEXT:    ldr q1, [sp, #240] // 16-byte Folded Reload
+; CHECK-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-NEXT:    str q0, [sp, #256] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #288] // 16-byte Folded Reload
+; CHECK-NEXT:    bl llrintl
+; CHECK-NEXT:    fmov d0, x0
+; CHECK-NEXT:    str q0, [sp, #288] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #304] // 16-byte Folded Reload
+; CHECK-NEXT:    bl llrintl
+; CHECK-NEXT:    fmov d0, x0
+; CHECK-NEXT:    ldr q1, [sp, #288] // 16-byte Folded Reload
+; CHECK-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-NEXT:    str q0, [sp, #304] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #336] // 16-byte Folded Reload
+; CHECK-NEXT:    bl llrintl
+; CHECK-NEXT:    fmov d0, x0
+; CHECK-NEXT:    str q0, [sp, #336] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #352] // 16-byte Folded Reload
+; CHECK-NEXT:    bl llrintl
+; CHECK-NEXT:    fmov d0, x0
+; CHECK-NEXT:    ldr q1, [sp, #336] // 16-byte Folded Reload
+; CHECK-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-NEXT:    str q0, [sp, #352] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #384] // 16-byte Folded Reload
+; CHECK-NEXT:    bl llrintl
+; CHECK-NEXT:    fmov d0, x0
+; CHECK-NEXT:    str q0, [sp, #384] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #400] // 16-byte Folded Reload
+; CHECK-NEXT:    bl llrintl
+; CHECK-NEXT:    fmov d0, x0
+; CHECK-NEXT:    ldr q1, [sp, #384] // 16-byte Folded Reload
+; CHECK-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-NEXT:    str q0, [sp, #400] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #432] // 16-byte Folded Reload
+; CHECK-NEXT:    bl llrintl
+; CHECK-NEXT:    fmov d0, x0
+; CHECK-NEXT:    str q0, [sp, #432] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #448] // 16-byte Folded Reload
+; CHECK-NEXT:    bl llrintl
+; CHECK-NEXT:    fmov d0, x0
+; CHECK-NEXT:    ldr q1, [sp, #432] // 16-byte Folded Reload
+; CHECK-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-NEXT:    str q0, [sp, #448] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #480] // 16-byte Folded Reload
+; CHECK-NEXT:    bl llrintl
+; CHECK-NEXT:    fmov d0, x0
+; CHECK-NEXT:    str q0, [sp, #480] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #496] // 16-byte Folded Reload
+; CHECK-NEXT:    bl llrintl
+; CHECK-NEXT:    ldr q0, [sp, #400] // 16-byte Folded Reload
+; CHECK-NEXT:    str q0, [x19, #208]
+; CHECK-NEXT:    fmov d0, x0
+; CHECK-NEXT:    ldr q1, [sp, #480] // 16-byte Folded Reload
+; CHECK-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-NEXT:    ldr q1, [sp, #352] // 16-byte Folded Reload
+; CHECK-NEXT:    str q1, [x19, #192]
+; CHECK-NEXT:    ldr q1, [sp, #304] // 16-byte Folded Reload
+; CHECK-NEXT:    str q1, [x19, #176]
+; CHECK-NEXT:    ldr q1, [sp, #256] // 16-byte Folded Reload
+; CHECK-NEXT:    str q0, [x19, #240]
+; CHECK-NEXT:    str q1, [x19, #160]
+; CHECK-NEXT:    ldr q0, [sp, #448] // 16-byte Folded Reload
+; CHECK-NEXT:    str q0, [x19, #224]
+; CHECK-NEXT:    ldr q0, [sp, #208] // 16-byte Folded Reload
+; CHECK-NEXT:    str q0, [x19, #144]
+; CHECK-NEXT:    ldr q0, [sp, #160] // 16-byte Folded Reload
+; CHECK-NEXT:    str q0, [x19, #128]
+; CHECK-NEXT:    ldr q0, [sp, #144] // 16-byte Folded Reload
+; CHECK-NEXT:    str q0, [x19, #112]
+; CHECK-NEXT:    ldr q0, [sp, #176] // 16-byte Folded Reload
+; CHECK-NEXT:    str q0, [x19, #96]
+; CHECK-NEXT:    ldr q0, [sp, #224] // 16-byte Folded Reload
+; CHECK-NEXT:    str q0, [x19, #80]
+; CHECK-NEXT:    ldr q0, [sp, #272] // 16-byte Folded Reload
+; CHECK-NEXT:    str q0, [x19, #64]
+; CHECK-NEXT:    ldr q0, [sp, #320] // 16-byte Folded Reload
+; CHECK-NEXT:    str q0, [x19, #48]
+; CHECK-NEXT:    ldr q0, [sp, #368] // 16-byte Folded Reload
+; CHECK-NEXT:    str q0, [x19, #32]
+; CHECK-NEXT:    ldr q0, [sp, #416] // 16-byte Folded Reload
+; CHECK-NEXT:    str q0, [x19, #16]
+; CHECK-NEXT:    ldr q0, [sp, #464] // 16-byte Folded Reload
+; CHECK-NEXT:    str q0, [x19]
+; CHECK-NEXT:    add sp, sp, #512
+; CHECK-NEXT:    ldp x30, x19, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT:    ldr x29, [sp], #32 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
+  %a = call <32 x i64> @llvm.llrint.v32i64.v16f128(<32 x fp128> %x)
+  ret <32 x i64> %a
+}
+declare <32 x i64> @llvm.llrint.v32i64.v32f128(<32 x fp128>)
diff --git a/llvm/test/CodeGen/AArch64/vector-lrint.ll b/llvm/test/CodeGen/AArch64/vector-lrint.ll
index 602643264e7be..cb7fe14273a42 100644
--- a/llvm/test/CodeGen/AArch64/vector-lrint.ll
+++ b/llvm/test/CodeGen/AArch64/vector-lrint.ll
@@ -1335,3 +1335,951 @@ define <32 x iXLen> @lrint_v32f64(<32 x double> %x) {
   ret <32 x iXLen> %a
 }
 declare <32 x iXLen> @llvm.lrint.v32iXLen.v32f64(<32 x double>)
+
+define <1 x iXLen> @lrint_v1fp128(<1 x fp128> %x) {
+; CHECK-i32-LABEL: lrint_v1fp128:
+; CHECK-i32:       // %bb.0:
+; CHECK-i32-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-i32-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-i32-NEXT:    .cfi_offset w30, -16
+; CHECK-i32-NEXT:    bl lrintl
+; CHECK-i32-NEXT:    fmov s0, w0
+; CHECK-i32-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-i32-NEXT:    ret
+;
+; CHECK-i64-LABEL: lrint_v1fp128:
+; CHECK-i64:       // %bb.0:
+; CHECK-i64-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-i64-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-i64-NEXT:    .cfi_offset w30, -16
+; CHECK-i64-NEXT:    bl lrintl
+; CHECK-i64-NEXT:    fmov d0, x0
+; CHECK-i64-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-i64-NEXT:    ret
+  %a = call <1 x iXLen> @llvm.lrint.v1iXLen.v1fp128(<1 x fp128> %x)
+  ret <1 x iXLen> %a
+}
+declare <1 x iXLen> @llvm.lrint.v1iXLen.v1fp128(<1 x fp128>)
+
+define <2 x iXLen> @lrint_v2fp128(<2 x fp128> %x) {
+; CHECK-i32-LABEL: lrint_v2fp128:
+; CHECK-i32:       // %bb.0:
+; CHECK-i32-NEXT:    sub sp, sp, #48
+; CHECK-i32-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
+; CHECK-i32-NEXT:    .cfi_def_cfa_offset 48
+; CHECK-i32-NEXT:    .cfi_offset w30, -16
+; CHECK-i32-NEXT:    str q1, [sp, #16] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    bl lrintl
+; CHECK-i32-NEXT:    fmov s0, w0
+; CHECK-i32-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    bl lrintl
+; CHECK-i32-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
+; CHECK-i32-NEXT:    mov v0.s[1], w0
+; CHECK-i32-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-i32-NEXT:    add sp, sp, #48
+; CHECK-i32-NEXT:    ret
+;
+; CHECK-i64-LABEL: lrint_v2fp128:
+; CHECK-i64:       // %bb.0:
+; CHECK-i64-NEXT:    sub sp, sp, #48
+; CHECK-i64-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
+; CHECK-i64-NEXT:    .cfi_def_cfa_offset 48
+; CHECK-i64-NEXT:    .cfi_offset w30, -16
+; CHECK-i64-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    mov v0.16b, v1.16b
+; CHECK-i64-NEXT:    bl lrintl
+; CHECK-i64-NEXT:    fmov d0, x0
+; CHECK-i64-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    bl lrintl
+; CHECK-i64-NEXT:    fmov d0, x0
+; CHECK-i64-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
+; CHECK-i64-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-i64-NEXT:    add sp, sp, #48
+; CHECK-i64-NEXT:    ret
+  %a = call <2 x iXLen> @llvm.lrint.v2iXLen.v2fp128(<2 x fp128> %x)
+  ret <2 x iXLen> %a
+}
+declare <2 x iXLen> @llvm.lrint.v2iXLen.v2fp128(<2 x fp128>)
+
+define <4 x iXLen> @lrint_v4fp128(<4 x fp128> %x) {
+; CHECK-i32-LABEL: lrint_v4fp128:
+; CHECK-i32:       // %bb.0:
+; CHECK-i32-NEXT:    sub sp, sp, #80
+; CHECK-i32-NEXT:    str x30, [sp, #64] // 8-byte Folded Spill
+; CHECK-i32-NEXT:    .cfi_def_cfa_offset 80
+; CHECK-i32-NEXT:    .cfi_offset w30, -16
+; CHECK-i32-NEXT:    stp q2, q3, [sp, #16] // 32-byte Folded Spill
+; CHECK-i32-NEXT:    str q1, [sp] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    bl lrintl
+; CHECK-i32-NEXT:    fmov s0, w0
+; CHECK-i32-NEXT:    str q0, [sp, #48] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    bl lrintl
+; CHECK-i32-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    mov v0.s[1], w0
+; CHECK-i32-NEXT:    str q0, [sp, #48] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    bl lrintl
+; CHECK-i32-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    mov v0.s[2], w0
+; CHECK-i32-NEXT:    str q0, [sp, #48] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    bl lrintl
+; CHECK-i32-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    ldr x30, [sp, #64] // 8-byte Folded Reload
+; CHECK-i32-NEXT:    mov v0.s[3], w0
+; CHECK-i32-NEXT:    add sp, sp, #80
+; CHECK-i32-NEXT:    ret
+;
+; CHECK-i64-LABEL: lrint_v4fp128:
+; CHECK-i64:       // %bb.0:
+; CHECK-i64-NEXT:    sub sp, sp, #80
+; CHECK-i64-NEXT:    str x30, [sp, #64] // 8-byte Folded Spill
+; CHECK-i64-NEXT:    .cfi_def_cfa_offset 80
+; CHECK-i64-NEXT:    .cfi_offset w30, -16
+; CHECK-i64-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    mov v0.16b, v1.16b
+; CHECK-i64-NEXT:    stp q3, q2, [sp, #32] // 32-byte Folded Spill
+; CHECK-i64-NEXT:    bl lrintl
+; CHECK-i64-NEXT:    fmov d0, x0
+; CHECK-i64-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    bl lrintl
+; CHECK-i64-NEXT:    fmov d0, x0
+; CHECK-i64-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-i64-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    bl lrintl
+; CHECK-i64-NEXT:    fmov d0, x0
+; CHECK-i64-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    bl lrintl
+; CHECK-i64-NEXT:    fmov d1, x0
+; CHECK-i64-NEXT:    ldp q0, q4, [sp, #16] // 32-byte Folded Reload
+; CHECK-i64-NEXT:    ldr x30, [sp, #64] // 8-byte Folded Reload
+; CHECK-i64-NEXT:    mov v1.d[1], v4.d[0]
+; CHECK-i64-NEXT:    add sp, sp, #80
+; CHECK-i64-NEXT:    ret
+  %a = call <4 x iXLen> @llvm.lrint.v4iXLen.v4fp128(<4 x fp128> %x)
+  ret <4 x iXLen> %a
+}
+declare <4 x iXLen> @llvm.lrint.v4iXLen.v4fp128(<4 x fp128>)
+
+define <8 x iXLen> @lrint_v8fp128(<8 x fp128> %x) {
+; CHECK-i32-LABEL: lrint_v8fp128:
+; CHECK-i32:       // %bb.0:
+; CHECK-i32-NEXT:    sub sp, sp, #144
+; CHECK-i32-NEXT:    str x30, [sp, #128] // 8-byte Folded Spill
+; CHECK-i32-NEXT:    .cfi_def_cfa_offset 144
+; CHECK-i32-NEXT:    .cfi_offset w30, -16
+; CHECK-i32-NEXT:    stp q1, q2, [sp] // 32-byte Folded Spill
+; CHECK-i32-NEXT:    stp q3, q5, [sp, #32] // 32-byte Folded Spill
+; CHECK-i32-NEXT:    stp q6, q7, [sp, #64] // 32-byte Folded Spill
+; CHECK-i32-NEXT:    str q4, [sp, #96] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    bl lrintl
+; CHECK-i32-NEXT:    fmov s0, w0
+; CHECK-i32-NEXT:    str q0, [sp, #112] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    bl lrintl
+; CHECK-i32-NEXT:    ldr q0, [sp, #112] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    mov v0.s[1], w0
+; CHECK-i32-NEXT:    str q0, [sp, #112] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    bl lrintl
+; CHECK-i32-NEXT:    ldr q0, [sp, #112] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    mov v0.s[2], w0
+; CHECK-i32-NEXT:    str q0, [sp, #112] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    bl lrintl
+; CHECK-i32-NEXT:    ldr q0, [sp, #112] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    mov v0.s[3], w0
+; CHECK-i32-NEXT:    str q0, [sp, #112] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    ldr q0, [sp, #96] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    bl lrintl
+; CHECK-i32-NEXT:    fmov s0, w0
+; CHECK-i32-NEXT:    str q0, [sp, #96] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    bl lrintl
+; CHECK-i32-NEXT:    ldr q0, [sp, #96] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    mov v0.s[1], w0
+; CHECK-i32-NEXT:    str q0, [sp, #96] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    ldr q0, [sp, #64] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    bl lrintl
+; CHECK-i32-NEXT:    ldr q0, [sp, #96] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    mov v0.s[2], w0
+; CHECK-i32-NEXT:    str q0, [sp, #96] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    ldr q0, [sp, #80] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    bl lrintl
+; CHECK-i32-NEXT:    ldp q1, q0, [sp, #96] // 32-byte Folded Reload
+; CHECK-i32-NEXT:    ldr x30, [sp, #128] // 8-byte Folded Reload
+; CHECK-i32-NEXT:    mov v1.s[3], w0
+; CHECK-i32-NEXT:    add sp, sp, #144
+; CHECK-i32-NEXT:    ret
+;
+; CHECK-i64-LABEL: lrint_v8fp128:
+; CHECK-i64:       // %bb.0:
+; CHECK-i64-NEXT:    sub sp, sp, #144
+; CHECK-i64-NEXT:    str x30, [sp, #128] // 8-byte Folded Spill
+; CHECK-i64-NEXT:    .cfi_def_cfa_offset 144
+; CHECK-i64-NEXT:    .cfi_offset w30, -16
+; CHECK-i64-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    mov v0.16b, v1.16b
+; CHECK-i64-NEXT:    stp q3, q2, [sp, #16] // 32-byte Folded Spill
+; CHECK-i64-NEXT:    stp q5, q4, [sp, #48] // 32-byte Folded Spill
+; CHECK-i64-NEXT:    stp q7, q6, [sp, #96] // 32-byte Folded Spill
+; CHECK-i64-NEXT:    bl lrintl
+; CHECK-i64-NEXT:    fmov d0, x0
+; CHECK-i64-NEXT:    str q0, [sp, #80] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    bl lrintl
+; CHECK-i64-NEXT:    fmov d0, x0
+; CHECK-i64-NEXT:    ldr q1, [sp, #80] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-i64-NEXT:    str q0, [sp, #80] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    bl lrintl
+; CHECK-i64-NEXT:    fmov d0, x0
+; CHECK-i64-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    bl lrintl
+; CHECK-i64-NEXT:    fmov d0, x0
+; CHECK-i64-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-i64-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    bl lrintl
+; CHECK-i64-NEXT:    fmov d0, x0
+; CHECK-i64-NEXT:    str q0, [sp, #48] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp, #64] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    bl lrintl
+; CHECK-i64-NEXT:    fmov d0, x0
+; CHECK-i64-NEXT:    ldr q1, [sp, #48] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-i64-NEXT:    str q0, [sp, #64] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp, #96] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    bl lrintl
+; CHECK-i64-NEXT:    fmov d0, x0
+; CHECK-i64-NEXT:    str q0, [sp, #96] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp, #112] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    bl lrintl
+; CHECK-i64-NEXT:    fmov d3, x0
+; CHECK-i64-NEXT:    ldp q0, q1, [sp, #80] // 32-byte Folded Reload
+; CHECK-i64-NEXT:    ldr q2, [sp, #64] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    ldr x30, [sp, #128] // 8-byte Folded Reload
+; CHECK-i64-NEXT:    mov v3.d[1], v1.d[0]
+; CHECK-i64-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    add sp, sp, #144
+; CHECK-i64-NEXT:    ret
+  %a = call <8 x iXLen> @llvm.lrint.v8iXLen.v8fp128(<8 x fp128> %x)
+  ret <8 x iXLen> %a
+}
+declare <8 x iXLen> @llvm.lrint.v8iXLen.v8fp128(<8 x fp128>)
+
+define <16 x iXLen> @lrint_v16fp128(<16 x fp128> %x) {
+; CHECK-i32-LABEL: lrint_v16fp128:
+; CHECK-i32:       // %bb.0:
+; CHECK-i32-NEXT:    sub sp, sp, #272
+; CHECK-i32-NEXT:    stp x29, x30, [sp, #256] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    .cfi_def_cfa_offset 272
+; CHECK-i32-NEXT:    .cfi_offset w30, -8
+; CHECK-i32-NEXT:    .cfi_offset w29, -16
+; CHECK-i32-NEXT:    stp q1, q2, [sp] // 32-byte Folded Spill
+; CHECK-i32-NEXT:    ldr q1, [sp, #384]
+; CHECK-i32-NEXT:    stp q3, q5, [sp, #32] // 32-byte Folded Spill
+; CHECK-i32-NEXT:    str q1, [sp, #176] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    ldr q1, [sp, #368]
+; CHECK-i32-NEXT:    stp q7, q4, [sp, #208] // 32-byte Folded Spill
+; CHECK-i32-NEXT:    str q1, [sp, #160] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    ldr q1, [sp, #352]
+; CHECK-i32-NEXT:    str q1, [sp, #144] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    ldr q1, [sp, #336]
+; CHECK-i32-NEXT:    str q1, [sp, #192] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    ldr q1, [sp, #320]
+; CHECK-i32-NEXT:    str q1, [sp, #128] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    ldr q1, [sp, #304]
+; CHECK-i32-NEXT:    str q1, [sp, #112] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    ldr q1, [sp, #288]
+; CHECK-i32-NEXT:    stp q6, q1, [sp, #80] // 32-byte Folded Spill
+; CHECK-i32-NEXT:    ldr q1, [sp, #272]
+; CHECK-i32-NEXT:    str q1, [sp, #64] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    bl lrintl
+; CHECK-i32-NEXT:    fmov s0, w0
+; CHECK-i32-NEXT:    str q0, [sp, #240] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    bl lrintl
+; CHECK-i32-NEXT:    ldr q0, [sp, #240] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    mov v0.s[1], w0
+; CHECK-i32-NEXT:    str q0, [sp, #240] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    bl lrintl
+; CHECK-i32-NEXT:    ldr q0, [sp, #240] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    mov v0.s[2], w0
+; CHECK-i32-NEXT:    str q0, [sp, #240] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    bl lrintl
+; CHECK-i32-NEXT:    ldr q0, [sp, #240] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    mov v0.s[3], w0
+; CHECK-i32-NEXT:    str q0, [sp, #240] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    ldr q0, [sp, #224] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    bl lrintl
+; CHECK-i32-NEXT:    fmov s0, w0
+; CHECK-i32-NEXT:    str q0, [sp, #224] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    bl lrintl
+; CHECK-i32-NEXT:    ldr q0, [sp, #224] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    mov v0.s[1], w0
+; CHECK-i32-NEXT:    str q0, [sp, #224] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    ldr q0, [sp, #80] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    bl lrintl
+; CHECK-i32-NEXT:    ldr q0, [sp, #224] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    mov v0.s[2], w0
+; CHECK-i32-NEXT:    str q0, [sp, #224] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    ldr q0, [sp, #208] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    bl lrintl
+; CHECK-i32-NEXT:    ldr q0, [sp, #224] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    mov v0.s[3], w0
+; CHECK-i32-NEXT:    str q0, [sp, #224] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    ldr q0, [sp, #64] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    bl lrintl
+; CHECK-i32-NEXT:    fmov s0, w0
+; CHECK-i32-NEXT:    str q0, [sp, #208] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    ldr q0, [sp, #96] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    bl lrintl
+; CHECK-i32-NEXT:    ldr q0, [sp, #208] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    mov v0.s[1], w0
+; CHECK-i32-NEXT:    str q0, [sp, #208] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    ldr q0, [sp, #112] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    bl lrintl
+; CHECK-i32-NEXT:    ldr q0, [sp, #208] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    mov v0.s[2], w0
+; CHECK-i32-NEXT:    str q0, [sp, #208] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    ldr q0, [sp, #128] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    bl lrintl
+; CHECK-i32-NEXT:    ldr q0, [sp, #208] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    mov v0.s[3], w0
+; CHECK-i32-NEXT:    str q0, [sp, #208] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    ldr q0, [sp, #192] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    bl lrintl
+; CHECK-i32-NEXT:    fmov s0, w0
+; CHECK-i32-NEXT:    str q0, [sp, #192] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    ldr q0, [sp, #144] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    bl lrintl
+; CHECK-i32-NEXT:    ldr q0, [sp, #192] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    mov v0.s[1], w0
+; CHECK-i32-NEXT:    str q0, [sp, #192] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    ldr q0, [sp, #160] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    bl lrintl
+; CHECK-i32-NEXT:    ldr q0, [sp, #192] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    mov v0.s[2], w0
+; CHECK-i32-NEXT:    str q0, [sp, #192] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    ldr q0, [sp, #176] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    bl lrintl
+; CHECK-i32-NEXT:    ldp q3, q2, [sp, #192] // 32-byte Folded Reload
+; CHECK-i32-NEXT:    ldp q1, q0, [sp, #224] // 32-byte Folded Reload
+; CHECK-i32-NEXT:    ldp x29, x30, [sp, #256] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    mov v3.s[3], w0
+; CHECK-i32-NEXT:    add sp, sp, #272
+; CHECK-i32-NEXT:    ret
+;
+; CHECK-i64-LABEL: lrint_v16fp128:
+; CHECK-i64:       // %bb.0:
+; CHECK-i64-NEXT:    sub sp, sp, #272
+; CHECK-i64-NEXT:    stp x29, x30, [sp, #256] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    .cfi_def_cfa_offset 272
+; CHECK-i64-NEXT:    .cfi_offset w30, -8
+; CHECK-i64-NEXT:    .cfi_offset w29, -16
+; CHECK-i64-NEXT:    str q2, [sp, #160] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q2, [sp, #368]
+; CHECK-i64-NEXT:    stp q0, q3, [sp] // 32-byte Folded Spill
+; CHECK-i64-NEXT:    mov v0.16b, v1.16b
+; CHECK-i64-NEXT:    str q2, [sp, #240] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q2, [sp, #384]
+; CHECK-i64-NEXT:    stp q5, q7, [sp, #32] // 32-byte Folded Spill
+; CHECK-i64-NEXT:    str q2, [sp, #224] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q2, [sp, #336]
+; CHECK-i64-NEXT:    str q2, [sp, #192] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q2, [sp, #352]
+; CHECK-i64-NEXT:    str q2, [sp, #176] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q2, [sp, #304]
+; CHECK-i64-NEXT:    str q2, [sp, #144] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q2, [sp, #320]
+; CHECK-i64-NEXT:    stp q4, q2, [sp, #112] // 32-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q2, [sp, #272]
+; CHECK-i64-NEXT:    stp q6, q2, [sp, #80] // 32-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q2, [sp, #288]
+; CHECK-i64-NEXT:    str q2, [sp, #64] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    bl lrintl
+; CHECK-i64-NEXT:    fmov d0, x0
+; CHECK-i64-NEXT:    str q0, [sp, #208] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    bl lrintl
+; CHECK-i64-NEXT:    fmov d0, x0
+; CHECK-i64-NEXT:    ldr q1, [sp, #208] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-i64-NEXT:    str q0, [sp, #208] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    bl lrintl
+; CHECK-i64-NEXT:    fmov d0, x0
+; CHECK-i64-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp, #160] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    bl lrintl
+; CHECK-i64-NEXT:    fmov d0, x0
+; CHECK-i64-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-i64-NEXT:    str q0, [sp, #160] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    bl lrintl
+; CHECK-i64-NEXT:    fmov d0, x0
+; CHECK-i64-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp, #112] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    bl lrintl
+; CHECK-i64-NEXT:    fmov d0, x0
+; CHECK-i64-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-i64-NEXT:    str q0, [sp, #112] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    bl lrintl
+; CHECK-i64-NEXT:    fmov d0, x0
+; CHECK-i64-NEXT:    str q0, [sp, #48] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp, #80] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    bl lrintl
+; CHECK-i64-NEXT:    fmov d0, x0
+; CHECK-i64-NEXT:    ldr q1, [sp, #48] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-i64-NEXT:    str q0, [sp, #80] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp, #64] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    bl lrintl
+; CHECK-i64-NEXT:    fmov d0, x0
+; CHECK-i64-NEXT:    str q0, [sp, #64] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp, #96] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    bl lrintl
+; CHECK-i64-NEXT:    fmov d0, x0
+; CHECK-i64-NEXT:    ldr q1, [sp, #64] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-i64-NEXT:    str q0, [sp, #96] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp, #128] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    bl lrintl
+; CHECK-i64-NEXT:    fmov d0, x0
+; CHECK-i64-NEXT:    str q0, [sp, #128] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp, #144] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    bl lrintl
+; CHECK-i64-NEXT:    fmov d0, x0
+; CHECK-i64-NEXT:    ldr q1, [sp, #128] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-i64-NEXT:    str q0, [sp, #144] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp, #176] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    bl lrintl
+; CHECK-i64-NEXT:    fmov d0, x0
+; CHECK-i64-NEXT:    str q0, [sp, #176] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp, #192] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    bl lrintl
+; CHECK-i64-NEXT:    fmov d0, x0
+; CHECK-i64-NEXT:    ldr q1, [sp, #176] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-i64-NEXT:    str q0, [sp, #192] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp, #224] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    bl lrintl
+; CHECK-i64-NEXT:    fmov d0, x0
+; CHECK-i64-NEXT:    str q0, [sp, #224] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp, #240] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    bl lrintl
+; CHECK-i64-NEXT:    fmov d7, x0
+; CHECK-i64-NEXT:    ldp q0, q1, [sp, #208] // 32-byte Folded Reload
+; CHECK-i64-NEXT:    ldp q4, q2, [sp, #96] // 32-byte Folded Reload
+; CHECK-i64-NEXT:    ldr q3, [sp, #80] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    ldp x29, x30, [sp, #256] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    ldr q6, [sp, #192] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    mov v7.d[1], v1.d[0]
+; CHECK-i64-NEXT:    ldp q5, q1, [sp, #144] // 32-byte Folded Reload
+; CHECK-i64-NEXT:    add sp, sp, #272
+; CHECK-i64-NEXT:    ret
+  %a = call <16 x iXLen> @llvm.lrint.v16iXLen.v16fp128(<16 x fp128> %x)
+  ret <16 x iXLen> %a
+}
+declare <16 x iXLen> @llvm.lrint.v16iXLen.v16fp128(<16 x fp128>)
+
+define <32 x iXLen> @lrint_v32fp128(<32 x fp128> %x) {
+; CHECK-i32-LABEL: lrint_v32fp128:
+; CHECK-i32:       // %bb.0:
+; CHECK-i32-NEXT:    stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
+; CHECK-i32-NEXT:    sub sp, sp, #512
+; CHECK-i32-NEXT:    .cfi_def_cfa_offset 528
+; CHECK-i32-NEXT:    .cfi_offset w30, -8
+; CHECK-i32-NEXT:    .cfi_offset w29, -16
+; CHECK-i32-NEXT:    str q1, [sp] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    ldr q1, [sp, #896]
+; CHECK-i32-NEXT:    stp q2, q3, [sp, #16] // 32-byte Folded Spill
+; CHECK-i32-NEXT:    str q1, [sp, #368] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    ldr q1, [sp, #880]
+; CHECK-i32-NEXT:    stp q7, q4, [sp, #464] // 32-byte Folded Spill
+; CHECK-i32-NEXT:    str q1, [sp, #352] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    ldr q1, [sp, #864]
+; CHECK-i32-NEXT:    str q6, [sp, #112] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    str q1, [sp, #336] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    ldr q1, [sp, #848]
+; CHECK-i32-NEXT:    str q5, [sp, #80] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    str q1, [sp, #384] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    ldr q1, [sp, #832]
+; CHECK-i32-NEXT:    str q1, [sp, #320] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    ldr q1, [sp, #816]
+; CHECK-i32-NEXT:    str q1, [sp, #304] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    ldr q1, [sp, #800]
+; CHECK-i32-NEXT:    str q1, [sp, #288] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    ldr q1, [sp, #784]
+; CHECK-i32-NEXT:    str q1, [sp, #400] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    ldr q1, [sp, #768]
+; CHECK-i32-NEXT:    str q1, [sp, #272] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    ldr q1, [sp, #752]
+; CHECK-i32-NEXT:    str q1, [sp, #256] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    ldr q1, [sp, #736]
+; CHECK-i32-NEXT:    str q1, [sp, #240] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    ldr q1, [sp, #720]
+; CHECK-i32-NEXT:    str q1, [sp, #416] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    ldr q1, [sp, #704]
+; CHECK-i32-NEXT:    str q1, [sp, #224] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    ldr q1, [sp, #688]
+; CHECK-i32-NEXT:    str q1, [sp, #208] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    ldr q1, [sp, #672]
+; CHECK-i32-NEXT:    str q1, [sp, #192] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    ldr q1, [sp, #656]
+; CHECK-i32-NEXT:    str q1, [sp, #432] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    ldr q1, [sp, #640]
+; CHECK-i32-NEXT:    str q1, [sp, #176] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    ldr q1, [sp, #624]
+; CHECK-i32-NEXT:    str q1, [sp, #160] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    ldr q1, [sp, #608]
+; CHECK-i32-NEXT:    str q1, [sp, #144] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    ldr q1, [sp, #592]
+; CHECK-i32-NEXT:    str q1, [sp, #448] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    ldr q1, [sp, #576]
+; CHECK-i32-NEXT:    str q1, [sp, #128] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    ldr q1, [sp, #560]
+; CHECK-i32-NEXT:    str q1, [sp, #96] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    ldr q1, [sp, #544]
+; CHECK-i32-NEXT:    str q1, [sp, #64] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    ldr q1, [sp, #528]
+; CHECK-i32-NEXT:    str q1, [sp, #48] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    bl lrintl
+; CHECK-i32-NEXT:    fmov s0, w0
+; CHECK-i32-NEXT:    str q0, [sp, #496] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    bl lrintl
+; CHECK-i32-NEXT:    ldr q0, [sp, #496] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    mov v0.s[1], w0
+; CHECK-i32-NEXT:    str q0, [sp, #496] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    bl lrintl
+; CHECK-i32-NEXT:    ldr q0, [sp, #496] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    mov v0.s[2], w0
+; CHECK-i32-NEXT:    str q0, [sp, #496] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    bl lrintl
+; CHECK-i32-NEXT:    ldr q0, [sp, #496] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    mov v0.s[3], w0
+; CHECK-i32-NEXT:    str q0, [sp, #496] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    ldr q0, [sp, #480] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    bl lrintl
+; CHECK-i32-NEXT:    fmov s0, w0
+; CHECK-i32-NEXT:    str q0, [sp, #480] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    ldr q0, [sp, #80] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    bl lrintl
+; CHECK-i32-NEXT:    ldr q0, [sp, #480] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    mov v0.s[1], w0
+; CHECK-i32-NEXT:    str q0, [sp, #480] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    ldr q0, [sp, #112] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    bl lrintl
+; CHECK-i32-NEXT:    ldr q0, [sp, #480] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    mov v0.s[2], w0
+; CHECK-i32-NEXT:    str q0, [sp, #480] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    ldr q0, [sp, #464] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    bl lrintl
+; CHECK-i32-NEXT:    ldr q0, [sp, #480] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    mov v0.s[3], w0
+; CHECK-i32-NEXT:    str q0, [sp, #480] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    bl lrintl
+; CHECK-i32-NEXT:    fmov s0, w0
+; CHECK-i32-NEXT:    str q0, [sp, #464] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    ldr q0, [sp, #64] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    bl lrintl
+; CHECK-i32-NEXT:    ldr q0, [sp, #464] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    mov v0.s[1], w0
+; CHECK-i32-NEXT:    str q0, [sp, #464] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    ldr q0, [sp, #96] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    bl lrintl
+; CHECK-i32-NEXT:    ldr q0, [sp, #464] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    mov v0.s[2], w0
+; CHECK-i32-NEXT:    str q0, [sp, #464] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    ldr q0, [sp, #128] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    bl lrintl
+; CHECK-i32-NEXT:    ldr q0, [sp, #464] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    mov v0.s[3], w0
+; CHECK-i32-NEXT:    str q0, [sp, #464] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    ldr q0, [sp, #448] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    bl lrintl
+; CHECK-i32-NEXT:    fmov s0, w0
+; CHECK-i32-NEXT:    str q0, [sp, #448] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    ldr q0, [sp, #144] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    bl lrintl
+; CHECK-i32-NEXT:    ldr q0, [sp, #448] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    mov v0.s[1], w0
+; CHECK-i32-NEXT:    str q0, [sp, #448] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    ldr q0, [sp, #160] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    bl lrintl
+; CHECK-i32-NEXT:    ldr q0, [sp, #448] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    mov v0.s[2], w0
+; CHECK-i32-NEXT:    str q0, [sp, #448] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    ldr q0, [sp, #176] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    bl lrintl
+; CHECK-i32-NEXT:    ldr q0, [sp, #448] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    mov v0.s[3], w0
+; CHECK-i32-NEXT:    str q0, [sp, #448] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    ldr q0, [sp, #432] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    bl lrintl
+; CHECK-i32-NEXT:    fmov s0, w0
+; CHECK-i32-NEXT:    str q0, [sp, #432] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    ldr q0, [sp, #192] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    bl lrintl
+; CHECK-i32-NEXT:    ldr q0, [sp, #432] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    mov v0.s[1], w0
+; CHECK-i32-NEXT:    str q0, [sp, #432] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    ldr q0, [sp, #208] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    bl lrintl
+; CHECK-i32-NEXT:    ldr q0, [sp, #432] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    mov v0.s[2], w0
+; CHECK-i32-NEXT:    str q0, [sp, #432] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    ldr q0, [sp, #224] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    bl lrintl
+; CHECK-i32-NEXT:    ldr q0, [sp, #432] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    mov v0.s[3], w0
+; CHECK-i32-NEXT:    str q0, [sp, #432] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    ldr q0, [sp, #416] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    bl lrintl
+; CHECK-i32-NEXT:    fmov s0, w0
+; CHECK-i32-NEXT:    str q0, [sp, #416] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    ldr q0, [sp, #240] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    bl lrintl
+; CHECK-i32-NEXT:    ldr q0, [sp, #416] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    mov v0.s[1], w0
+; CHECK-i32-NEXT:    str q0, [sp, #416] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    ldr q0, [sp, #256] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    bl lrintl
+; CHECK-i32-NEXT:    ldr q0, [sp, #416] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    mov v0.s[2], w0
+; CHECK-i32-NEXT:    str q0, [sp, #416] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    ldr q0, [sp, #272] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    bl lrintl
+; CHECK-i32-NEXT:    ldr q0, [sp, #416] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    mov v0.s[3], w0
+; CHECK-i32-NEXT:    str q0, [sp, #416] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    ldr q0, [sp, #400] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    bl lrintl
+; CHECK-i32-NEXT:    fmov s0, w0
+; CHECK-i32-NEXT:    str q0, [sp, #400] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    ldr q0, [sp, #288] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    bl lrintl
+; CHECK-i32-NEXT:    ldr q0, [sp, #400] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    mov v0.s[1], w0
+; CHECK-i32-NEXT:    str q0, [sp, #400] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    ldr q0, [sp, #304] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    bl lrintl
+; CHECK-i32-NEXT:    ldr q0, [sp, #400] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    mov v0.s[2], w0
+; CHECK-i32-NEXT:    str q0, [sp, #400] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    ldr q0, [sp, #320] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    bl lrintl
+; CHECK-i32-NEXT:    ldr q0, [sp, #400] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    mov v0.s[3], w0
+; CHECK-i32-NEXT:    str q0, [sp, #400] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    ldr q0, [sp, #384] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    bl lrintl
+; CHECK-i32-NEXT:    fmov s0, w0
+; CHECK-i32-NEXT:    str q0, [sp, #384] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    ldr q0, [sp, #336] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    bl lrintl
+; CHECK-i32-NEXT:    ldr q0, [sp, #384] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    mov v0.s[1], w0
+; CHECK-i32-NEXT:    str q0, [sp, #384] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    ldr q0, [sp, #352] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    bl lrintl
+; CHECK-i32-NEXT:    ldr q0, [sp, #384] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    mov v0.s[2], w0
+; CHECK-i32-NEXT:    str q0, [sp, #384] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    ldr q0, [sp, #368] // 16-byte Folded Reload
+; CHECK-i32-NEXT:    bl lrintl
+; CHECK-i32-NEXT:    ldp q7, q6, [sp, #384] // 32-byte Folded Reload
+; CHECK-i32-NEXT:    ldp q1, q0, [sp, #480] // 32-byte Folded Reload
+; CHECK-i32-NEXT:    ldp q3, q2, [sp, #448] // 32-byte Folded Reload
+; CHECK-i32-NEXT:    ldp q5, q4, [sp, #416] // 32-byte Folded Reload
+; CHECK-i32-NEXT:    mov v7.s[3], w0
+; CHECK-i32-NEXT:    add sp, sp, #512
+; CHECK-i32-NEXT:    ldp x29, x30, [sp], #16 // 16-byte Folded Reload
+; CHECK-i32-NEXT:    ret
+;
+; CHECK-i64-LABEL: lrint_v32fp128:
+; CHECK-i64:       // %bb.0:
+; CHECK-i64-NEXT:    str x29, [sp, #-32]! // 8-byte Folded Spill
+; CHECK-i64-NEXT:    stp x30, x19, [sp, #16] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    sub sp, sp, #512
+; CHECK-i64-NEXT:    .cfi_def_cfa_offset 544
+; CHECK-i64-NEXT:    .cfi_offset w19, -8
+; CHECK-i64-NEXT:    .cfi_offset w30, -16
+; CHECK-i64-NEXT:    .cfi_offset w29, -32
+; CHECK-i64-NEXT:    str q0, [sp, #464] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp, #896]
+; CHECK-i64-NEXT:    mov x19, x8
+; CHECK-i64-NEXT:    str q7, [sp, #272] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    str q0, [sp, #496] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp, #912]
+; CHECK-i64-NEXT:    str q6, [sp, #320] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    str q0, [sp, #480] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp, #864]
+; CHECK-i64-NEXT:    stp q3, q5, [sp, #16] // 32-byte Folded Spill
+; CHECK-i64-NEXT:    str q0, [sp, #448] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp, #880]
+; CHECK-i64-NEXT:    stp q2, q0, [sp, #416] // 32-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp, #832]
+; CHECK-i64-NEXT:    str q0, [sp, #400] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp, #848]
+; CHECK-i64-NEXT:    stp q4, q0, [sp, #368] // 32-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp, #800]
+; CHECK-i64-NEXT:    str q0, [sp, #352] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp, #816]
+; CHECK-i64-NEXT:    str q0, [sp, #336] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp, #768]
+; CHECK-i64-NEXT:    str q0, [sp, #304] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp, #784]
+; CHECK-i64-NEXT:    str q0, [sp, #288] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp, #736]
+; CHECK-i64-NEXT:    str q0, [sp, #256] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp, #752]
+; CHECK-i64-NEXT:    str q0, [sp, #240] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp, #704]
+; CHECK-i64-NEXT:    str q0, [sp, #208] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp, #720]
+; CHECK-i64-NEXT:    str q0, [sp, #192] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp, #672]
+; CHECK-i64-NEXT:    str q0, [sp, #160] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp, #688]
+; CHECK-i64-NEXT:    str q0, [sp, #128] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp, #640]
+; CHECK-i64-NEXT:    str q0, [sp, #144] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp, #656]
+; CHECK-i64-NEXT:    str q0, [sp, #112] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp, #608]
+; CHECK-i64-NEXT:    str q0, [sp, #176] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp, #624]
+; CHECK-i64-NEXT:    str q0, [sp, #96] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp, #576]
+; CHECK-i64-NEXT:    str q0, [sp, #224] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp, #592]
+; CHECK-i64-NEXT:    str q0, [sp, #80] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp, #544]
+; CHECK-i64-NEXT:    str q0, [sp, #64] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp, #560]
+; CHECK-i64-NEXT:    str q0, [sp, #48] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    mov v0.16b, v1.16b
+; CHECK-i64-NEXT:    bl lrintl
+; CHECK-i64-NEXT:    fmov d0, x0
+; CHECK-i64-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp, #464] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    bl lrintl
+; CHECK-i64-NEXT:    fmov d0, x0
+; CHECK-i64-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-i64-NEXT:    str q0, [sp, #464] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    bl lrintl
+; CHECK-i64-NEXT:    fmov d0, x0
+; CHECK-i64-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp, #416] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    bl lrintl
+; CHECK-i64-NEXT:    fmov d0, x0
+; CHECK-i64-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-i64-NEXT:    str q0, [sp, #416] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    bl lrintl
+; CHECK-i64-NEXT:    fmov d0, x0
+; CHECK-i64-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp, #368] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    bl lrintl
+; CHECK-i64-NEXT:    fmov d0, x0
+; CHECK-i64-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-i64-NEXT:    str q0, [sp, #368] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp, #272] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    bl lrintl
+; CHECK-i64-NEXT:    fmov d0, x0
+; CHECK-i64-NEXT:    str q0, [sp, #272] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp, #320] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    bl lrintl
+; CHECK-i64-NEXT:    fmov d0, x0
+; CHECK-i64-NEXT:    ldr q1, [sp, #272] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-i64-NEXT:    str q0, [sp, #320] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    bl lrintl
+; CHECK-i64-NEXT:    fmov d0, x0
+; CHECK-i64-NEXT:    str q0, [sp, #272] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp, #64] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    bl lrintl
+; CHECK-i64-NEXT:    fmov d0, x0
+; CHECK-i64-NEXT:    ldr q1, [sp, #272] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-i64-NEXT:    str q0, [sp, #272] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp, #80] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    bl lrintl
+; CHECK-i64-NEXT:    fmov d0, x0
+; CHECK-i64-NEXT:    str q0, [sp, #80] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp, #224] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    bl lrintl
+; CHECK-i64-NEXT:    fmov d0, x0
+; CHECK-i64-NEXT:    ldr q1, [sp, #80] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-i64-NEXT:    str q0, [sp, #224] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp, #96] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    bl lrintl
+; CHECK-i64-NEXT:    fmov d0, x0
+; CHECK-i64-NEXT:    str q0, [sp, #96] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp, #176] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    bl lrintl
+; CHECK-i64-NEXT:    fmov d0, x0
+; CHECK-i64-NEXT:    ldr q1, [sp, #96] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-i64-NEXT:    str q0, [sp, #176] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp, #112] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    bl lrintl
+; CHECK-i64-NEXT:    fmov d0, x0
+; CHECK-i64-NEXT:    str q0, [sp, #112] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp, #144] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    bl lrintl
+; CHECK-i64-NEXT:    fmov d0, x0
+; CHECK-i64-NEXT:    ldr q1, [sp, #112] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-i64-NEXT:    str q0, [sp, #144] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp, #128] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    bl lrintl
+; CHECK-i64-NEXT:    fmov d0, x0
+; CHECK-i64-NEXT:    str q0, [sp, #128] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp, #160] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    bl lrintl
+; CHECK-i64-NEXT:    fmov d0, x0
+; CHECK-i64-NEXT:    ldr q1, [sp, #128] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-i64-NEXT:    str q0, [sp, #160] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp, #192] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    bl lrintl
+; CHECK-i64-NEXT:    fmov d0, x0
+; CHECK-i64-NEXT:    str q0, [sp, #192] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp, #208] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    bl lrintl
+; CHECK-i64-NEXT:    fmov d0, x0
+; CHECK-i64-NEXT:    ldr q1, [sp, #192] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-i64-NEXT:    str q0, [sp, #208] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp, #240] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    bl lrintl
+; CHECK-i64-NEXT:    fmov d0, x0
+; CHECK-i64-NEXT:    str q0, [sp, #240] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp, #256] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    bl lrintl
+; CHECK-i64-NEXT:    fmov d0, x0
+; CHECK-i64-NEXT:    ldr q1, [sp, #240] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-i64-NEXT:    str q0, [sp, #256] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp, #288] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    bl lrintl
+; CHECK-i64-NEXT:    fmov d0, x0
+; CHECK-i64-NEXT:    str q0, [sp, #288] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp, #304] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    bl lrintl
+; CHECK-i64-NEXT:    fmov d0, x0
+; CHECK-i64-NEXT:    ldr q1, [sp, #288] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-i64-NEXT:    str q0, [sp, #304] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp, #336] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    bl lrintl
+; CHECK-i64-NEXT:    fmov d0, x0
+; CHECK-i64-NEXT:    str q0, [sp, #336] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp, #352] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    bl lrintl
+; CHECK-i64-NEXT:    fmov d0, x0
+; CHECK-i64-NEXT:    ldr q1, [sp, #336] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-i64-NEXT:    str q0, [sp, #352] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp, #384] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    bl lrintl
+; CHECK-i64-NEXT:    fmov d0, x0
+; CHECK-i64-NEXT:    str q0, [sp, #384] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp, #400] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    bl lrintl
+; CHECK-i64-NEXT:    fmov d0, x0
+; CHECK-i64-NEXT:    ldr q1, [sp, #384] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-i64-NEXT:    str q0, [sp, #400] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp, #432] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    bl lrintl
+; CHECK-i64-NEXT:    fmov d0, x0
+; CHECK-i64-NEXT:    str q0, [sp, #432] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp, #448] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    bl lrintl
+; CHECK-i64-NEXT:    fmov d0, x0
+; CHECK-i64-NEXT:    ldr q1, [sp, #432] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-i64-NEXT:    str q0, [sp, #448] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp, #480] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    bl lrintl
+; CHECK-i64-NEXT:    fmov d0, x0
+; CHECK-i64-NEXT:    str q0, [sp, #480] // 16-byte Folded Spill
+; CHECK-i64-NEXT:    ldr q0, [sp, #496] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    bl lrintl
+; CHECK-i64-NEXT:    ldr q0, [sp, #400] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    str q0, [x19, #208]
+; CHECK-i64-NEXT:    fmov d0, x0
+; CHECK-i64-NEXT:    ldr q1, [sp, #480] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-i64-NEXT:    ldr q1, [sp, #352] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    str q1, [x19, #192]
+; CHECK-i64-NEXT:    ldr q1, [sp, #304] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    str q1, [x19, #176]
+; CHECK-i64-NEXT:    ldr q1, [sp, #256] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    str q0, [x19, #240]
+; CHECK-i64-NEXT:    str q1, [x19, #160]
+; CHECK-i64-NEXT:    ldr q0, [sp, #448] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    str q0, [x19, #224]
+; CHECK-i64-NEXT:    ldr q0, [sp, #208] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    str q0, [x19, #144]
+; CHECK-i64-NEXT:    ldr q0, [sp, #160] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    str q0, [x19, #128]
+; CHECK-i64-NEXT:    ldr q0, [sp, #144] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    str q0, [x19, #112]
+; CHECK-i64-NEXT:    ldr q0, [sp, #176] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    str q0, [x19, #96]
+; CHECK-i64-NEXT:    ldr q0, [sp, #224] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    str q0, [x19, #80]
+; CHECK-i64-NEXT:    ldr q0, [sp, #272] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    str q0, [x19, #64]
+; CHECK-i64-NEXT:    ldr q0, [sp, #320] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    str q0, [x19, #48]
+; CHECK-i64-NEXT:    ldr q0, [sp, #368] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    str q0, [x19, #32]
+; CHECK-i64-NEXT:    ldr q0, [sp, #416] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    str q0, [x19, #16]
+; CHECK-i64-NEXT:    ldr q0, [sp, #464] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    str q0, [x19]
+; CHECK-i64-NEXT:    add sp, sp, #512
+; CHECK-i64-NEXT:    ldp x30, x19, [sp, #16] // 16-byte Folded Reload
+; CHECK-i64-NEXT:    ldr x29, [sp], #32 // 8-byte Folded Reload
+; CHECK-i64-NEXT:    ret
+  %a = call <32 x iXLen> @llvm.lrint.v32iXLen.v16fp128(<32 x fp128> %x)
+  ret <32 x iXLen> %a
+}
+declare <32 x iXLen> @llvm.lrint.v32iXLen.v32fp128(<32 x fp128>)
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK-i32-GI: {{.*}}
diff --git a/llvm/test/CodeGen/PowerPC/vector-llrint.ll b/llvm/test/CodeGen/PowerPC/vector-llrint.ll
index 7085cf51916da..d57bf6b2e706c 100644
--- a/llvm/test/CodeGen/PowerPC/vector-llrint.ll
+++ b/llvm/test/CodeGen/PowerPC/vector-llrint.ll
@@ -4836,3 +4836,599 @@ define <8 x i64> @llrint_v8i64_v8f64(<8 x double> %x) {
   ret <8 x i64> %a
 }
 declare <8 x i64> @llvm.llrint.v8i64.v8f64(<8 x double>)
+
+define <1 x i64> @llrint_v1i64_v1f128(<1 x fp128> %x) {
+; BE-LABEL: llrint_v1i64_v1f128:
+; BE:       # %bb.0:
+; BE-NEXT:    mflr r0
+; BE-NEXT:    stdu r1, -112(r1)
+; BE-NEXT:    std r0, 128(r1)
+; BE-NEXT:    .cfi_def_cfa_offset 112
+; BE-NEXT:    .cfi_offset lr, 16
+; BE-NEXT:    bl llrintf128
+; BE-NEXT:    nop
+; BE-NEXT:    addi r1, r1, 112
+; BE-NEXT:    ld r0, 16(r1)
+; BE-NEXT:    mtlr r0
+; BE-NEXT:    blr
+;
+; CHECK-LABEL: llrint_v1i64_v1f128:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    mflr r0
+; CHECK-NEXT:    stdu r1, -32(r1)
+; CHECK-NEXT:    std r0, 48(r1)
+; CHECK-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-NEXT:    .cfi_offset lr, 16
+; CHECK-NEXT:    bl llrintf128
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    addi r1, r1, 32
+; CHECK-NEXT:    ld r0, 16(r1)
+; CHECK-NEXT:    mtlr r0
+; CHECK-NEXT:    blr
+;
+; FAST-LABEL: llrint_v1i64_v1f128:
+; FAST:       # %bb.0:
+; FAST-NEXT:    mflr r0
+; FAST-NEXT:    stdu r1, -32(r1)
+; FAST-NEXT:    std r0, 48(r1)
+; FAST-NEXT:    .cfi_def_cfa_offset 32
+; FAST-NEXT:    .cfi_offset lr, 16
+; FAST-NEXT:    bl llrintf128
+; FAST-NEXT:    nop
+; FAST-NEXT:    addi r1, r1, 32
+; FAST-NEXT:    ld r0, 16(r1)
+; FAST-NEXT:    mtlr r0
+; FAST-NEXT:    blr
+  %a = call <1 x i64> @llvm.llrint.v1i64.v1f128(<1 x fp128> %x)
+  ret <1 x i64> %a
+}
+declare <1 x i64> @llvm.llrint.v1i64.v1f128(<1 x fp128>)
+
+define <2 x i64> @llrint_v2i64_v2f128(<2 x fp128> %x) {
+; BE-LABEL: llrint_v2i64_v2f128:
+; BE:       # %bb.0:
+; BE-NEXT:    mflr r0
+; BE-NEXT:    stdu r1, -160(r1)
+; BE-NEXT:    std r0, 176(r1)
+; BE-NEXT:    .cfi_def_cfa_offset 160
+; BE-NEXT:    .cfi_offset lr, 16
+; BE-NEXT:    .cfi_offset v31, -16
+; BE-NEXT:    li r3, 144
+; BE-NEXT:    stxvd2x v31, r1, r3 # 16-byte Folded Spill
+; BE-NEXT:    vmr v31, v2
+; BE-NEXT:    vmr v2, v3
+; BE-NEXT:    bl llrintf128
+; BE-NEXT:    nop
+; BE-NEXT:    vmr v2, v31
+; BE-NEXT:    std r3, 136(r1)
+; BE-NEXT:    bl llrintf128
+; BE-NEXT:    nop
+; BE-NEXT:    std r3, 128(r1)
+; BE-NEXT:    addi r3, r1, 128
+; BE-NEXT:    lxvd2x v2, 0, r3
+; BE-NEXT:    li r3, 144
+; BE-NEXT:    lxvd2x v31, r1, r3 # 16-byte Folded Reload
+; BE-NEXT:    addi r1, r1, 160
+; BE-NEXT:    ld r0, 16(r1)
+; BE-NEXT:    mtlr r0
+; BE-NEXT:    blr
+;
+; CHECK-LABEL: llrint_v2i64_v2f128:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    mflr r0
+; CHECK-NEXT:    stdu r1, -80(r1)
+; CHECK-NEXT:    std r0, 96(r1)
+; CHECK-NEXT:    .cfi_def_cfa_offset 80
+; CHECK-NEXT:    .cfi_offset lr, 16
+; CHECK-NEXT:    .cfi_offset v30, -32
+; CHECK-NEXT:    .cfi_offset v31, -16
+; CHECK-NEXT:    li r3, 48
+; CHECK-NEXT:    stvx v30, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    li r3, 64
+; CHECK-NEXT:    stvx v31, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    vmr v31, v3
+; CHECK-NEXT:    bl llrintf128
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    vmr v2, v31
+; CHECK-NEXT:    mtvsrd v30, r3
+; CHECK-NEXT:    bl llrintf128
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    mtfprd f0, r3
+; CHECK-NEXT:    li r3, 64
+; CHECK-NEXT:    lvx v31, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    li r3, 48
+; CHECK-NEXT:    xxmrghd v2, vs0, v30
+; CHECK-NEXT:    lvx v30, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    addi r1, r1, 80
+; CHECK-NEXT:    ld r0, 16(r1)
+; CHECK-NEXT:    mtlr r0
+; CHECK-NEXT:    blr
+;
+; FAST-LABEL: llrint_v2i64_v2f128:
+; FAST:       # %bb.0:
+; FAST-NEXT:    mflr r0
+; FAST-NEXT:    stdu r1, -80(r1)
+; FAST-NEXT:    std r0, 96(r1)
+; FAST-NEXT:    .cfi_def_cfa_offset 80
+; FAST-NEXT:    .cfi_offset lr, 16
+; FAST-NEXT:    .cfi_offset v30, -32
+; FAST-NEXT:    .cfi_offset v31, -16
+; FAST-NEXT:    li r3, 48
+; FAST-NEXT:    stvx v30, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT:    li r3, 64
+; FAST-NEXT:    stvx v31, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT:    vmr v31, v3
+; FAST-NEXT:    bl llrintf128
+; FAST-NEXT:    nop
+; FAST-NEXT:    vmr v2, v31
+; FAST-NEXT:    mtvsrd v30, r3
+; FAST-NEXT:    bl llrintf128
+; FAST-NEXT:    nop
+; FAST-NEXT:    mtfprd f0, r3
+; FAST-NEXT:    li r3, 64
+; FAST-NEXT:    lvx v31, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    li r3, 48
+; FAST-NEXT:    xxmrghd v2, vs0, v30
+; FAST-NEXT:    lvx v30, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    addi r1, r1, 80
+; FAST-NEXT:    ld r0, 16(r1)
+; FAST-NEXT:    mtlr r0
+; FAST-NEXT:    blr
+  %a = call <2 x i64> @llvm.llrint.v2i64.v2f128(<2 x fp128> %x)
+  ret <2 x i64> %a
+}
+declare <2 x i64> @llvm.llrint.v2i64.v2f128(<2 x fp128>)
+
+define <4 x i64> @llrint_v4i64_v4f128(<4 x fp128> %x) {
+; BE-LABEL: llrint_v4i64_v4f128:
+; BE:       # %bb.0:
+; BE-NEXT:    mflr r0
+; BE-NEXT:    stdu r1, -208(r1)
+; BE-NEXT:    std r0, 224(r1)
+; BE-NEXT:    .cfi_def_cfa_offset 208
+; BE-NEXT:    .cfi_offset lr, 16
+; BE-NEXT:    .cfi_offset v29, -48
+; BE-NEXT:    .cfi_offset v30, -32
+; BE-NEXT:    .cfi_offset v31, -16
+; BE-NEXT:    li r3, 160
+; BE-NEXT:    stxvd2x v29, r1, r3 # 16-byte Folded Spill
+; BE-NEXT:    li r3, 176
+; BE-NEXT:    vmr v29, v2
+; BE-NEXT:    vmr v2, v3
+; BE-NEXT:    stxvd2x v30, r1, r3 # 16-byte Folded Spill
+; BE-NEXT:    li r3, 192
+; BE-NEXT:    vmr v30, v4
+; BE-NEXT:    stxvd2x v31, r1, r3 # 16-byte Folded Spill
+; BE-NEXT:    vmr v31, v5
+; BE-NEXT:    bl llrintf128
+; BE-NEXT:    nop
+; BE-NEXT:    vmr v2, v29
+; BE-NEXT:    std r3, 136(r1)
+; BE-NEXT:    bl llrintf128
+; BE-NEXT:    nop
+; BE-NEXT:    vmr v2, v31
+; BE-NEXT:    std r3, 128(r1)
+; BE-NEXT:    bl llrintf128
+; BE-NEXT:    nop
+; BE-NEXT:    vmr v2, v30
+; BE-NEXT:    std r3, 152(r1)
+; BE-NEXT:    bl llrintf128
+; BE-NEXT:    nop
+; BE-NEXT:    std r3, 144(r1)
+; BE-NEXT:    addi r3, r1, 128
+; BE-NEXT:    lxvd2x v2, 0, r3
+; BE-NEXT:    addi r3, r1, 144
+; BE-NEXT:    lxvd2x v3, 0, r3
+; BE-NEXT:    li r3, 192
+; BE-NEXT:    lxvd2x v31, r1, r3 # 16-byte Folded Reload
+; BE-NEXT:    li r3, 176
+; BE-NEXT:    lxvd2x v30, r1, r3 # 16-byte Folded Reload
+; BE-NEXT:    li r3, 160
+; BE-NEXT:    lxvd2x v29, r1, r3 # 16-byte Folded Reload
+; BE-NEXT:    addi r1, r1, 208
+; BE-NEXT:    ld r0, 16(r1)
+; BE-NEXT:    mtlr r0
+; BE-NEXT:    blr
+;
+; CHECK-LABEL: llrint_v4i64_v4f128:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    mflr r0
+; CHECK-NEXT:    stdu r1, -112(r1)
+; CHECK-NEXT:    std r0, 128(r1)
+; CHECK-NEXT:    .cfi_def_cfa_offset 112
+; CHECK-NEXT:    .cfi_offset lr, 16
+; CHECK-NEXT:    .cfi_offset v28, -64
+; CHECK-NEXT:    .cfi_offset v29, -48
+; CHECK-NEXT:    .cfi_offset v30, -32
+; CHECK-NEXT:    .cfi_offset v31, -16
+; CHECK-NEXT:    li r3, 48
+; CHECK-NEXT:    stvx v28, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    li r3, 64
+; CHECK-NEXT:    stvx v29, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    li r3, 80
+; CHECK-NEXT:    vmr v29, v3
+; CHECK-NEXT:    stvx v30, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    li r3, 96
+; CHECK-NEXT:    vmr v30, v4
+; CHECK-NEXT:    stvx v31, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    vmr v31, v5
+; CHECK-NEXT:    bl llrintf128
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    vmr v2, v29
+; CHECK-NEXT:    mtvsrd v28, r3
+; CHECK-NEXT:    bl llrintf128
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    mtfprd f0, r3
+; CHECK-NEXT:    vmr v2, v30
+; CHECK-NEXT:    xxmrghd v29, vs0, v28
+; CHECK-NEXT:    bl llrintf128
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    vmr v2, v31
+; CHECK-NEXT:    mtvsrd v30, r3
+; CHECK-NEXT:    bl llrintf128
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    mtfprd f0, r3
+; CHECK-NEXT:    li r3, 96
+; CHECK-NEXT:    vmr v2, v29
+; CHECK-NEXT:    lvx v31, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    li r3, 80
+; CHECK-NEXT:    xxmrghd v3, vs0, v30
+; CHECK-NEXT:    lvx v30, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    li r3, 64
+; CHECK-NEXT:    lvx v29, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    li r3, 48
+; CHECK-NEXT:    lvx v28, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    addi r1, r1, 112
+; CHECK-NEXT:    ld r0, 16(r1)
+; CHECK-NEXT:    mtlr r0
+; CHECK-NEXT:    blr
+;
+; FAST-LABEL: llrint_v4i64_v4f128:
+; FAST:       # %bb.0:
+; FAST-NEXT:    mflr r0
+; FAST-NEXT:    stdu r1, -112(r1)
+; FAST-NEXT:    std r0, 128(r1)
+; FAST-NEXT:    .cfi_def_cfa_offset 112
+; FAST-NEXT:    .cfi_offset lr, 16
+; FAST-NEXT:    .cfi_offset v28, -64
+; FAST-NEXT:    .cfi_offset v29, -48
+; FAST-NEXT:    .cfi_offset v30, -32
+; FAST-NEXT:    .cfi_offset v31, -16
+; FAST-NEXT:    li r3, 48
+; FAST-NEXT:    stvx v28, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT:    li r3, 64
+; FAST-NEXT:    stvx v29, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT:    li r3, 80
+; FAST-NEXT:    vmr v29, v3
+; FAST-NEXT:    stvx v30, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT:    li r3, 96
+; FAST-NEXT:    vmr v30, v4
+; FAST-NEXT:    stvx v31, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT:    vmr v31, v5
+; FAST-NEXT:    bl llrintf128
+; FAST-NEXT:    nop
+; FAST-NEXT:    vmr v2, v29
+; FAST-NEXT:    mtvsrd v28, r3
+; FAST-NEXT:    bl llrintf128
+; FAST-NEXT:    nop
+; FAST-NEXT:    mtfprd f0, r3
+; FAST-NEXT:    vmr v2, v30
+; FAST-NEXT:    xxmrghd v29, vs0, v28
+; FAST-NEXT:    bl llrintf128
+; FAST-NEXT:    nop
+; FAST-NEXT:    vmr v2, v31
+; FAST-NEXT:    mtvsrd v30, r3
+; FAST-NEXT:    bl llrintf128
+; FAST-NEXT:    nop
+; FAST-NEXT:    mtfprd f0, r3
+; FAST-NEXT:    li r3, 96
+; FAST-NEXT:    vmr v2, v29
+; FAST-NEXT:    lvx v31, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    li r3, 80
+; FAST-NEXT:    xxmrghd v3, vs0, v30
+; FAST-NEXT:    lvx v30, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    li r3, 64
+; FAST-NEXT:    lvx v29, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    li r3, 48
+; FAST-NEXT:    lvx v28, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    addi r1, r1, 112
+; FAST-NEXT:    ld r0, 16(r1)
+; FAST-NEXT:    mtlr r0
+; FAST-NEXT:    blr
+  %a = call <4 x i64> @llvm.llrint.v4i64.v4f128(<4 x fp128> %x)
+  ret <4 x i64> %a
+}
+declare <4 x i64> @llvm.llrint.v4i64.v4f128(<4 x fp128>)
+
+define <8 x i64> @llrint_v8i64_v8f128(<8 x fp128> %x) {
+; BE-LABEL: llrint_v8i64_v8f128:
+; BE:       # %bb.0:
+; BE-NEXT:    mflr r0
+; BE-NEXT:    stdu r1, -304(r1)
+; BE-NEXT:    std r0, 320(r1)
+; BE-NEXT:    .cfi_def_cfa_offset 304
+; BE-NEXT:    .cfi_offset lr, 16
+; BE-NEXT:    .cfi_offset v25, -112
+; BE-NEXT:    .cfi_offset v26, -96
+; BE-NEXT:    .cfi_offset v27, -80
+; BE-NEXT:    .cfi_offset v28, -64
+; BE-NEXT:    .cfi_offset v29, -48
+; BE-NEXT:    .cfi_offset v30, -32
+; BE-NEXT:    .cfi_offset v31, -16
+; BE-NEXT:    li r3, 192
+; BE-NEXT:    stxvd2x v25, r1, r3 # 16-byte Folded Spill
+; BE-NEXT:    li r3, 208
+; BE-NEXT:    vmr v25, v2
+; BE-NEXT:    vmr v2, v3
+; BE-NEXT:    stxvd2x v26, r1, r3 # 16-byte Folded Spill
+; BE-NEXT:    li r3, 224
+; BE-NEXT:    vmr v26, v4
+; BE-NEXT:    stxvd2x v27, r1, r3 # 16-byte Folded Spill
+; BE-NEXT:    li r3, 240
+; BE-NEXT:    vmr v27, v5
+; BE-NEXT:    stxvd2x v28, r1, r3 # 16-byte Folded Spill
+; BE-NEXT:    li r3, 256
+; BE-NEXT:    vmr v28, v6
+; BE-NEXT:    stxvd2x v29, r1, r3 # 16-byte Folded Spill
+; BE-NEXT:    li r3, 272
+; BE-NEXT:    vmr v29, v7
+; BE-NEXT:    stxvd2x v30, r1, r3 # 16-byte Folded Spill
+; BE-NEXT:    li r3, 288
+; BE-NEXT:    vmr v30, v8
+; BE-NEXT:    stxvd2x v31, r1, r3 # 16-byte Folded Spill
+; BE-NEXT:    vmr v31, v9
+; BE-NEXT:    bl llrintf128
+; BE-NEXT:    nop
+; BE-NEXT:    vmr v2, v25
+; BE-NEXT:    std r3, 136(r1)
+; BE-NEXT:    bl llrintf128
+; BE-NEXT:    nop
+; BE-NEXT:    vmr v2, v27
+; BE-NEXT:    std r3, 128(r1)
+; BE-NEXT:    bl llrintf128
+; BE-NEXT:    nop
+; BE-NEXT:    vmr v2, v26
+; BE-NEXT:    std r3, 152(r1)
+; BE-NEXT:    bl llrintf128
+; BE-NEXT:    nop
+; BE-NEXT:    vmr v2, v29
+; BE-NEXT:    std r3, 144(r1)
+; BE-NEXT:    bl llrintf128
+; BE-NEXT:    nop
+; BE-NEXT:    vmr v2, v28
+; BE-NEXT:    std r3, 168(r1)
+; BE-NEXT:    bl llrintf128
+; BE-NEXT:    nop
+; BE-NEXT:    vmr v2, v31
+; BE-NEXT:    std r3, 160(r1)
+; BE-NEXT:    bl llrintf128
+; BE-NEXT:    nop
+; BE-NEXT:    vmr v2, v30
+; BE-NEXT:    std r3, 184(r1)
+; BE-NEXT:    bl llrintf128
+; BE-NEXT:    nop
+; BE-NEXT:    std r3, 176(r1)
+; BE-NEXT:    addi r3, r1, 128
+; BE-NEXT:    lxvd2x v2, 0, r3
+; BE-NEXT:    addi r3, r1, 144
+; BE-NEXT:    lxvd2x v3, 0, r3
+; BE-NEXT:    addi r3, r1, 160
+; BE-NEXT:    lxvd2x v4, 0, r3
+; BE-NEXT:    addi r3, r1, 176
+; BE-NEXT:    lxvd2x v5, 0, r3
+; BE-NEXT:    li r3, 288
+; BE-NEXT:    lxvd2x v31, r1, r3 # 16-byte Folded Reload
+; BE-NEXT:    li r3, 272
+; BE-NEXT:    lxvd2x v30, r1, r3 # 16-byte Folded Reload
+; BE-NEXT:    li r3, 256
+; BE-NEXT:    lxvd2x v29, r1, r3 # 16-byte Folded Reload
+; BE-NEXT:    li r3, 240
+; BE-NEXT:    lxvd2x v28, r1, r3 # 16-byte Folded Reload
+; BE-NEXT:    li r3, 224
+; BE-NEXT:    lxvd2x v27, r1, r3 # 16-byte Folded Reload
+; BE-NEXT:    li r3, 208
+; BE-NEXT:    lxvd2x v26, r1, r3 # 16-byte Folded Reload
+; BE-NEXT:    li r3, 192
+; BE-NEXT:    lxvd2x v25, r1, r3 # 16-byte Folded Reload
+; BE-NEXT:    addi r1, r1, 304
+; BE-NEXT:    ld r0, 16(r1)
+; BE-NEXT:    mtlr r0
+; BE-NEXT:    blr
+;
+; CHECK-LABEL: llrint_v8i64_v8f128:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    mflr r0
+; CHECK-NEXT:    stdu r1, -176(r1)
+; CHECK-NEXT:    std r0, 192(r1)
+; CHECK-NEXT:    .cfi_def_cfa_offset 176
+; CHECK-NEXT:    .cfi_offset lr, 16
+; CHECK-NEXT:    .cfi_offset v24, -128
+; CHECK-NEXT:    .cfi_offset v25, -112
+; CHECK-NEXT:    .cfi_offset v26, -96
+; CHECK-NEXT:    .cfi_offset v27, -80
+; CHECK-NEXT:    .cfi_offset v28, -64
+; CHECK-NEXT:    .cfi_offset v29, -48
+; CHECK-NEXT:    .cfi_offset v30, -32
+; CHECK-NEXT:    .cfi_offset v31, -16
+; CHECK-NEXT:    li r3, 48
+; CHECK-NEXT:    stvx v24, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    li r3, 64
+; CHECK-NEXT:    stvx v25, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    li r3, 80
+; CHECK-NEXT:    vmr v25, v3
+; CHECK-NEXT:    stvx v26, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    li r3, 96
+; CHECK-NEXT:    vmr v26, v4
+; CHECK-NEXT:    stvx v27, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    li r3, 112
+; CHECK-NEXT:    vmr v27, v5
+; CHECK-NEXT:    stvx v28, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    li r3, 128
+; CHECK-NEXT:    vmr v28, v6
+; CHECK-NEXT:    stvx v29, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    li r3, 144
+; CHECK-NEXT:    vmr v29, v7
+; CHECK-NEXT:    stvx v30, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    li r3, 160
+; CHECK-NEXT:    vmr v30, v8
+; CHECK-NEXT:    stvx v31, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    vmr v31, v9
+; CHECK-NEXT:    bl llrintf128
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    vmr v2, v25
+; CHECK-NEXT:    mtvsrd v24, r3
+; CHECK-NEXT:    bl llrintf128
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    mtfprd f0, r3
+; CHECK-NEXT:    vmr v2, v26
+; CHECK-NEXT:    xxmrghd v25, vs0, v24
+; CHECK-NEXT:    bl llrintf128
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    vmr v2, v27
+; CHECK-NEXT:    mtvsrd v26, r3
+; CHECK-NEXT:    bl llrintf128
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    mtfprd f0, r3
+; CHECK-NEXT:    vmr v2, v28
+; CHECK-NEXT:    xxmrghd v27, vs0, v26
+; CHECK-NEXT:    bl llrintf128
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    vmr v2, v29
+; CHECK-NEXT:    mtvsrd v28, r3
+; CHECK-NEXT:    bl llrintf128
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    mtfprd f0, r3
+; CHECK-NEXT:    vmr v2, v30
+; CHECK-NEXT:    xxmrghd v29, vs0, v28
+; CHECK-NEXT:    bl llrintf128
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    vmr v2, v31
+; CHECK-NEXT:    mtvsrd v30, r3
+; CHECK-NEXT:    bl llrintf128
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    mtfprd f0, r3
+; CHECK-NEXT:    li r3, 160
+; CHECK-NEXT:    vmr v4, v29
+; CHECK-NEXT:    lvx v31, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    li r3, 144
+; CHECK-NEXT:    vmr v3, v27
+; CHECK-NEXT:    vmr v2, v25
+; CHECK-NEXT:    xxmrghd v5, vs0, v30
+; CHECK-NEXT:    lvx v30, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    li r3, 128
+; CHECK-NEXT:    lvx v29, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    li r3, 112
+; CHECK-NEXT:    lvx v28, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    li r3, 96
+; CHECK-NEXT:    lvx v27, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    li r3, 80
+; CHECK-NEXT:    lvx v26, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    li r3, 64
+; CHECK-NEXT:    lvx v25, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    li r3, 48
+; CHECK-NEXT:    lvx v24, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    addi r1, r1, 176
+; CHECK-NEXT:    ld r0, 16(r1)
+; CHECK-NEXT:    mtlr r0
+; CHECK-NEXT:    blr
+;
+; FAST-LABEL: llrint_v8i64_v8f128:
+; FAST:       # %bb.0:
+; FAST-NEXT:    mflr r0
+; FAST-NEXT:    stdu r1, -176(r1)
+; FAST-NEXT:    std r0, 192(r1)
+; FAST-NEXT:    .cfi_def_cfa_offset 176
+; FAST-NEXT:    .cfi_offset lr, 16
+; FAST-NEXT:    .cfi_offset v24, -128
+; FAST-NEXT:    .cfi_offset v25, -112
+; FAST-NEXT:    .cfi_offset v26, -96
+; FAST-NEXT:    .cfi_offset v27, -80
+; FAST-NEXT:    .cfi_offset v28, -64
+; FAST-NEXT:    .cfi_offset v29, -48
+; FAST-NEXT:    .cfi_offset v30, -32
+; FAST-NEXT:    .cfi_offset v31, -16
+; FAST-NEXT:    li r3, 48
+; FAST-NEXT:    stvx v24, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT:    li r3, 64
+; FAST-NEXT:    stvx v25, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT:    li r3, 80
+; FAST-NEXT:    vmr v25, v3
+; FAST-NEXT:    stvx v26, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT:    li r3, 96
+; FAST-NEXT:    vmr v26, v4
+; FAST-NEXT:    stvx v27, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT:    li r3, 112
+; FAST-NEXT:    vmr v27, v5
+; FAST-NEXT:    stvx v28, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT:    li r3, 128
+; FAST-NEXT:    vmr v28, v6
+; FAST-NEXT:    stvx v29, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT:    li r3, 144
+; FAST-NEXT:    vmr v29, v7
+; FAST-NEXT:    stvx v30, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT:    li r3, 160
+; FAST-NEXT:    vmr v30, v8
+; FAST-NEXT:    stvx v31, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT:    vmr v31, v9
+; FAST-NEXT:    bl llrintf128
+; FAST-NEXT:    nop
+; FAST-NEXT:    vmr v2, v25
+; FAST-NEXT:    mtvsrd v24, r3
+; FAST-NEXT:    bl llrintf128
+; FAST-NEXT:    nop
+; FAST-NEXT:    mtfprd f0, r3
+; FAST-NEXT:    vmr v2, v26
+; FAST-NEXT:    xxmrghd v25, vs0, v24
+; FAST-NEXT:    bl llrintf128
+; FAST-NEXT:    nop
+; FAST-NEXT:    vmr v2, v27
+; FAST-NEXT:    mtvsrd v26, r3
+; FAST-NEXT:    bl llrintf128
+; FAST-NEXT:    nop
+; FAST-NEXT:    mtfprd f0, r3
+; FAST-NEXT:    vmr v2, v28
+; FAST-NEXT:    xxmrghd v27, vs0, v26
+; FAST-NEXT:    bl llrintf128
+; FAST-NEXT:    nop
+; FAST-NEXT:    vmr v2, v29
+; FAST-NEXT:    mtvsrd v28, r3
+; FAST-NEXT:    bl llrintf128
+; FAST-NEXT:    nop
+; FAST-NEXT:    mtfprd f0, r3
+; FAST-NEXT:    vmr v2, v30
+; FAST-NEXT:    xxmrghd v29, vs0, v28
+; FAST-NEXT:    bl llrintf128
+; FAST-NEXT:    nop
+; FAST-NEXT:    vmr v2, v31
+; FAST-NEXT:    mtvsrd v30, r3
+; FAST-NEXT:    bl llrintf128
+; FAST-NEXT:    nop
+; FAST-NEXT:    mtfprd f0, r3
+; FAST-NEXT:    li r3, 160
+; FAST-NEXT:    vmr v4, v29
+; FAST-NEXT:    lvx v31, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    li r3, 144
+; FAST-NEXT:    vmr v3, v27
+; FAST-NEXT:    vmr v2, v25
+; FAST-NEXT:    xxmrghd v5, vs0, v30
+; FAST-NEXT:    lvx v30, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    li r3, 128
+; FAST-NEXT:    lvx v29, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    li r3, 112
+; FAST-NEXT:    lvx v28, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    li r3, 96
+; FAST-NEXT:    lvx v27, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    li r3, 80
+; FAST-NEXT:    lvx v26, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    li r3, 64
+; FAST-NEXT:    lvx v25, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    li r3, 48
+; FAST-NEXT:    lvx v24, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    addi r1, r1, 176
+; FAST-NEXT:    ld r0, 16(r1)
+; FAST-NEXT:    mtlr r0
+; FAST-NEXT:    blr
+  %a = call <8 x i64> @llvm.llrint.v8i64.v8f128(<8 x fp128> %x)
+  ret <8 x i64> %a
+}
+declare <8 x i64> @llvm.llrint.v8i64.v8f128(<8 x fp128>)
diff --git a/llvm/test/CodeGen/PowerPC/vector-lrint.ll b/llvm/test/CodeGen/PowerPC/vector-lrint.ll
index b2ade5300dbc3..c64c2e15179cb 100644
--- a/llvm/test/CodeGen/PowerPC/vector-lrint.ll
+++ b/llvm/test/CodeGen/PowerPC/vector-lrint.ll
@@ -4851,3 +4851,2327 @@ define <8 x i64> @lrint_v8f64(<8 x double> %x) {
   ret <8 x i64> %a
 }
 declare <8 x i64> @llvm.lrint.v8i64.v8f64(<8 x double>)
+
+define <1 x i64> @lrint_v1f128(<1 x fp128> %x) {
+; BE-LABEL: lrint_v1f128:
+; BE:       # %bb.0:
+; BE-NEXT:    mflr r0
+; BE-NEXT:    stdu r1, -112(r1)
+; BE-NEXT:    std r0, 128(r1)
+; BE-NEXT:    .cfi_def_cfa_offset 112
+; BE-NEXT:    .cfi_offset lr, 16
+; BE-NEXT:    bl lrintf128
+; BE-NEXT:    nop
+; BE-NEXT:    addi r1, r1, 112
+; BE-NEXT:    ld r0, 16(r1)
+; BE-NEXT:    mtlr r0
+; BE-NEXT:    blr
+;
+; CHECK-LABEL: lrint_v1f128:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    mflr r0
+; CHECK-NEXT:    stdu r1, -32(r1)
+; CHECK-NEXT:    std r0, 48(r1)
+; CHECK-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-NEXT:    .cfi_offset lr, 16
+; CHECK-NEXT:    bl lrintf128
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    addi r1, r1, 32
+; CHECK-NEXT:    ld r0, 16(r1)
+; CHECK-NEXT:    mtlr r0
+; CHECK-NEXT:    blr
+;
+; FAST-LABEL: lrint_v1f128:
+; FAST:       # %bb.0:
+; FAST-NEXT:    mflr r0
+; FAST-NEXT:    stdu r1, -32(r1)
+; FAST-NEXT:    std r0, 48(r1)
+; FAST-NEXT:    .cfi_def_cfa_offset 32
+; FAST-NEXT:    .cfi_offset lr, 16
+; FAST-NEXT:    bl lrintf128
+; FAST-NEXT:    nop
+; FAST-NEXT:    addi r1, r1, 32
+; FAST-NEXT:    ld r0, 16(r1)
+; FAST-NEXT:    mtlr r0
+; FAST-NEXT:    blr
+  %a = call <1 x i64> @llvm.lrint.v1i64.v1f128(<1 x fp128> %x)
+  ret <1 x i64> %a
+}
+declare <1 x i64> @llvm.lrint.v1i64.v1f128(<1 x fp128>)
+
+define <2 x i64> @lrint_v2f128(<2 x fp128> %x) {
+; BE-LABEL: lrint_v2f128:
+; BE:       # %bb.0:
+; BE-NEXT:    mflr r0
+; BE-NEXT:    stdu r1, -160(r1)
+; BE-NEXT:    std r0, 176(r1)
+; BE-NEXT:    .cfi_def_cfa_offset 160
+; BE-NEXT:    .cfi_offset lr, 16
+; BE-NEXT:    .cfi_offset v31, -16
+; BE-NEXT:    li r3, 144
+; BE-NEXT:    stxvd2x v31, r1, r3 # 16-byte Folded Spill
+; BE-NEXT:    vmr v31, v2
+; BE-NEXT:    vmr v2, v3
+; BE-NEXT:    bl lrintf128
+; BE-NEXT:    nop
+; BE-NEXT:    vmr v2, v31
+; BE-NEXT:    std r3, 136(r1)
+; BE-NEXT:    bl lrintf128
+; BE-NEXT:    nop
+; BE-NEXT:    std r3, 128(r1)
+; BE-NEXT:    addi r3, r1, 128
+; BE-NEXT:    lxvd2x v2, 0, r3
+; BE-NEXT:    li r3, 144
+; BE-NEXT:    lxvd2x v31, r1, r3 # 16-byte Folded Reload
+; BE-NEXT:    addi r1, r1, 160
+; BE-NEXT:    ld r0, 16(r1)
+; BE-NEXT:    mtlr r0
+; BE-NEXT:    blr
+;
+; CHECK-LABEL: lrint_v2f128:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    mflr r0
+; CHECK-NEXT:    stdu r1, -80(r1)
+; CHECK-NEXT:    std r0, 96(r1)
+; CHECK-NEXT:    .cfi_def_cfa_offset 80
+; CHECK-NEXT:    .cfi_offset lr, 16
+; CHECK-NEXT:    .cfi_offset v30, -32
+; CHECK-NEXT:    .cfi_offset v31, -16
+; CHECK-NEXT:    li r3, 48
+; CHECK-NEXT:    stvx v30, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    li r3, 64
+; CHECK-NEXT:    stvx v31, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    vmr v31, v3
+; CHECK-NEXT:    bl lrintf128
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    vmr v2, v31
+; CHECK-NEXT:    mtvsrd v30, r3
+; CHECK-NEXT:    bl lrintf128
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    mtfprd f0, r3
+; CHECK-NEXT:    li r3, 64
+; CHECK-NEXT:    lvx v31, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    li r3, 48
+; CHECK-NEXT:    xxmrghd v2, vs0, v30
+; CHECK-NEXT:    lvx v30, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    addi r1, r1, 80
+; CHECK-NEXT:    ld r0, 16(r1)
+; CHECK-NEXT:    mtlr r0
+; CHECK-NEXT:    blr
+;
+; FAST-LABEL: lrint_v2f128:
+; FAST:       # %bb.0:
+; FAST-NEXT:    mflr r0
+; FAST-NEXT:    stdu r1, -80(r1)
+; FAST-NEXT:    std r0, 96(r1)
+; FAST-NEXT:    .cfi_def_cfa_offset 80
+; FAST-NEXT:    .cfi_offset lr, 16
+; FAST-NEXT:    .cfi_offset v30, -32
+; FAST-NEXT:    .cfi_offset v31, -16
+; FAST-NEXT:    li r3, 48
+; FAST-NEXT:    stvx v30, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT:    li r3, 64
+; FAST-NEXT:    stvx v31, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT:    vmr v31, v3
+; FAST-NEXT:    bl lrintf128
+; FAST-NEXT:    nop
+; FAST-NEXT:    vmr v2, v31
+; FAST-NEXT:    mtvsrd v30, r3
+; FAST-NEXT:    bl lrintf128
+; FAST-NEXT:    nop
+; FAST-NEXT:    mtfprd f0, r3
+; FAST-NEXT:    li r3, 64
+; FAST-NEXT:    lvx v31, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    li r3, 48
+; FAST-NEXT:    xxmrghd v2, vs0, v30
+; FAST-NEXT:    lvx v30, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    addi r1, r1, 80
+; FAST-NEXT:    ld r0, 16(r1)
+; FAST-NEXT:    mtlr r0
+; FAST-NEXT:    blr
+  %a = call <2 x i64> @llvm.lrint.v2i64.v2f128(<2 x fp128> %x)
+  ret <2 x i64> %a
+}
+declare <2 x i64> @llvm.lrint.v2i64.v2f128(<2 x fp128>)
+
+define <4 x i64> @lrint_v4f128(<4 x fp128> %x) {
+; BE-LABEL: lrint_v4f128:
+; BE:       # %bb.0:
+; BE-NEXT:    mflr r0
+; BE-NEXT:    stdu r1, -208(r1)
+; BE-NEXT:    std r0, 224(r1)
+; BE-NEXT:    .cfi_def_cfa_offset 208
+; BE-NEXT:    .cfi_offset lr, 16
+; BE-NEXT:    .cfi_offset v29, -48
+; BE-NEXT:    .cfi_offset v30, -32
+; BE-NEXT:    .cfi_offset v31, -16
+; BE-NEXT:    li r3, 160
+; BE-NEXT:    stxvd2x v29, r1, r3 # 16-byte Folded Spill
+; BE-NEXT:    li r3, 176
+; BE-NEXT:    vmr v29, v2
+; BE-NEXT:    vmr v2, v3
+; BE-NEXT:    stxvd2x v30, r1, r3 # 16-byte Folded Spill
+; BE-NEXT:    li r3, 192
+; BE-NEXT:    vmr v30, v4
+; BE-NEXT:    stxvd2x v31, r1, r3 # 16-byte Folded Spill
+; BE-NEXT:    vmr v31, v5
+; BE-NEXT:    bl lrintf128
+; BE-NEXT:    nop
+; BE-NEXT:    vmr v2, v29
+; BE-NEXT:    std r3, 136(r1)
+; BE-NEXT:    bl lrintf128
+; BE-NEXT:    nop
+; BE-NEXT:    vmr v2, v31
+; BE-NEXT:    std r3, 128(r1)
+; BE-NEXT:    bl lrintf128
+; BE-NEXT:    nop
+; BE-NEXT:    vmr v2, v30
+; BE-NEXT:    std r3, 152(r1)
+; BE-NEXT:    bl lrintf128
+; BE-NEXT:    nop
+; BE-NEXT:    std r3, 144(r1)
+; BE-NEXT:    addi r3, r1, 128
+; BE-NEXT:    lxvd2x v2, 0, r3
+; BE-NEXT:    addi r3, r1, 144
+; BE-NEXT:    lxvd2x v3, 0, r3
+; BE-NEXT:    li r3, 192
+; BE-NEXT:    lxvd2x v31, r1, r3 # 16-byte Folded Reload
+; BE-NEXT:    li r3, 176
+; BE-NEXT:    lxvd2x v30, r1, r3 # 16-byte Folded Reload
+; BE-NEXT:    li r3, 160
+; BE-NEXT:    lxvd2x v29, r1, r3 # 16-byte Folded Reload
+; BE-NEXT:    addi r1, r1, 208
+; BE-NEXT:    ld r0, 16(r1)
+; BE-NEXT:    mtlr r0
+; BE-NEXT:    blr
+;
+; CHECK-LABEL: lrint_v4f128:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    mflr r0
+; CHECK-NEXT:    stdu r1, -112(r1)
+; CHECK-NEXT:    std r0, 128(r1)
+; CHECK-NEXT:    .cfi_def_cfa_offset 112
+; CHECK-NEXT:    .cfi_offset lr, 16
+; CHECK-NEXT:    .cfi_offset v28, -64
+; CHECK-NEXT:    .cfi_offset v29, -48
+; CHECK-NEXT:    .cfi_offset v30, -32
+; CHECK-NEXT:    .cfi_offset v31, -16
+; CHECK-NEXT:    li r3, 48
+; CHECK-NEXT:    stvx v28, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    li r3, 64
+; CHECK-NEXT:    stvx v29, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    li r3, 80
+; CHECK-NEXT:    vmr v29, v3
+; CHECK-NEXT:    stvx v30, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    li r3, 96
+; CHECK-NEXT:    vmr v30, v4
+; CHECK-NEXT:    stvx v31, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    vmr v31, v5
+; CHECK-NEXT:    bl lrintf128
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    vmr v2, v29
+; CHECK-NEXT:    mtvsrd v28, r3
+; CHECK-NEXT:    bl lrintf128
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    mtfprd f0, r3
+; CHECK-NEXT:    vmr v2, v30
+; CHECK-NEXT:    xxmrghd v29, vs0, v28
+; CHECK-NEXT:    bl lrintf128
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    vmr v2, v31
+; CHECK-NEXT:    mtvsrd v30, r3
+; CHECK-NEXT:    bl lrintf128
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    mtfprd f0, r3
+; CHECK-NEXT:    li r3, 96
+; CHECK-NEXT:    vmr v2, v29
+; CHECK-NEXT:    lvx v31, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    li r3, 80
+; CHECK-NEXT:    xxmrghd v3, vs0, v30
+; CHECK-NEXT:    lvx v30, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    li r3, 64
+; CHECK-NEXT:    lvx v29, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    li r3, 48
+; CHECK-NEXT:    lvx v28, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    addi r1, r1, 112
+; CHECK-NEXT:    ld r0, 16(r1)
+; CHECK-NEXT:    mtlr r0
+; CHECK-NEXT:    blr
+;
+; FAST-LABEL: lrint_v4f128:
+; FAST:       # %bb.0:
+; FAST-NEXT:    mflr r0
+; FAST-NEXT:    stdu r1, -112(r1)
+; FAST-NEXT:    std r0, 128(r1)
+; FAST-NEXT:    .cfi_def_cfa_offset 112
+; FAST-NEXT:    .cfi_offset lr, 16
+; FAST-NEXT:    .cfi_offset v28, -64
+; FAST-NEXT:    .cfi_offset v29, -48
+; FAST-NEXT:    .cfi_offset v30, -32
+; FAST-NEXT:    .cfi_offset v31, -16
+; FAST-NEXT:    li r3, 48
+; FAST-NEXT:    stvx v28, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT:    li r3, 64
+; FAST-NEXT:    stvx v29, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT:    li r3, 80
+; FAST-NEXT:    vmr v29, v3
+; FAST-NEXT:    stvx v30, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT:    li r3, 96
+; FAST-NEXT:    vmr v30, v4
+; FAST-NEXT:    stvx v31, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT:    vmr v31, v5
+; FAST-NEXT:    bl lrintf128
+; FAST-NEXT:    nop
+; FAST-NEXT:    vmr v2, v29
+; FAST-NEXT:    mtvsrd v28, r3
+; FAST-NEXT:    bl lrintf128
+; FAST-NEXT:    nop
+; FAST-NEXT:    mtfprd f0, r3
+; FAST-NEXT:    vmr v2, v30
+; FAST-NEXT:    xxmrghd v29, vs0, v28
+; FAST-NEXT:    bl lrintf128
+; FAST-NEXT:    nop
+; FAST-NEXT:    vmr v2, v31
+; FAST-NEXT:    mtvsrd v30, r3
+; FAST-NEXT:    bl lrintf128
+; FAST-NEXT:    nop
+; FAST-NEXT:    mtfprd f0, r3
+; FAST-NEXT:    li r3, 96
+; FAST-NEXT:    vmr v2, v29
+; FAST-NEXT:    lvx v31, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    li r3, 80
+; FAST-NEXT:    xxmrghd v3, vs0, v30
+; FAST-NEXT:    lvx v30, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    li r3, 64
+; FAST-NEXT:    lvx v29, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    li r3, 48
+; FAST-NEXT:    lvx v28, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    addi r1, r1, 112
+; FAST-NEXT:    ld r0, 16(r1)
+; FAST-NEXT:    mtlr r0
+; FAST-NEXT:    blr
+  %a = call <4 x i64> @llvm.lrint.v4i64.v4f128(<4 x fp128> %x)
+  ret <4 x i64> %a
+}
+declare <4 x i64> @llvm.lrint.v4i64.v4f128(<4 x fp128>)
+
+define <8 x i64> @lrint_v8f128(<8 x fp128> %x) {
+; BE-LABEL: lrint_v8f128:
+; BE:       # %bb.0:
+; BE-NEXT:    mflr r0
+; BE-NEXT:    stdu r1, -304(r1)
+; BE-NEXT:    std r0, 320(r1)
+; BE-NEXT:    .cfi_def_cfa_offset 304
+; BE-NEXT:    .cfi_offset lr, 16
+; BE-NEXT:    .cfi_offset v25, -112
+; BE-NEXT:    .cfi_offset v26, -96
+; BE-NEXT:    .cfi_offset v27, -80
+; BE-NEXT:    .cfi_offset v28, -64
+; BE-NEXT:    .cfi_offset v29, -48
+; BE-NEXT:    .cfi_offset v30, -32
+; BE-NEXT:    .cfi_offset v31, -16
+; BE-NEXT:    li r3, 192
+; BE-NEXT:    stxvd2x v25, r1, r3 # 16-byte Folded Spill
+; BE-NEXT:    li r3, 208
+; BE-NEXT:    vmr v25, v2
+; BE-NEXT:    vmr v2, v3
+; BE-NEXT:    stxvd2x v26, r1, r3 # 16-byte Folded Spill
+; BE-NEXT:    li r3, 224
+; BE-NEXT:    vmr v26, v4
+; BE-NEXT:    stxvd2x v27, r1, r3 # 16-byte Folded Spill
+; BE-NEXT:    li r3, 240
+; BE-NEXT:    vmr v27, v5
+; BE-NEXT:    stxvd2x v28, r1, r3 # 16-byte Folded Spill
+; BE-NEXT:    li r3, 256
+; BE-NEXT:    vmr v28, v6
+; BE-NEXT:    stxvd2x v29, r1, r3 # 16-byte Folded Spill
+; BE-NEXT:    li r3, 272
+; BE-NEXT:    vmr v29, v7
+; BE-NEXT:    stxvd2x v30, r1, r3 # 16-byte Folded Spill
+; BE-NEXT:    li r3, 288
+; BE-NEXT:    vmr v30, v8
+; BE-NEXT:    stxvd2x v31, r1, r3 # 16-byte Folded Spill
+; BE-NEXT:    vmr v31, v9
+; BE-NEXT:    bl lrintf128
+; BE-NEXT:    nop
+; BE-NEXT:    vmr v2, v25
+; BE-NEXT:    std r3, 136(r1)
+; BE-NEXT:    bl lrintf128
+; BE-NEXT:    nop
+; BE-NEXT:    vmr v2, v27
+; BE-NEXT:    std r3, 128(r1)
+; BE-NEXT:    bl lrintf128
+; BE-NEXT:    nop
+; BE-NEXT:    vmr v2, v26
+; BE-NEXT:    std r3, 152(r1)
+; BE-NEXT:    bl lrintf128
+; BE-NEXT:    nop
+; BE-NEXT:    vmr v2, v29
+; BE-NEXT:    std r3, 144(r1)
+; BE-NEXT:    bl lrintf128
+; BE-NEXT:    nop
+; BE-NEXT:    vmr v2, v28
+; BE-NEXT:    std r3, 168(r1)
+; BE-NEXT:    bl lrintf128
+; BE-NEXT:    nop
+; BE-NEXT:    vmr v2, v31
+; BE-NEXT:    std r3, 160(r1)
+; BE-NEXT:    bl lrintf128
+; BE-NEXT:    nop
+; BE-NEXT:    vmr v2, v30
+; BE-NEXT:    std r3, 184(r1)
+; BE-NEXT:    bl lrintf128
+; BE-NEXT:    nop
+; BE-NEXT:    std r3, 176(r1)
+; BE-NEXT:    addi r3, r1, 128
+; BE-NEXT:    lxvd2x v2, 0, r3
+; BE-NEXT:    addi r3, r1, 144
+; BE-NEXT:    lxvd2x v3, 0, r3
+; BE-NEXT:    addi r3, r1, 160
+; BE-NEXT:    lxvd2x v4, 0, r3
+; BE-NEXT:    addi r3, r1, 176
+; BE-NEXT:    lxvd2x v5, 0, r3
+; BE-NEXT:    li r3, 288
+; BE-NEXT:    lxvd2x v31, r1, r3 # 16-byte Folded Reload
+; BE-NEXT:    li r3, 272
+; BE-NEXT:    lxvd2x v30, r1, r3 # 16-byte Folded Reload
+; BE-NEXT:    li r3, 256
+; BE-NEXT:    lxvd2x v29, r1, r3 # 16-byte Folded Reload
+; BE-NEXT:    li r3, 240
+; BE-NEXT:    lxvd2x v28, r1, r3 # 16-byte Folded Reload
+; BE-NEXT:    li r3, 224
+; BE-NEXT:    lxvd2x v27, r1, r3 # 16-byte Folded Reload
+; BE-NEXT:    li r3, 208
+; BE-NEXT:    lxvd2x v26, r1, r3 # 16-byte Folded Reload
+; BE-NEXT:    li r3, 192
+; BE-NEXT:    lxvd2x v25, r1, r3 # 16-byte Folded Reload
+; BE-NEXT:    addi r1, r1, 304
+; BE-NEXT:    ld r0, 16(r1)
+; BE-NEXT:    mtlr r0
+; BE-NEXT:    blr
+;
+; CHECK-LABEL: lrint_v8f128:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    mflr r0
+; CHECK-NEXT:    stdu r1, -176(r1)
+; CHECK-NEXT:    std r0, 192(r1)
+; CHECK-NEXT:    .cfi_def_cfa_offset 176
+; CHECK-NEXT:    .cfi_offset lr, 16
+; CHECK-NEXT:    .cfi_offset v24, -128
+; CHECK-NEXT:    .cfi_offset v25, -112
+; CHECK-NEXT:    .cfi_offset v26, -96
+; CHECK-NEXT:    .cfi_offset v27, -80
+; CHECK-NEXT:    .cfi_offset v28, -64
+; CHECK-NEXT:    .cfi_offset v29, -48
+; CHECK-NEXT:    .cfi_offset v30, -32
+; CHECK-NEXT:    .cfi_offset v31, -16
+; CHECK-NEXT:    li r3, 48
+; CHECK-NEXT:    stvx v24, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    li r3, 64
+; CHECK-NEXT:    stvx v25, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    li r3, 80
+; CHECK-NEXT:    vmr v25, v3
+; CHECK-NEXT:    stvx v26, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    li r3, 96
+; CHECK-NEXT:    vmr v26, v4
+; CHECK-NEXT:    stvx v27, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    li r3, 112
+; CHECK-NEXT:    vmr v27, v5
+; CHECK-NEXT:    stvx v28, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    li r3, 128
+; CHECK-NEXT:    vmr v28, v6
+; CHECK-NEXT:    stvx v29, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    li r3, 144
+; CHECK-NEXT:    vmr v29, v7
+; CHECK-NEXT:    stvx v30, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    li r3, 160
+; CHECK-NEXT:    vmr v30, v8
+; CHECK-NEXT:    stvx v31, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    vmr v31, v9
+; CHECK-NEXT:    bl lrintf128
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    vmr v2, v25
+; CHECK-NEXT:    mtvsrd v24, r3
+; CHECK-NEXT:    bl lrintf128
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    mtfprd f0, r3
+; CHECK-NEXT:    vmr v2, v26
+; CHECK-NEXT:    xxmrghd v25, vs0, v24
+; CHECK-NEXT:    bl lrintf128
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    vmr v2, v27
+; CHECK-NEXT:    mtvsrd v26, r3
+; CHECK-NEXT:    bl lrintf128
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    mtfprd f0, r3
+; CHECK-NEXT:    vmr v2, v28
+; CHECK-NEXT:    xxmrghd v27, vs0, v26
+; CHECK-NEXT:    bl lrintf128
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    vmr v2, v29
+; CHECK-NEXT:    mtvsrd v28, r3
+; CHECK-NEXT:    bl lrintf128
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    mtfprd f0, r3
+; CHECK-NEXT:    vmr v2, v30
+; CHECK-NEXT:    xxmrghd v29, vs0, v28
+; CHECK-NEXT:    bl lrintf128
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    vmr v2, v31
+; CHECK-NEXT:    mtvsrd v30, r3
+; CHECK-NEXT:    bl lrintf128
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    mtfprd f0, r3
+; CHECK-NEXT:    li r3, 160
+; CHECK-NEXT:    vmr v4, v29
+; CHECK-NEXT:    lvx v31, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    li r3, 144
+; CHECK-NEXT:    vmr v3, v27
+; CHECK-NEXT:    vmr v2, v25
+; CHECK-NEXT:    xxmrghd v5, vs0, v30
+; CHECK-NEXT:    lvx v30, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    li r3, 128
+; CHECK-NEXT:    lvx v29, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    li r3, 112
+; CHECK-NEXT:    lvx v28, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    li r3, 96
+; CHECK-NEXT:    lvx v27, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    li r3, 80
+; CHECK-NEXT:    lvx v26, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    li r3, 64
+; CHECK-NEXT:    lvx v25, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    li r3, 48
+; CHECK-NEXT:    lvx v24, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    addi r1, r1, 176
+; CHECK-NEXT:    ld r0, 16(r1)
+; CHECK-NEXT:    mtlr r0
+; CHECK-NEXT:    blr
+;
+; FAST-LABEL: lrint_v8f128:
+; FAST:       # %bb.0:
+; FAST-NEXT:    mflr r0
+; FAST-NEXT:    stdu r1, -176(r1)
+; FAST-NEXT:    std r0, 192(r1)
+; FAST-NEXT:    .cfi_def_cfa_offset 176
+; FAST-NEXT:    .cfi_offset lr, 16
+; FAST-NEXT:    .cfi_offset v24, -128
+; FAST-NEXT:    .cfi_offset v25, -112
+; FAST-NEXT:    .cfi_offset v26, -96
+; FAST-NEXT:    .cfi_offset v27, -80
+; FAST-NEXT:    .cfi_offset v28, -64
+; FAST-NEXT:    .cfi_offset v29, -48
+; FAST-NEXT:    .cfi_offset v30, -32
+; FAST-NEXT:    .cfi_offset v31, -16
+; FAST-NEXT:    li r3, 48
+; FAST-NEXT:    stvx v24, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT:    li r3, 64
+; FAST-NEXT:    stvx v25, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT:    li r3, 80
+; FAST-NEXT:    vmr v25, v3
+; FAST-NEXT:    stvx v26, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT:    li r3, 96
+; FAST-NEXT:    vmr v26, v4
+; FAST-NEXT:    stvx v27, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT:    li r3, 112
+; FAST-NEXT:    vmr v27, v5
+; FAST-NEXT:    stvx v28, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT:    li r3, 128
+; FAST-NEXT:    vmr v28, v6
+; FAST-NEXT:    stvx v29, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT:    li r3, 144
+; FAST-NEXT:    vmr v29, v7
+; FAST-NEXT:    stvx v30, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT:    li r3, 160
+; FAST-NEXT:    vmr v30, v8
+; FAST-NEXT:    stvx v31, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT:    vmr v31, v9
+; FAST-NEXT:    bl lrintf128
+; FAST-NEXT:    nop
+; FAST-NEXT:    vmr v2, v25
+; FAST-NEXT:    mtvsrd v24, r3
+; FAST-NEXT:    bl lrintf128
+; FAST-NEXT:    nop
+; FAST-NEXT:    mtfprd f0, r3
+; FAST-NEXT:    vmr v2, v26
+; FAST-NEXT:    xxmrghd v25, vs0, v24
+; FAST-NEXT:    bl lrintf128
+; FAST-NEXT:    nop
+; FAST-NEXT:    vmr v2, v27
+; FAST-NEXT:    mtvsrd v26, r3
+; FAST-NEXT:    bl lrintf128
+; FAST-NEXT:    nop
+; FAST-NEXT:    mtfprd f0, r3
+; FAST-NEXT:    vmr v2, v28
+; FAST-NEXT:    xxmrghd v27, vs0, v26
+; FAST-NEXT:    bl lrintf128
+; FAST-NEXT:    nop
+; FAST-NEXT:    vmr v2, v29
+; FAST-NEXT:    mtvsrd v28, r3
+; FAST-NEXT:    bl lrintf128
+; FAST-NEXT:    nop
+; FAST-NEXT:    mtfprd f0, r3
+; FAST-NEXT:    vmr v2, v30
+; FAST-NEXT:    xxmrghd v29, vs0, v28
+; FAST-NEXT:    bl lrintf128
+; FAST-NEXT:    nop
+; FAST-NEXT:    vmr v2, v31
+; FAST-NEXT:    mtvsrd v30, r3
+; FAST-NEXT:    bl lrintf128
+; FAST-NEXT:    nop
+; FAST-NEXT:    mtfprd f0, r3
+; FAST-NEXT:    li r3, 160
+; FAST-NEXT:    vmr v4, v29
+; FAST-NEXT:    lvx v31, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    li r3, 144
+; FAST-NEXT:    vmr v3, v27
+; FAST-NEXT:    vmr v2, v25
+; FAST-NEXT:    xxmrghd v5, vs0, v30
+; FAST-NEXT:    lvx v30, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    li r3, 128
+; FAST-NEXT:    lvx v29, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    li r3, 112
+; FAST-NEXT:    lvx v28, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    li r3, 96
+; FAST-NEXT:    lvx v27, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    li r3, 80
+; FAST-NEXT:    lvx v26, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    li r3, 64
+; FAST-NEXT:    lvx v25, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    li r3, 48
+; FAST-NEXT:    lvx v24, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    addi r1, r1, 176
+; FAST-NEXT:    ld r0, 16(r1)
+; FAST-NEXT:    mtlr r0
+; FAST-NEXT:    blr
+  %a = call <8 x i64> @llvm.lrint.v8i64.v8f128(<8 x fp128> %x)
+  ret <8 x i64> %a
+}
+declare <8 x i64> @llvm.lrint.v8i64.v8f128(<8 x fp128>)
+
+define <16 x i64> @lrint_v16i64_v16f128(<16 x fp128> %x) {
+; BE-LABEL: lrint_v16i64_v16f128:
+; BE:       # %bb.0:
+; BE-NEXT:    mflr r0
+; BE-NEXT:    stdu r1, -496(r1)
+; BE-NEXT:    std r0, 512(r1)
+; BE-NEXT:    .cfi_def_cfa_offset 496
+; BE-NEXT:    .cfi_offset lr, 16
+; BE-NEXT:    .cfi_offset v20, -192
+; BE-NEXT:    .cfi_offset v21, -176
+; BE-NEXT:    .cfi_offset v22, -160
+; BE-NEXT:    .cfi_offset v23, -144
+; BE-NEXT:    .cfi_offset v24, -128
+; BE-NEXT:    .cfi_offset v25, -112
+; BE-NEXT:    .cfi_offset v26, -96
+; BE-NEXT:    .cfi_offset v27, -80
+; BE-NEXT:    .cfi_offset v28, -64
+; BE-NEXT:    .cfi_offset v29, -48
+; BE-NEXT:    .cfi_offset v30, -32
+; BE-NEXT:    .cfi_offset v31, -16
+; BE-NEXT:    li r3, 304
+; BE-NEXT:    stxvd2x v20, r1, r3 # 16-byte Folded Spill
+; BE-NEXT:    li r3, 320
+; BE-NEXT:    stxvd2x v21, r1, r3 # 16-byte Folded Spill
+; BE-NEXT:    li r3, 336
+; BE-NEXT:    vmr v21, v2
+; BE-NEXT:    vmr v2, v3
+; BE-NEXT:    stxvd2x v22, r1, r3 # 16-byte Folded Spill
+; BE-NEXT:    li r3, 352
+; BE-NEXT:    vmr v22, v4
+; BE-NEXT:    stxvd2x v23, r1, r3 # 16-byte Folded Spill
+; BE-NEXT:    li r3, 368
+; BE-NEXT:    vmr v23, v5
+; BE-NEXT:    stxvd2x v24, r1, r3 # 16-byte Folded Spill
+; BE-NEXT:    li r3, 384
+; BE-NEXT:    vmr v24, v6
+; BE-NEXT:    stxvd2x v25, r1, r3 # 16-byte Folded Spill
+; BE-NEXT:    li r3, 400
+; BE-NEXT:    vmr v25, v7
+; BE-NEXT:    stxvd2x v26, r1, r3 # 16-byte Folded Spill
+; BE-NEXT:    li r3, 416
+; BE-NEXT:    vmr v26, v8
+; BE-NEXT:    stxvd2x v27, r1, r3 # 16-byte Folded Spill
+; BE-NEXT:    li r3, 432
+; BE-NEXT:    vmr v27, v9
+; BE-NEXT:    stxvd2x v28, r1, r3 # 16-byte Folded Spill
+; BE-NEXT:    li r3, 448
+; BE-NEXT:    vmr v28, v11
+; BE-NEXT:    stxvd2x v29, r1, r3 # 16-byte Folded Spill
+; BE-NEXT:    li r3, 464
+; BE-NEXT:    vmr v29, v10
+; BE-NEXT:    stxvd2x v30, r1, r3 # 16-byte Folded Spill
+; BE-NEXT:    li r3, 480
+; BE-NEXT:    vmr v30, v13
+; BE-NEXT:    stxvd2x v31, r1, r3 # 16-byte Folded Spill
+; BE-NEXT:    li r3, 128
+; BE-NEXT:    stxvd2x v12, r1, r3 # 16-byte Folded Spill
+; BE-NEXT:    addi r3, r1, 768
+; BE-NEXT:    lxvw4x vs0, 0, r3
+; BE-NEXT:    li r3, 160
+; BE-NEXT:    stxvd2x vs0, r1, r3 # 16-byte Folded Spill
+; BE-NEXT:    addi r3, r1, 784
+; BE-NEXT:    lxvw4x vs0, 0, r3
+; BE-NEXT:    li r3, 144
+; BE-NEXT:    stxvd2x vs0, r1, r3 # 16-byte Folded Spill
+; BE-NEXT:    addi r3, r1, 736
+; BE-NEXT:    lxvw4x v20, 0, r3
+; BE-NEXT:    addi r3, r1, 752
+; BE-NEXT:    lxvw4x v31, 0, r3
+; BE-NEXT:    bl lrintf128
+; BE-NEXT:    nop
+; BE-NEXT:    vmr v2, v21
+; BE-NEXT:    std r3, 184(r1)
+; BE-NEXT:    bl lrintf128
+; BE-NEXT:    nop
+; BE-NEXT:    vmr v2, v23
+; BE-NEXT:    std r3, 176(r1)
+; BE-NEXT:    bl lrintf128
+; BE-NEXT:    nop
+; BE-NEXT:    vmr v2, v22
+; BE-NEXT:    std r3, 200(r1)
+; BE-NEXT:    bl lrintf128
+; BE-NEXT:    nop
+; BE-NEXT:    vmr v2, v25
+; BE-NEXT:    std r3, 192(r1)
+; BE-NEXT:    bl lrintf128
+; BE-NEXT:    nop
+; BE-NEXT:    vmr v2, v24
+; BE-NEXT:    std r3, 216(r1)
+; BE-NEXT:    bl lrintf128
+; BE-NEXT:    nop
+; BE-NEXT:    vmr v2, v27
+; BE-NEXT:    std r3, 208(r1)
+; BE-NEXT:    bl lrintf128
+; BE-NEXT:    nop
+; BE-NEXT:    vmr v2, v26
+; BE-NEXT:    std r3, 232(r1)
+; BE-NEXT:    bl lrintf128
+; BE-NEXT:    nop
+; BE-NEXT:    vmr v2, v28
+; BE-NEXT:    std r3, 224(r1)
+; BE-NEXT:    bl lrintf128
+; BE-NEXT:    nop
+; BE-NEXT:    vmr v2, v29
+; BE-NEXT:    std r3, 248(r1)
+; BE-NEXT:    bl lrintf128
+; BE-NEXT:    nop
+; BE-NEXT:    vmr v2, v30
+; BE-NEXT:    std r3, 240(r1)
+; BE-NEXT:    bl lrintf128
+; BE-NEXT:    nop
+; BE-NEXT:    std r3, 264(r1)
+; BE-NEXT:    li r3, 128
+; BE-NEXT:    lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; BE-NEXT:    bl lrintf128
+; BE-NEXT:    nop
+; BE-NEXT:    vmr v2, v31
+; BE-NEXT:    std r3, 256(r1)
+; BE-NEXT:    bl lrintf128
+; BE-NEXT:    nop
+; BE-NEXT:    vmr v2, v20
+; BE-NEXT:    std r3, 280(r1)
+; BE-NEXT:    bl lrintf128
+; BE-NEXT:    nop
+; BE-NEXT:    std r3, 272(r1)
+; BE-NEXT:    li r3, 144
+; BE-NEXT:    lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; BE-NEXT:    bl lrintf128
+; BE-NEXT:    nop
+; BE-NEXT:    std r3, 296(r1)
+; BE-NEXT:    li r3, 160
+; BE-NEXT:    lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; BE-NEXT:    bl lrintf128
+; BE-NEXT:    nop
+; BE-NEXT:    std r3, 288(r1)
+; BE-NEXT:    addi r3, r1, 176
+; BE-NEXT:    lxvd2x v2, 0, r3
+; BE-NEXT:    addi r3, r1, 192
+; BE-NEXT:    lxvd2x v3, 0, r3
+; BE-NEXT:    addi r3, r1, 208
+; BE-NEXT:    lxvd2x v4, 0, r3
+; BE-NEXT:    addi r3, r1, 224
+; BE-NEXT:    lxvd2x v5, 0, r3
+; BE-NEXT:    addi r3, r1, 240
+; BE-NEXT:    lxvd2x v6, 0, r3
+; BE-NEXT:    addi r3, r1, 256
+; BE-NEXT:    lxvd2x v7, 0, r3
+; BE-NEXT:    addi r3, r1, 272
+; BE-NEXT:    lxvd2x v8, 0, r3
+; BE-NEXT:    addi r3, r1, 288
+; BE-NEXT:    lxvd2x v9, 0, r3
+; BE-NEXT:    li r3, 480
+; BE-NEXT:    lxvd2x v31, r1, r3 # 16-byte Folded Reload
+; BE-NEXT:    li r3, 464
+; BE-NEXT:    lxvd2x v30, r1, r3 # 16-byte Folded Reload
+; BE-NEXT:    li r3, 448
+; BE-NEXT:    lxvd2x v29, r1, r3 # 16-byte Folded Reload
+; BE-NEXT:    li r3, 432
+; BE-NEXT:    lxvd2x v28, r1, r3 # 16-byte Folded Reload
+; BE-NEXT:    li r3, 416
+; BE-NEXT:    lxvd2x v27, r1, r3 # 16-byte Folded Reload
+; BE-NEXT:    li r3, 400
+; BE-NEXT:    lxvd2x v26, r1, r3 # 16-byte Folded Reload
+; BE-NEXT:    li r3, 384
+; BE-NEXT:    lxvd2x v25, r1, r3 # 16-byte Folded Reload
+; BE-NEXT:    li r3, 368
+; BE-NEXT:    lxvd2x v24, r1, r3 # 16-byte Folded Reload
+; BE-NEXT:    li r3, 352
+; BE-NEXT:    lxvd2x v23, r1, r3 # 16-byte Folded Reload
+; BE-NEXT:    li r3, 336
+; BE-NEXT:    lxvd2x v22, r1, r3 # 16-byte Folded Reload
+; BE-NEXT:    li r3, 320
+; BE-NEXT:    lxvd2x v21, r1, r3 # 16-byte Folded Reload
+; BE-NEXT:    li r3, 304
+; BE-NEXT:    lxvd2x v20, r1, r3 # 16-byte Folded Reload
+; BE-NEXT:    addi r1, r1, 496
+; BE-NEXT:    ld r0, 16(r1)
+; BE-NEXT:    mtlr r0
+; BE-NEXT:    blr
+;
+; CHECK-LABEL: lrint_v16i64_v16f128:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    mflr r0
+; CHECK-NEXT:    stdu r1, -304(r1)
+; CHECK-NEXT:    std r0, 320(r1)
+; CHECK-NEXT:    .cfi_def_cfa_offset 304
+; CHECK-NEXT:    .cfi_offset lr, 16
+; CHECK-NEXT:    .cfi_offset v20, -192
+; CHECK-NEXT:    .cfi_offset v21, -176
+; CHECK-NEXT:    .cfi_offset v22, -160
+; CHECK-NEXT:    .cfi_offset v23, -144
+; CHECK-NEXT:    .cfi_offset v24, -128
+; CHECK-NEXT:    .cfi_offset v25, -112
+; CHECK-NEXT:    .cfi_offset v26, -96
+; CHECK-NEXT:    .cfi_offset v27, -80
+; CHECK-NEXT:    .cfi_offset v28, -64
+; CHECK-NEXT:    .cfi_offset v29, -48
+; CHECK-NEXT:    .cfi_offset v30, -32
+; CHECK-NEXT:    .cfi_offset v31, -16
+; CHECK-NEXT:    li r3, 112
+; CHECK-NEXT:    stvx v20, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    li r3, 128
+; CHECK-NEXT:    stvx v21, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    li r3, 144
+; CHECK-NEXT:    vmr v21, v4
+; CHECK-NEXT:    stvx v22, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    li r3, 160
+; CHECK-NEXT:    vmr v22, v6
+; CHECK-NEXT:    stvx v23, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    li r3, 176
+; CHECK-NEXT:    vmr v23, v8
+; CHECK-NEXT:    stvx v24, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    li r3, 192
+; CHECK-NEXT:    vmr v24, v9
+; CHECK-NEXT:    stvx v25, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    li r3, 208
+; CHECK-NEXT:    vmr v25, v7
+; CHECK-NEXT:    stvx v26, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    li r3, 224
+; CHECK-NEXT:    vmr v26, v10
+; CHECK-NEXT:    stvx v27, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    li r3, 240
+; CHECK-NEXT:    vmr v27, v5
+; CHECK-NEXT:    stvx v28, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    li r3, 256
+; CHECK-NEXT:    vmr v28, v11
+; CHECK-NEXT:    stvx v29, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    li r3, 272
+; CHECK-NEXT:    vmr v29, v12
+; CHECK-NEXT:    stvx v30, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    li r3, 288
+; CHECK-NEXT:    vmr v30, v3
+; CHECK-NEXT:    stvx v31, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    li r3, 64
+; CHECK-NEXT:    stxvd2x v13, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    addi r3, r1, 576
+; CHECK-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-NEXT:    addi r3, r1, 560
+; CHECK-NEXT:    lxvd2x vs1, 0, r3
+; CHECK-NEXT:    addi r3, r1, 544
+; CHECK-NEXT:    lxvd2x vs2, 0, r3
+; CHECK-NEXT:    li r3, 96
+; CHECK-NEXT:    xxswapd vs0, vs0
+; CHECK-NEXT:    stxvd2x vs0, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    li r3, 80
+; CHECK-NEXT:    xxswapd vs0, vs1
+; CHECK-NEXT:    stxvd2x vs0, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    li r3, 48
+; CHECK-NEXT:    xxswapd vs0, vs2
+; CHECK-NEXT:    stxvd2x vs0, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    addi r3, r1, 528
+; CHECK-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-NEXT:    xxswapd v31, vs0
+; CHECK-NEXT:    bl lrintf128
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    vmr v2, v30
+; CHECK-NEXT:    mtvsrd v20, r3
+; CHECK-NEXT:    bl lrintf128
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    mtfprd f0, r3
+; CHECK-NEXT:    vmr v2, v21
+; CHECK-NEXT:    xxmrghd v30, vs0, v20
+; CHECK-NEXT:    bl lrintf128
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    vmr v2, v27
+; CHECK-NEXT:    mtvsrd v21, r3
+; CHECK-NEXT:    bl lrintf128
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    mtfprd f0, r3
+; CHECK-NEXT:    vmr v2, v22
+; CHECK-NEXT:    xxmrghd v27, vs0, v21
+; CHECK-NEXT:    bl lrintf128
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    vmr v2, v25
+; CHECK-NEXT:    mtvsrd v22, r3
+; CHECK-NEXT:    bl lrintf128
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    mtfprd f0, r3
+; CHECK-NEXT:    vmr v2, v23
+; CHECK-NEXT:    xxmrghd v25, vs0, v22
+; CHECK-NEXT:    bl lrintf128
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    vmr v2, v24
+; CHECK-NEXT:    mtvsrd v23, r3
+; CHECK-NEXT:    bl lrintf128
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    mtfprd f0, r3
+; CHECK-NEXT:    vmr v2, v26
+; CHECK-NEXT:    xxmrghd v24, vs0, v23
+; CHECK-NEXT:    bl lrintf128
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    vmr v2, v28
+; CHECK-NEXT:    mtvsrd v26, r3
+; CHECK-NEXT:    bl lrintf128
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    mtfprd f0, r3
+; CHECK-NEXT:    vmr v2, v29
+; CHECK-NEXT:    xxmrghd v28, vs0, v26
+; CHECK-NEXT:    bl lrintf128
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    mtvsrd v29, r3
+; CHECK-NEXT:    li r3, 64
+; CHECK-NEXT:    lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    bl lrintf128
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    mtfprd f0, r3
+; CHECK-NEXT:    vmr v2, v31
+; CHECK-NEXT:    xxmrghd v29, vs0, v29
+; CHECK-NEXT:    bl lrintf128
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    mtvsrd v31, r3
+; CHECK-NEXT:    li r3, 48
+; CHECK-NEXT:    lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    bl lrintf128
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    mtfprd f0, r3
+; CHECK-NEXT:    li r3, 80
+; CHECK-NEXT:    lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    xxmrghd v31, vs0, v31
+; CHECK-NEXT:    bl lrintf128
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    mtvsrd v26, r3
+; CHECK-NEXT:    li r3, 96
+; CHECK-NEXT:    lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    bl lrintf128
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    mtfprd f0, r3
+; CHECK-NEXT:    li r3, 288
+; CHECK-NEXT:    vmr v8, v31
+; CHECK-NEXT:    lvx v31, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    li r3, 272
+; CHECK-NEXT:    vmr v2, v30
+; CHECK-NEXT:    vmr v7, v29
+; CHECK-NEXT:    vmr v6, v28
+; CHECK-NEXT:    vmr v3, v27
+; CHECK-NEXT:    lvx v30, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    li r3, 256
+; CHECK-NEXT:    vmr v4, v25
+; CHECK-NEXT:    vmr v5, v24
+; CHECK-NEXT:    lvx v29, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    li r3, 240
+; CHECK-NEXT:    lvx v28, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    li r3, 224
+; CHECK-NEXT:    lvx v27, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    li r3, 208
+; CHECK-NEXT:    xxmrghd v9, vs0, v26
+; CHECK-NEXT:    lvx v26, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    li r3, 192
+; CHECK-NEXT:    lvx v25, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    li r3, 176
+; CHECK-NEXT:    lvx v24, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    li r3, 160
+; CHECK-NEXT:    lvx v23, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    li r3, 144
+; CHECK-NEXT:    lvx v22, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    li r3, 128
+; CHECK-NEXT:    lvx v21, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    li r3, 112
+; CHECK-NEXT:    lvx v20, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    addi r1, r1, 304
+; CHECK-NEXT:    ld r0, 16(r1)
+; CHECK-NEXT:    mtlr r0
+; CHECK-NEXT:    blr
+;
+; FAST-LABEL: lrint_v16i64_v16f128:
+; FAST:       # %bb.0:
+; FAST-NEXT:    mflr r0
+; FAST-NEXT:    stdu r1, -304(r1)
+; FAST-NEXT:    std r0, 320(r1)
+; FAST-NEXT:    .cfi_def_cfa_offset 304
+; FAST-NEXT:    .cfi_offset lr, 16
+; FAST-NEXT:    .cfi_offset v20, -192
+; FAST-NEXT:    .cfi_offset v21, -176
+; FAST-NEXT:    .cfi_offset v22, -160
+; FAST-NEXT:    .cfi_offset v23, -144
+; FAST-NEXT:    .cfi_offset v24, -128
+; FAST-NEXT:    .cfi_offset v25, -112
+; FAST-NEXT:    .cfi_offset v26, -96
+; FAST-NEXT:    .cfi_offset v27, -80
+; FAST-NEXT:    .cfi_offset v28, -64
+; FAST-NEXT:    .cfi_offset v29, -48
+; FAST-NEXT:    .cfi_offset v30, -32
+; FAST-NEXT:    .cfi_offset v31, -16
+; FAST-NEXT:    li r3, 112
+; FAST-NEXT:    stvx v20, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT:    li r3, 128
+; FAST-NEXT:    stvx v21, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT:    li r3, 144
+; FAST-NEXT:    vmr v21, v4
+; FAST-NEXT:    stvx v22, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT:    li r3, 160
+; FAST-NEXT:    vmr v22, v6
+; FAST-NEXT:    stvx v23, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT:    li r3, 176
+; FAST-NEXT:    vmr v23, v8
+; FAST-NEXT:    stvx v24, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT:    li r3, 192
+; FAST-NEXT:    vmr v24, v9
+; FAST-NEXT:    stvx v25, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT:    li r3, 208
+; FAST-NEXT:    vmr v25, v7
+; FAST-NEXT:    stvx v26, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT:    li r3, 224
+; FAST-NEXT:    vmr v26, v10
+; FAST-NEXT:    stvx v27, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT:    li r3, 240
+; FAST-NEXT:    vmr v27, v5
+; FAST-NEXT:    stvx v28, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT:    li r3, 256
+; FAST-NEXT:    vmr v28, v11
+; FAST-NEXT:    stvx v29, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT:    li r3, 272
+; FAST-NEXT:    vmr v29, v12
+; FAST-NEXT:    stvx v30, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT:    li r3, 288
+; FAST-NEXT:    vmr v30, v3
+; FAST-NEXT:    stvx v31, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT:    li r3, 64
+; FAST-NEXT:    stxvd2x v13, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT:    addi r3, r1, 576
+; FAST-NEXT:    lxvd2x vs0, 0, r3
+; FAST-NEXT:    addi r3, r1, 560
+; FAST-NEXT:    lxvd2x vs1, 0, r3
+; FAST-NEXT:    addi r3, r1, 544
+; FAST-NEXT:    lxvd2x vs2, 0, r3
+; FAST-NEXT:    li r3, 96
+; FAST-NEXT:    xxswapd vs0, vs0
+; FAST-NEXT:    stxvd2x vs0, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT:    li r3, 80
+; FAST-NEXT:    xxswapd vs0, vs1
+; FAST-NEXT:    stxvd2x vs0, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT:    li r3, 48
+; FAST-NEXT:    xxswapd vs0, vs2
+; FAST-NEXT:    stxvd2x vs0, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT:    addi r3, r1, 528
+; FAST-NEXT:    lxvd2x vs0, 0, r3
+; FAST-NEXT:    xxswapd v31, vs0
+; FAST-NEXT:    bl lrintf128
+; FAST-NEXT:    nop
+; FAST-NEXT:    vmr v2, v30
+; FAST-NEXT:    mtvsrd v20, r3
+; FAST-NEXT:    bl lrintf128
+; FAST-NEXT:    nop
+; FAST-NEXT:    mtfprd f0, r3
+; FAST-NEXT:    vmr v2, v21
+; FAST-NEXT:    xxmrghd v30, vs0, v20
+; FAST-NEXT:    bl lrintf128
+; FAST-NEXT:    nop
+; FAST-NEXT:    vmr v2, v27
+; FAST-NEXT:    mtvsrd v21, r3
+; FAST-NEXT:    bl lrintf128
+; FAST-NEXT:    nop
+; FAST-NEXT:    mtfprd f0, r3
+; FAST-NEXT:    vmr v2, v22
+; FAST-NEXT:    xxmrghd v27, vs0, v21
+; FAST-NEXT:    bl lrintf128
+; FAST-NEXT:    nop
+; FAST-NEXT:    vmr v2, v25
+; FAST-NEXT:    mtvsrd v22, r3
+; FAST-NEXT:    bl lrintf128
+; FAST-NEXT:    nop
+; FAST-NEXT:    mtfprd f0, r3
+; FAST-NEXT:    vmr v2, v23
+; FAST-NEXT:    xxmrghd v25, vs0, v22
+; FAST-NEXT:    bl lrintf128
+; FAST-NEXT:    nop
+; FAST-NEXT:    vmr v2, v24
+; FAST-NEXT:    mtvsrd v23, r3
+; FAST-NEXT:    bl lrintf128
+; FAST-NEXT:    nop
+; FAST-NEXT:    mtfprd f0, r3
+; FAST-NEXT:    vmr v2, v26
+; FAST-NEXT:    xxmrghd v24, vs0, v23
+; FAST-NEXT:    bl lrintf128
+; FAST-NEXT:    nop
+; FAST-NEXT:    vmr v2, v28
+; FAST-NEXT:    mtvsrd v26, r3
+; FAST-NEXT:    bl lrintf128
+; FAST-NEXT:    nop
+; FAST-NEXT:    mtfprd f0, r3
+; FAST-NEXT:    vmr v2, v29
+; FAST-NEXT:    xxmrghd v28, vs0, v26
+; FAST-NEXT:    bl lrintf128
+; FAST-NEXT:    nop
+; FAST-NEXT:    mtvsrd v29, r3
+; FAST-NEXT:    li r3, 64
+; FAST-NEXT:    lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    bl lrintf128
+; FAST-NEXT:    nop
+; FAST-NEXT:    mtfprd f0, r3
+; FAST-NEXT:    vmr v2, v31
+; FAST-NEXT:    xxmrghd v29, vs0, v29
+; FAST-NEXT:    bl lrintf128
+; FAST-NEXT:    nop
+; FAST-NEXT:    mtvsrd v31, r3
+; FAST-NEXT:    li r3, 48
+; FAST-NEXT:    lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    bl lrintf128
+; FAST-NEXT:    nop
+; FAST-NEXT:    mtfprd f0, r3
+; FAST-NEXT:    li r3, 80
+; FAST-NEXT:    lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    xxmrghd v31, vs0, v31
+; FAST-NEXT:    bl lrintf128
+; FAST-NEXT:    nop
+; FAST-NEXT:    mtvsrd v26, r3
+; FAST-NEXT:    li r3, 96
+; FAST-NEXT:    lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    bl lrintf128
+; FAST-NEXT:    nop
+; FAST-NEXT:    mtfprd f0, r3
+; FAST-NEXT:    li r3, 288
+; FAST-NEXT:    vmr v8, v31
+; FAST-NEXT:    lvx v31, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    li r3, 272
+; FAST-NEXT:    vmr v2, v30
+; FAST-NEXT:    vmr v7, v29
+; FAST-NEXT:    vmr v6, v28
+; FAST-NEXT:    vmr v3, v27
+; FAST-NEXT:    lvx v30, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    li r3, 256
+; FAST-NEXT:    vmr v4, v25
+; FAST-NEXT:    vmr v5, v24
+; FAST-NEXT:    lvx v29, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    li r3, 240
+; FAST-NEXT:    lvx v28, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    li r3, 224
+; FAST-NEXT:    lvx v27, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    li r3, 208
+; FAST-NEXT:    xxmrghd v9, vs0, v26
+; FAST-NEXT:    lvx v26, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    li r3, 192
+; FAST-NEXT:    lvx v25, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    li r3, 176
+; FAST-NEXT:    lvx v24, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    li r3, 160
+; FAST-NEXT:    lvx v23, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    li r3, 144
+; FAST-NEXT:    lvx v22, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    li r3, 128
+; FAST-NEXT:    lvx v21, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    li r3, 112
+; FAST-NEXT:    lvx v20, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    addi r1, r1, 304
+; FAST-NEXT:    ld r0, 16(r1)
+; FAST-NEXT:    mtlr r0
+; FAST-NEXT:    blr
+  %a = call <16 x i64> @llvm.lrint.v16i64.v16f128(<16 x fp128> %x)
+  ret <16 x i64> %a
+}
+declare <16 x i64> @llvm.lrint.v16i64.v16f128(<16 x fp128>)
+
+define <32 x i64> @lrint_v32i64_v32f128(<32 x fp128> %x) {
+; BE-LABEL: lrint_v32i64_v32f128:
+; BE:       # %bb.0:
+; BE-NEXT:    mflr r0
+; BE-NEXT:    stdu r1, -896(r1)
+; BE-NEXT:    std r0, 912(r1)
+; BE-NEXT:    .cfi_def_cfa_offset 896
+; BE-NEXT:    .cfi_offset lr, 16
+; BE-NEXT:    .cfi_offset r30, -16
+; BE-NEXT:    .cfi_offset v20, -208
+; BE-NEXT:    .cfi_offset v21, -192
+; BE-NEXT:    .cfi_offset v22, -176
+; BE-NEXT:    .cfi_offset v23, -160
+; BE-NEXT:    .cfi_offset v24, -144
+; BE-NEXT:    .cfi_offset v25, -128
+; BE-NEXT:    .cfi_offset v26, -112
+; BE-NEXT:    .cfi_offset v27, -96
+; BE-NEXT:    .cfi_offset v28, -80
+; BE-NEXT:    .cfi_offset v29, -64
+; BE-NEXT:    .cfi_offset v30, -48
+; BE-NEXT:    .cfi_offset v31, -32
+; BE-NEXT:    std r30, 880(r1) # 8-byte Folded Spill
+; BE-NEXT:    mr r30, r3
+; BE-NEXT:    addi r3, r1, 1440
+; BE-NEXT:    li r4, 688
+; BE-NEXT:    lxvw4x vs0, 0, r3
+; BE-NEXT:    li r3, 256
+; BE-NEXT:    stxvd2x v20, r1, r4 # 16-byte Folded Spill
+; BE-NEXT:    li r4, 704
+; BE-NEXT:    stxvd2x v21, r1, r4 # 16-byte Folded Spill
+; BE-NEXT:    li r4, 720
+; BE-NEXT:    stxvd2x vs0, r1, r3 # 16-byte Folded Spill
+; BE-NEXT:    addi r3, r1, 1456
+; BE-NEXT:    stxvd2x v22, r1, r4 # 16-byte Folded Spill
+; BE-NEXT:    li r4, 736
+; BE-NEXT:    lxvw4x vs0, 0, r3
+; BE-NEXT:    li r3, 240
+; BE-NEXT:    stxvd2x v23, r1, r4 # 16-byte Folded Spill
+; BE-NEXT:    li r4, 752
+; BE-NEXT:    stxvd2x vs0, r1, r3 # 16-byte Folded Spill
+; BE-NEXT:    addi r3, r1, 1408
+; BE-NEXT:    stxvd2x v24, r1, r4 # 16-byte Folded Spill
+; BE-NEXT:    li r4, 768
+; BE-NEXT:    lxvw4x vs0, 0, r3
+; BE-NEXT:    li r3, 224
+; BE-NEXT:    stxvd2x v25, r1, r4 # 16-byte Folded Spill
+; BE-NEXT:    li r4, 784
+; BE-NEXT:    stxvd2x v26, r1, r4 # 16-byte Folded Spill
+; BE-NEXT:    li r4, 800
+; BE-NEXT:    stxvd2x vs0, r1, r3 # 16-byte Folded Spill
+; BE-NEXT:    addi r3, r1, 1424
+; BE-NEXT:    stxvd2x v27, r1, r4 # 16-byte Folded Spill
+; BE-NEXT:    li r4, 816
+; BE-NEXT:    lxvw4x vs0, 0, r3
+; BE-NEXT:    li r3, 208
+; BE-NEXT:    stxvd2x v28, r1, r4 # 16-byte Folded Spill
+; BE-NEXT:    li r4, 832
+; BE-NEXT:    vmr v28, v2
+; BE-NEXT:    vmr v2, v3
+; BE-NEXT:    stxvd2x vs0, r1, r3 # 16-byte Folded Spill
+; BE-NEXT:    addi r3, r1, 1376
+; BE-NEXT:    stxvd2x v29, r1, r4 # 16-byte Folded Spill
+; BE-NEXT:    li r4, 848
+; BE-NEXT:    lxvw4x vs0, 0, r3
+; BE-NEXT:    li r3, 192
+; BE-NEXT:    stxvd2x v30, r1, r4 # 16-byte Folded Spill
+; BE-NEXT:    li r4, 864
+; BE-NEXT:    stxvd2x v31, r1, r4 # 16-byte Folded Spill
+; BE-NEXT:    li r4, 400
+; BE-NEXT:    stxvd2x vs0, r1, r3 # 16-byte Folded Spill
+; BE-NEXT:    addi r3, r1, 1392
+; BE-NEXT:    stxvd2x v13, r1, r4 # 16-byte Folded Spill
+; BE-NEXT:    li r4, 416
+; BE-NEXT:    lxvw4x vs0, 0, r3
+; BE-NEXT:    li r3, 176
+; BE-NEXT:    stxvd2x v12, r1, r4 # 16-byte Folded Spill
+; BE-NEXT:    li r4, 368
+; BE-NEXT:    stxvd2x vs0, r1, r3 # 16-byte Folded Spill
+; BE-NEXT:    addi r3, r1, 1344
+; BE-NEXT:    stxvd2x v11, r1, r4 # 16-byte Folded Spill
+; BE-NEXT:    li r4, 384
+; BE-NEXT:    lxvw4x vs0, 0, r3
+; BE-NEXT:    li r3, 160
+; BE-NEXT:    stxvd2x v10, r1, r4 # 16-byte Folded Spill
+; BE-NEXT:    li r4, 336
+; BE-NEXT:    stxvd2x v9, r1, r4 # 16-byte Folded Spill
+; BE-NEXT:    li r4, 352
+; BE-NEXT:    stxvd2x vs0, r1, r3 # 16-byte Folded Spill
+; BE-NEXT:    addi r3, r1, 1360
+; BE-NEXT:    stxvd2x v8, r1, r4 # 16-byte Folded Spill
+; BE-NEXT:    li r4, 304
+; BE-NEXT:    lxvw4x vs0, 0, r3
+; BE-NEXT:    li r3, 144
+; BE-NEXT:    stxvd2x v7, r1, r4 # 16-byte Folded Spill
+; BE-NEXT:    li r4, 320
+; BE-NEXT:    stxvd2x vs0, r1, r3 # 16-byte Folded Spill
+; BE-NEXT:    addi r3, r1, 1312
+; BE-NEXT:    stxvd2x v6, r1, r4 # 16-byte Folded Spill
+; BE-NEXT:    li r4, 272
+; BE-NEXT:    lxvw4x vs0, 0, r3
+; BE-NEXT:    li r3, 128
+; BE-NEXT:    stxvd2x v5, r1, r4 # 16-byte Folded Spill
+; BE-NEXT:    li r4, 288
+; BE-NEXT:    stxvd2x v4, r1, r4 # 16-byte Folded Spill
+; BE-NEXT:    stxvd2x vs0, r1, r3 # 16-byte Folded Spill
+; BE-NEXT:    addi r3, r1, 1328
+; BE-NEXT:    lxvw4x v23, 0, r3
+; BE-NEXT:    addi r3, r1, 1280
+; BE-NEXT:    lxvw4x v22, 0, r3
+; BE-NEXT:    addi r3, r1, 1296
+; BE-NEXT:    lxvw4x v21, 0, r3
+; BE-NEXT:    addi r3, r1, 1248
+; BE-NEXT:    lxvw4x v20, 0, r3
+; BE-NEXT:    addi r3, r1, 1264
+; BE-NEXT:    lxvw4x v31, 0, r3
+; BE-NEXT:    addi r3, r1, 1216
+; BE-NEXT:    lxvw4x v30, 0, r3
+; BE-NEXT:    addi r3, r1, 1232
+; BE-NEXT:    lxvw4x v29, 0, r3
+; BE-NEXT:    addi r3, r1, 1184
+; BE-NEXT:    lxvw4x v27, 0, r3
+; BE-NEXT:    addi r3, r1, 1200
+; BE-NEXT:    lxvw4x v26, 0, r3
+; BE-NEXT:    addi r3, r1, 1152
+; BE-NEXT:    lxvw4x v25, 0, r3
+; BE-NEXT:    addi r3, r1, 1168
+; BE-NEXT:    lxvw4x v24, 0, r3
+; BE-NEXT:    bl lrintf128
+; BE-NEXT:    nop
+; BE-NEXT:    vmr v2, v28
+; BE-NEXT:    std r3, 440(r1)
+; BE-NEXT:    bl lrintf128
+; BE-NEXT:    nop
+; BE-NEXT:    vmr v2, v24
+; BE-NEXT:    std r3, 432(r1)
+; BE-NEXT:    bl lrintf128
+; BE-NEXT:    nop
+; BE-NEXT:    vmr v2, v25
+; BE-NEXT:    std r3, 536(r1)
+; BE-NEXT:    bl lrintf128
+; BE-NEXT:    nop
+; BE-NEXT:    vmr v2, v26
+; BE-NEXT:    std r3, 528(r1)
+; BE-NEXT:    bl lrintf128
+; BE-NEXT:    nop
+; BE-NEXT:    vmr v2, v27
+; BE-NEXT:    std r3, 552(r1)
+; BE-NEXT:    bl lrintf128
+; BE-NEXT:    nop
+; BE-NEXT:    vmr v2, v29
+; BE-NEXT:    std r3, 544(r1)
+; BE-NEXT:    bl lrintf128
+; BE-NEXT:    nop
+; BE-NEXT:    vmr v2, v30
+; BE-NEXT:    std r3, 568(r1)
+; BE-NEXT:    bl lrintf128
+; BE-NEXT:    nop
+; BE-NEXT:    vmr v2, v31
+; BE-NEXT:    std r3, 560(r1)
+; BE-NEXT:    bl lrintf128
+; BE-NEXT:    nop
+; BE-NEXT:    vmr v2, v20
+; BE-NEXT:    std r3, 584(r1)
+; BE-NEXT:    bl lrintf128
+; BE-NEXT:    nop
+; BE-NEXT:    vmr v2, v21
+; BE-NEXT:    std r3, 576(r1)
+; BE-NEXT:    bl lrintf128
+; BE-NEXT:    nop
+; BE-NEXT:    vmr v2, v22
+; BE-NEXT:    std r3, 600(r1)
+; BE-NEXT:    bl lrintf128
+; BE-NEXT:    nop
+; BE-NEXT:    vmr v2, v23
+; BE-NEXT:    std r3, 592(r1)
+; BE-NEXT:    bl lrintf128
+; BE-NEXT:    nop
+; BE-NEXT:    std r3, 616(r1)
+; BE-NEXT:    li r3, 128
+; BE-NEXT:    lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; BE-NEXT:    bl lrintf128
+; BE-NEXT:    nop
+; BE-NEXT:    std r3, 608(r1)
+; BE-NEXT:    li r3, 144
+; BE-NEXT:    lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; BE-NEXT:    bl lrintf128
+; BE-NEXT:    nop
+; BE-NEXT:    std r3, 632(r1)
+; BE-NEXT:    li r3, 160
+; BE-NEXT:    lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; BE-NEXT:    bl lrintf128
+; BE-NEXT:    nop
+; BE-NEXT:    std r3, 624(r1)
+; BE-NEXT:    li r3, 176
+; BE-NEXT:    lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; BE-NEXT:    bl lrintf128
+; BE-NEXT:    nop
+; BE-NEXT:    std r3, 648(r1)
+; BE-NEXT:    li r3, 192
+; BE-NEXT:    lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; BE-NEXT:    bl lrintf128
+; BE-NEXT:    nop
+; BE-NEXT:    std r3, 640(r1)
+; BE-NEXT:    li r3, 208
+; BE-NEXT:    lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; BE-NEXT:    bl lrintf128
+; BE-NEXT:    nop
+; BE-NEXT:    std r3, 664(r1)
+; BE-NEXT:    li r3, 224
+; BE-NEXT:    lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; BE-NEXT:    bl lrintf128
+; BE-NEXT:    nop
+; BE-NEXT:    std r3, 656(r1)
+; BE-NEXT:    li r3, 240
+; BE-NEXT:    lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; BE-NEXT:    bl lrintf128
+; BE-NEXT:    nop
+; BE-NEXT:    std r3, 680(r1)
+; BE-NEXT:    li r3, 256
+; BE-NEXT:    lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; BE-NEXT:    bl lrintf128
+; BE-NEXT:    nop
+; BE-NEXT:    std r3, 672(r1)
+; BE-NEXT:    li r3, 272
+; BE-NEXT:    lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; BE-NEXT:    bl lrintf128
+; BE-NEXT:    nop
+; BE-NEXT:    std r3, 456(r1)
+; BE-NEXT:    li r3, 288
+; BE-NEXT:    lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; BE-NEXT:    bl lrintf128
+; BE-NEXT:    nop
+; BE-NEXT:    std r3, 448(r1)
+; BE-NEXT:    li r3, 304
+; BE-NEXT:    lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; BE-NEXT:    bl lrintf128
+; BE-NEXT:    nop
+; BE-NEXT:    std r3, 472(r1)
+; BE-NEXT:    li r3, 320
+; BE-NEXT:    lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; BE-NEXT:    bl lrintf128
+; BE-NEXT:    nop
+; BE-NEXT:    std r3, 464(r1)
+; BE-NEXT:    li r3, 336
+; BE-NEXT:    lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; BE-NEXT:    bl lrintf128
+; BE-NEXT:    nop
+; BE-NEXT:    std r3, 488(r1)
+; BE-NEXT:    li r3, 352
+; BE-NEXT:    lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; BE-NEXT:    bl lrintf128
+; BE-NEXT:    nop
+; BE-NEXT:    std r3, 480(r1)
+; BE-NEXT:    li r3, 368
+; BE-NEXT:    lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; BE-NEXT:    bl lrintf128
+; BE-NEXT:    nop
+; BE-NEXT:    std r3, 504(r1)
+; BE-NEXT:    li r3, 384
+; BE-NEXT:    lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; BE-NEXT:    bl lrintf128
+; BE-NEXT:    nop
+; BE-NEXT:    std r3, 496(r1)
+; BE-NEXT:    li r3, 400
+; BE-NEXT:    lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; BE-NEXT:    bl lrintf128
+; BE-NEXT:    nop
+; BE-NEXT:    std r3, 520(r1)
+; BE-NEXT:    li r3, 416
+; BE-NEXT:    lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; BE-NEXT:    bl lrintf128
+; BE-NEXT:    nop
+; BE-NEXT:    std r3, 512(r1)
+; BE-NEXT:    addi r3, r1, 432
+; BE-NEXT:    lxvd2x vs0, 0, r3
+; BE-NEXT:    addi r3, r1, 528
+; BE-NEXT:    lxvd2x vs1, 0, r3
+; BE-NEXT:    addi r3, r1, 544
+; BE-NEXT:    lxvd2x vs2, 0, r3
+; BE-NEXT:    addi r3, r1, 560
+; BE-NEXT:    lxvd2x vs3, 0, r3
+; BE-NEXT:    addi r3, r1, 576
+; BE-NEXT:    lxvd2x vs4, 0, r3
+; BE-NEXT:    addi r3, r1, 592
+; BE-NEXT:    lxvd2x vs5, 0, r3
+; BE-NEXT:    addi r3, r1, 608
+; BE-NEXT:    lxvd2x vs6, 0, r3
+; BE-NEXT:    addi r3, r1, 624
+; BE-NEXT:    lxvd2x vs7, 0, r3
+; BE-NEXT:    addi r3, r1, 640
+; BE-NEXT:    lxvd2x vs8, 0, r3
+; BE-NEXT:    addi r3, r1, 656
+; BE-NEXT:    lxvd2x vs9, 0, r3
+; BE-NEXT:    addi r3, r1, 672
+; BE-NEXT:    lxvd2x vs10, 0, r3
+; BE-NEXT:    addi r3, r1, 448
+; BE-NEXT:    lxvd2x vs11, 0, r3
+; BE-NEXT:    addi r3, r1, 464
+; BE-NEXT:    lxvd2x vs12, 0, r3
+; BE-NEXT:    addi r3, r1, 480
+; BE-NEXT:    lxvd2x vs13, 0, r3
+; BE-NEXT:    addi r3, r1, 496
+; BE-NEXT:    lxvd2x v2, 0, r3
+; BE-NEXT:    addi r3, r1, 512
+; BE-NEXT:    lxvd2x v3, 0, r3
+; BE-NEXT:    li r3, 80
+; BE-NEXT:    stxvd2x v3, r30, r3
+; BE-NEXT:    li r3, 64
+; BE-NEXT:    stxvd2x v2, r30, r3
+; BE-NEXT:    li r3, 48
+; BE-NEXT:    stxvd2x vs13, r30, r3
+; BE-NEXT:    li r3, 32
+; BE-NEXT:    stxvd2x vs12, r30, r3
+; BE-NEXT:    li r3, 16
+; BE-NEXT:    stxvd2x vs11, r30, r3
+; BE-NEXT:    li r3, 240
+; BE-NEXT:    stxvd2x vs10, r30, r3
+; BE-NEXT:    li r3, 224
+; BE-NEXT:    stxvd2x vs9, r30, r3
+; BE-NEXT:    li r3, 208
+; BE-NEXT:    stxvd2x vs8, r30, r3
+; BE-NEXT:    li r3, 192
+; BE-NEXT:    stxvd2x vs7, r30, r3
+; BE-NEXT:    li r3, 176
+; BE-NEXT:    stxvd2x vs6, r30, r3
+; BE-NEXT:    li r3, 160
+; BE-NEXT:    stxvd2x vs5, r30, r3
+; BE-NEXT:    li r3, 144
+; BE-NEXT:    stxvd2x vs4, r30, r3
+; BE-NEXT:    li r3, 128
+; BE-NEXT:    stxvd2x vs3, r30, r3
+; BE-NEXT:    li r3, 112
+; BE-NEXT:    stxvd2x vs2, r30, r3
+; BE-NEXT:    li r3, 96
+; BE-NEXT:    stxvd2x vs1, r30, r3
+; BE-NEXT:    li r3, 864
+; BE-NEXT:    stxvd2x vs0, 0, r30
+; BE-NEXT:    ld r30, 880(r1) # 8-byte Folded Reload
+; BE-NEXT:    lxvd2x v31, r1, r3 # 16-byte Folded Reload
+; BE-NEXT:    li r3, 848
+; BE-NEXT:    lxvd2x v30, r1, r3 # 16-byte Folded Reload
+; BE-NEXT:    li r3, 832
+; BE-NEXT:    lxvd2x v29, r1, r3 # 16-byte Folded Reload
+; BE-NEXT:    li r3, 816
+; BE-NEXT:    lxvd2x v28, r1, r3 # 16-byte Folded Reload
+; BE-NEXT:    li r3, 800
+; BE-NEXT:    lxvd2x v27, r1, r3 # 16-byte Folded Reload
+; BE-NEXT:    li r3, 784
+; BE-NEXT:    lxvd2x v26, r1, r3 # 16-byte Folded Reload
+; BE-NEXT:    li r3, 768
+; BE-NEXT:    lxvd2x v25, r1, r3 # 16-byte Folded Reload
+; BE-NEXT:    li r3, 752
+; BE-NEXT:    lxvd2x v24, r1, r3 # 16-byte Folded Reload
+; BE-NEXT:    li r3, 736
+; BE-NEXT:    lxvd2x v23, r1, r3 # 16-byte Folded Reload
+; BE-NEXT:    li r3, 720
+; BE-NEXT:    lxvd2x v22, r1, r3 # 16-byte Folded Reload
+; BE-NEXT:    li r3, 704
+; BE-NEXT:    lxvd2x v21, r1, r3 # 16-byte Folded Reload
+; BE-NEXT:    li r3, 688
+; BE-NEXT:    lxvd2x v20, r1, r3 # 16-byte Folded Reload
+; BE-NEXT:    addi r1, r1, 896
+; BE-NEXT:    ld r0, 16(r1)
+; BE-NEXT:    mtlr r0
+; BE-NEXT:    blr
+;
+; CHECK-LABEL: lrint_v32i64_v32f128:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    mflr r0
+; CHECK-NEXT:    stdu r1, -640(r1)
+; CHECK-NEXT:    std r0, 656(r1)
+; CHECK-NEXT:    .cfi_def_cfa_offset 640
+; CHECK-NEXT:    .cfi_offset lr, 16
+; CHECK-NEXT:    .cfi_offset r30, -16
+; CHECK-NEXT:    .cfi_offset v20, -208
+; CHECK-NEXT:    .cfi_offset v21, -192
+; CHECK-NEXT:    .cfi_offset v22, -176
+; CHECK-NEXT:    .cfi_offset v23, -160
+; CHECK-NEXT:    .cfi_offset v24, -144
+; CHECK-NEXT:    .cfi_offset v25, -128
+; CHECK-NEXT:    .cfi_offset v26, -112
+; CHECK-NEXT:    .cfi_offset v27, -96
+; CHECK-NEXT:    .cfi_offset v28, -80
+; CHECK-NEXT:    .cfi_offset v29, -64
+; CHECK-NEXT:    .cfi_offset v30, -48
+; CHECK-NEXT:    .cfi_offset v31, -32
+; CHECK-NEXT:    li r4, 432
+; CHECK-NEXT:    std r30, 624(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    mr r30, r3
+; CHECK-NEXT:    addi r3, r1, 1184
+; CHECK-NEXT:    stvx v20, r1, r4 # 16-byte Folded Spill
+; CHECK-NEXT:    li r4, 448
+; CHECK-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-NEXT:    addi r3, r1, 1168
+; CHECK-NEXT:    stvx v21, r1, r4 # 16-byte Folded Spill
+; CHECK-NEXT:    li r4, 464
+; CHECK-NEXT:    lxvd2x vs1, 0, r3
+; CHECK-NEXT:    addi r3, r1, 1152
+; CHECK-NEXT:    stvx v22, r1, r4 # 16-byte Folded Spill
+; CHECK-NEXT:    li r4, 480
+; CHECK-NEXT:    lxvd2x vs2, 0, r3
+; CHECK-NEXT:    addi r3, r1, 1136
+; CHECK-NEXT:    stvx v23, r1, r4 # 16-byte Folded Spill
+; CHECK-NEXT:    li r4, 496
+; CHECK-NEXT:    lxvd2x vs3, 0, r3
+; CHECK-NEXT:    addi r3, r1, 1120
+; CHECK-NEXT:    stvx v24, r1, r4 # 16-byte Folded Spill
+; CHECK-NEXT:    li r4, 512
+; CHECK-NEXT:    lxvd2x vs4, 0, r3
+; CHECK-NEXT:    addi r3, r1, 1104
+; CHECK-NEXT:    vmr v24, v3
+; CHECK-NEXT:    stvx v25, r1, r4 # 16-byte Folded Spill
+; CHECK-NEXT:    li r4, 528
+; CHECK-NEXT:    lxvd2x vs5, 0, r3
+; CHECK-NEXT:    xxswapd vs0, vs0
+; CHECK-NEXT:    addi r3, r1, 1088
+; CHECK-NEXT:    stvx v26, r1, r4 # 16-byte Folded Spill
+; CHECK-NEXT:    li r4, 544
+; CHECK-NEXT:    stvx v27, r1, r4 # 16-byte Folded Spill
+; CHECK-NEXT:    li r4, 560
+; CHECK-NEXT:    stvx v28, r1, r4 # 16-byte Folded Spill
+; CHECK-NEXT:    li r4, 576
+; CHECK-NEXT:    stvx v29, r1, r4 # 16-byte Folded Spill
+; CHECK-NEXT:    li r4, 592
+; CHECK-NEXT:    stvx v30, r1, r4 # 16-byte Folded Spill
+; CHECK-NEXT:    li r4, 608
+; CHECK-NEXT:    stvx v31, r1, r4 # 16-byte Folded Spill
+; CHECK-NEXT:    li r4, 416
+; CHECK-NEXT:    stxvd2x v13, r1, r4 # 16-byte Folded Spill
+; CHECK-NEXT:    li r4, 400
+; CHECK-NEXT:    stxvd2x v12, r1, r4 # 16-byte Folded Spill
+; CHECK-NEXT:    li r4, 384
+; CHECK-NEXT:    stxvd2x v11, r1, r4 # 16-byte Folded Spill
+; CHECK-NEXT:    li r4, 352
+; CHECK-NEXT:    stxvd2x v10, r1, r4 # 16-byte Folded Spill
+; CHECK-NEXT:    li r4, 336
+; CHECK-NEXT:    stxvd2x v9, r1, r4 # 16-byte Folded Spill
+; CHECK-NEXT:    li r4, 304
+; CHECK-NEXT:    stxvd2x v8, r1, r4 # 16-byte Folded Spill
+; CHECK-NEXT:    li r4, 288
+; CHECK-NEXT:    stxvd2x v7, r1, r4 # 16-byte Folded Spill
+; CHECK-NEXT:    li r4, 256
+; CHECK-NEXT:    stxvd2x v6, r1, r4 # 16-byte Folded Spill
+; CHECK-NEXT:    li r4, 240
+; CHECK-NEXT:    stxvd2x v5, r1, r4 # 16-byte Folded Spill
+; CHECK-NEXT:    li r4, 224
+; CHECK-NEXT:    stxvd2x v4, r1, r4 # 16-byte Folded Spill
+; CHECK-NEXT:    li r4, 192
+; CHECK-NEXT:    stxvd2x vs0, r1, r4 # 16-byte Folded Spill
+; CHECK-NEXT:    li r4, 176
+; CHECK-NEXT:    xxswapd vs0, vs1
+; CHECK-NEXT:    stxvd2x vs0, r1, r4 # 16-byte Folded Spill
+; CHECK-NEXT:    li r4, 160
+; CHECK-NEXT:    xxswapd vs0, vs2
+; CHECK-NEXT:    stxvd2x vs0, r1, r4 # 16-byte Folded Spill
+; CHECK-NEXT:    li r4, 144
+; CHECK-NEXT:    xxswapd vs0, vs3
+; CHECK-NEXT:    stxvd2x vs0, r1, r4 # 16-byte Folded Spill
+; CHECK-NEXT:    li r4, 128
+; CHECK-NEXT:    xxswapd vs0, vs4
+; CHECK-NEXT:    stxvd2x vs0, r1, r4 # 16-byte Folded Spill
+; CHECK-NEXT:    li r4, 112
+; CHECK-NEXT:    xxswapd vs0, vs5
+; CHECK-NEXT:    stxvd2x vs0, r1, r4 # 16-byte Folded Spill
+; CHECK-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-NEXT:    li r3, 96
+; CHECK-NEXT:    xxswapd vs0, vs0
+; CHECK-NEXT:    stxvd2x vs0, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    addi r3, r1, 1072
+; CHECK-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-NEXT:    li r3, 80
+; CHECK-NEXT:    xxswapd vs0, vs0
+; CHECK-NEXT:    stxvd2x vs0, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    addi r3, r1, 1056
+; CHECK-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-NEXT:    li r3, 64
+; CHECK-NEXT:    xxswapd vs0, vs0
+; CHECK-NEXT:    stxvd2x vs0, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    addi r3, r1, 1040
+; CHECK-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-NEXT:    li r3, 48
+; CHECK-NEXT:    xxswapd vs0, vs0
+; CHECK-NEXT:    stxvd2x vs0, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    addi r3, r1, 1024
+; CHECK-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-NEXT:    addi r3, r1, 1008
+; CHECK-NEXT:    xxswapd v22, vs0
+; CHECK-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-NEXT:    addi r3, r1, 992
+; CHECK-NEXT:    xxswapd v21, vs0
+; CHECK-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-NEXT:    addi r3, r1, 976
+; CHECK-NEXT:    xxswapd v20, vs0
+; CHECK-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-NEXT:    addi r3, r1, 960
+; CHECK-NEXT:    xxswapd v31, vs0
+; CHECK-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-NEXT:    addi r3, r1, 944
+; CHECK-NEXT:    xxswapd v30, vs0
+; CHECK-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-NEXT:    addi r3, r1, 928
+; CHECK-NEXT:    xxswapd v29, vs0
+; CHECK-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-NEXT:    addi r3, r1, 912
+; CHECK-NEXT:    xxswapd v28, vs0
+; CHECK-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-NEXT:    addi r3, r1, 896
+; CHECK-NEXT:    xxswapd v27, vs0
+; CHECK-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-NEXT:    addi r3, r1, 880
+; CHECK-NEXT:    xxswapd v26, vs0
+; CHECK-NEXT:    lxvd2x vs0, 0, r3
+; CHECK-NEXT:    xxswapd v25, vs0
+; CHECK-NEXT:    bl lrintf128
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    vmr v2, v24
+; CHECK-NEXT:    mtvsrd v23, r3
+; CHECK-NEXT:    bl lrintf128
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    mtfprd f0, r3
+; CHECK-NEXT:    li r3, 368
+; CHECK-NEXT:    vmr v2, v25
+; CHECK-NEXT:    xxmrghd vs0, vs0, v23
+; CHECK-NEXT:    stxvd2x vs0, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    bl lrintf128
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    vmr v2, v26
+; CHECK-NEXT:    mtvsrd v25, r3
+; CHECK-NEXT:    bl lrintf128
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    mtfprd f0, r3
+; CHECK-NEXT:    li r3, 320
+; CHECK-NEXT:    vmr v2, v27
+; CHECK-NEXT:    xxmrghd vs0, vs0, v25
+; CHECK-NEXT:    stxvd2x vs0, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    bl lrintf128
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    vmr v2, v28
+; CHECK-NEXT:    mtvsrd v27, r3
+; CHECK-NEXT:    bl lrintf128
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    mtfprd f0, r3
+; CHECK-NEXT:    li r3, 272
+; CHECK-NEXT:    vmr v2, v29
+; CHECK-NEXT:    xxmrghd vs0, vs0, v27
+; CHECK-NEXT:    stxvd2x vs0, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    bl lrintf128
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    vmr v2, v30
+; CHECK-NEXT:    mtvsrd v29, r3
+; CHECK-NEXT:    bl lrintf128
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    mtfprd f0, r3
+; CHECK-NEXT:    li r3, 208
+; CHECK-NEXT:    vmr v2, v31
+; CHECK-NEXT:    xxmrghd vs0, vs0, v29
+; CHECK-NEXT:    stxvd2x vs0, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    bl lrintf128
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    vmr v2, v20
+; CHECK-NEXT:    mtvsrd v31, r3
+; CHECK-NEXT:    bl lrintf128
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    mtfprd f0, r3
+; CHECK-NEXT:    vmr v2, v21
+; CHECK-NEXT:    xxmrghd v31, vs0, v31
+; CHECK-NEXT:    bl lrintf128
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    vmr v2, v22
+; CHECK-NEXT:    mtvsrd v29, r3
+; CHECK-NEXT:    bl lrintf128
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    mtfprd f0, r3
+; CHECK-NEXT:    li r3, 48
+; CHECK-NEXT:    lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    xxmrghd v29, vs0, v29
+; CHECK-NEXT:    bl lrintf128
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    mtvsrd v27, r3
+; CHECK-NEXT:    li r3, 64
+; CHECK-NEXT:    lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    bl lrintf128
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    mtfprd f0, r3
+; CHECK-NEXT:    li r3, 80
+; CHECK-NEXT:    lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    xxmrghd v27, vs0, v27
+; CHECK-NEXT:    bl lrintf128
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    mtvsrd v25, r3
+; CHECK-NEXT:    li r3, 96
+; CHECK-NEXT:    lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    bl lrintf128
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    mtfprd f0, r3
+; CHECK-NEXT:    li r3, 112
+; CHECK-NEXT:    lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    xxmrghd v25, vs0, v25
+; CHECK-NEXT:    bl lrintf128
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    mtvsrd v23, r3
+; CHECK-NEXT:    li r3, 128
+; CHECK-NEXT:    lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    bl lrintf128
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    mtfprd f0, r3
+; CHECK-NEXT:    li r3, 144
+; CHECK-NEXT:    lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    xxmrghd v23, vs0, v23
+; CHECK-NEXT:    bl lrintf128
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    mtvsrd v22, r3
+; CHECK-NEXT:    li r3, 160
+; CHECK-NEXT:    lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    bl lrintf128
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    mtfprd f0, r3
+; CHECK-NEXT:    li r3, 176
+; CHECK-NEXT:    lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    xxmrghd v22, vs0, v22
+; CHECK-NEXT:    bl lrintf128
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    mtvsrd v21, r3
+; CHECK-NEXT:    li r3, 192
+; CHECK-NEXT:    lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    bl lrintf128
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    mtfprd f0, r3
+; CHECK-NEXT:    li r3, 224
+; CHECK-NEXT:    lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    xxmrghd v21, vs0, v21
+; CHECK-NEXT:    bl lrintf128
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    mtvsrd v20, r3
+; CHECK-NEXT:    li r3, 240
+; CHECK-NEXT:    lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    bl lrintf128
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    mtfprd f0, r3
+; CHECK-NEXT:    li r3, 256
+; CHECK-NEXT:    lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    xxmrghd v20, vs0, v20
+; CHECK-NEXT:    bl lrintf128
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    mtvsrd v24, r3
+; CHECK-NEXT:    li r3, 288
+; CHECK-NEXT:    lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    bl lrintf128
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    mtfprd f0, r3
+; CHECK-NEXT:    li r3, 304
+; CHECK-NEXT:    lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    xxmrghd v24, vs0, v24
+; CHECK-NEXT:    bl lrintf128
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    mtvsrd v26, r3
+; CHECK-NEXT:    li r3, 336
+; CHECK-NEXT:    lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    bl lrintf128
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    mtfprd f0, r3
+; CHECK-NEXT:    li r3, 352
+; CHECK-NEXT:    lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    xxmrghd v26, vs0, v26
+; CHECK-NEXT:    bl lrintf128
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    mtvsrd v28, r3
+; CHECK-NEXT:    li r3, 384
+; CHECK-NEXT:    lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    bl lrintf128
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    mtfprd f0, r3
+; CHECK-NEXT:    li r3, 400
+; CHECK-NEXT:    lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    xxmrghd v28, vs0, v28
+; CHECK-NEXT:    bl lrintf128
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    mtvsrd v30, r3
+; CHECK-NEXT:    li r3, 416
+; CHECK-NEXT:    lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    bl lrintf128
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    mtfprd f0, r3
+; CHECK-NEXT:    li r3, 80
+; CHECK-NEXT:    xxswapd vs1, v28
+; CHECK-NEXT:    li r4, 208
+; CHECK-NEXT:    xxswapd vs2, v26
+; CHECK-NEXT:    xxswapd vs3, v27
+; CHECK-NEXT:    xxmrghd v2, vs0, v30
+; CHECK-NEXT:    xxswapd vs0, v2
+; CHECK-NEXT:    stxvd2x vs0, r30, r3
+; CHECK-NEXT:    li r3, 64
+; CHECK-NEXT:    stxvd2x vs1, r30, r3
+; CHECK-NEXT:    li r3, 48
+; CHECK-NEXT:    stxvd2x vs2, r30, r3
+; CHECK-NEXT:    li r3, 32
+; CHECK-NEXT:    xxswapd vs0, v24
+; CHECK-NEXT:    stxvd2x vs0, r30, r3
+; CHECK-NEXT:    li r3, 16
+; CHECK-NEXT:    xxswapd vs1, v20
+; CHECK-NEXT:    stxvd2x vs1, r30, r3
+; CHECK-NEXT:    li r3, 240
+; CHECK-NEXT:    xxswapd vs2, v23
+; CHECK-NEXT:    xxswapd vs0, v21
+; CHECK-NEXT:    stxvd2x vs0, r30, r3
+; CHECK-NEXT:    li r3, 224
+; CHECK-NEXT:    xxswapd vs1, v22
+; CHECK-NEXT:    stxvd2x vs1, r30, r3
+; CHECK-NEXT:    li r3, 208
+; CHECK-NEXT:    stxvd2x vs2, r30, r3
+; CHECK-NEXT:    li r3, 192
+; CHECK-NEXT:    xxswapd vs0, v25
+; CHECK-NEXT:    stxvd2x vs0, r30, r3
+; CHECK-NEXT:    li r3, 176
+; CHECK-NEXT:    stxvd2x vs3, r30, r3
+; CHECK-NEXT:    li r3, 160
+; CHECK-NEXT:    lxvd2x vs2, r1, r4 # 16-byte Folded Reload
+; CHECK-NEXT:    li r4, 272
+; CHECK-NEXT:    xxswapd vs1, v29
+; CHECK-NEXT:    stxvd2x vs1, r30, r3
+; CHECK-NEXT:    li r3, 144
+; CHECK-NEXT:    lxvd2x vs1, r1, r4 # 16-byte Folded Reload
+; CHECK-NEXT:    li r4, 320
+; CHECK-NEXT:    lxvd2x vs3, r1, r4 # 16-byte Folded Reload
+; CHECK-NEXT:    li r4, 368
+; CHECK-NEXT:    lxvd2x vs4, r1, r4 # 16-byte Folded Reload
+; CHECK-NEXT:    xxswapd vs0, v31
+; CHECK-NEXT:    stxvd2x vs0, r30, r3
+; CHECK-NEXT:    li r3, 128
+; CHECK-NEXT:    xxswapd vs2, vs2
+; CHECK-NEXT:    stxvd2x vs2, r30, r3
+; CHECK-NEXT:    li r3, 112
+; CHECK-NEXT:    xxswapd vs1, vs1
+; CHECK-NEXT:    stxvd2x vs1, r30, r3
+; CHECK-NEXT:    li r3, 96
+; CHECK-NEXT:    xxswapd vs3, vs3
+; CHECK-NEXT:    stxvd2x vs3, r30, r3
+; CHECK-NEXT:    li r3, 608
+; CHECK-NEXT:    xxswapd vs4, vs4
+; CHECK-NEXT:    stxvd2x vs4, 0, r30
+; CHECK-NEXT:    lvx v31, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    li r3, 592
+; CHECK-NEXT:    ld r30, 624(r1) # 8-byte Folded Reload
+; CHECK-NEXT:    lvx v30, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    li r3, 576
+; CHECK-NEXT:    lvx v29, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    li r3, 560
+; CHECK-NEXT:    lvx v28, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    li r3, 544
+; CHECK-NEXT:    lvx v27, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    li r3, 528
+; CHECK-NEXT:    lvx v26, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    li r3, 512
+; CHECK-NEXT:    lvx v25, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    li r3, 496
+; CHECK-NEXT:    lvx v24, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    li r3, 480
+; CHECK-NEXT:    lvx v23, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    li r3, 464
+; CHECK-NEXT:    lvx v22, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    li r3, 448
+; CHECK-NEXT:    lvx v21, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    li r3, 432
+; CHECK-NEXT:    lvx v20, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    addi r1, r1, 640
+; CHECK-NEXT:    ld r0, 16(r1)
+; CHECK-NEXT:    mtlr r0
+; CHECK-NEXT:    blr
+;
+; FAST-LABEL: lrint_v32i64_v32f128:
+; FAST:       # %bb.0:
+; FAST-NEXT:    mflr r0
+; FAST-NEXT:    stdu r1, -640(r1)
+; FAST-NEXT:    std r0, 656(r1)
+; FAST-NEXT:    .cfi_def_cfa_offset 640
+; FAST-NEXT:    .cfi_offset lr, 16
+; FAST-NEXT:    .cfi_offset r30, -16
+; FAST-NEXT:    .cfi_offset v20, -208
+; FAST-NEXT:    .cfi_offset v21, -192
+; FAST-NEXT:    .cfi_offset v22, -176
+; FAST-NEXT:    .cfi_offset v23, -160
+; FAST-NEXT:    .cfi_offset v24, -144
+; FAST-NEXT:    .cfi_offset v25, -128
+; FAST-NEXT:    .cfi_offset v26, -112
+; FAST-NEXT:    .cfi_offset v27, -96
+; FAST-NEXT:    .cfi_offset v28, -80
+; FAST-NEXT:    .cfi_offset v29, -64
+; FAST-NEXT:    .cfi_offset v30, -48
+; FAST-NEXT:    .cfi_offset v31, -32
+; FAST-NEXT:    li r4, 432
+; FAST-NEXT:    std r30, 624(r1) # 8-byte Folded Spill
+; FAST-NEXT:    mr r30, r3
+; FAST-NEXT:    addi r3, r1, 1184
+; FAST-NEXT:    stvx v20, r1, r4 # 16-byte Folded Spill
+; FAST-NEXT:    li r4, 448
+; FAST-NEXT:    lxvd2x vs0, 0, r3
+; FAST-NEXT:    addi r3, r1, 1168
+; FAST-NEXT:    stvx v21, r1, r4 # 16-byte Folded Spill
+; FAST-NEXT:    li r4, 464
+; FAST-NEXT:    lxvd2x vs1, 0, r3
+; FAST-NEXT:    addi r3, r1, 1152
+; FAST-NEXT:    stvx v22, r1, r4 # 16-byte Folded Spill
+; FAST-NEXT:    li r4, 480
+; FAST-NEXT:    lxvd2x vs2, 0, r3
+; FAST-NEXT:    addi r3, r1, 1136
+; FAST-NEXT:    stvx v23, r1, r4 # 16-byte Folded Spill
+; FAST-NEXT:    li r4, 496
+; FAST-NEXT:    lxvd2x vs3, 0, r3
+; FAST-NEXT:    addi r3, r1, 1120
+; FAST-NEXT:    stvx v24, r1, r4 # 16-byte Folded Spill
+; FAST-NEXT:    li r4, 512
+; FAST-NEXT:    lxvd2x vs4, 0, r3
+; FAST-NEXT:    addi r3, r1, 1104
+; FAST-NEXT:    vmr v24, v3
+; FAST-NEXT:    stvx v25, r1, r4 # 16-byte Folded Spill
+; FAST-NEXT:    li r4, 528
+; FAST-NEXT:    lxvd2x vs5, 0, r3
+; FAST-NEXT:    xxswapd vs0, vs0
+; FAST-NEXT:    addi r3, r1, 1088
+; FAST-NEXT:    stvx v26, r1, r4 # 16-byte Folded Spill
+; FAST-NEXT:    li r4, 544
+; FAST-NEXT:    stvx v27, r1, r4 # 16-byte Folded Spill
+; FAST-NEXT:    li r4, 560
+; FAST-NEXT:    stvx v28, r1, r4 # 16-byte Folded Spill
+; FAST-NEXT:    li r4, 576
+; FAST-NEXT:    stvx v29, r1, r4 # 16-byte Folded Spill
+; FAST-NEXT:    li r4, 592
+; FAST-NEXT:    stvx v30, r1, r4 # 16-byte Folded Spill
+; FAST-NEXT:    li r4, 608
+; FAST-NEXT:    stvx v31, r1, r4 # 16-byte Folded Spill
+; FAST-NEXT:    li r4, 416
+; FAST-NEXT:    stxvd2x v13, r1, r4 # 16-byte Folded Spill
+; FAST-NEXT:    li r4, 400
+; FAST-NEXT:    stxvd2x v12, r1, r4 # 16-byte Folded Spill
+; FAST-NEXT:    li r4, 384
+; FAST-NEXT:    stxvd2x v11, r1, r4 # 16-byte Folded Spill
+; FAST-NEXT:    li r4, 352
+; FAST-NEXT:    stxvd2x v10, r1, r4 # 16-byte Folded Spill
+; FAST-NEXT:    li r4, 336
+; FAST-NEXT:    stxvd2x v9, r1, r4 # 16-byte Folded Spill
+; FAST-NEXT:    li r4, 304
+; FAST-NEXT:    stxvd2x v8, r1, r4 # 16-byte Folded Spill
+; FAST-NEXT:    li r4, 288
+; FAST-NEXT:    stxvd2x v7, r1, r4 # 16-byte Folded Spill
+; FAST-NEXT:    li r4, 256
+; FAST-NEXT:    stxvd2x v6, r1, r4 # 16-byte Folded Spill
+; FAST-NEXT:    li r4, 240
+; FAST-NEXT:    stxvd2x v5, r1, r4 # 16-byte Folded Spill
+; FAST-NEXT:    li r4, 224
+; FAST-NEXT:    stxvd2x v4, r1, r4 # 16-byte Folded Spill
+; FAST-NEXT:    li r4, 192
+; FAST-NEXT:    stxvd2x vs0, r1, r4 # 16-byte Folded Spill
+; FAST-NEXT:    li r4, 176
+; FAST-NEXT:    xxswapd vs0, vs1
+; FAST-NEXT:    stxvd2x vs0, r1, r4 # 16-byte Folded Spill
+; FAST-NEXT:    li r4, 160
+; FAST-NEXT:    xxswapd vs0, vs2
+; FAST-NEXT:    stxvd2x vs0, r1, r4 # 16-byte Folded Spill
+; FAST-NEXT:    li r4, 144
+; FAST-NEXT:    xxswapd vs0, vs3
+; FAST-NEXT:    stxvd2x vs0, r1, r4 # 16-byte Folded Spill
+; FAST-NEXT:    li r4, 128
+; FAST-NEXT:    xxswapd vs0, vs4
+; FAST-NEXT:    stxvd2x vs0, r1, r4 # 16-byte Folded Spill
+; FAST-NEXT:    li r4, 112
+; FAST-NEXT:    xxswapd vs0, vs5
+; FAST-NEXT:    stxvd2x vs0, r1, r4 # 16-byte Folded Spill
+; FAST-NEXT:    lxvd2x vs0, 0, r3
+; FAST-NEXT:    li r3, 96
+; FAST-NEXT:    xxswapd vs0, vs0
+; FAST-NEXT:    stxvd2x vs0, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT:    addi r3, r1, 1072
+; FAST-NEXT:    lxvd2x vs0, 0, r3
+; FAST-NEXT:    li r3, 80
+; FAST-NEXT:    xxswapd vs0, vs0
+; FAST-NEXT:    stxvd2x vs0, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT:    addi r3, r1, 1056
+; FAST-NEXT:    lxvd2x vs0, 0, r3
+; FAST-NEXT:    li r3, 64
+; FAST-NEXT:    xxswapd vs0, vs0
+; FAST-NEXT:    stxvd2x vs0, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT:    addi r3, r1, 1040
+; FAST-NEXT:    lxvd2x vs0, 0, r3
+; FAST-NEXT:    li r3, 48
+; FAST-NEXT:    xxswapd vs0, vs0
+; FAST-NEXT:    stxvd2x vs0, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT:    addi r3, r1, 1024
+; FAST-NEXT:    lxvd2x vs0, 0, r3
+; FAST-NEXT:    addi r3, r1, 1008
+; FAST-NEXT:    xxswapd v22, vs0
+; FAST-NEXT:    lxvd2x vs0, 0, r3
+; FAST-NEXT:    addi r3, r1, 992
+; FAST-NEXT:    xxswapd v21, vs0
+; FAST-NEXT:    lxvd2x vs0, 0, r3
+; FAST-NEXT:    addi r3, r1, 976
+; FAST-NEXT:    xxswapd v20, vs0
+; FAST-NEXT:    lxvd2x vs0, 0, r3
+; FAST-NEXT:    addi r3, r1, 960
+; FAST-NEXT:    xxswapd v31, vs0
+; FAST-NEXT:    lxvd2x vs0, 0, r3
+; FAST-NEXT:    addi r3, r1, 944
+; FAST-NEXT:    xxswapd v30, vs0
+; FAST-NEXT:    lxvd2x vs0, 0, r3
+; FAST-NEXT:    addi r3, r1, 928
+; FAST-NEXT:    xxswapd v29, vs0
+; FAST-NEXT:    lxvd2x vs0, 0, r3
+; FAST-NEXT:    addi r3, r1, 912
+; FAST-NEXT:    xxswapd v28, vs0
+; FAST-NEXT:    lxvd2x vs0, 0, r3
+; FAST-NEXT:    addi r3, r1, 896
+; FAST-NEXT:    xxswapd v27, vs0
+; FAST-NEXT:    lxvd2x vs0, 0, r3
+; FAST-NEXT:    addi r3, r1, 880
+; FAST-NEXT:    xxswapd v26, vs0
+; FAST-NEXT:    lxvd2x vs0, 0, r3
+; FAST-NEXT:    xxswapd v25, vs0
+; FAST-NEXT:    bl lrintf128
+; FAST-NEXT:    nop
+; FAST-NEXT:    vmr v2, v24
+; FAST-NEXT:    mtvsrd v23, r3
+; FAST-NEXT:    bl lrintf128
+; FAST-NEXT:    nop
+; FAST-NEXT:    mtfprd f0, r3
+; FAST-NEXT:    li r3, 368
+; FAST-NEXT:    vmr v2, v25
+; FAST-NEXT:    xxmrghd vs0, vs0, v23
+; FAST-NEXT:    stxvd2x vs0, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT:    bl lrintf128
+; FAST-NEXT:    nop
+; FAST-NEXT:    vmr v2, v26
+; FAST-NEXT:    mtvsrd v25, r3
+; FAST-NEXT:    bl lrintf128
+; FAST-NEXT:    nop
+; FAST-NEXT:    mtfprd f0, r3
+; FAST-NEXT:    li r3, 320
+; FAST-NEXT:    vmr v2, v27
+; FAST-NEXT:    xxmrghd vs0, vs0, v25
+; FAST-NEXT:    stxvd2x vs0, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT:    bl lrintf128
+; FAST-NEXT:    nop
+; FAST-NEXT:    vmr v2, v28
+; FAST-NEXT:    mtvsrd v27, r3
+; FAST-NEXT:    bl lrintf128
+; FAST-NEXT:    nop
+; FAST-NEXT:    mtfprd f0, r3
+; FAST-NEXT:    li r3, 272
+; FAST-NEXT:    vmr v2, v29
+; FAST-NEXT:    xxmrghd vs0, vs0, v27
+; FAST-NEXT:    stxvd2x vs0, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT:    bl lrintf128
+; FAST-NEXT:    nop
+; FAST-NEXT:    vmr v2, v30
+; FAST-NEXT:    mtvsrd v29, r3
+; FAST-NEXT:    bl lrintf128
+; FAST-NEXT:    nop
+; FAST-NEXT:    mtfprd f0, r3
+; FAST-NEXT:    li r3, 208
+; FAST-NEXT:    vmr v2, v31
+; FAST-NEXT:    xxmrghd vs0, vs0, v29
+; FAST-NEXT:    stxvd2x vs0, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT:    bl lrintf128
+; FAST-NEXT:    nop
+; FAST-NEXT:    vmr v2, v20
+; FAST-NEXT:    mtvsrd v31, r3
+; FAST-NEXT:    bl lrintf128
+; FAST-NEXT:    nop
+; FAST-NEXT:    mtfprd f0, r3
+; FAST-NEXT:    vmr v2, v21
+; FAST-NEXT:    xxmrghd v31, vs0, v31
+; FAST-NEXT:    bl lrintf128
+; FAST-NEXT:    nop
+; FAST-NEXT:    vmr v2, v22
+; FAST-NEXT:    mtvsrd v29, r3
+; FAST-NEXT:    bl lrintf128
+; FAST-NEXT:    nop
+; FAST-NEXT:    mtfprd f0, r3
+; FAST-NEXT:    li r3, 48
+; FAST-NEXT:    lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    xxmrghd v29, vs0, v29
+; FAST-NEXT:    bl lrintf128
+; FAST-NEXT:    nop
+; FAST-NEXT:    mtvsrd v27, r3
+; FAST-NEXT:    li r3, 64
+; FAST-NEXT:    lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    bl lrintf128
+; FAST-NEXT:    nop
+; FAST-NEXT:    mtfprd f0, r3
+; FAST-NEXT:    li r3, 80
+; FAST-NEXT:    lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    xxmrghd v27, vs0, v27
+; FAST-NEXT:    bl lrintf128
+; FAST-NEXT:    nop
+; FAST-NEXT:    mtvsrd v25, r3
+; FAST-NEXT:    li r3, 96
+; FAST-NEXT:    lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    bl lrintf128
+; FAST-NEXT:    nop
+; FAST-NEXT:    mtfprd f0, r3
+; FAST-NEXT:    li r3, 112
+; FAST-NEXT:    lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    xxmrghd v25, vs0, v25
+; FAST-NEXT:    bl lrintf128
+; FAST-NEXT:    nop
+; FAST-NEXT:    mtvsrd v23, r3
+; FAST-NEXT:    li r3, 128
+; FAST-NEXT:    lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    bl lrintf128
+; FAST-NEXT:    nop
+; FAST-NEXT:    mtfprd f0, r3
+; FAST-NEXT:    li r3, 144
+; FAST-NEXT:    lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    xxmrghd v23, vs0, v23
+; FAST-NEXT:    bl lrintf128
+; FAST-NEXT:    nop
+; FAST-NEXT:    mtvsrd v22, r3
+; FAST-NEXT:    li r3, 160
+; FAST-NEXT:    lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    bl lrintf128
+; FAST-NEXT:    nop
+; FAST-NEXT:    mtfprd f0, r3
+; FAST-NEXT:    li r3, 176
+; FAST-NEXT:    lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    xxmrghd v22, vs0, v22
+; FAST-NEXT:    bl lrintf128
+; FAST-NEXT:    nop
+; FAST-NEXT:    mtvsrd v21, r3
+; FAST-NEXT:    li r3, 192
+; FAST-NEXT:    lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    bl lrintf128
+; FAST-NEXT:    nop
+; FAST-NEXT:    mtfprd f0, r3
+; FAST-NEXT:    li r3, 224
+; FAST-NEXT:    lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    xxmrghd v21, vs0, v21
+; FAST-NEXT:    bl lrintf128
+; FAST-NEXT:    nop
+; FAST-NEXT:    mtvsrd v20, r3
+; FAST-NEXT:    li r3, 240
+; FAST-NEXT:    lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    bl lrintf128
+; FAST-NEXT:    nop
+; FAST-NEXT:    mtfprd f0, r3
+; FAST-NEXT:    li r3, 256
+; FAST-NEXT:    lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    xxmrghd v20, vs0, v20
+; FAST-NEXT:    bl lrintf128
+; FAST-NEXT:    nop
+; FAST-NEXT:    mtvsrd v24, r3
+; FAST-NEXT:    li r3, 288
+; FAST-NEXT:    lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    bl lrintf128
+; FAST-NEXT:    nop
+; FAST-NEXT:    mtfprd f0, r3
+; FAST-NEXT:    li r3, 304
+; FAST-NEXT:    lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    xxmrghd v24, vs0, v24
+; FAST-NEXT:    bl lrintf128
+; FAST-NEXT:    nop
+; FAST-NEXT:    mtvsrd v26, r3
+; FAST-NEXT:    li r3, 336
+; FAST-NEXT:    lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    bl lrintf128
+; FAST-NEXT:    nop
+; FAST-NEXT:    mtfprd f0, r3
+; FAST-NEXT:    li r3, 352
+; FAST-NEXT:    lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    xxmrghd v26, vs0, v26
+; FAST-NEXT:    bl lrintf128
+; FAST-NEXT:    nop
+; FAST-NEXT:    mtvsrd v28, r3
+; FAST-NEXT:    li r3, 384
+; FAST-NEXT:    lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    bl lrintf128
+; FAST-NEXT:    nop
+; FAST-NEXT:    mtfprd f0, r3
+; FAST-NEXT:    li r3, 400
+; FAST-NEXT:    lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    xxmrghd v28, vs0, v28
+; FAST-NEXT:    bl lrintf128
+; FAST-NEXT:    nop
+; FAST-NEXT:    mtvsrd v30, r3
+; FAST-NEXT:    li r3, 416
+; FAST-NEXT:    lxvd2x v2, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    bl lrintf128
+; FAST-NEXT:    nop
+; FAST-NEXT:    mtfprd f0, r3
+; FAST-NEXT:    li r3, 80
+; FAST-NEXT:    xxswapd vs1, v28
+; FAST-NEXT:    li r4, 208
+; FAST-NEXT:    xxswapd vs2, v26
+; FAST-NEXT:    xxswapd vs3, v27
+; FAST-NEXT:    xxmrghd v2, vs0, v30
+; FAST-NEXT:    xxswapd vs0, v2
+; FAST-NEXT:    stxvd2x vs0, r30, r3
+; FAST-NEXT:    li r3, 64
+; FAST-NEXT:    stxvd2x vs1, r30, r3
+; FAST-NEXT:    li r3, 48
+; FAST-NEXT:    stxvd2x vs2, r30, r3
+; FAST-NEXT:    li r3, 32
+; FAST-NEXT:    xxswapd vs0, v24
+; FAST-NEXT:    stxvd2x vs0, r30, r3
+; FAST-NEXT:    li r3, 16
+; FAST-NEXT:    xxswapd vs1, v20
+; FAST-NEXT:    stxvd2x vs1, r30, r3
+; FAST-NEXT:    li r3, 240
+; FAST-NEXT:    xxswapd vs2, v23
+; FAST-NEXT:    xxswapd vs0, v21
+; FAST-NEXT:    stxvd2x vs0, r30, r3
+; FAST-NEXT:    li r3, 224
+; FAST-NEXT:    xxswapd vs1, v22
+; FAST-NEXT:    stxvd2x vs1, r30, r3
+; FAST-NEXT:    li r3, 208
+; FAST-NEXT:    stxvd2x vs2, r30, r3
+; FAST-NEXT:    li r3, 192
+; FAST-NEXT:    xxswapd vs0, v25
+; FAST-NEXT:    stxvd2x vs0, r30, r3
+; FAST-NEXT:    li r3, 176
+; FAST-NEXT:    stxvd2x vs3, r30, r3
+; FAST-NEXT:    li r3, 160
+; FAST-NEXT:    lxvd2x vs2, r1, r4 # 16-byte Folded Reload
+; FAST-NEXT:    li r4, 272
+; FAST-NEXT:    xxswapd vs1, v29
+; FAST-NEXT:    stxvd2x vs1, r30, r3
+; FAST-NEXT:    li r3, 144
+; FAST-NEXT:    lxvd2x vs1, r1, r4 # 16-byte Folded Reload
+; FAST-NEXT:    li r4, 320
+; FAST-NEXT:    lxvd2x vs3, r1, r4 # 16-byte Folded Reload
+; FAST-NEXT:    li r4, 368
+; FAST-NEXT:    lxvd2x vs4, r1, r4 # 16-byte Folded Reload
+; FAST-NEXT:    xxswapd vs0, v31
+; FAST-NEXT:    stxvd2x vs0, r30, r3
+; FAST-NEXT:    li r3, 128
+; FAST-NEXT:    xxswapd vs2, vs2
+; FAST-NEXT:    stxvd2x vs2, r30, r3
+; FAST-NEXT:    li r3, 112
+; FAST-NEXT:    xxswapd vs1, vs1
+; FAST-NEXT:    stxvd2x vs1, r30, r3
+; FAST-NEXT:    li r3, 96
+; FAST-NEXT:    xxswapd vs3, vs3
+; FAST-NEXT:    stxvd2x vs3, r30, r3
+; FAST-NEXT:    li r3, 608
+; FAST-NEXT:    xxswapd vs4, vs4
+; FAST-NEXT:    stxvd2x vs4, 0, r30
+; FAST-NEXT:    lvx v31, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    li r3, 592
+; FAST-NEXT:    ld r30, 624(r1) # 8-byte Folded Reload
+; FAST-NEXT:    lvx v30, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    li r3, 576
+; FAST-NEXT:    lvx v29, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    li r3, 560
+; FAST-NEXT:    lvx v28, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    li r3, 544
+; FAST-NEXT:    lvx v27, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    li r3, 528
+; FAST-NEXT:    lvx v26, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    li r3, 512
+; FAST-NEXT:    lvx v25, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    li r3, 496
+; FAST-NEXT:    lvx v24, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    li r3, 480
+; FAST-NEXT:    lvx v23, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    li r3, 464
+; FAST-NEXT:    lvx v22, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    li r3, 448
+; FAST-NEXT:    lvx v21, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    li r3, 432
+; FAST-NEXT:    lvx v20, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    addi r1, r1, 640
+; FAST-NEXT:    ld r0, 16(r1)
+; FAST-NEXT:    mtlr r0
+; FAST-NEXT:    blr
+  %a = call <32 x i64> @llvm.lrint.v32i64.v32f128(<32 x fp128> %x)
+  ret <32 x i64> %a
+}
+declare <32 x i64> @llvm.lrint.v32i64.v32f128(<32 x fp128>)
diff --git a/llvm/test/CodeGen/X86/vector-llrint.ll b/llvm/test/CodeGen/X86/vector-llrint.ll
index 08ee748497650..f393ffd8a0441 100644
--- a/llvm/test/CodeGen/X86/vector-llrint.ll
+++ b/llvm/test/CodeGen/X86/vector-llrint.ll
@@ -1246,3 +1246,708 @@ define <8 x i64> @llrint_v8i64_v8f64(<8 x double> %x) {
   ret <8 x i64> %a
 }
 declare <8 x i64> @llvm.llrint.v8i64.v8f64(<8 x double>)
+
+define <1 x i64> @llrint_v1i64_v1f128(<1 x fp128> %x) {
+; X86-LABEL: llrint_v1i64_v1f128:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebp
+; X86-NEXT:    .cfi_def_cfa_offset 8
+; X86-NEXT:    .cfi_offset %ebp, -8
+; X86-NEXT:    movl %esp, %ebp
+; X86-NEXT:    .cfi_def_cfa_register %ebp
+; X86-NEXT:    andl $-16, %esp
+; X86-NEXT:    subl $16, %esp
+; X86-NEXT:    pushl 20(%ebp)
+; X86-NEXT:    pushl 16(%ebp)
+; X86-NEXT:    pushl 12(%ebp)
+; X86-NEXT:    pushl 8(%ebp)
+; X86-NEXT:    calll llrintl
+; X86-NEXT:    addl $16, %esp
+; X86-NEXT:    movl %ebp, %esp
+; X86-NEXT:    popl %ebp
+; X86-NEXT:    .cfi_def_cfa %esp, 4
+; X86-NEXT:    retl
+;
+; SSE-LABEL: llrint_v1i64_v1f128:
+; SSE:       # %bb.0:
+; SSE-NEXT:    pushq %rax
+; SSE-NEXT:    .cfi_def_cfa_offset 16
+; SSE-NEXT:    callq llrintl at PLT
+; SSE-NEXT:    popq %rcx
+; SSE-NEXT:    .cfi_def_cfa_offset 8
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: llrint_v1i64_v1f128:
+; AVX:       # %bb.0:
+; AVX-NEXT:    pushq %rax
+; AVX-NEXT:    .cfi_def_cfa_offset 16
+; AVX-NEXT:    callq llrintl at PLT
+; AVX-NEXT:    popq %rcx
+; AVX-NEXT:    .cfi_def_cfa_offset 8
+; AVX-NEXT:    retq
+;
+; AVX512DQ-LABEL: llrint_v1i64_v1f128:
+; AVX512DQ:       # %bb.0:
+; AVX512DQ-NEXT:    pushq %rax
+; AVX512DQ-NEXT:    .cfi_def_cfa_offset 16
+; AVX512DQ-NEXT:    callq llrintl at PLT
+; AVX512DQ-NEXT:    popq %rcx
+; AVX512DQ-NEXT:    .cfi_def_cfa_offset 8
+; AVX512DQ-NEXT:    retq
+  %a = call <1 x i64> @llvm.llrint.v1i64.v1f128(<1 x fp128> %x)
+  ret <1 x i64> %a
+}
+declare <1 x i64> @llvm.llrint.v1i64.v1f128(<1 x fp128>)
+
+define <2 x i64> @llrint_v2i64_v2f128(<2 x fp128> %x) {
+; X86-LABEL: llrint_v2i64_v2f128:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebp
+; X86-NEXT:    .cfi_def_cfa_offset 8
+; X86-NEXT:    .cfi_offset %ebp, -8
+; X86-NEXT:    movl %esp, %ebp
+; X86-NEXT:    .cfi_def_cfa_register %ebp
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    andl $-16, %esp
+; X86-NEXT:    subl $16, %esp
+; X86-NEXT:    .cfi_offset %esi, -20
+; X86-NEXT:    .cfi_offset %edi, -16
+; X86-NEXT:    .cfi_offset %ebx, -12
+; X86-NEXT:    movl 8(%ebp), %esi
+; X86-NEXT:    pushl 24(%ebp)
+; X86-NEXT:    pushl 20(%ebp)
+; X86-NEXT:    pushl 16(%ebp)
+; X86-NEXT:    pushl 12(%ebp)
+; X86-NEXT:    calll llrintl
+; X86-NEXT:    addl $16, %esp
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    movl %edx, %ebx
+; X86-NEXT:    pushl 40(%ebp)
+; X86-NEXT:    pushl 36(%ebp)
+; X86-NEXT:    pushl 32(%ebp)
+; X86-NEXT:    pushl 28(%ebp)
+; X86-NEXT:    calll llrintl
+; X86-NEXT:    addl $16, %esp
+; X86-NEXT:    movl %edx, 12(%esi)
+; X86-NEXT:    movl %eax, 8(%esi)
+; X86-NEXT:    movl %ebx, 4(%esi)
+; X86-NEXT:    movl %edi, (%esi)
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    leal -12(%ebp), %esp
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    popl %ebx
+; X86-NEXT:    popl %ebp
+; X86-NEXT:    .cfi_def_cfa %esp, 4
+; X86-NEXT:    retl $4
+;
+; SSE-LABEL: llrint_v2i64_v2f128:
+; SSE:       # %bb.0:
+; SSE-NEXT:    subq $40, %rsp
+; SSE-NEXT:    .cfi_def_cfa_offset 48
+; SSE-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; SSE-NEXT:    movaps %xmm1, %xmm0
+; SSE-NEXT:    callq llrintl at PLT
+; SSE-NEXT:    movq %rax, %xmm0
+; SSE-NEXT:    movdqa %xmm0, (%rsp) # 16-byte Spill
+; SSE-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; SSE-NEXT:    callq llrintl at PLT
+; SSE-NEXT:    movq %rax, %xmm0
+; SSE-NEXT:    punpcklqdq (%rsp), %xmm0 # 16-byte Folded Reload
+; SSE-NEXT:    # xmm0 = xmm0[0],mem[0]
+; SSE-NEXT:    addq $40, %rsp
+; SSE-NEXT:    .cfi_def_cfa_offset 8
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: llrint_v2i64_v2f128:
+; AVX:       # %bb.0:
+; AVX-NEXT:    subq $40, %rsp
+; AVX-NEXT:    .cfi_def_cfa_offset 48
+; AVX-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX-NEXT:    vmovaps %xmm1, %xmm0
+; AVX-NEXT:    callq llrintl at PLT
+; AVX-NEXT:    vmovq %rax, %xmm0
+; AVX-NEXT:    vmovdqa %xmm0, (%rsp) # 16-byte Spill
+; AVX-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; AVX-NEXT:    callq llrintl at PLT
+; AVX-NEXT:    vmovq %rax, %xmm0
+; AVX-NEXT:    vpunpcklqdq (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
+; AVX-NEXT:    addq $40, %rsp
+; AVX-NEXT:    .cfi_def_cfa_offset 8
+; AVX-NEXT:    retq
+;
+; AVX512DQ-LABEL: llrint_v2i64_v2f128:
+; AVX512DQ:       # %bb.0:
+; AVX512DQ-NEXT:    subq $40, %rsp
+; AVX512DQ-NEXT:    .cfi_def_cfa_offset 48
+; AVX512DQ-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512DQ-NEXT:    vmovaps %xmm1, %xmm0
+; AVX512DQ-NEXT:    callq llrintl at PLT
+; AVX512DQ-NEXT:    vmovq %rax, %xmm0
+; AVX512DQ-NEXT:    vmovdqa %xmm0, (%rsp) # 16-byte Spill
+; AVX512DQ-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; AVX512DQ-NEXT:    callq llrintl at PLT
+; AVX512DQ-NEXT:    vmovq %rax, %xmm0
+; AVX512DQ-NEXT:    vpunpcklqdq (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512DQ-NEXT:    # xmm0 = xmm0[0],mem[0]
+; AVX512DQ-NEXT:    addq $40, %rsp
+; AVX512DQ-NEXT:    .cfi_def_cfa_offset 8
+; AVX512DQ-NEXT:    retq
+  %a = call <2 x i64> @llvm.llrint.v2i64.v2f128(<2 x fp128> %x)
+  ret <2 x i64> %a
+}
+declare <2 x i64> @llvm.llrint.v2i64.v2f128(<2 x fp128>)
+
+define <4 x i64> @llrint_v4i64_v4f128(<4 x fp128> %x) {
+; X86-LABEL: llrint_v4i64_v4f128:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebp
+; X86-NEXT:    .cfi_def_cfa_offset 8
+; X86-NEXT:    .cfi_offset %ebp, -8
+; X86-NEXT:    movl %esp, %ebp
+; X86-NEXT:    .cfi_def_cfa_register %ebp
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    andl $-16, %esp
+; X86-NEXT:    subl $32, %esp
+; X86-NEXT:    .cfi_offset %esi, -20
+; X86-NEXT:    .cfi_offset %edi, -16
+; X86-NEXT:    .cfi_offset %ebx, -12
+; X86-NEXT:    movl 8(%ebp), %esi
+; X86-NEXT:    movl 36(%ebp), %edi
+; X86-NEXT:    movl 40(%ebp), %ebx
+; X86-NEXT:    pushl 24(%ebp)
+; X86-NEXT:    pushl 20(%ebp)
+; X86-NEXT:    pushl 16(%ebp)
+; X86-NEXT:    pushl 12(%ebp)
+; X86-NEXT:    calll llrintl
+; X86-NEXT:    addl $16, %esp
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl 32(%ebp)
+; X86-NEXT:    pushl 28(%ebp)
+; X86-NEXT:    calll llrintl
+; X86-NEXT:    addl $16, %esp
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    pushl 56(%ebp)
+; X86-NEXT:    pushl 52(%ebp)
+; X86-NEXT:    pushl 48(%ebp)
+; X86-NEXT:    pushl 44(%ebp)
+; X86-NEXT:    calll llrintl
+; X86-NEXT:    addl $16, %esp
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    movl %edx, %ebx
+; X86-NEXT:    pushl 72(%ebp)
+; X86-NEXT:    pushl 68(%ebp)
+; X86-NEXT:    pushl 64(%ebp)
+; X86-NEXT:    pushl 60(%ebp)
+; X86-NEXT:    calll llrintl
+; X86-NEXT:    addl $16, %esp
+; X86-NEXT:    movl %edx, 28(%esi)
+; X86-NEXT:    movl %eax, 24(%esi)
+; X86-NEXT:    movl %ebx, 20(%esi)
+; X86-NEXT:    movl %edi, 16(%esi)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, 12(%esi)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, 8(%esi)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, 4(%esi)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, (%esi)
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    leal -12(%ebp), %esp
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    popl %ebx
+; X86-NEXT:    popl %ebp
+; X86-NEXT:    .cfi_def_cfa %esp, 4
+; X86-NEXT:    retl $4
+;
+; SSE-LABEL: llrint_v4i64_v4f128:
+; SSE:       # %bb.0:
+; SSE-NEXT:    subq $72, %rsp
+; SSE-NEXT:    .cfi_def_cfa_offset 80
+; SSE-NEXT:    movaps %xmm3, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; SSE-NEXT:    movaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; SSE-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
+; SSE-NEXT:    movaps %xmm1, %xmm0
+; SSE-NEXT:    callq llrintl at PLT
+; SSE-NEXT:    movq %rax, %xmm0
+; SSE-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; SSE-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
+; SSE-NEXT:    callq llrintl at PLT
+; SSE-NEXT:    movq %rax, %xmm0
+; SSE-NEXT:    punpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; SSE-NEXT:    # xmm0 = xmm0[0],mem[0]
+; SSE-NEXT:    movdqa %xmm0, (%rsp) # 16-byte Spill
+; SSE-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; SSE-NEXT:    callq llrintl at PLT
+; SSE-NEXT:    movq %rax, %xmm0
+; SSE-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; SSE-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; SSE-NEXT:    callq llrintl at PLT
+; SSE-NEXT:    movq %rax, %xmm1
+; SSE-NEXT:    punpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
+; SSE-NEXT:    # xmm1 = xmm1[0],mem[0]
+; SSE-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
+; SSE-NEXT:    addq $72, %rsp
+; SSE-NEXT:    .cfi_def_cfa_offset 8
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: llrint_v4i64_v4f128:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    subq $72, %rsp
+; AVX1-NEXT:    .cfi_def_cfa_offset 80
+; AVX1-NEXT:    vmovaps %xmm2, (%rsp) # 16-byte Spill
+; AVX1-NEXT:    vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX1-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX1-NEXT:    vmovaps %xmm3, %xmm0
+; AVX1-NEXT:    callq llrintl at PLT
+; AVX1-NEXT:    vmovq %rax, %xmm0
+; AVX1-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX1-NEXT:    vmovaps (%rsp), %xmm0 # 16-byte Reload
+; AVX1-NEXT:    callq llrintl at PLT
+; AVX1-NEXT:    vmovq %rax, %xmm0
+; AVX1-NEXT:    vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX1-NEXT:    # xmm0 = xmm0[0],mem[0]
+; AVX1-NEXT:    vmovdqa %xmm0, (%rsp) # 16-byte Spill
+; AVX1-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; AVX1-NEXT:    callq llrintl at PLT
+; AVX1-NEXT:    vmovq %rax, %xmm0
+; AVX1-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX1-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; AVX1-NEXT:    callq llrintl at PLT
+; AVX1-NEXT:    vmovq %rax, %xmm0
+; AVX1-NEXT:    vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX1-NEXT:    # xmm0 = xmm0[0],mem[0]
+; AVX1-NEXT:    vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload
+; AVX1-NEXT:    addq $72, %rsp
+; AVX1-NEXT:    .cfi_def_cfa_offset 8
+; AVX1-NEXT:    retq
+;
+; AVX512-LABEL: llrint_v4i64_v4f128:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    subq $72, %rsp
+; AVX512-NEXT:    .cfi_def_cfa_offset 80
+; AVX512-NEXT:    vmovaps %xmm2, (%rsp) # 16-byte Spill
+; AVX512-NEXT:    vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512-NEXT:    vmovaps %xmm3, %xmm0
+; AVX512-NEXT:    callq llrintl at PLT
+; AVX512-NEXT:    vmovq %rax, %xmm0
+; AVX512-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512-NEXT:    vmovaps (%rsp), %xmm0 # 16-byte Reload
+; AVX512-NEXT:    callq llrintl at PLT
+; AVX512-NEXT:    vmovq %rax, %xmm0
+; AVX512-NEXT:    vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512-NEXT:    # xmm0 = xmm0[0],mem[0]
+; AVX512-NEXT:    vmovdqa %xmm0, (%rsp) # 16-byte Spill
+; AVX512-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; AVX512-NEXT:    callq llrintl at PLT
+; AVX512-NEXT:    vmovq %rax, %xmm0
+; AVX512-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; AVX512-NEXT:    callq llrintl at PLT
+; AVX512-NEXT:    vmovq %rax, %xmm0
+; AVX512-NEXT:    vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512-NEXT:    # xmm0 = xmm0[0],mem[0]
+; AVX512-NEXT:    vinserti128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload
+; AVX512-NEXT:    addq $72, %rsp
+; AVX512-NEXT:    .cfi_def_cfa_offset 8
+; AVX512-NEXT:    retq
+;
+; AVX512DQ-LABEL: llrint_v4i64_v4f128:
+; AVX512DQ:       # %bb.0:
+; AVX512DQ-NEXT:    subq $72, %rsp
+; AVX512DQ-NEXT:    .cfi_def_cfa_offset 80
+; AVX512DQ-NEXT:    vmovaps %xmm2, (%rsp) # 16-byte Spill
+; AVX512DQ-NEXT:    vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512DQ-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512DQ-NEXT:    vmovaps %xmm3, %xmm0
+; AVX512DQ-NEXT:    callq llrintl at PLT
+; AVX512DQ-NEXT:    vmovq %rax, %xmm0
+; AVX512DQ-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512DQ-NEXT:    vmovaps (%rsp), %xmm0 # 16-byte Reload
+; AVX512DQ-NEXT:    callq llrintl at PLT
+; AVX512DQ-NEXT:    vmovq %rax, %xmm0
+; AVX512DQ-NEXT:    vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512DQ-NEXT:    # xmm0 = xmm0[0],mem[0]
+; AVX512DQ-NEXT:    vmovdqa %xmm0, (%rsp) # 16-byte Spill
+; AVX512DQ-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; AVX512DQ-NEXT:    callq llrintl at PLT
+; AVX512DQ-NEXT:    vmovq %rax, %xmm0
+; AVX512DQ-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512DQ-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; AVX512DQ-NEXT:    callq llrintl at PLT
+; AVX512DQ-NEXT:    vmovq %rax, %xmm0
+; AVX512DQ-NEXT:    vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512DQ-NEXT:    # xmm0 = xmm0[0],mem[0]
+; AVX512DQ-NEXT:    vinserti128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload
+; AVX512DQ-NEXT:    addq $72, %rsp
+; AVX512DQ-NEXT:    .cfi_def_cfa_offset 8
+; AVX512DQ-NEXT:    retq
+  %a = call <4 x i64> @llvm.llrint.v4i64.v4f128(<4 x fp128> %x)
+  ret <4 x i64> %a
+}
+declare <4 x i64> @llvm.llrint.v4i64.v4f128(<4 x fp128>)
+
+define <8 x i64> @llrint_v8i64_v8f128(<8 x fp128> %x) {
+; X86-LABEL: llrint_v8i64_v8f128:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebp
+; X86-NEXT:    .cfi_def_cfa_offset 8
+; X86-NEXT:    .cfi_offset %ebp, -8
+; X86-NEXT:    movl %esp, %ebp
+; X86-NEXT:    .cfi_def_cfa_register %ebp
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    andl $-16, %esp
+; X86-NEXT:    subl $64, %esp
+; X86-NEXT:    .cfi_offset %esi, -20
+; X86-NEXT:    .cfi_offset %edi, -16
+; X86-NEXT:    .cfi_offset %ebx, -12
+; X86-NEXT:    movl 8(%ebp), %esi
+; X86-NEXT:    movl 36(%ebp), %edi
+; X86-NEXT:    movl 40(%ebp), %ebx
+; X86-NEXT:    pushl 24(%ebp)
+; X86-NEXT:    pushl 20(%ebp)
+; X86-NEXT:    pushl 16(%ebp)
+; X86-NEXT:    pushl 12(%ebp)
+; X86-NEXT:    calll llrintl
+; X86-NEXT:    addl $16, %esp
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl 32(%ebp)
+; X86-NEXT:    pushl 28(%ebp)
+; X86-NEXT:    calll llrintl
+; X86-NEXT:    addl $16, %esp
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    pushl 56(%ebp)
+; X86-NEXT:    pushl 52(%ebp)
+; X86-NEXT:    pushl 48(%ebp)
+; X86-NEXT:    pushl 44(%ebp)
+; X86-NEXT:    calll llrintl
+; X86-NEXT:    addl $16, %esp
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    pushl 72(%ebp)
+; X86-NEXT:    pushl 68(%ebp)
+; X86-NEXT:    pushl 64(%ebp)
+; X86-NEXT:    pushl 60(%ebp)
+; X86-NEXT:    calll llrintl
+; X86-NEXT:    addl $16, %esp
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    pushl 88(%ebp)
+; X86-NEXT:    pushl 84(%ebp)
+; X86-NEXT:    pushl 80(%ebp)
+; X86-NEXT:    pushl 76(%ebp)
+; X86-NEXT:    calll llrintl
+; X86-NEXT:    addl $16, %esp
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    pushl 104(%ebp)
+; X86-NEXT:    pushl 100(%ebp)
+; X86-NEXT:    pushl 96(%ebp)
+; X86-NEXT:    pushl 92(%ebp)
+; X86-NEXT:    calll llrintl
+; X86-NEXT:    addl $16, %esp
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    pushl 120(%ebp)
+; X86-NEXT:    pushl 116(%ebp)
+; X86-NEXT:    pushl 112(%ebp)
+; X86-NEXT:    pushl 108(%ebp)
+; X86-NEXT:    calll llrintl
+; X86-NEXT:    addl $16, %esp
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    movl %edx, %ebx
+; X86-NEXT:    pushl 136(%ebp)
+; X86-NEXT:    pushl 132(%ebp)
+; X86-NEXT:    pushl 128(%ebp)
+; X86-NEXT:    pushl 124(%ebp)
+; X86-NEXT:    calll llrintl
+; X86-NEXT:    addl $16, %esp
+; X86-NEXT:    movl %edx, 60(%esi)
+; X86-NEXT:    movl %eax, 56(%esi)
+; X86-NEXT:    movl %ebx, 52(%esi)
+; X86-NEXT:    movl %edi, 48(%esi)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, 44(%esi)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, 40(%esi)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, 36(%esi)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, 32(%esi)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, 28(%esi)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, 24(%esi)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, 20(%esi)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, 16(%esi)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, 12(%esi)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, 8(%esi)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, 4(%esi)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, (%esi)
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    leal -12(%ebp), %esp
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    popl %ebx
+; X86-NEXT:    popl %ebp
+; X86-NEXT:    .cfi_def_cfa %esp, 4
+; X86-NEXT:    retl $4
+;
+; SSE-LABEL: llrint_v8i64_v8f128:
+; SSE:       # %bb.0:
+; SSE-NEXT:    subq $136, %rsp
+; SSE-NEXT:    .cfi_def_cfa_offset 144
+; SSE-NEXT:    movaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; SSE-NEXT:    movaps %xmm6, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; SSE-NEXT:    movaps %xmm5, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; SSE-NEXT:    movaps %xmm4, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; SSE-NEXT:    movaps %xmm3, (%rsp) # 16-byte Spill
+; SSE-NEXT:    movaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; SSE-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; SSE-NEXT:    movaps %xmm1, %xmm0
+; SSE-NEXT:    callq llrintl at PLT
+; SSE-NEXT:    movq %rax, %xmm0
+; SSE-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; SSE-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; SSE-NEXT:    callq llrintl at PLT
+; SSE-NEXT:    movq %rax, %xmm0
+; SSE-NEXT:    punpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; SSE-NEXT:    # xmm0 = xmm0[0],mem[0]
+; SSE-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; SSE-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
+; SSE-NEXT:    callq llrintl at PLT
+; SSE-NEXT:    movq %rax, %xmm0
+; SSE-NEXT:    movdqa %xmm0, (%rsp) # 16-byte Spill
+; SSE-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; SSE-NEXT:    callq llrintl at PLT
+; SSE-NEXT:    movq %rax, %xmm0
+; SSE-NEXT:    punpcklqdq (%rsp), %xmm0 # 16-byte Folded Reload
+; SSE-NEXT:    # xmm0 = xmm0[0],mem[0]
+; SSE-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; SSE-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; SSE-NEXT:    callq llrintl at PLT
+; SSE-NEXT:    movq %rax, %xmm0
+; SSE-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; SSE-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; SSE-NEXT:    callq llrintl at PLT
+; SSE-NEXT:    movq %rax, %xmm0
+; SSE-NEXT:    punpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; SSE-NEXT:    # xmm0 = xmm0[0],mem[0]
+; SSE-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; SSE-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; SSE-NEXT:    callq llrintl at PLT
+; SSE-NEXT:    movq %rax, %xmm0
+; SSE-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; SSE-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; SSE-NEXT:    callq llrintl at PLT
+; SSE-NEXT:    movq %rax, %xmm3
+; SSE-NEXT:    punpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm3 # 16-byte Folded Reload
+; SSE-NEXT:    # xmm3 = xmm3[0],mem[0]
+; SSE-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; SSE-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; SSE-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 16-byte Reload
+; SSE-NEXT:    addq $136, %rsp
+; SSE-NEXT:    .cfi_def_cfa_offset 8
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: llrint_v8i64_v8f128:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    subq $152, %rsp
+; AVX1-NEXT:    .cfi_def_cfa_offset 160
+; AVX1-NEXT:    vmovaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX1-NEXT:    vmovaps %xmm6, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX1-NEXT:    vmovaps %xmm5, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX1-NEXT:    vmovaps %xmm4, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX1-NEXT:    vmovaps %xmm2, (%rsp) # 16-byte Spill
+; AVX1-NEXT:    vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX1-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX1-NEXT:    vmovaps %xmm3, %xmm0
+; AVX1-NEXT:    callq llrintl at PLT
+; AVX1-NEXT:    vmovq %rax, %xmm0
+; AVX1-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX1-NEXT:    vmovaps (%rsp), %xmm0 # 16-byte Reload
+; AVX1-NEXT:    callq llrintl at PLT
+; AVX1-NEXT:    vmovq %rax, %xmm0
+; AVX1-NEXT:    vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX1-NEXT:    # xmm0 = xmm0[0],mem[0]
+; AVX1-NEXT:    vmovdqa %xmm0, (%rsp) # 16-byte Spill
+; AVX1-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; AVX1-NEXT:    callq llrintl at PLT
+; AVX1-NEXT:    vmovq %rax, %xmm0
+; AVX1-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX1-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; AVX1-NEXT:    callq llrintl at PLT
+; AVX1-NEXT:    vmovq %rax, %xmm0
+; AVX1-NEXT:    vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX1-NEXT:    # xmm0 = xmm0[0],mem[0]
+; AVX1-NEXT:    vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload
+; AVX1-NEXT:    vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
+; AVX1-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; AVX1-NEXT:    vzeroupper
+; AVX1-NEXT:    callq llrintl at PLT
+; AVX1-NEXT:    vmovq %rax, %xmm0
+; AVX1-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX1-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; AVX1-NEXT:    callq llrintl at PLT
+; AVX1-NEXT:    vmovq %rax, %xmm0
+; AVX1-NEXT:    vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX1-NEXT:    # xmm0 = xmm0[0],mem[0]
+; AVX1-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX1-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; AVX1-NEXT:    callq llrintl at PLT
+; AVX1-NEXT:    vmovq %rax, %xmm0
+; AVX1-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX1-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; AVX1-NEXT:    callq llrintl at PLT
+; AVX1-NEXT:    vmovq %rax, %xmm0
+; AVX1-NEXT:    vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX1-NEXT:    # xmm0 = xmm0[0],mem[0]
+; AVX1-NEXT:    vinsertf128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm1 # 16-byte Folded Reload
+; AVX1-NEXT:    vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
+; AVX1-NEXT:    addq $152, %rsp
+; AVX1-NEXT:    .cfi_def_cfa_offset 8
+; AVX1-NEXT:    retq
+;
+; AVX512-LABEL: llrint_v8i64_v8f128:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    subq $152, %rsp
+; AVX512-NEXT:    .cfi_def_cfa_offset 160
+; AVX512-NEXT:    vmovaps %xmm6, (%rsp) # 16-byte Spill
+; AVX512-NEXT:    vmovaps %xmm5, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512-NEXT:    vmovaps %xmm4, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512-NEXT:    vmovaps %xmm3, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512-NEXT:    vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512-NEXT:    vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512-NEXT:    vmovaps %xmm7, %xmm0
+; AVX512-NEXT:    callq llrintl at PLT
+; AVX512-NEXT:    vmovq %rax, %xmm0
+; AVX512-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512-NEXT:    vmovaps (%rsp), %xmm0 # 16-byte Reload
+; AVX512-NEXT:    callq llrintl at PLT
+; AVX512-NEXT:    vmovq %rax, %xmm0
+; AVX512-NEXT:    vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512-NEXT:    # xmm0 = xmm0[0],mem[0]
+; AVX512-NEXT:    vmovdqa %xmm0, (%rsp) # 16-byte Spill
+; AVX512-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; AVX512-NEXT:    callq llrintl at PLT
+; AVX512-NEXT:    vmovq %rax, %xmm0
+; AVX512-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; AVX512-NEXT:    callq llrintl at PLT
+; AVX512-NEXT:    vmovq %rax, %xmm0
+; AVX512-NEXT:    vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512-NEXT:    # xmm0 = xmm0[0],mem[0]
+; AVX512-NEXT:    vinserti128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload
+; AVX512-NEXT:    vmovdqu %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
+; AVX512-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; AVX512-NEXT:    vzeroupper
+; AVX512-NEXT:    callq llrintl at PLT
+; AVX512-NEXT:    vmovq %rax, %xmm0
+; AVX512-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; AVX512-NEXT:    callq llrintl at PLT
+; AVX512-NEXT:    vmovq %rax, %xmm0
+; AVX512-NEXT:    vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512-NEXT:    # xmm0 = xmm0[0],mem[0]
+; AVX512-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; AVX512-NEXT:    callq llrintl at PLT
+; AVX512-NEXT:    vmovq %rax, %xmm0
+; AVX512-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; AVX512-NEXT:    callq llrintl at PLT
+; AVX512-NEXT:    vmovq %rax, %xmm0
+; AVX512-NEXT:    vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512-NEXT:    # xmm0 = xmm0[0],mem[0]
+; AVX512-NEXT:    vinserti128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload
+; AVX512-NEXT:    vinserti64x4 $1, {{[-0-9]+}}(%r{{[sb]}}p), %zmm0, %zmm0 # 32-byte Folded Reload
+; AVX512-NEXT:    addq $152, %rsp
+; AVX512-NEXT:    .cfi_def_cfa_offset 8
+; AVX512-NEXT:    retq
+;
+; AVX512DQ-LABEL: llrint_v8i64_v8f128:
+; AVX512DQ:       # %bb.0:
+; AVX512DQ-NEXT:    subq $152, %rsp
+; AVX512DQ-NEXT:    .cfi_def_cfa_offset 160
+; AVX512DQ-NEXT:    vmovaps %xmm6, (%rsp) # 16-byte Spill
+; AVX512DQ-NEXT:    vmovaps %xmm5, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512DQ-NEXT:    vmovaps %xmm4, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512DQ-NEXT:    vmovaps %xmm3, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512DQ-NEXT:    vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512DQ-NEXT:    vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512DQ-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512DQ-NEXT:    vmovaps %xmm7, %xmm0
+; AVX512DQ-NEXT:    callq llrintl at PLT
+; AVX512DQ-NEXT:    vmovq %rax, %xmm0
+; AVX512DQ-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512DQ-NEXT:    vmovaps (%rsp), %xmm0 # 16-byte Reload
+; AVX512DQ-NEXT:    callq llrintl at PLT
+; AVX512DQ-NEXT:    vmovq %rax, %xmm0
+; AVX512DQ-NEXT:    vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512DQ-NEXT:    # xmm0 = xmm0[0],mem[0]
+; AVX512DQ-NEXT:    vmovdqa %xmm0, (%rsp) # 16-byte Spill
+; AVX512DQ-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; AVX512DQ-NEXT:    callq llrintl at PLT
+; AVX512DQ-NEXT:    vmovq %rax, %xmm0
+; AVX512DQ-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512DQ-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; AVX512DQ-NEXT:    callq llrintl at PLT
+; AVX512DQ-NEXT:    vmovq %rax, %xmm0
+; AVX512DQ-NEXT:    vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512DQ-NEXT:    # xmm0 = xmm0[0],mem[0]
+; AVX512DQ-NEXT:    vinserti128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload
+; AVX512DQ-NEXT:    vmovdqu %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
+; AVX512DQ-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; AVX512DQ-NEXT:    vzeroupper
+; AVX512DQ-NEXT:    callq llrintl at PLT
+; AVX512DQ-NEXT:    vmovq %rax, %xmm0
+; AVX512DQ-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512DQ-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; AVX512DQ-NEXT:    callq llrintl at PLT
+; AVX512DQ-NEXT:    vmovq %rax, %xmm0
+; AVX512DQ-NEXT:    vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512DQ-NEXT:    # xmm0 = xmm0[0],mem[0]
+; AVX512DQ-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512DQ-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; AVX512DQ-NEXT:    callq llrintl at PLT
+; AVX512DQ-NEXT:    vmovq %rax, %xmm0
+; AVX512DQ-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512DQ-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; AVX512DQ-NEXT:    callq llrintl at PLT
+; AVX512DQ-NEXT:    vmovq %rax, %xmm0
+; AVX512DQ-NEXT:    vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512DQ-NEXT:    # xmm0 = xmm0[0],mem[0]
+; AVX512DQ-NEXT:    vinserti128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload
+; AVX512DQ-NEXT:    vinserti64x4 $1, {{[-0-9]+}}(%r{{[sb]}}p), %zmm0, %zmm0 # 32-byte Folded Reload
+; AVX512DQ-NEXT:    addq $152, %rsp
+; AVX512DQ-NEXT:    .cfi_def_cfa_offset 8
+; AVX512DQ-NEXT:    retq
+  %a = call <8 x i64> @llvm.llrint.v8i64.v8f128(<8 x fp128> %x)
+  ret <8 x i64> %a
+}
+declare <8 x i64> @llvm.llrint.v8i64.v8f128(<8 x fp128>)
diff --git a/llvm/test/CodeGen/X86/vector-lrint.ll b/llvm/test/CodeGen/X86/vector-lrint.ll
index a4c50e539d661..8900e94c50305 100644
--- a/llvm/test/CodeGen/X86/vector-lrint.ll
+++ b/llvm/test/CodeGen/X86/vector-lrint.ll
@@ -1140,3 +1140,1274 @@ define <8 x iXLen> @lrint_v8f64(<8 x double> %x) {
   ret <8 x iXLen> %a
 }
 declare <8 x iXLen> @llvm.lrint.v8iXLen.v8f64(<8 x double>)
+
+define <1 x iXLen> @lrint_v1fp128(<1 x fp128> %x) {
+; X86-I32-LABEL: lrint_v1fp128:
+; X86-I32:       # %bb.0:
+; X86-I32-NEXT:    pushl %ebp
+; X86-I32-NEXT:    .cfi_def_cfa_offset 8
+; X86-I32-NEXT:    .cfi_offset %ebp, -8
+; X86-I32-NEXT:    movl %esp, %ebp
+; X86-I32-NEXT:    .cfi_def_cfa_register %ebp
+; X86-I32-NEXT:    andl $-16, %esp
+; X86-I32-NEXT:    subl $16, %esp
+; X86-I32-NEXT:    pushl 20(%ebp)
+; X86-I32-NEXT:    pushl 16(%ebp)
+; X86-I32-NEXT:    pushl 12(%ebp)
+; X86-I32-NEXT:    pushl 8(%ebp)
+; X86-I32-NEXT:    calll lrintl
+; X86-I32-NEXT:    addl $16, %esp
+; X86-I32-NEXT:    movl %ebp, %esp
+; X86-I32-NEXT:    popl %ebp
+; X86-I32-NEXT:    .cfi_def_cfa %esp, 4
+; X86-I32-NEXT:    retl
+;
+; X86-I64-LABEL: lrint_v1fp128:
+; X86-I64:       # %bb.0:
+; X86-I64-NEXT:    pushl %ebp
+; X86-I64-NEXT:    .cfi_def_cfa_offset 8
+; X86-I64-NEXT:    .cfi_offset %ebp, -8
+; X86-I64-NEXT:    movl %esp, %ebp
+; X86-I64-NEXT:    .cfi_def_cfa_register %ebp
+; X86-I64-NEXT:    andl $-16, %esp
+; X86-I64-NEXT:    subl $16, %esp
+; X86-I64-NEXT:    pushl 20(%ebp)
+; X86-I64-NEXT:    pushl 16(%ebp)
+; X86-I64-NEXT:    pushl 12(%ebp)
+; X86-I64-NEXT:    pushl 8(%ebp)
+; X86-I64-NEXT:    calll lrintl
+; X86-I64-NEXT:    addl $16, %esp
+; X86-I64-NEXT:    movl %ebp, %esp
+; X86-I64-NEXT:    popl %ebp
+; X86-I64-NEXT:    .cfi_def_cfa %esp, 4
+; X86-I64-NEXT:    retl
+;
+; X86-SSE2-LABEL: lrint_v1fp128:
+; X86-SSE2:       # %bb.0:
+; X86-SSE2-NEXT:    pushl %ebp
+; X86-SSE2-NEXT:    .cfi_def_cfa_offset 8
+; X86-SSE2-NEXT:    .cfi_offset %ebp, -8
+; X86-SSE2-NEXT:    movl %esp, %ebp
+; X86-SSE2-NEXT:    .cfi_def_cfa_register %ebp
+; X86-SSE2-NEXT:    andl $-16, %esp
+; X86-SSE2-NEXT:    subl $16, %esp
+; X86-SSE2-NEXT:    pushl 20(%ebp)
+; X86-SSE2-NEXT:    pushl 16(%ebp)
+; X86-SSE2-NEXT:    pushl 12(%ebp)
+; X86-SSE2-NEXT:    pushl 8(%ebp)
+; X86-SSE2-NEXT:    calll lrintl
+; X86-SSE2-NEXT:    addl $16, %esp
+; X86-SSE2-NEXT:    movl %ebp, %esp
+; X86-SSE2-NEXT:    popl %ebp
+; X86-SSE2-NEXT:    .cfi_def_cfa %esp, 4
+; X86-SSE2-NEXT:    retl
+;
+; X86-AVX-LABEL: lrint_v1fp128:
+; X86-AVX:       # %bb.0:
+; X86-AVX-NEXT:    pushl %ebp
+; X86-AVX-NEXT:    .cfi_def_cfa_offset 8
+; X86-AVX-NEXT:    .cfi_offset %ebp, -8
+; X86-AVX-NEXT:    movl %esp, %ebp
+; X86-AVX-NEXT:    .cfi_def_cfa_register %ebp
+; X86-AVX-NEXT:    andl $-16, %esp
+; X86-AVX-NEXT:    subl $32, %esp
+; X86-AVX-NEXT:    vmovups 8(%ebp), %xmm0
+; X86-AVX-NEXT:    vmovups %xmm0, (%esp)
+; X86-AVX-NEXT:    calll lrintl
+; X86-AVX-NEXT:    movl %ebp, %esp
+; X86-AVX-NEXT:    popl %ebp
+; X86-AVX-NEXT:    .cfi_def_cfa %esp, 4
+; X86-AVX-NEXT:    retl
+;
+; X64-AVX-i32-LABEL: lrint_v1fp128:
+; X64-AVX-i32:       # %bb.0:
+; X64-AVX-i32-NEXT:    pushq %rax
+; X64-AVX-i32-NEXT:    .cfi_def_cfa_offset 16
+; X64-AVX-i32-NEXT:    callq lrintl at PLT
+; X64-AVX-i32-NEXT:    popq %rcx
+; X64-AVX-i32-NEXT:    .cfi_def_cfa_offset 8
+; X64-AVX-i32-NEXT:    retq
+;
+; X64-AVX-i64-LABEL: lrint_v1fp128:
+; X64-AVX-i64:       # %bb.0:
+; X64-AVX-i64-NEXT:    pushq %rax
+; X64-AVX-i64-NEXT:    .cfi_def_cfa_offset 16
+; X64-AVX-i64-NEXT:    callq lrintl at PLT
+; X64-AVX-i64-NEXT:    popq %rcx
+; X64-AVX-i64-NEXT:    .cfi_def_cfa_offset 8
+; X64-AVX-i64-NEXT:    retq
+  %a = call <1 x iXLen> @llvm.lrint.v1iXLen.v1fp128(<1 x fp128> %x)
+  ret <1 x iXLen> %a
+}
+declare <1 x iXLen> @llvm.lrint.v1iXLen.v1fp128(<1 x fp128>)
+
+define <2 x iXLen> @lrint_v2fp128(<2 x fp128> %x) {
+; X86-I32-LABEL: lrint_v2fp128:
+; X86-I32:       # %bb.0:
+; X86-I32-NEXT:    pushl %ebp
+; X86-I32-NEXT:    .cfi_def_cfa_offset 8
+; X86-I32-NEXT:    .cfi_offset %ebp, -8
+; X86-I32-NEXT:    movl %esp, %ebp
+; X86-I32-NEXT:    .cfi_def_cfa_register %ebp
+; X86-I32-NEXT:    pushl %ebx
+; X86-I32-NEXT:    pushl %edi
+; X86-I32-NEXT:    pushl %esi
+; X86-I32-NEXT:    andl $-16, %esp
+; X86-I32-NEXT:    subl $16, %esp
+; X86-I32-NEXT:    .cfi_offset %esi, -20
+; X86-I32-NEXT:    .cfi_offset %edi, -16
+; X86-I32-NEXT:    .cfi_offset %ebx, -12
+; X86-I32-NEXT:    movl 32(%ebp), %edi
+; X86-I32-NEXT:    movl 36(%ebp), %ebx
+; X86-I32-NEXT:    pushl 20(%ebp)
+; X86-I32-NEXT:    pushl 16(%ebp)
+; X86-I32-NEXT:    pushl 12(%ebp)
+; X86-I32-NEXT:    pushl 8(%ebp)
+; X86-I32-NEXT:    calll lrintl
+; X86-I32-NEXT:    addl $16, %esp
+; X86-I32-NEXT:    movl %eax, %esi
+; X86-I32-NEXT:    pushl %ebx
+; X86-I32-NEXT:    pushl %edi
+; X86-I32-NEXT:    pushl 28(%ebp)
+; X86-I32-NEXT:    pushl 24(%ebp)
+; X86-I32-NEXT:    calll lrintl
+; X86-I32-NEXT:    addl $16, %esp
+; X86-I32-NEXT:    movl %eax, %edx
+; X86-I32-NEXT:    movl %esi, %eax
+; X86-I32-NEXT:    leal -12(%ebp), %esp
+; X86-I32-NEXT:    popl %esi
+; X86-I32-NEXT:    popl %edi
+; X86-I32-NEXT:    popl %ebx
+; X86-I32-NEXT:    popl %ebp
+; X86-I32-NEXT:    .cfi_def_cfa %esp, 4
+; X86-I32-NEXT:    retl
+;
+; X86-I64-LABEL: lrint_v2fp128:
+; X86-I64:       # %bb.0:
+; X86-I64-NEXT:    pushl %ebp
+; X86-I64-NEXT:    .cfi_def_cfa_offset 8
+; X86-I64-NEXT:    .cfi_offset %ebp, -8
+; X86-I64-NEXT:    movl %esp, %ebp
+; X86-I64-NEXT:    .cfi_def_cfa_register %ebp
+; X86-I64-NEXT:    pushl %ebx
+; X86-I64-NEXT:    pushl %edi
+; X86-I64-NEXT:    pushl %esi
+; X86-I64-NEXT:    andl $-16, %esp
+; X86-I64-NEXT:    subl $16, %esp
+; X86-I64-NEXT:    .cfi_offset %esi, -20
+; X86-I64-NEXT:    .cfi_offset %edi, -16
+; X86-I64-NEXT:    .cfi_offset %ebx, -12
+; X86-I64-NEXT:    movl 8(%ebp), %esi
+; X86-I64-NEXT:    pushl 24(%ebp)
+; X86-I64-NEXT:    pushl 20(%ebp)
+; X86-I64-NEXT:    pushl 16(%ebp)
+; X86-I64-NEXT:    pushl 12(%ebp)
+; X86-I64-NEXT:    calll lrintl
+; X86-I64-NEXT:    addl $16, %esp
+; X86-I64-NEXT:    movl %eax, %edi
+; X86-I64-NEXT:    movl %edx, %ebx
+; X86-I64-NEXT:    pushl 40(%ebp)
+; X86-I64-NEXT:    pushl 36(%ebp)
+; X86-I64-NEXT:    pushl 32(%ebp)
+; X86-I64-NEXT:    pushl 28(%ebp)
+; X86-I64-NEXT:    calll lrintl
+; X86-I64-NEXT:    addl $16, %esp
+; X86-I64-NEXT:    movl %edx, 12(%esi)
+; X86-I64-NEXT:    movl %eax, 8(%esi)
+; X86-I64-NEXT:    movl %ebx, 4(%esi)
+; X86-I64-NEXT:    movl %edi, (%esi)
+; X86-I64-NEXT:    movl %esi, %eax
+; X86-I64-NEXT:    leal -12(%ebp), %esp
+; X86-I64-NEXT:    popl %esi
+; X86-I64-NEXT:    popl %edi
+; X86-I64-NEXT:    popl %ebx
+; X86-I64-NEXT:    popl %ebp
+; X86-I64-NEXT:    .cfi_def_cfa %esp, 4
+; X86-I64-NEXT:    retl $4
+;
+; X86-SSE2-LABEL: lrint_v2fp128:
+; X86-SSE2:       # %bb.0:
+; X86-SSE2-NEXT:    pushl %ebp
+; X86-SSE2-NEXT:    .cfi_def_cfa_offset 8
+; X86-SSE2-NEXT:    .cfi_offset %ebp, -8
+; X86-SSE2-NEXT:    movl %esp, %ebp
+; X86-SSE2-NEXT:    .cfi_def_cfa_register %ebp
+; X86-SSE2-NEXT:    pushl %ebx
+; X86-SSE2-NEXT:    pushl %edi
+; X86-SSE2-NEXT:    pushl %esi
+; X86-SSE2-NEXT:    andl $-16, %esp
+; X86-SSE2-NEXT:    subl $32, %esp
+; X86-SSE2-NEXT:    .cfi_offset %esi, -20
+; X86-SSE2-NEXT:    .cfi_offset %edi, -16
+; X86-SSE2-NEXT:    .cfi_offset %ebx, -12
+; X86-SSE2-NEXT:    movl 12(%ebp), %edi
+; X86-SSE2-NEXT:    movl 16(%ebp), %ebx
+; X86-SSE2-NEXT:    movl 20(%ebp), %esi
+; X86-SSE2-NEXT:    pushl 36(%ebp)
+; X86-SSE2-NEXT:    pushl 32(%ebp)
+; X86-SSE2-NEXT:    pushl 28(%ebp)
+; X86-SSE2-NEXT:    pushl 24(%ebp)
+; X86-SSE2-NEXT:    calll lrintl
+; X86-SSE2-NEXT:    addl $16, %esp
+; X86-SSE2-NEXT:    movd %eax, %xmm0
+; X86-SSE2-NEXT:    movdqa %xmm0, (%esp) # 16-byte Spill
+; X86-SSE2-NEXT:    pushl %esi
+; X86-SSE2-NEXT:    pushl %ebx
+; X86-SSE2-NEXT:    pushl %edi
+; X86-SSE2-NEXT:    pushl 8(%ebp)
+; X86-SSE2-NEXT:    calll lrintl
+; X86-SSE2-NEXT:    addl $16, %esp
+; X86-SSE2-NEXT:    movd %eax, %xmm0
+; X86-SSE2-NEXT:    punpckldq (%esp), %xmm0 # 16-byte Folded Reload
+; X86-SSE2-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
+; X86-SSE2-NEXT:    leal -12(%ebp), %esp
+; X86-SSE2-NEXT:    popl %esi
+; X86-SSE2-NEXT:    popl %edi
+; X86-SSE2-NEXT:    popl %ebx
+; X86-SSE2-NEXT:    popl %ebp
+; X86-SSE2-NEXT:    .cfi_def_cfa %esp, 4
+; X86-SSE2-NEXT:    retl
+;
+; X86-AVX-LABEL: lrint_v2fp128:
+; X86-AVX:       # %bb.0:
+; X86-AVX-NEXT:    pushl %ebp
+; X86-AVX-NEXT:    .cfi_def_cfa_offset 8
+; X86-AVX-NEXT:    .cfi_offset %ebp, -8
+; X86-AVX-NEXT:    movl %esp, %ebp
+; X86-AVX-NEXT:    .cfi_def_cfa_register %ebp
+; X86-AVX-NEXT:    andl $-16, %esp
+; X86-AVX-NEXT:    subl $48, %esp
+; X86-AVX-NEXT:    vmovups 8(%ebp), %xmm0
+; X86-AVX-NEXT:    vmovups %xmm0, (%esp)
+; X86-AVX-NEXT:    calll lrintl
+; X86-AVX-NEXT:    vmovups 24(%ebp), %xmm0
+; X86-AVX-NEXT:    vmovups %xmm0, (%esp)
+; X86-AVX-NEXT:    vmovd %eax, %xmm0
+; X86-AVX-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX-NEXT:    calll lrintl
+; X86-AVX-NEXT:    vmovdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX-NEXT:    vpinsrd $1, %eax, %xmm0, %xmm0
+; X86-AVX-NEXT:    movl %ebp, %esp
+; X86-AVX-NEXT:    popl %ebp
+; X86-AVX-NEXT:    .cfi_def_cfa %esp, 4
+; X86-AVX-NEXT:    retl
+;
+; X64-AVX-i32-LABEL: lrint_v2fp128:
+; X64-AVX-i32:       # %bb.0:
+; X64-AVX-i32-NEXT:    pushq %rbx
+; X64-AVX-i32-NEXT:    .cfi_def_cfa_offset 16
+; X64-AVX-i32-NEXT:    subq $16, %rsp
+; X64-AVX-i32-NEXT:    .cfi_def_cfa_offset 32
+; X64-AVX-i32-NEXT:    .cfi_offset %rbx, -16
+; X64-AVX-i32-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
+; X64-AVX-i32-NEXT:    vmovaps %xmm1, %xmm0
+; X64-AVX-i32-NEXT:    callq lrintl at PLT
+; X64-AVX-i32-NEXT:    movl %eax, %ebx
+; X64-AVX-i32-NEXT:    vmovaps (%rsp), %xmm0 # 16-byte Reload
+; X64-AVX-i32-NEXT:    callq lrintl at PLT
+; X64-AVX-i32-NEXT:    vmovd %eax, %xmm0
+; X64-AVX-i32-NEXT:    vpinsrd $1, %ebx, %xmm0, %xmm0
+; X64-AVX-i32-NEXT:    addq $16, %rsp
+; X64-AVX-i32-NEXT:    .cfi_def_cfa_offset 16
+; X64-AVX-i32-NEXT:    popq %rbx
+; X64-AVX-i32-NEXT:    .cfi_def_cfa_offset 8
+; X64-AVX-i32-NEXT:    retq
+;
+; X64-AVX-i64-LABEL: lrint_v2fp128:
+; X64-AVX-i64:       # %bb.0:
+; X64-AVX-i64-NEXT:    subq $40, %rsp
+; X64-AVX-i64-NEXT:    .cfi_def_cfa_offset 48
+; X64-AVX-i64-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX-i64-NEXT:    vmovaps %xmm1, %xmm0
+; X64-AVX-i64-NEXT:    callq lrintl at PLT
+; X64-AVX-i64-NEXT:    vmovq %rax, %xmm0
+; X64-AVX-i64-NEXT:    vmovdqa %xmm0, (%rsp) # 16-byte Spill
+; X64-AVX-i64-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX-i64-NEXT:    callq lrintl at PLT
+; X64-AVX-i64-NEXT:    vmovq %rax, %xmm0
+; X64-AVX-i64-NEXT:    vpunpcklqdq (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX-i64-NEXT:    # xmm0 = xmm0[0],mem[0]
+; X64-AVX-i64-NEXT:    addq $40, %rsp
+; X64-AVX-i64-NEXT:    .cfi_def_cfa_offset 8
+; X64-AVX-i64-NEXT:    retq
+  %a = call <2 x iXLen> @llvm.lrint.v2iXLen.v2fp128(<2 x fp128> %x)
+  ret <2 x iXLen> %a
+}
+declare <2 x iXLen> @llvm.lrint.v2iXLen.v2fp128(<2 x fp128>)
+
+define <4 x iXLen> @lrint_v4fp128(<4 x fp128> %x) {
+; X86-I32-LABEL: lrint_v4fp128:
+; X86-I32:       # %bb.0:
+; X86-I32-NEXT:    pushl %ebp
+; X86-I32-NEXT:    .cfi_def_cfa_offset 8
+; X86-I32-NEXT:    .cfi_offset %ebp, -8
+; X86-I32-NEXT:    movl %esp, %ebp
+; X86-I32-NEXT:    .cfi_def_cfa_register %ebp
+; X86-I32-NEXT:    pushl %ebx
+; X86-I32-NEXT:    pushl %edi
+; X86-I32-NEXT:    pushl %esi
+; X86-I32-NEXT:    andl $-16, %esp
+; X86-I32-NEXT:    subl $16, %esp
+; X86-I32-NEXT:    .cfi_offset %esi, -20
+; X86-I32-NEXT:    .cfi_offset %edi, -16
+; X86-I32-NEXT:    .cfi_offset %ebx, -12
+; X86-I32-NEXT:    movl 8(%ebp), %esi
+; X86-I32-NEXT:    movl 36(%ebp), %ebx
+; X86-I32-NEXT:    movl 40(%ebp), %edi
+; X86-I32-NEXT:    pushl 24(%ebp)
+; X86-I32-NEXT:    pushl 20(%ebp)
+; X86-I32-NEXT:    pushl 16(%ebp)
+; X86-I32-NEXT:    pushl 12(%ebp)
+; X86-I32-NEXT:    calll lrintl
+; X86-I32-NEXT:    addl $16, %esp
+; X86-I32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-I32-NEXT:    pushl %edi
+; X86-I32-NEXT:    pushl %ebx
+; X86-I32-NEXT:    pushl 32(%ebp)
+; X86-I32-NEXT:    pushl 28(%ebp)
+; X86-I32-NEXT:    calll lrintl
+; X86-I32-NEXT:    addl $16, %esp
+; X86-I32-NEXT:    movl %eax, %ebx
+; X86-I32-NEXT:    pushl 56(%ebp)
+; X86-I32-NEXT:    pushl 52(%ebp)
+; X86-I32-NEXT:    pushl 48(%ebp)
+; X86-I32-NEXT:    pushl 44(%ebp)
+; X86-I32-NEXT:    calll lrintl
+; X86-I32-NEXT:    addl $16, %esp
+; X86-I32-NEXT:    movl %eax, %edi
+; X86-I32-NEXT:    pushl 72(%ebp)
+; X86-I32-NEXT:    pushl 68(%ebp)
+; X86-I32-NEXT:    pushl 64(%ebp)
+; X86-I32-NEXT:    pushl 60(%ebp)
+; X86-I32-NEXT:    calll lrintl
+; X86-I32-NEXT:    addl $16, %esp
+; X86-I32-NEXT:    movl %eax, 12(%esi)
+; X86-I32-NEXT:    movl %edi, 8(%esi)
+; X86-I32-NEXT:    movl %ebx, 4(%esi)
+; X86-I32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-I32-NEXT:    movl %eax, (%esi)
+; X86-I32-NEXT:    movl %esi, %eax
+; X86-I32-NEXT:    leal -12(%ebp), %esp
+; X86-I32-NEXT:    popl %esi
+; X86-I32-NEXT:    popl %edi
+; X86-I32-NEXT:    popl %ebx
+; X86-I32-NEXT:    popl %ebp
+; X86-I32-NEXT:    .cfi_def_cfa %esp, 4
+; X86-I32-NEXT:    retl $4
+;
+; X86-I64-LABEL: lrint_v4fp128:
+; X86-I64:       # %bb.0:
+; X86-I64-NEXT:    pushl %ebp
+; X86-I64-NEXT:    .cfi_def_cfa_offset 8
+; X86-I64-NEXT:    .cfi_offset %ebp, -8
+; X86-I64-NEXT:    movl %esp, %ebp
+; X86-I64-NEXT:    .cfi_def_cfa_register %ebp
+; X86-I64-NEXT:    pushl %ebx
+; X86-I64-NEXT:    pushl %edi
+; X86-I64-NEXT:    pushl %esi
+; X86-I64-NEXT:    andl $-16, %esp
+; X86-I64-NEXT:    subl $32, %esp
+; X86-I64-NEXT:    .cfi_offset %esi, -20
+; X86-I64-NEXT:    .cfi_offset %edi, -16
+; X86-I64-NEXT:    .cfi_offset %ebx, -12
+; X86-I64-NEXT:    movl 8(%ebp), %esi
+; X86-I64-NEXT:    movl 36(%ebp), %edi
+; X86-I64-NEXT:    movl 40(%ebp), %ebx
+; X86-I64-NEXT:    pushl 24(%ebp)
+; X86-I64-NEXT:    pushl 20(%ebp)
+; X86-I64-NEXT:    pushl 16(%ebp)
+; X86-I64-NEXT:    pushl 12(%ebp)
+; X86-I64-NEXT:    calll lrintl
+; X86-I64-NEXT:    addl $16, %esp
+; X86-I64-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-I64-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-I64-NEXT:    pushl %ebx
+; X86-I64-NEXT:    pushl %edi
+; X86-I64-NEXT:    pushl 32(%ebp)
+; X86-I64-NEXT:    pushl 28(%ebp)
+; X86-I64-NEXT:    calll lrintl
+; X86-I64-NEXT:    addl $16, %esp
+; X86-I64-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-I64-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-I64-NEXT:    pushl 56(%ebp)
+; X86-I64-NEXT:    pushl 52(%ebp)
+; X86-I64-NEXT:    pushl 48(%ebp)
+; X86-I64-NEXT:    pushl 44(%ebp)
+; X86-I64-NEXT:    calll lrintl
+; X86-I64-NEXT:    addl $16, %esp
+; X86-I64-NEXT:    movl %eax, %edi
+; X86-I64-NEXT:    movl %edx, %ebx
+; X86-I64-NEXT:    pushl 72(%ebp)
+; X86-I64-NEXT:    pushl 68(%ebp)
+; X86-I64-NEXT:    pushl 64(%ebp)
+; X86-I64-NEXT:    pushl 60(%ebp)
+; X86-I64-NEXT:    calll lrintl
+; X86-I64-NEXT:    addl $16, %esp
+; X86-I64-NEXT:    movl %edx, 28(%esi)
+; X86-I64-NEXT:    movl %eax, 24(%esi)
+; X86-I64-NEXT:    movl %ebx, 20(%esi)
+; X86-I64-NEXT:    movl %edi, 16(%esi)
+; X86-I64-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-I64-NEXT:    movl %eax, 12(%esi)
+; X86-I64-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-I64-NEXT:    movl %eax, 8(%esi)
+; X86-I64-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-I64-NEXT:    movl %eax, 4(%esi)
+; X86-I64-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-I64-NEXT:    movl %eax, (%esi)
+; X86-I64-NEXT:    movl %esi, %eax
+; X86-I64-NEXT:    leal -12(%ebp), %esp
+; X86-I64-NEXT:    popl %esi
+; X86-I64-NEXT:    popl %edi
+; X86-I64-NEXT:    popl %ebx
+; X86-I64-NEXT:    popl %ebp
+; X86-I64-NEXT:    .cfi_def_cfa %esp, 4
+; X86-I64-NEXT:    retl $4
+;
+; X86-SSE2-LABEL: lrint_v4fp128:
+; X86-SSE2:       # %bb.0:
+; X86-SSE2-NEXT:    pushl %ebp
+; X86-SSE2-NEXT:    .cfi_def_cfa_offset 8
+; X86-SSE2-NEXT:    .cfi_offset %ebp, -8
+; X86-SSE2-NEXT:    movl %esp, %ebp
+; X86-SSE2-NEXT:    .cfi_def_cfa_register %ebp
+; X86-SSE2-NEXT:    pushl %ebx
+; X86-SSE2-NEXT:    pushl %edi
+; X86-SSE2-NEXT:    pushl %esi
+; X86-SSE2-NEXT:    andl $-16, %esp
+; X86-SSE2-NEXT:    subl $48, %esp
+; X86-SSE2-NEXT:    .cfi_offset %esi, -20
+; X86-SSE2-NEXT:    .cfi_offset %edi, -16
+; X86-SSE2-NEXT:    .cfi_offset %ebx, -12
+; X86-SSE2-NEXT:    movl 48(%ebp), %edi
+; X86-SSE2-NEXT:    movl 52(%ebp), %ebx
+; X86-SSE2-NEXT:    pushl 36(%ebp)
+; X86-SSE2-NEXT:    pushl 32(%ebp)
+; X86-SSE2-NEXT:    pushl 28(%ebp)
+; X86-SSE2-NEXT:    pushl 24(%ebp)
+; X86-SSE2-NEXT:    calll lrintl
+; X86-SSE2-NEXT:    addl $16, %esp
+; X86-SSE2-NEXT:    movl %eax, %esi
+; X86-SSE2-NEXT:    pushl %ebx
+; X86-SSE2-NEXT:    pushl %edi
+; X86-SSE2-NEXT:    pushl 44(%ebp)
+; X86-SSE2-NEXT:    pushl 40(%ebp)
+; X86-SSE2-NEXT:    calll lrintl
+; X86-SSE2-NEXT:    addl $16, %esp
+; X86-SSE2-NEXT:    movl %eax, %edi
+; X86-SSE2-NEXT:    pushl 68(%ebp)
+; X86-SSE2-NEXT:    pushl 64(%ebp)
+; X86-SSE2-NEXT:    pushl 60(%ebp)
+; X86-SSE2-NEXT:    pushl 56(%ebp)
+; X86-SSE2-NEXT:    calll lrintl
+; X86-SSE2-NEXT:    addl $16, %esp
+; X86-SSE2-NEXT:    movd %eax, %xmm0
+; X86-SSE2-NEXT:    movd %edi, %xmm1
+; X86-SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; X86-SSE2-NEXT:    movdqa %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-SSE2-NEXT:    movd %esi, %xmm0
+; X86-SSE2-NEXT:    movdqa %xmm0, (%esp) # 16-byte Spill
+; X86-SSE2-NEXT:    pushl 20(%ebp)
+; X86-SSE2-NEXT:    pushl 16(%ebp)
+; X86-SSE2-NEXT:    pushl 12(%ebp)
+; X86-SSE2-NEXT:    pushl 8(%ebp)
+; X86-SSE2-NEXT:    calll lrintl
+; X86-SSE2-NEXT:    addl $16, %esp
+; X86-SSE2-NEXT:    movd %eax, %xmm0
+; X86-SSE2-NEXT:    punpckldq (%esp), %xmm0 # 16-byte Folded Reload
+; X86-SSE2-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
+; X86-SSE2-NEXT:    punpcklqdq {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; X86-SSE2-NEXT:    # xmm0 = xmm0[0],mem[0]
+; X86-SSE2-NEXT:    leal -12(%ebp), %esp
+; X86-SSE2-NEXT:    popl %esi
+; X86-SSE2-NEXT:    popl %edi
+; X86-SSE2-NEXT:    popl %ebx
+; X86-SSE2-NEXT:    popl %ebp
+; X86-SSE2-NEXT:    .cfi_def_cfa %esp, 4
+; X86-SSE2-NEXT:    retl
+;
+; X86-AVX-LABEL: lrint_v4fp128:
+; X86-AVX:       # %bb.0:
+; X86-AVX-NEXT:    pushl %ebp
+; X86-AVX-NEXT:    .cfi_def_cfa_offset 8
+; X86-AVX-NEXT:    .cfi_offset %ebp, -8
+; X86-AVX-NEXT:    movl %esp, %ebp
+; X86-AVX-NEXT:    .cfi_def_cfa_register %ebp
+; X86-AVX-NEXT:    pushl %edi
+; X86-AVX-NEXT:    pushl %esi
+; X86-AVX-NEXT:    andl $-16, %esp
+; X86-AVX-NEXT:    subl $32, %esp
+; X86-AVX-NEXT:    .cfi_offset %esi, -16
+; X86-AVX-NEXT:    .cfi_offset %edi, -12
+; X86-AVX-NEXT:    vmovups 40(%ebp), %xmm0
+; X86-AVX-NEXT:    vmovups %xmm0, (%esp)
+; X86-AVX-NEXT:    calll lrintl
+; X86-AVX-NEXT:    movl %eax, %esi
+; X86-AVX-NEXT:    vmovups 24(%ebp), %xmm0
+; X86-AVX-NEXT:    vmovups %xmm0, (%esp)
+; X86-AVX-NEXT:    calll lrintl
+; X86-AVX-NEXT:    movl %eax, %edi
+; X86-AVX-NEXT:    vmovups 8(%ebp), %xmm0
+; X86-AVX-NEXT:    vmovups %xmm0, (%esp)
+; X86-AVX-NEXT:    calll lrintl
+; X86-AVX-NEXT:    vmovups 56(%ebp), %xmm0
+; X86-AVX-NEXT:    vmovups %xmm0, (%esp)
+; X86-AVX-NEXT:    vmovd %eax, %xmm0
+; X86-AVX-NEXT:    vpinsrd $1, %edi, %xmm0, %xmm0
+; X86-AVX-NEXT:    vpinsrd $2, %esi, %xmm0, %xmm0
+; X86-AVX-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX-NEXT:    calll lrintl
+; X86-AVX-NEXT:    vmovdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX-NEXT:    vpinsrd $3, %eax, %xmm0, %xmm0
+; X86-AVX-NEXT:    leal -8(%ebp), %esp
+; X86-AVX-NEXT:    popl %esi
+; X86-AVX-NEXT:    popl %edi
+; X86-AVX-NEXT:    popl %ebp
+; X86-AVX-NEXT:    .cfi_def_cfa %esp, 4
+; X86-AVX-NEXT:    retl
+;
+; X64-AVX-i32-LABEL: lrint_v4fp128:
+; X64-AVX-i32:       # %bb.0:
+; X64-AVX-i32-NEXT:    pushq %rbx
+; X64-AVX-i32-NEXT:    .cfi_def_cfa_offset 16
+; X64-AVX-i32-NEXT:    subq $48, %rsp
+; X64-AVX-i32-NEXT:    .cfi_def_cfa_offset 64
+; X64-AVX-i32-NEXT:    .cfi_offset %rbx, -16
+; X64-AVX-i32-NEXT:    vmovaps %xmm3, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX-i32-NEXT:    vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX-i32-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
+; X64-AVX-i32-NEXT:    vmovaps %xmm1, %xmm0
+; X64-AVX-i32-NEXT:    callq lrintl at PLT
+; X64-AVX-i32-NEXT:    movl %eax, %ebx
+; X64-AVX-i32-NEXT:    vmovaps (%rsp), %xmm0 # 16-byte Reload
+; X64-AVX-i32-NEXT:    callq lrintl at PLT
+; X64-AVX-i32-NEXT:    vmovd %eax, %xmm0
+; X64-AVX-i32-NEXT:    vpinsrd $1, %ebx, %xmm0, %xmm0
+; X64-AVX-i32-NEXT:    vmovdqa %xmm0, (%rsp) # 16-byte Spill
+; X64-AVX-i32-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX-i32-NEXT:    callq lrintl at PLT
+; X64-AVX-i32-NEXT:    vmovdqa (%rsp), %xmm0 # 16-byte Reload
+; X64-AVX-i32-NEXT:    vpinsrd $2, %eax, %xmm0, %xmm0
+; X64-AVX-i32-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX-i32-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX-i32-NEXT:    callq lrintl at PLT
+; X64-AVX-i32-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX-i32-NEXT:    vpinsrd $3, %eax, %xmm0, %xmm0
+; X64-AVX-i32-NEXT:    addq $48, %rsp
+; X64-AVX-i32-NEXT:    .cfi_def_cfa_offset 16
+; X64-AVX-i32-NEXT:    popq %rbx
+; X64-AVX-i32-NEXT:    .cfi_def_cfa_offset 8
+; X64-AVX-i32-NEXT:    retq
+;
+; X64-AVX1-i64-LABEL: lrint_v4fp128:
+; X64-AVX1-i64:       # %bb.0:
+; X64-AVX1-i64-NEXT:    subq $72, %rsp
+; X64-AVX1-i64-NEXT:    .cfi_def_cfa_offset 80
+; X64-AVX1-i64-NEXT:    vmovaps %xmm2, (%rsp) # 16-byte Spill
+; X64-AVX1-i64-NEXT:    vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX1-i64-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX1-i64-NEXT:    vmovaps %xmm3, %xmm0
+; X64-AVX1-i64-NEXT:    callq lrintl at PLT
+; X64-AVX1-i64-NEXT:    vmovq %rax, %xmm0
+; X64-AVX1-i64-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX1-i64-NEXT:    vmovaps (%rsp), %xmm0 # 16-byte Reload
+; X64-AVX1-i64-NEXT:    callq lrintl at PLT
+; X64-AVX1-i64-NEXT:    vmovq %rax, %xmm0
+; X64-AVX1-i64-NEXT:    vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX1-i64-NEXT:    # xmm0 = xmm0[0],mem[0]
+; X64-AVX1-i64-NEXT:    vmovdqa %xmm0, (%rsp) # 16-byte Spill
+; X64-AVX1-i64-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX1-i64-NEXT:    callq lrintl at PLT
+; X64-AVX1-i64-NEXT:    vmovq %rax, %xmm0
+; X64-AVX1-i64-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX1-i64-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX1-i64-NEXT:    callq lrintl at PLT
+; X64-AVX1-i64-NEXT:    vmovq %rax, %xmm0
+; X64-AVX1-i64-NEXT:    vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX1-i64-NEXT:    # xmm0 = xmm0[0],mem[0]
+; X64-AVX1-i64-NEXT:    vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload
+; X64-AVX1-i64-NEXT:    addq $72, %rsp
+; X64-AVX1-i64-NEXT:    .cfi_def_cfa_offset 8
+; X64-AVX1-i64-NEXT:    retq
+;
+; AVX512-i64-LABEL: lrint_v4fp128:
+; AVX512-i64:       # %bb.0:
+; AVX512-i64-NEXT:    subq $72, %rsp
+; AVX512-i64-NEXT:    .cfi_def_cfa_offset 80
+; AVX512-i64-NEXT:    vmovaps %xmm2, (%rsp) # 16-byte Spill
+; AVX512-i64-NEXT:    vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512-i64-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512-i64-NEXT:    vmovaps %xmm3, %xmm0
+; AVX512-i64-NEXT:    callq lrintl at PLT
+; AVX512-i64-NEXT:    vmovq %rax, %xmm0
+; AVX512-i64-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512-i64-NEXT:    vmovaps (%rsp), %xmm0 # 16-byte Reload
+; AVX512-i64-NEXT:    callq lrintl at PLT
+; AVX512-i64-NEXT:    vmovq %rax, %xmm0
+; AVX512-i64-NEXT:    vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512-i64-NEXT:    # xmm0 = xmm0[0],mem[0]
+; AVX512-i64-NEXT:    vmovdqa %xmm0, (%rsp) # 16-byte Spill
+; AVX512-i64-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; AVX512-i64-NEXT:    callq lrintl at PLT
+; AVX512-i64-NEXT:    vmovq %rax, %xmm0
+; AVX512-i64-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512-i64-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; AVX512-i64-NEXT:    callq lrintl at PLT
+; AVX512-i64-NEXT:    vmovq %rax, %xmm0
+; AVX512-i64-NEXT:    vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512-i64-NEXT:    # xmm0 = xmm0[0],mem[0]
+; AVX512-i64-NEXT:    vinserti128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload
+; AVX512-i64-NEXT:    addq $72, %rsp
+; AVX512-i64-NEXT:    .cfi_def_cfa_offset 8
+; AVX512-i64-NEXT:    retq
+;
+; AVX512DQ-i64-LABEL: lrint_v4fp128:
+; AVX512DQ-i64:       # %bb.0:
+; AVX512DQ-i64-NEXT:    subq $72, %rsp
+; AVX512DQ-i64-NEXT:    .cfi_def_cfa_offset 80
+; AVX512DQ-i64-NEXT:    vmovaps %xmm2, (%rsp) # 16-byte Spill
+; AVX512DQ-i64-NEXT:    vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512DQ-i64-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512DQ-i64-NEXT:    vmovaps %xmm3, %xmm0
+; AVX512DQ-i64-NEXT:    callq lrintl at PLT
+; AVX512DQ-i64-NEXT:    vmovq %rax, %xmm0
+; AVX512DQ-i64-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512DQ-i64-NEXT:    vmovaps (%rsp), %xmm0 # 16-byte Reload
+; AVX512DQ-i64-NEXT:    callq lrintl at PLT
+; AVX512DQ-i64-NEXT:    vmovq %rax, %xmm0
+; AVX512DQ-i64-NEXT:    vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512DQ-i64-NEXT:    # xmm0 = xmm0[0],mem[0]
+; AVX512DQ-i64-NEXT:    vmovdqa %xmm0, (%rsp) # 16-byte Spill
+; AVX512DQ-i64-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; AVX512DQ-i64-NEXT:    callq lrintl at PLT
+; AVX512DQ-i64-NEXT:    vmovq %rax, %xmm0
+; AVX512DQ-i64-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512DQ-i64-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; AVX512DQ-i64-NEXT:    callq lrintl at PLT
+; AVX512DQ-i64-NEXT:    vmovq %rax, %xmm0
+; AVX512DQ-i64-NEXT:    vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512DQ-i64-NEXT:    # xmm0 = xmm0[0],mem[0]
+; AVX512DQ-i64-NEXT:    vinserti128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload
+; AVX512DQ-i64-NEXT:    addq $72, %rsp
+; AVX512DQ-i64-NEXT:    .cfi_def_cfa_offset 8
+; AVX512DQ-i64-NEXT:    retq
+  %a = call <4 x iXLen> @llvm.lrint.v4iXLen.v4fp128(<4 x fp128> %x)
+  ret <4 x iXLen> %a
+}
+declare <4 x iXLen> @llvm.lrint.v4iXLen.v4fp128(<4 x fp128>)
+
+define <8 x iXLen> @lrint_v8fp128(<8 x fp128> %x) {
+; X86-I32-LABEL: lrint_v8fp128:
+; X86-I32:       # %bb.0:
+; X86-I32-NEXT:    pushl %ebp
+; X86-I32-NEXT:    .cfi_def_cfa_offset 8
+; X86-I32-NEXT:    .cfi_offset %ebp, -8
+; X86-I32-NEXT:    movl %esp, %ebp
+; X86-I32-NEXT:    .cfi_def_cfa_register %ebp
+; X86-I32-NEXT:    pushl %ebx
+; X86-I32-NEXT:    pushl %edi
+; X86-I32-NEXT:    pushl %esi
+; X86-I32-NEXT:    andl $-16, %esp
+; X86-I32-NEXT:    subl $32, %esp
+; X86-I32-NEXT:    .cfi_offset %esi, -20
+; X86-I32-NEXT:    .cfi_offset %edi, -16
+; X86-I32-NEXT:    .cfi_offset %ebx, -12
+; X86-I32-NEXT:    movl 8(%ebp), %esi
+; X86-I32-NEXT:    movl 36(%ebp), %ebx
+; X86-I32-NEXT:    movl 40(%ebp), %edi
+; X86-I32-NEXT:    pushl 24(%ebp)
+; X86-I32-NEXT:    pushl 20(%ebp)
+; X86-I32-NEXT:    pushl 16(%ebp)
+; X86-I32-NEXT:    pushl 12(%ebp)
+; X86-I32-NEXT:    calll lrintl
+; X86-I32-NEXT:    addl $16, %esp
+; X86-I32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-I32-NEXT:    pushl %edi
+; X86-I32-NEXT:    pushl %ebx
+; X86-I32-NEXT:    pushl 32(%ebp)
+; X86-I32-NEXT:    pushl 28(%ebp)
+; X86-I32-NEXT:    calll lrintl
+; X86-I32-NEXT:    addl $16, %esp
+; X86-I32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-I32-NEXT:    pushl 56(%ebp)
+; X86-I32-NEXT:    pushl 52(%ebp)
+; X86-I32-NEXT:    pushl 48(%ebp)
+; X86-I32-NEXT:    pushl 44(%ebp)
+; X86-I32-NEXT:    calll lrintl
+; X86-I32-NEXT:    addl $16, %esp
+; X86-I32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-I32-NEXT:    pushl 72(%ebp)
+; X86-I32-NEXT:    pushl 68(%ebp)
+; X86-I32-NEXT:    pushl 64(%ebp)
+; X86-I32-NEXT:    pushl 60(%ebp)
+; X86-I32-NEXT:    calll lrintl
+; X86-I32-NEXT:    addl $16, %esp
+; X86-I32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-I32-NEXT:    pushl 88(%ebp)
+; X86-I32-NEXT:    pushl 84(%ebp)
+; X86-I32-NEXT:    pushl 80(%ebp)
+; X86-I32-NEXT:    pushl 76(%ebp)
+; X86-I32-NEXT:    calll lrintl
+; X86-I32-NEXT:    addl $16, %esp
+; X86-I32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-I32-NEXT:    pushl 104(%ebp)
+; X86-I32-NEXT:    pushl 100(%ebp)
+; X86-I32-NEXT:    pushl 96(%ebp)
+; X86-I32-NEXT:    pushl 92(%ebp)
+; X86-I32-NEXT:    calll lrintl
+; X86-I32-NEXT:    addl $16, %esp
+; X86-I32-NEXT:    movl %eax, %ebx
+; X86-I32-NEXT:    pushl 120(%ebp)
+; X86-I32-NEXT:    pushl 116(%ebp)
+; X86-I32-NEXT:    pushl 112(%ebp)
+; X86-I32-NEXT:    pushl 108(%ebp)
+; X86-I32-NEXT:    calll lrintl
+; X86-I32-NEXT:    addl $16, %esp
+; X86-I32-NEXT:    movl %eax, %edi
+; X86-I32-NEXT:    pushl 136(%ebp)
+; X86-I32-NEXT:    pushl 132(%ebp)
+; X86-I32-NEXT:    pushl 128(%ebp)
+; X86-I32-NEXT:    pushl 124(%ebp)
+; X86-I32-NEXT:    calll lrintl
+; X86-I32-NEXT:    addl $16, %esp
+; X86-I32-NEXT:    movl %eax, 28(%esi)
+; X86-I32-NEXT:    movl %edi, 24(%esi)
+; X86-I32-NEXT:    movl %ebx, 20(%esi)
+; X86-I32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-I32-NEXT:    movl %eax, 16(%esi)
+; X86-I32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-I32-NEXT:    movl %eax, 12(%esi)
+; X86-I32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-I32-NEXT:    movl %eax, 8(%esi)
+; X86-I32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-I32-NEXT:    movl %eax, 4(%esi)
+; X86-I32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-I32-NEXT:    movl %eax, (%esi)
+; X86-I32-NEXT:    movl %esi, %eax
+; X86-I32-NEXT:    leal -12(%ebp), %esp
+; X86-I32-NEXT:    popl %esi
+; X86-I32-NEXT:    popl %edi
+; X86-I32-NEXT:    popl %ebx
+; X86-I32-NEXT:    popl %ebp
+; X86-I32-NEXT:    .cfi_def_cfa %esp, 4
+; X86-I32-NEXT:    retl $4
+;
+; X86-I64-LABEL: lrint_v8fp128:
+; X86-I64:       # %bb.0:
+; X86-I64-NEXT:    pushl %ebp
+; X86-I64-NEXT:    .cfi_def_cfa_offset 8
+; X86-I64-NEXT:    .cfi_offset %ebp, -8
+; X86-I64-NEXT:    movl %esp, %ebp
+; X86-I64-NEXT:    .cfi_def_cfa_register %ebp
+; X86-I64-NEXT:    pushl %ebx
+; X86-I64-NEXT:    pushl %edi
+; X86-I64-NEXT:    pushl %esi
+; X86-I64-NEXT:    andl $-16, %esp
+; X86-I64-NEXT:    subl $64, %esp
+; X86-I64-NEXT:    .cfi_offset %esi, -20
+; X86-I64-NEXT:    .cfi_offset %edi, -16
+; X86-I64-NEXT:    .cfi_offset %ebx, -12
+; X86-I64-NEXT:    movl 8(%ebp), %esi
+; X86-I64-NEXT:    movl 36(%ebp), %edi
+; X86-I64-NEXT:    movl 40(%ebp), %ebx
+; X86-I64-NEXT:    pushl 24(%ebp)
+; X86-I64-NEXT:    pushl 20(%ebp)
+; X86-I64-NEXT:    pushl 16(%ebp)
+; X86-I64-NEXT:    pushl 12(%ebp)
+; X86-I64-NEXT:    calll lrintl
+; X86-I64-NEXT:    addl $16, %esp
+; X86-I64-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-I64-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-I64-NEXT:    pushl %ebx
+; X86-I64-NEXT:    pushl %edi
+; X86-I64-NEXT:    pushl 32(%ebp)
+; X86-I64-NEXT:    pushl 28(%ebp)
+; X86-I64-NEXT:    calll lrintl
+; X86-I64-NEXT:    addl $16, %esp
+; X86-I64-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-I64-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-I64-NEXT:    pushl 56(%ebp)
+; X86-I64-NEXT:    pushl 52(%ebp)
+; X86-I64-NEXT:    pushl 48(%ebp)
+; X86-I64-NEXT:    pushl 44(%ebp)
+; X86-I64-NEXT:    calll lrintl
+; X86-I64-NEXT:    addl $16, %esp
+; X86-I64-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-I64-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-I64-NEXT:    pushl 72(%ebp)
+; X86-I64-NEXT:    pushl 68(%ebp)
+; X86-I64-NEXT:    pushl 64(%ebp)
+; X86-I64-NEXT:    pushl 60(%ebp)
+; X86-I64-NEXT:    calll lrintl
+; X86-I64-NEXT:    addl $16, %esp
+; X86-I64-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-I64-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-I64-NEXT:    pushl 88(%ebp)
+; X86-I64-NEXT:    pushl 84(%ebp)
+; X86-I64-NEXT:    pushl 80(%ebp)
+; X86-I64-NEXT:    pushl 76(%ebp)
+; X86-I64-NEXT:    calll lrintl
+; X86-I64-NEXT:    addl $16, %esp
+; X86-I64-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-I64-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-I64-NEXT:    pushl 104(%ebp)
+; X86-I64-NEXT:    pushl 100(%ebp)
+; X86-I64-NEXT:    pushl 96(%ebp)
+; X86-I64-NEXT:    pushl 92(%ebp)
+; X86-I64-NEXT:    calll lrintl
+; X86-I64-NEXT:    addl $16, %esp
+; X86-I64-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-I64-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-I64-NEXT:    pushl 120(%ebp)
+; X86-I64-NEXT:    pushl 116(%ebp)
+; X86-I64-NEXT:    pushl 112(%ebp)
+; X86-I64-NEXT:    pushl 108(%ebp)
+; X86-I64-NEXT:    calll lrintl
+; X86-I64-NEXT:    addl $16, %esp
+; X86-I64-NEXT:    movl %eax, %edi
+; X86-I64-NEXT:    movl %edx, %ebx
+; X86-I64-NEXT:    pushl 136(%ebp)
+; X86-I64-NEXT:    pushl 132(%ebp)
+; X86-I64-NEXT:    pushl 128(%ebp)
+; X86-I64-NEXT:    pushl 124(%ebp)
+; X86-I64-NEXT:    calll lrintl
+; X86-I64-NEXT:    addl $16, %esp
+; X86-I64-NEXT:    movl %edx, 60(%esi)
+; X86-I64-NEXT:    movl %eax, 56(%esi)
+; X86-I64-NEXT:    movl %ebx, 52(%esi)
+; X86-I64-NEXT:    movl %edi, 48(%esi)
+; X86-I64-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-I64-NEXT:    movl %eax, 44(%esi)
+; X86-I64-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-I64-NEXT:    movl %eax, 40(%esi)
+; X86-I64-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-I64-NEXT:    movl %eax, 36(%esi)
+; X86-I64-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-I64-NEXT:    movl %eax, 32(%esi)
+; X86-I64-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-I64-NEXT:    movl %eax, 28(%esi)
+; X86-I64-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-I64-NEXT:    movl %eax, 24(%esi)
+; X86-I64-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-I64-NEXT:    movl %eax, 20(%esi)
+; X86-I64-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-I64-NEXT:    movl %eax, 16(%esi)
+; X86-I64-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-I64-NEXT:    movl %eax, 12(%esi)
+; X86-I64-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-I64-NEXT:    movl %eax, 8(%esi)
+; X86-I64-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-I64-NEXT:    movl %eax, 4(%esi)
+; X86-I64-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-I64-NEXT:    movl %eax, (%esi)
+; X86-I64-NEXT:    movl %esi, %eax
+; X86-I64-NEXT:    leal -12(%ebp), %esp
+; X86-I64-NEXT:    popl %esi
+; X86-I64-NEXT:    popl %edi
+; X86-I64-NEXT:    popl %ebx
+; X86-I64-NEXT:    popl %ebp
+; X86-I64-NEXT:    .cfi_def_cfa %esp, 4
+; X86-I64-NEXT:    retl $4
+;
+; X86-SSE2-LABEL: lrint_v8fp128:
+; X86-SSE2:       # %bb.0:
+; X86-SSE2-NEXT:    pushl %ebp
+; X86-SSE2-NEXT:    .cfi_def_cfa_offset 8
+; X86-SSE2-NEXT:    .cfi_offset %ebp, -8
+; X86-SSE2-NEXT:    movl %esp, %ebp
+; X86-SSE2-NEXT:    .cfi_def_cfa_register %ebp
+; X86-SSE2-NEXT:    pushl %ebx
+; X86-SSE2-NEXT:    pushl %edi
+; X86-SSE2-NEXT:    pushl %esi
+; X86-SSE2-NEXT:    andl $-16, %esp
+; X86-SSE2-NEXT:    subl $64, %esp
+; X86-SSE2-NEXT:    .cfi_offset %esi, -20
+; X86-SSE2-NEXT:    .cfi_offset %edi, -16
+; X86-SSE2-NEXT:    .cfi_offset %ebx, -12
+; X86-SSE2-NEXT:    movl 108(%ebp), %esi
+; X86-SSE2-NEXT:    movl 112(%ebp), %edi
+; X86-SSE2-NEXT:    movl 116(%ebp), %ebx
+; X86-SSE2-NEXT:    pushl 100(%ebp)
+; X86-SSE2-NEXT:    pushl 96(%ebp)
+; X86-SSE2-NEXT:    pushl 92(%ebp)
+; X86-SSE2-NEXT:    pushl 88(%ebp)
+; X86-SSE2-NEXT:    calll lrintl
+; X86-SSE2-NEXT:    addl $16, %esp
+; X86-SSE2-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-SSE2-NEXT:    pushl %ebx
+; X86-SSE2-NEXT:    pushl %edi
+; X86-SSE2-NEXT:    pushl %esi
+; X86-SSE2-NEXT:    pushl 104(%ebp)
+; X86-SSE2-NEXT:    calll lrintl
+; X86-SSE2-NEXT:    addl $16, %esp
+; X86-SSE2-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-SSE2-NEXT:    pushl 132(%ebp)
+; X86-SSE2-NEXT:    pushl 128(%ebp)
+; X86-SSE2-NEXT:    pushl 124(%ebp)
+; X86-SSE2-NEXT:    pushl 120(%ebp)
+; X86-SSE2-NEXT:    calll lrintl
+; X86-SSE2-NEXT:    addl $16, %esp
+; X86-SSE2-NEXT:    movl %eax, (%esp) # 4-byte Spill
+; X86-SSE2-NEXT:    pushl 20(%ebp)
+; X86-SSE2-NEXT:    pushl 16(%ebp)
+; X86-SSE2-NEXT:    pushl 12(%ebp)
+; X86-SSE2-NEXT:    pushl 8(%ebp)
+; X86-SSE2-NEXT:    calll lrintl
+; X86-SSE2-NEXT:    addl $16, %esp
+; X86-SSE2-NEXT:    movl %eax, %esi
+; X86-SSE2-NEXT:    pushl 36(%ebp)
+; X86-SSE2-NEXT:    pushl 32(%ebp)
+; X86-SSE2-NEXT:    pushl 28(%ebp)
+; X86-SSE2-NEXT:    pushl 24(%ebp)
+; X86-SSE2-NEXT:    calll lrintl
+; X86-SSE2-NEXT:    addl $16, %esp
+; X86-SSE2-NEXT:    movl %eax, %edi
+; X86-SSE2-NEXT:    pushl 52(%ebp)
+; X86-SSE2-NEXT:    pushl 48(%ebp)
+; X86-SSE2-NEXT:    pushl 44(%ebp)
+; X86-SSE2-NEXT:    pushl 40(%ebp)
+; X86-SSE2-NEXT:    calll lrintl
+; X86-SSE2-NEXT:    addl $16, %esp
+; X86-SSE2-NEXT:    movl %eax, %ebx
+; X86-SSE2-NEXT:    pushl 68(%ebp)
+; X86-SSE2-NEXT:    pushl 64(%ebp)
+; X86-SSE2-NEXT:    pushl 60(%ebp)
+; X86-SSE2-NEXT:    pushl 56(%ebp)
+; X86-SSE2-NEXT:    calll lrintl
+; X86-SSE2-NEXT:    addl $16, %esp
+; X86-SSE2-NEXT:    movd %eax, %xmm0
+; X86-SSE2-NEXT:    movd %ebx, %xmm1
+; X86-SSE2-NEXT:    movd %edi, %xmm2
+; X86-SSE2-NEXT:    movd %esi, %xmm4
+; X86-SSE2-NEXT:    movss (%esp), %xmm3 # 4-byte Reload
+; X86-SSE2-NEXT:    # xmm3 = mem[0],zero,zero,zero
+; X86-SSE2-NEXT:    movss {{[-0-9]+}}(%e{{[sb]}}p), %xmm5 # 4-byte Reload
+; X86-SSE2-NEXT:    # xmm5 = mem[0],zero,zero,zero
+; X86-SSE2-NEXT:    movss {{[-0-9]+}}(%e{{[sb]}}p), %xmm6 # 4-byte Reload
+; X86-SSE2-NEXT:    # xmm6 = mem[0],zero,zero,zero
+; X86-SSE2-NEXT:    movaps %xmm6, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; X86-SSE2-NEXT:    punpckldq {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1]
+; X86-SSE2-NEXT:    punpcklqdq {{.*#+}} xmm4 = xmm4[0],xmm1[0]
+; X86-SSE2-NEXT:    movdqa %xmm4, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-SSE2-NEXT:    unpcklps {{.*#+}} xmm5 = xmm5[0],xmm3[0],xmm5[1],xmm3[1]
+; X86-SSE2-NEXT:    movaps %xmm5, (%esp) # 16-byte Spill
+; X86-SSE2-NEXT:    pushl 84(%ebp)
+; X86-SSE2-NEXT:    pushl 80(%ebp)
+; X86-SSE2-NEXT:    pushl 76(%ebp)
+; X86-SSE2-NEXT:    pushl 72(%ebp)
+; X86-SSE2-NEXT:    calll lrintl
+; X86-SSE2-NEXT:    addl $16, %esp
+; X86-SSE2-NEXT:    movd %eax, %xmm1
+; X86-SSE2-NEXT:    punpckldq {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 # 16-byte Folded Reload
+; X86-SSE2-NEXT:    # xmm1 = xmm1[0],mem[0],xmm1[1],mem[1]
+; X86-SSE2-NEXT:    punpcklqdq (%esp), %xmm1 # 16-byte Folded Reload
+; X86-SSE2-NEXT:    # xmm1 = xmm1[0],mem[0]
+; X86-SSE2-NEXT:    movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-SSE2-NEXT:    leal -12(%ebp), %esp
+; X86-SSE2-NEXT:    popl %esi
+; X86-SSE2-NEXT:    popl %edi
+; X86-SSE2-NEXT:    popl %ebx
+; X86-SSE2-NEXT:    popl %ebp
+; X86-SSE2-NEXT:    .cfi_def_cfa %esp, 4
+; X86-SSE2-NEXT:    retl
+;
+; X86-AVX1-LABEL: lrint_v8fp128:
+; X86-AVX1:       # %bb.0:
+; X86-AVX1-NEXT:    pushl %ebp
+; X86-AVX1-NEXT:    .cfi_def_cfa_offset 8
+; X86-AVX1-NEXT:    .cfi_offset %ebp, -8
+; X86-AVX1-NEXT:    movl %esp, %ebp
+; X86-AVX1-NEXT:    .cfi_def_cfa_register %ebp
+; X86-AVX1-NEXT:    pushl %ebx
+; X86-AVX1-NEXT:    pushl %edi
+; X86-AVX1-NEXT:    pushl %esi
+; X86-AVX1-NEXT:    andl $-16, %esp
+; X86-AVX1-NEXT:    subl $80, %esp
+; X86-AVX1-NEXT:    .cfi_offset %esi, -20
+; X86-AVX1-NEXT:    .cfi_offset %edi, -16
+; X86-AVX1-NEXT:    .cfi_offset %ebx, -12
+; X86-AVX1-NEXT:    vmovups 40(%ebp), %xmm0
+; X86-AVX1-NEXT:    vmovups %xmm0, (%esp)
+; X86-AVX1-NEXT:    calll lrintl
+; X86-AVX1-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-AVX1-NEXT:    vmovups 24(%ebp), %xmm0
+; X86-AVX1-NEXT:    vmovups %xmm0, (%esp)
+; X86-AVX1-NEXT:    calll lrintl
+; X86-AVX1-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-AVX1-NEXT:    vmovups 8(%ebp), %xmm0
+; X86-AVX1-NEXT:    vmovups %xmm0, (%esp)
+; X86-AVX1-NEXT:    calll lrintl
+; X86-AVX1-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-AVX1-NEXT:    vmovups 120(%ebp), %xmm0
+; X86-AVX1-NEXT:    vmovups %xmm0, (%esp)
+; X86-AVX1-NEXT:    calll lrintl
+; X86-AVX1-NEXT:    movl %eax, %esi
+; X86-AVX1-NEXT:    vmovups 104(%ebp), %xmm0
+; X86-AVX1-NEXT:    vmovups %xmm0, (%esp)
+; X86-AVX1-NEXT:    calll lrintl
+; X86-AVX1-NEXT:    movl %eax, %edi
+; X86-AVX1-NEXT:    vmovups 88(%ebp), %xmm0
+; X86-AVX1-NEXT:    vmovups %xmm0, (%esp)
+; X86-AVX1-NEXT:    calll lrintl
+; X86-AVX1-NEXT:    movl %eax, %ebx
+; X86-AVX1-NEXT:    vmovups 72(%ebp), %xmm0
+; X86-AVX1-NEXT:    vmovups %xmm0, (%esp)
+; X86-AVX1-NEXT:    calll lrintl
+; X86-AVX1-NEXT:    vmovd %eax, %xmm0
+; X86-AVX1-NEXT:    vpinsrd $1, %ebx, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vpinsrd $2, %edi, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vpinsrd $3, %esi, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    vmovd {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 4-byte Folded Reload
+; X86-AVX1-NEXT:    # xmm0 = mem[0],zero,zero,zero
+; X86-AVX1-NEXT:    vpinsrd $1, {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 # 4-byte Folded Reload
+; X86-AVX1-NEXT:    vmovups 56(%ebp), %xmm1
+; X86-AVX1-NEXT:    vmovups %xmm1, (%esp)
+; X86-AVX1-NEXT:    vpinsrd $2, {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 # 4-byte Folded Reload
+; X86-AVX1-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    calll lrintl
+; X86-AVX1-NEXT:    vmovdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vpinsrd $3, %eax, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vinsertf128 $1, {{[-0-9]+}}(%e{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload
+; X86-AVX1-NEXT:    leal -12(%ebp), %esp
+; X86-AVX1-NEXT:    popl %esi
+; X86-AVX1-NEXT:    popl %edi
+; X86-AVX1-NEXT:    popl %ebx
+; X86-AVX1-NEXT:    popl %ebp
+; X86-AVX1-NEXT:    .cfi_def_cfa %esp, 4
+; X86-AVX1-NEXT:    retl
+;
+; X64-AVX1-i32-LABEL: lrint_v8fp128:
+; X64-AVX1-i32:       # %bb.0:
+; X64-AVX1-i32-NEXT:    pushq %rbx
+; X64-AVX1-i32-NEXT:    .cfi_def_cfa_offset 16
+; X64-AVX1-i32-NEXT:    subq $112, %rsp
+; X64-AVX1-i32-NEXT:    .cfi_def_cfa_offset 128
+; X64-AVX1-i32-NEXT:    .cfi_offset %rbx, -16
+; X64-AVX1-i32-NEXT:    vmovaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX1-i32-NEXT:    vmovaps %xmm6, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX1-i32-NEXT:    vmovaps %xmm4, (%rsp) # 16-byte Spill
+; X64-AVX1-i32-NEXT:    vmovaps %xmm3, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX1-i32-NEXT:    vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX1-i32-NEXT:    vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX1-i32-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX1-i32-NEXT:    vmovaps %xmm5, %xmm0
+; X64-AVX1-i32-NEXT:    callq lrintl at PLT
+; X64-AVX1-i32-NEXT:    movl %eax, %ebx
+; X64-AVX1-i32-NEXT:    vmovaps (%rsp), %xmm0 # 16-byte Reload
+; X64-AVX1-i32-NEXT:    callq lrintl at PLT
+; X64-AVX1-i32-NEXT:    vmovd %eax, %xmm0
+; X64-AVX1-i32-NEXT:    vpinsrd $1, %ebx, %xmm0, %xmm0
+; X64-AVX1-i32-NEXT:    vmovdqa %xmm0, (%rsp) # 16-byte Spill
+; X64-AVX1-i32-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX1-i32-NEXT:    callq lrintl at PLT
+; X64-AVX1-i32-NEXT:    vmovdqa (%rsp), %xmm0 # 16-byte Reload
+; X64-AVX1-i32-NEXT:    vpinsrd $2, %eax, %xmm0, %xmm0
+; X64-AVX1-i32-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX1-i32-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX1-i32-NEXT:    callq lrintl at PLT
+; X64-AVX1-i32-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX1-i32-NEXT:    vpinsrd $3, %eax, %xmm0, %xmm0
+; X64-AVX1-i32-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX1-i32-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX1-i32-NEXT:    callq lrintl at PLT
+; X64-AVX1-i32-NEXT:    movl %eax, %ebx
+; X64-AVX1-i32-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX1-i32-NEXT:    callq lrintl at PLT
+; X64-AVX1-i32-NEXT:    vmovd %eax, %xmm0
+; X64-AVX1-i32-NEXT:    vpinsrd $1, %ebx, %xmm0, %xmm0
+; X64-AVX1-i32-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX1-i32-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX1-i32-NEXT:    callq lrintl at PLT
+; X64-AVX1-i32-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX1-i32-NEXT:    vpinsrd $2, %eax, %xmm0, %xmm0
+; X64-AVX1-i32-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX1-i32-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX1-i32-NEXT:    callq lrintl at PLT
+; X64-AVX1-i32-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX1-i32-NEXT:    vpinsrd $3, %eax, %xmm0, %xmm0
+; X64-AVX1-i32-NEXT:    vinsertf128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload
+; X64-AVX1-i32-NEXT:    addq $112, %rsp
+; X64-AVX1-i32-NEXT:    .cfi_def_cfa_offset 16
+; X64-AVX1-i32-NEXT:    popq %rbx
+; X64-AVX1-i32-NEXT:    .cfi_def_cfa_offset 8
+; X64-AVX1-i32-NEXT:    retq
+;
+; X64-AVX1-i64-LABEL: lrint_v8fp128:
+; X64-AVX1-i64:       # %bb.0:
+; X64-AVX1-i64-NEXT:    subq $152, %rsp
+; X64-AVX1-i64-NEXT:    .cfi_def_cfa_offset 160
+; X64-AVX1-i64-NEXT:    vmovaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX1-i64-NEXT:    vmovaps %xmm6, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX1-i64-NEXT:    vmovaps %xmm5, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX1-i64-NEXT:    vmovaps %xmm4, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX1-i64-NEXT:    vmovaps %xmm2, (%rsp) # 16-byte Spill
+; X64-AVX1-i64-NEXT:    vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX1-i64-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX1-i64-NEXT:    vmovaps %xmm3, %xmm0
+; X64-AVX1-i64-NEXT:    callq lrintl at PLT
+; X64-AVX1-i64-NEXT:    vmovq %rax, %xmm0
+; X64-AVX1-i64-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX1-i64-NEXT:    vmovaps (%rsp), %xmm0 # 16-byte Reload
+; X64-AVX1-i64-NEXT:    callq lrintl at PLT
+; X64-AVX1-i64-NEXT:    vmovq %rax, %xmm0
+; X64-AVX1-i64-NEXT:    vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX1-i64-NEXT:    # xmm0 = xmm0[0],mem[0]
+; X64-AVX1-i64-NEXT:    vmovdqa %xmm0, (%rsp) # 16-byte Spill
+; X64-AVX1-i64-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX1-i64-NEXT:    callq lrintl at PLT
+; X64-AVX1-i64-NEXT:    vmovq %rax, %xmm0
+; X64-AVX1-i64-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX1-i64-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX1-i64-NEXT:    callq lrintl at PLT
+; X64-AVX1-i64-NEXT:    vmovq %rax, %xmm0
+; X64-AVX1-i64-NEXT:    vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX1-i64-NEXT:    # xmm0 = xmm0[0],mem[0]
+; X64-AVX1-i64-NEXT:    vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload
+; X64-AVX1-i64-NEXT:    vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
+; X64-AVX1-i64-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX1-i64-NEXT:    vzeroupper
+; X64-AVX1-i64-NEXT:    callq lrintl at PLT
+; X64-AVX1-i64-NEXT:    vmovq %rax, %xmm0
+; X64-AVX1-i64-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX1-i64-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX1-i64-NEXT:    callq lrintl at PLT
+; X64-AVX1-i64-NEXT:    vmovq %rax, %xmm0
+; X64-AVX1-i64-NEXT:    vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX1-i64-NEXT:    # xmm0 = xmm0[0],mem[0]
+; X64-AVX1-i64-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX1-i64-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX1-i64-NEXT:    callq lrintl at PLT
+; X64-AVX1-i64-NEXT:    vmovq %rax, %xmm0
+; X64-AVX1-i64-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX1-i64-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX1-i64-NEXT:    callq lrintl at PLT
+; X64-AVX1-i64-NEXT:    vmovq %rax, %xmm0
+; X64-AVX1-i64-NEXT:    vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX1-i64-NEXT:    # xmm0 = xmm0[0],mem[0]
+; X64-AVX1-i64-NEXT:    vinsertf128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm1 # 16-byte Folded Reload
+; X64-AVX1-i64-NEXT:    vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
+; X64-AVX1-i64-NEXT:    addq $152, %rsp
+; X64-AVX1-i64-NEXT:    .cfi_def_cfa_offset 8
+; X64-AVX1-i64-NEXT:    retq
+;
+; AVX512-i64-LABEL: lrint_v8fp128:
+; AVX512-i64:       # %bb.0:
+; AVX512-i64-NEXT:    subq $152, %rsp
+; AVX512-i64-NEXT:    .cfi_def_cfa_offset 160
+; AVX512-i64-NEXT:    vmovaps %xmm6, (%rsp) # 16-byte Spill
+; AVX512-i64-NEXT:    vmovaps %xmm5, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512-i64-NEXT:    vmovaps %xmm4, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512-i64-NEXT:    vmovaps %xmm3, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512-i64-NEXT:    vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512-i64-NEXT:    vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512-i64-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512-i64-NEXT:    vmovaps %xmm7, %xmm0
+; AVX512-i64-NEXT:    callq lrintl at PLT
+; AVX512-i64-NEXT:    vmovq %rax, %xmm0
+; AVX512-i64-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512-i64-NEXT:    vmovaps (%rsp), %xmm0 # 16-byte Reload
+; AVX512-i64-NEXT:    callq lrintl at PLT
+; AVX512-i64-NEXT:    vmovq %rax, %xmm0
+; AVX512-i64-NEXT:    vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512-i64-NEXT:    # xmm0 = xmm0[0],mem[0]
+; AVX512-i64-NEXT:    vmovdqa %xmm0, (%rsp) # 16-byte Spill
+; AVX512-i64-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; AVX512-i64-NEXT:    callq lrintl at PLT
+; AVX512-i64-NEXT:    vmovq %rax, %xmm0
+; AVX512-i64-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512-i64-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; AVX512-i64-NEXT:    callq lrintl at PLT
+; AVX512-i64-NEXT:    vmovq %rax, %xmm0
+; AVX512-i64-NEXT:    vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512-i64-NEXT:    # xmm0 = xmm0[0],mem[0]
+; AVX512-i64-NEXT:    vinserti128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload
+; AVX512-i64-NEXT:    vmovdqu %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
+; AVX512-i64-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; AVX512-i64-NEXT:    vzeroupper
+; AVX512-i64-NEXT:    callq lrintl at PLT
+; AVX512-i64-NEXT:    vmovq %rax, %xmm0
+; AVX512-i64-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512-i64-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; AVX512-i64-NEXT:    callq lrintl at PLT
+; AVX512-i64-NEXT:    vmovq %rax, %xmm0
+; AVX512-i64-NEXT:    vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512-i64-NEXT:    # xmm0 = xmm0[0],mem[0]
+; AVX512-i64-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512-i64-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; AVX512-i64-NEXT:    callq lrintl at PLT
+; AVX512-i64-NEXT:    vmovq %rax, %xmm0
+; AVX512-i64-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512-i64-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; AVX512-i64-NEXT:    callq lrintl at PLT
+; AVX512-i64-NEXT:    vmovq %rax, %xmm0
+; AVX512-i64-NEXT:    vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512-i64-NEXT:    # xmm0 = xmm0[0],mem[0]
+; AVX512-i64-NEXT:    vinserti128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload
+; AVX512-i64-NEXT:    vinserti64x4 $1, {{[-0-9]+}}(%r{{[sb]}}p), %zmm0, %zmm0 # 32-byte Folded Reload
+; AVX512-i64-NEXT:    addq $152, %rsp
+; AVX512-i64-NEXT:    .cfi_def_cfa_offset 8
+; AVX512-i64-NEXT:    retq
+;
+; AVX512DQ-i64-LABEL: lrint_v8fp128:
+; AVX512DQ-i64:       # %bb.0:
+; AVX512DQ-i64-NEXT:    subq $152, %rsp
+; AVX512DQ-i64-NEXT:    .cfi_def_cfa_offset 160
+; AVX512DQ-i64-NEXT:    vmovaps %xmm6, (%rsp) # 16-byte Spill
+; AVX512DQ-i64-NEXT:    vmovaps %xmm5, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512DQ-i64-NEXT:    vmovaps %xmm4, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512DQ-i64-NEXT:    vmovaps %xmm3, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512DQ-i64-NEXT:    vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512DQ-i64-NEXT:    vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512DQ-i64-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512DQ-i64-NEXT:    vmovaps %xmm7, %xmm0
+; AVX512DQ-i64-NEXT:    callq lrintl at PLT
+; AVX512DQ-i64-NEXT:    vmovq %rax, %xmm0
+; AVX512DQ-i64-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512DQ-i64-NEXT:    vmovaps (%rsp), %xmm0 # 16-byte Reload
+; AVX512DQ-i64-NEXT:    callq lrintl at PLT
+; AVX512DQ-i64-NEXT:    vmovq %rax, %xmm0
+; AVX512DQ-i64-NEXT:    vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512DQ-i64-NEXT:    # xmm0 = xmm0[0],mem[0]
+; AVX512DQ-i64-NEXT:    vmovdqa %xmm0, (%rsp) # 16-byte Spill
+; AVX512DQ-i64-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; AVX512DQ-i64-NEXT:    callq lrintl at PLT
+; AVX512DQ-i64-NEXT:    vmovq %rax, %xmm0
+; AVX512DQ-i64-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512DQ-i64-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; AVX512DQ-i64-NEXT:    callq lrintl at PLT
+; AVX512DQ-i64-NEXT:    vmovq %rax, %xmm0
+; AVX512DQ-i64-NEXT:    vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512DQ-i64-NEXT:    # xmm0 = xmm0[0],mem[0]
+; AVX512DQ-i64-NEXT:    vinserti128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload
+; AVX512DQ-i64-NEXT:    vmovdqu %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
+; AVX512DQ-i64-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; AVX512DQ-i64-NEXT:    vzeroupper
+; AVX512DQ-i64-NEXT:    callq lrintl at PLT
+; AVX512DQ-i64-NEXT:    vmovq %rax, %xmm0
+; AVX512DQ-i64-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512DQ-i64-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; AVX512DQ-i64-NEXT:    callq lrintl at PLT
+; AVX512DQ-i64-NEXT:    vmovq %rax, %xmm0
+; AVX512DQ-i64-NEXT:    vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512DQ-i64-NEXT:    # xmm0 = xmm0[0],mem[0]
+; AVX512DQ-i64-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512DQ-i64-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; AVX512DQ-i64-NEXT:    callq lrintl at PLT
+; AVX512DQ-i64-NEXT:    vmovq %rax, %xmm0
+; AVX512DQ-i64-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; AVX512DQ-i64-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; AVX512DQ-i64-NEXT:    callq lrintl at PLT
+; AVX512DQ-i64-NEXT:    vmovq %rax, %xmm0
+; AVX512DQ-i64-NEXT:    vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX512DQ-i64-NEXT:    # xmm0 = xmm0[0],mem[0]
+; AVX512DQ-i64-NEXT:    vinserti128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload
+; AVX512DQ-i64-NEXT:    vinserti64x4 $1, {{[-0-9]+}}(%r{{[sb]}}p), %zmm0, %zmm0 # 32-byte Folded Reload
+; AVX512DQ-i64-NEXT:    addq $152, %rsp
+; AVX512DQ-i64-NEXT:    .cfi_def_cfa_offset 8
+; AVX512DQ-i64-NEXT:    retq
+  %a = call <8 x iXLen> @llvm.lrint.v8iXLen.v8fp128(<8 x fp128> %x)
+  ret <8 x iXLen> %a
+}
+declare <8 x iXLen> @llvm.lrint.v8iXLen.v8fp128(<8 x fp128>)

>From 8ace6b7ff788c2bb232744d75b7dfd933735eee7 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross at umich.edu>
Date: Fri, 8 Aug 2025 07:01:31 -0500
Subject: [PATCH 4/7] Add a vector test to arm

---
 llvm/test/CodeGen/ARM/vector-llrint.ll | 11126 +++++++++++++++++++
 llvm/test/CodeGen/ARM/vector-lrint.ll  | 13251 +++++++++++++++++++++++
 2 files changed, 24377 insertions(+)
 create mode 100644 llvm/test/CodeGen/ARM/vector-llrint.ll
 create mode 100644 llvm/test/CodeGen/ARM/vector-lrint.ll

diff --git a/llvm/test/CodeGen/ARM/vector-llrint.ll b/llvm/test/CodeGen/ARM/vector-llrint.ll
new file mode 100644
index 0000000000000..870947fac063e
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/vector-llrint.ll
@@ -0,0 +1,11126 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=armv7-unknown-none-eabihf                | FileCheck %s --check-prefix=LE
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=armv7-unknown-none-eabihf                | FileCheck %s --check-prefix=LE
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=armv7-unknown-none-eabihf -mattr=+neon   | FileCheck %s --check-prefix=LE-NEON
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=armv7-unknown-none-eabihf -mattr=+neon   | FileCheck %s --check-prefix=LE-NEON
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=armebv7-unknown-none-eabihf              | FileCheck %s --check-prefix=BE
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=armebv7-unknown-none-eabihf              | FileCheck %s --check-prefix=BE
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=armebv7-unknown-none-eabihf -mattr=+neon | FileCheck %s --check-prefix=BE-NEON
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=armebv7-unknown-none-eabihf -mattr=+neon | FileCheck %s --check-prefix=BE-NEON
+
+define <1 x i64> @llrint_v1i64_v1f16(<1 x half> %x) {
+; LE-LABEL: llrint_v1i64_v1f16:
+; LE:       @ %bb.0:
+; LE-NEXT:    .save {r11, lr}
+; LE-NEXT:    push {r11, lr}
+; LE-NEXT:    vmov r0, s0
+; LE-NEXT:    bl __aeabi_f2h
+; LE-NEXT:    bl __aeabi_h2f
+; LE-NEXT:    vmov s0, r0
+; LE-NEXT:    bl llrintf
+; LE-NEXT:    vmov.32 d0[0], r0
+; LE-NEXT:    vmov.32 d0[1], r1
+; LE-NEXT:    pop {r11, pc}
+;
+; LE-NEON-LABEL: llrint_v1i64_v1f16:
+; LE-NEON:       @ %bb.0:
+; LE-NEON-NEXT:    .save {r11, lr}
+; LE-NEON-NEXT:    push {r11, lr}
+; LE-NEON-NEXT:    vmov r0, s0
+; LE-NEON-NEXT:    bl __aeabi_f2h
+; LE-NEON-NEXT:    bl __aeabi_h2f
+; LE-NEON-NEXT:    vmov s0, r0
+; LE-NEON-NEXT:    bl llrintf
+; LE-NEON-NEXT:    vmov.32 d0[0], r0
+; LE-NEON-NEXT:    vmov.32 d0[1], r1
+; LE-NEON-NEXT:    pop {r11, pc}
+;
+; BE-LABEL: llrint_v1i64_v1f16:
+; BE:       @ %bb.0:
+; BE-NEXT:    .save {r11, lr}
+; BE-NEXT:    push {r11, lr}
+; BE-NEXT:    vmov r0, s0
+; BE-NEXT:    bl __aeabi_f2h
+; BE-NEXT:    bl __aeabi_h2f
+; BE-NEXT:    vmov s0, r0
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    vmov.32 d16[0], r0
+; BE-NEXT:    vmov.32 d16[1], r1
+; BE-NEXT:    vrev64.32 d0, d16
+; BE-NEXT:    pop {r11, pc}
+;
+; BE-NEON-LABEL: llrint_v1i64_v1f16:
+; BE-NEON:       @ %bb.0:
+; BE-NEON-NEXT:    .save {r11, lr}
+; BE-NEON-NEXT:    push {r11, lr}
+; BE-NEON-NEXT:    vmov r0, s0
+; BE-NEON-NEXT:    bl __aeabi_f2h
+; BE-NEON-NEXT:    bl __aeabi_h2f
+; BE-NEON-NEXT:    vmov s0, r0
+; BE-NEON-NEXT:    bl llrintf
+; BE-NEON-NEXT:    vmov.32 d16[0], r0
+; BE-NEON-NEXT:    vmov.32 d16[1], r1
+; BE-NEON-NEXT:    vrev64.32 d0, d16
+; BE-NEON-NEXT:    pop {r11, pc}
+  %a = call <1 x i64> @llvm.llrint.v1i64.v1f16(<1 x half> %x)
+  ret <1 x i64> %a
+}
+declare <1 x i64> @llvm.llrint.v1i64.v1f16(<1 x half>)
+
+define <2 x i64> @llrint_v1i64_v2f16(<2 x half> %x) {
+; LE-LABEL: llrint_v1i64_v2f16:
+; LE:       @ %bb.0:
+; LE-NEXT:    .save {r4, r5, r11, lr}
+; LE-NEXT:    push {r4, r5, r11, lr}
+; LE-NEXT:    .vsave {d8, d9}
+; LE-NEXT:    vpush {d8, d9}
+; LE-NEXT:    vmov r0, s1
+; LE-NEXT:    vmov.f32 s16, s0
+; LE-NEXT:    bl __aeabi_h2f
+; LE-NEXT:    vmov s0, r0
+; LE-NEXT:    bl llrintf
+; LE-NEXT:    mov r4, r0
+; LE-NEXT:    vmov r0, s16
+; LE-NEXT:    mov r5, r1
+; LE-NEXT:    bl __aeabi_h2f
+; LE-NEXT:    vmov s0, r0
+; LE-NEXT:    vmov.32 d9[0], r4
+; LE-NEXT:    bl llrintf
+; LE-NEXT:    vmov.32 d8[0], r0
+; LE-NEXT:    vmov.32 d9[1], r5
+; LE-NEXT:    vmov.32 d8[1], r1
+; LE-NEXT:    vorr q0, q4, q4
+; LE-NEXT:    vpop {d8, d9}
+; LE-NEXT:    pop {r4, r5, r11, pc}
+;
+; LE-NEON-LABEL: llrint_v1i64_v2f16:
+; LE-NEON:       @ %bb.0:
+; LE-NEON-NEXT:    .save {r4, r5, r11, lr}
+; LE-NEON-NEXT:    push {r4, r5, r11, lr}
+; LE-NEON-NEXT:    .vsave {d8, d9}
+; LE-NEON-NEXT:    vpush {d8, d9}
+; LE-NEON-NEXT:    vmov r0, s1
+; LE-NEON-NEXT:    vmov.f32 s16, s0
+; LE-NEON-NEXT:    bl __aeabi_h2f
+; LE-NEON-NEXT:    vmov s0, r0
+; LE-NEON-NEXT:    bl llrintf
+; LE-NEON-NEXT:    mov r4, r0
+; LE-NEON-NEXT:    vmov r0, s16
+; LE-NEON-NEXT:    mov r5, r1
+; LE-NEON-NEXT:    bl __aeabi_h2f
+; LE-NEON-NEXT:    vmov s0, r0
+; LE-NEON-NEXT:    vmov.32 d9[0], r4
+; LE-NEON-NEXT:    bl llrintf
+; LE-NEON-NEXT:    vmov.32 d8[0], r0
+; LE-NEON-NEXT:    vmov.32 d9[1], r5
+; LE-NEON-NEXT:    vmov.32 d8[1], r1
+; LE-NEON-NEXT:    vorr q0, q4, q4
+; LE-NEON-NEXT:    vpop {d8, d9}
+; LE-NEON-NEXT:    pop {r4, r5, r11, pc}
+;
+; BE-LABEL: llrint_v1i64_v2f16:
+; BE:       @ %bb.0:
+; BE-NEXT:    .save {r4, r5, r11, lr}
+; BE-NEXT:    push {r4, r5, r11, lr}
+; BE-NEXT:    .vsave {d8}
+; BE-NEXT:    vpush {d8}
+; BE-NEXT:    vmov r0, s1
+; BE-NEXT:    vmov.f32 s16, s0
+; BE-NEXT:    bl __aeabi_h2f
+; BE-NEXT:    vmov s0, r0
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    mov r4, r0
+; BE-NEXT:    vmov r0, s16
+; BE-NEXT:    mov r5, r1
+; BE-NEXT:    bl __aeabi_h2f
+; BE-NEXT:    vmov s0, r0
+; BE-NEXT:    vmov.32 d8[0], r4
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    vmov.32 d16[0], r0
+; BE-NEXT:    vmov.32 d8[1], r5
+; BE-NEXT:    vmov.32 d16[1], r1
+; BE-NEXT:    vrev64.32 d1, d8
+; BE-NEXT:    vrev64.32 d0, d16
+; BE-NEXT:    vpop {d8}
+; BE-NEXT:    pop {r4, r5, r11, pc}
+;
+; BE-NEON-LABEL: llrint_v1i64_v2f16:
+; BE-NEON:       @ %bb.0:
+; BE-NEON-NEXT:    .save {r4, r5, r11, lr}
+; BE-NEON-NEXT:    push {r4, r5, r11, lr}
+; BE-NEON-NEXT:    .vsave {d8}
+; BE-NEON-NEXT:    vpush {d8}
+; BE-NEON-NEXT:    vmov r0, s1
+; BE-NEON-NEXT:    vmov.f32 s16, s0
+; BE-NEON-NEXT:    bl __aeabi_h2f
+; BE-NEON-NEXT:    vmov s0, r0
+; BE-NEON-NEXT:    bl llrintf
+; BE-NEON-NEXT:    mov r4, r0
+; BE-NEON-NEXT:    vmov r0, s16
+; BE-NEON-NEXT:    mov r5, r1
+; BE-NEON-NEXT:    bl __aeabi_h2f
+; BE-NEON-NEXT:    vmov s0, r0
+; BE-NEON-NEXT:    vmov.32 d8[0], r4
+; BE-NEON-NEXT:    bl llrintf
+; BE-NEON-NEXT:    vmov.32 d16[0], r0
+; BE-NEON-NEXT:    vmov.32 d8[1], r5
+; BE-NEON-NEXT:    vmov.32 d16[1], r1
+; BE-NEON-NEXT:    vrev64.32 d1, d8
+; BE-NEON-NEXT:    vrev64.32 d0, d16
+; BE-NEON-NEXT:    vpop {d8}
+; BE-NEON-NEXT:    pop {r4, r5, r11, pc}
+  %a = call <2 x i64> @llvm.llrint.v2i64.v2f16(<2 x half> %x)
+  ret <2 x i64> %a
+}
+declare <2 x i64> @llvm.llrint.v2i64.v2f16(<2 x half>)
+
+define <4 x i64> @llrint_v4i64_v4f16(<4 x half> %x) {
+; LE-LABEL: llrint_v4i64_v4f16:
+; LE:       @ %bb.0:
+; LE-NEXT:    .save {r4, r5, r6, r7, r11, lr}
+; LE-NEXT:    push {r4, r5, r6, r7, r11, lr}
+; LE-NEXT:    .vsave {d12, d13}
+; LE-NEXT:    vpush {d12, d13}
+; LE-NEXT:    .vsave {d8, d9, d10}
+; LE-NEXT:    vpush {d8, d9, d10}
+; LE-NEXT:    vmov r0, s1
+; LE-NEXT:    vmov.f32 s16, s3
+; LE-NEXT:    vmov.f32 s20, s2
+; LE-NEXT:    vmov.f32 s18, s0
+; LE-NEXT:    bl __aeabi_h2f
+; LE-NEXT:    vmov s0, r0
+; LE-NEXT:    bl llrintf
+; LE-NEXT:    mov r5, r0
+; LE-NEXT:    vmov r0, s18
+; LE-NEXT:    mov r4, r1
+; LE-NEXT:    bl __aeabi_h2f
+; LE-NEXT:    mov r7, r0
+; LE-NEXT:    vmov r0, s16
+; LE-NEXT:    bl __aeabi_h2f
+; LE-NEXT:    vmov s0, r0
+; LE-NEXT:    bl llrintf
+; LE-NEXT:    vmov s0, r7
+; LE-NEXT:    mov r6, r1
+; LE-NEXT:    vmov.32 d9[0], r0
+; LE-NEXT:    bl llrintf
+; LE-NEXT:    vmov.32 d12[0], r0
+; LE-NEXT:    vmov r0, s20
+; LE-NEXT:    mov r7, r1
+; LE-NEXT:    bl __aeabi_h2f
+; LE-NEXT:    vmov s0, r0
+; LE-NEXT:    vmov.32 d13[0], r5
+; LE-NEXT:    bl llrintf
+; LE-NEXT:    vmov.32 d8[0], r0
+; LE-NEXT:    vmov.32 d13[1], r4
+; LE-NEXT:    vmov.32 d9[1], r6
+; LE-NEXT:    vmov.32 d12[1], r7
+; LE-NEXT:    vmov.32 d8[1], r1
+; LE-NEXT:    vorr q0, q6, q6
+; LE-NEXT:    vorr q1, q4, q4
+; LE-NEXT:    vpop {d8, d9, d10}
+; LE-NEXT:    vpop {d12, d13}
+; LE-NEXT:    pop {r4, r5, r6, r7, r11, pc}
+;
+; LE-NEON-LABEL: llrint_v4i64_v4f16:
+; LE-NEON:       @ %bb.0:
+; LE-NEON-NEXT:    .save {r4, r5, r6, r7, r11, lr}
+; LE-NEON-NEXT:    push {r4, r5, r6, r7, r11, lr}
+; LE-NEON-NEXT:    .vsave {d12, d13}
+; LE-NEON-NEXT:    vpush {d12, d13}
+; LE-NEON-NEXT:    .vsave {d8, d9, d10}
+; LE-NEON-NEXT:    vpush {d8, d9, d10}
+; LE-NEON-NEXT:    vmov r0, s1
+; LE-NEON-NEXT:    vmov.f32 s16, s3
+; LE-NEON-NEXT:    vmov.f32 s20, s2
+; LE-NEON-NEXT:    vmov.f32 s18, s0
+; LE-NEON-NEXT:    bl __aeabi_h2f
+; LE-NEON-NEXT:    vmov s0, r0
+; LE-NEON-NEXT:    bl llrintf
+; LE-NEON-NEXT:    mov r5, r0
+; LE-NEON-NEXT:    vmov r0, s18
+; LE-NEON-NEXT:    mov r4, r1
+; LE-NEON-NEXT:    bl __aeabi_h2f
+; LE-NEON-NEXT:    mov r7, r0
+; LE-NEON-NEXT:    vmov r0, s16
+; LE-NEON-NEXT:    bl __aeabi_h2f
+; LE-NEON-NEXT:    vmov s0, r0
+; LE-NEON-NEXT:    bl llrintf
+; LE-NEON-NEXT:    vmov s0, r7
+; LE-NEON-NEXT:    mov r6, r1
+; LE-NEON-NEXT:    vmov.32 d9[0], r0
+; LE-NEON-NEXT:    bl llrintf
+; LE-NEON-NEXT:    vmov.32 d12[0], r0
+; LE-NEON-NEXT:    vmov r0, s20
+; LE-NEON-NEXT:    mov r7, r1
+; LE-NEON-NEXT:    bl __aeabi_h2f
+; LE-NEON-NEXT:    vmov s0, r0
+; LE-NEON-NEXT:    vmov.32 d13[0], r5
+; LE-NEON-NEXT:    bl llrintf
+; LE-NEON-NEXT:    vmov.32 d8[0], r0
+; LE-NEON-NEXT:    vmov.32 d13[1], r4
+; LE-NEON-NEXT:    vmov.32 d9[1], r6
+; LE-NEON-NEXT:    vmov.32 d12[1], r7
+; LE-NEON-NEXT:    vmov.32 d8[1], r1
+; LE-NEON-NEXT:    vorr q0, q6, q6
+; LE-NEON-NEXT:    vorr q1, q4, q4
+; LE-NEON-NEXT:    vpop {d8, d9, d10}
+; LE-NEON-NEXT:    vpop {d12, d13}
+; LE-NEON-NEXT:    pop {r4, r5, r6, r7, r11, pc}
+;
+; BE-LABEL: llrint_v4i64_v4f16:
+; BE:       @ %bb.0:
+; BE-NEXT:    .save {r4, r5, r6, r7, r11, lr}
+; BE-NEXT:    push {r4, r5, r6, r7, r11, lr}
+; BE-NEXT:    .vsave {d8, d9, d10}
+; BE-NEXT:    vpush {d8, d9, d10}
+; BE-NEXT:    vmov r0, s1
+; BE-NEXT:    vmov.f32 s16, s3
+; BE-NEXT:    vmov.f32 s18, s2
+; BE-NEXT:    vmov.f32 s20, s0
+; BE-NEXT:    bl __aeabi_h2f
+; BE-NEXT:    vmov s0, r0
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    mov r5, r0
+; BE-NEXT:    vmov r0, s20
+; BE-NEXT:    mov r4, r1
+; BE-NEXT:    bl __aeabi_h2f
+; BE-NEXT:    mov r7, r0
+; BE-NEXT:    vmov r0, s16
+; BE-NEXT:    bl __aeabi_h2f
+; BE-NEXT:    vmov s0, r0
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    vmov s0, r7
+; BE-NEXT:    mov r6, r1
+; BE-NEXT:    vmov.32 d8[0], r0
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    vmov.32 d10[0], r0
+; BE-NEXT:    vmov r0, s18
+; BE-NEXT:    mov r7, r1
+; BE-NEXT:    bl __aeabi_h2f
+; BE-NEXT:    vmov s0, r0
+; BE-NEXT:    vmov.32 d9[0], r5
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    vmov.32 d16[0], r0
+; BE-NEXT:    vmov.32 d9[1], r4
+; BE-NEXT:    vmov.32 d8[1], r6
+; BE-NEXT:    vmov.32 d10[1], r7
+; BE-NEXT:    vmov.32 d16[1], r1
+; BE-NEXT:    vrev64.32 d1, d9
+; BE-NEXT:    vrev64.32 d3, d8
+; BE-NEXT:    vrev64.32 d0, d10
+; BE-NEXT:    vrev64.32 d2, d16
+; BE-NEXT:    vpop {d8, d9, d10}
+; BE-NEXT:    pop {r4, r5, r6, r7, r11, pc}
+;
+; BE-NEON-LABEL: llrint_v4i64_v4f16:
+; BE-NEON:       @ %bb.0:
+; BE-NEON-NEXT:    .save {r4, r5, r6, r7, r11, lr}
+; BE-NEON-NEXT:    push {r4, r5, r6, r7, r11, lr}
+; BE-NEON-NEXT:    .vsave {d8, d9, d10}
+; BE-NEON-NEXT:    vpush {d8, d9, d10}
+; BE-NEON-NEXT:    vmov r0, s1
+; BE-NEON-NEXT:    vmov.f32 s16, s3
+; BE-NEON-NEXT:    vmov.f32 s18, s2
+; BE-NEON-NEXT:    vmov.f32 s20, s0
+; BE-NEON-NEXT:    bl __aeabi_h2f
+; BE-NEON-NEXT:    vmov s0, r0
+; BE-NEON-NEXT:    bl llrintf
+; BE-NEON-NEXT:    mov r5, r0
+; BE-NEON-NEXT:    vmov r0, s20
+; BE-NEON-NEXT:    mov r4, r1
+; BE-NEON-NEXT:    bl __aeabi_h2f
+; BE-NEON-NEXT:    mov r7, r0
+; BE-NEON-NEXT:    vmov r0, s16
+; BE-NEON-NEXT:    bl __aeabi_h2f
+; BE-NEON-NEXT:    vmov s0, r0
+; BE-NEON-NEXT:    bl llrintf
+; BE-NEON-NEXT:    vmov s0, r7
+; BE-NEON-NEXT:    mov r6, r1
+; BE-NEON-NEXT:    vmov.32 d8[0], r0
+; BE-NEON-NEXT:    bl llrintf
+; BE-NEON-NEXT:    vmov.32 d10[0], r0
+; BE-NEON-NEXT:    vmov r0, s18
+; BE-NEON-NEXT:    mov r7, r1
+; BE-NEON-NEXT:    bl __aeabi_h2f
+; BE-NEON-NEXT:    vmov s0, r0
+; BE-NEON-NEXT:    vmov.32 d9[0], r5
+; BE-NEON-NEXT:    bl llrintf
+; BE-NEON-NEXT:    vmov.32 d16[0], r0
+; BE-NEON-NEXT:    vmov.32 d9[1], r4
+; BE-NEON-NEXT:    vmov.32 d8[1], r6
+; BE-NEON-NEXT:    vmov.32 d10[1], r7
+; BE-NEON-NEXT:    vmov.32 d16[1], r1
+; BE-NEON-NEXT:    vrev64.32 d1, d9
+; BE-NEON-NEXT:    vrev64.32 d3, d8
+; BE-NEON-NEXT:    vrev64.32 d0, d10
+; BE-NEON-NEXT:    vrev64.32 d2, d16
+; BE-NEON-NEXT:    vpop {d8, d9, d10}
+; BE-NEON-NEXT:    pop {r4, r5, r6, r7, r11, pc}
+  %a = call <4 x i64> @llvm.llrint.v4i64.v4f16(<4 x half> %x)
+  ret <4 x i64> %a
+}
+declare <4 x i64> @llvm.llrint.v4i64.v4f16(<4 x half>)
+
+define <8 x i64> @llrint_v8i64_v8f16(<8 x half> %x) {
+; LE-LABEL: llrint_v8i64_v8f16:
+; LE:       @ %bb.0:
+; LE-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-NEXT:    .pad #4
+; LE-NEXT:    sub sp, sp, #4
+; LE-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEXT:    .pad #8
+; LE-NEXT:    sub sp, sp, #8
+; LE-NEXT:    vmov r0, s1
+; LE-NEXT:    vstr s6, [sp, #4] @ 4-byte Spill
+; LE-NEXT:    vmov.f32 s16, s7
+; LE-NEXT:    vmov.f32 s18, s5
+; LE-NEXT:    vmov.f32 s20, s4
+; LE-NEXT:    vmov.f32 s22, s3
+; LE-NEXT:    vmov.f32 s24, s2
+; LE-NEXT:    vmov.f32 s26, s0
+; LE-NEXT:    bl __aeabi_h2f
+; LE-NEXT:    vmov s0, r0
+; LE-NEXT:    bl llrintf
+; LE-NEXT:    mov r9, r0
+; LE-NEXT:    vmov r0, s26
+; LE-NEXT:    str r1, [sp] @ 4-byte Spill
+; LE-NEXT:    bl __aeabi_h2f
+; LE-NEXT:    mov r10, r0
+; LE-NEXT:    vmov r0, s22
+; LE-NEXT:    bl __aeabi_h2f
+; LE-NEXT:    mov r5, r0
+; LE-NEXT:    vmov r0, s24
+; LE-NEXT:    bl __aeabi_h2f
+; LE-NEXT:    mov r7, r0
+; LE-NEXT:    vmov r0, s18
+; LE-NEXT:    bl __aeabi_h2f
+; LE-NEXT:    mov r6, r0
+; LE-NEXT:    vmov r0, s20
+; LE-NEXT:    bl __aeabi_h2f
+; LE-NEXT:    mov r4, r0
+; LE-NEXT:    vmov r0, s16
+; LE-NEXT:    bl __aeabi_h2f
+; LE-NEXT:    vmov s0, r0
+; LE-NEXT:    bl llrintf
+; LE-NEXT:    vmov s0, r4
+; LE-NEXT:    mov r11, r1
+; LE-NEXT:    vmov.32 d11[0], r0
+; LE-NEXT:    bl llrintf
+; LE-NEXT:    vmov s0, r6
+; LE-NEXT:    mov r8, r1
+; LE-NEXT:    vmov.32 d12[0], r0
+; LE-NEXT:    bl llrintf
+; LE-NEXT:    vmov s0, r7
+; LE-NEXT:    mov r6, r1
+; LE-NEXT:    vmov.32 d13[0], r0
+; LE-NEXT:    bl llrintf
+; LE-NEXT:    vmov s0, r5
+; LE-NEXT:    mov r7, r1
+; LE-NEXT:    vmov.32 d14[0], r0
+; LE-NEXT:    bl llrintf
+; LE-NEXT:    vmov s0, r10
+; LE-NEXT:    mov r5, r1
+; LE-NEXT:    vmov.32 d15[0], r0
+; LE-NEXT:    bl llrintf
+; LE-NEXT:    vldr s0, [sp, #4] @ 4-byte Reload
+; LE-NEXT:    mov r4, r1
+; LE-NEXT:    vmov.32 d8[0], r0
+; LE-NEXT:    vmov r0, s0
+; LE-NEXT:    bl __aeabi_h2f
+; LE-NEXT:    vmov s0, r0
+; LE-NEXT:    vmov.32 d9[0], r9
+; LE-NEXT:    bl llrintf
+; LE-NEXT:    vmov.32 d10[0], r0
+; LE-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; LE-NEXT:    vmov.32 d15[1], r5
+; LE-NEXT:    vmov.32 d9[1], r0
+; LE-NEXT:    vmov.32 d13[1], r6
+; LE-NEXT:    vmov.32 d11[1], r11
+; LE-NEXT:    vmov.32 d8[1], r4
+; LE-NEXT:    vmov.32 d14[1], r7
+; LE-NEXT:    vorr q0, q4, q4
+; LE-NEXT:    vmov.32 d12[1], r8
+; LE-NEXT:    vorr q1, q7, q7
+; LE-NEXT:    vmov.32 d10[1], r1
+; LE-NEXT:    vorr q2, q6, q6
+; LE-NEXT:    vorr q3, q5, q5
+; LE-NEXT:    add sp, sp, #8
+; LE-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEXT:    add sp, sp, #4
+; LE-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; LE-NEON-LABEL: llrint_v8i64_v8f16:
+; LE-NEON:       @ %bb.0:
+; LE-NEON-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-NEON-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-NEON-NEXT:    .pad #4
+; LE-NEON-NEXT:    sub sp, sp, #4
+; LE-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEON-NEXT:    .pad #8
+; LE-NEON-NEXT:    sub sp, sp, #8
+; LE-NEON-NEXT:    vmov r0, s1
+; LE-NEON-NEXT:    vstr s6, [sp, #4] @ 4-byte Spill
+; LE-NEON-NEXT:    vmov.f32 s16, s7
+; LE-NEON-NEXT:    vmov.f32 s18, s5
+; LE-NEON-NEXT:    vmov.f32 s20, s4
+; LE-NEON-NEXT:    vmov.f32 s22, s3
+; LE-NEON-NEXT:    vmov.f32 s24, s2
+; LE-NEON-NEXT:    vmov.f32 s26, s0
+; LE-NEON-NEXT:    bl __aeabi_h2f
+; LE-NEON-NEXT:    vmov s0, r0
+; LE-NEON-NEXT:    bl llrintf
+; LE-NEON-NEXT:    mov r9, r0
+; LE-NEON-NEXT:    vmov r0, s26
+; LE-NEON-NEXT:    str r1, [sp] @ 4-byte Spill
+; LE-NEON-NEXT:    bl __aeabi_h2f
+; LE-NEON-NEXT:    mov r10, r0
+; LE-NEON-NEXT:    vmov r0, s22
+; LE-NEON-NEXT:    bl __aeabi_h2f
+; LE-NEON-NEXT:    mov r5, r0
+; LE-NEON-NEXT:    vmov r0, s24
+; LE-NEON-NEXT:    bl __aeabi_h2f
+; LE-NEON-NEXT:    mov r7, r0
+; LE-NEON-NEXT:    vmov r0, s18
+; LE-NEON-NEXT:    bl __aeabi_h2f
+; LE-NEON-NEXT:    mov r6, r0
+; LE-NEON-NEXT:    vmov r0, s20
+; LE-NEON-NEXT:    bl __aeabi_h2f
+; LE-NEON-NEXT:    mov r4, r0
+; LE-NEON-NEXT:    vmov r0, s16
+; LE-NEON-NEXT:    bl __aeabi_h2f
+; LE-NEON-NEXT:    vmov s0, r0
+; LE-NEON-NEXT:    bl llrintf
+; LE-NEON-NEXT:    vmov s0, r4
+; LE-NEON-NEXT:    mov r11, r1
+; LE-NEON-NEXT:    vmov.32 d11[0], r0
+; LE-NEON-NEXT:    bl llrintf
+; LE-NEON-NEXT:    vmov s0, r6
+; LE-NEON-NEXT:    mov r8, r1
+; LE-NEON-NEXT:    vmov.32 d12[0], r0
+; LE-NEON-NEXT:    bl llrintf
+; LE-NEON-NEXT:    vmov s0, r7
+; LE-NEON-NEXT:    mov r6, r1
+; LE-NEON-NEXT:    vmov.32 d13[0], r0
+; LE-NEON-NEXT:    bl llrintf
+; LE-NEON-NEXT:    vmov s0, r5
+; LE-NEON-NEXT:    mov r7, r1
+; LE-NEON-NEXT:    vmov.32 d14[0], r0
+; LE-NEON-NEXT:    bl llrintf
+; LE-NEON-NEXT:    vmov s0, r10
+; LE-NEON-NEXT:    mov r5, r1
+; LE-NEON-NEXT:    vmov.32 d15[0], r0
+; LE-NEON-NEXT:    bl llrintf
+; LE-NEON-NEXT:    vldr s0, [sp, #4] @ 4-byte Reload
+; LE-NEON-NEXT:    mov r4, r1
+; LE-NEON-NEXT:    vmov.32 d8[0], r0
+; LE-NEON-NEXT:    vmov r0, s0
+; LE-NEON-NEXT:    bl __aeabi_h2f
+; LE-NEON-NEXT:    vmov s0, r0
+; LE-NEON-NEXT:    vmov.32 d9[0], r9
+; LE-NEON-NEXT:    bl llrintf
+; LE-NEON-NEXT:    vmov.32 d10[0], r0
+; LE-NEON-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; LE-NEON-NEXT:    vmov.32 d15[1], r5
+; LE-NEON-NEXT:    vmov.32 d9[1], r0
+; LE-NEON-NEXT:    vmov.32 d13[1], r6
+; LE-NEON-NEXT:    vmov.32 d11[1], r11
+; LE-NEON-NEXT:    vmov.32 d8[1], r4
+; LE-NEON-NEXT:    vmov.32 d14[1], r7
+; LE-NEON-NEXT:    vorr q0, q4, q4
+; LE-NEON-NEXT:    vmov.32 d12[1], r8
+; LE-NEON-NEXT:    vorr q1, q7, q7
+; LE-NEON-NEXT:    vmov.32 d10[1], r1
+; LE-NEON-NEXT:    vorr q2, q6, q6
+; LE-NEON-NEXT:    vorr q3, q5, q5
+; LE-NEON-NEXT:    add sp, sp, #8
+; LE-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEON-NEXT:    add sp, sp, #4
+; LE-NEON-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; BE-LABEL: llrint_v8i64_v8f16:
+; BE:       @ %bb.0:
+; BE-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-NEXT:    .pad #4
+; BE-NEXT:    sub sp, sp, #4
+; BE-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14}
+; BE-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14}
+; BE-NEXT:    .pad #8
+; BE-NEXT:    sub sp, sp, #8
+; BE-NEXT:    vmov r0, s1
+; BE-NEXT:    vmov.f32 s18, s7
+; BE-NEXT:    vmov.f32 s16, s6
+; BE-NEXT:    vmov.f32 s20, s5
+; BE-NEXT:    vmov.f32 s22, s4
+; BE-NEXT:    vmov.f32 s24, s3
+; BE-NEXT:    vmov.f32 s26, s2
+; BE-NEXT:    vmov.f32 s28, s0
+; BE-NEXT:    bl __aeabi_h2f
+; BE-NEXT:    vmov s0, r0
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    mov r9, r0
+; BE-NEXT:    vmov r0, s28
+; BE-NEXT:    str r1, [sp, #4] @ 4-byte Spill
+; BE-NEXT:    bl __aeabi_h2f
+; BE-NEXT:    mov r10, r0
+; BE-NEXT:    vmov r0, s24
+; BE-NEXT:    bl __aeabi_h2f
+; BE-NEXT:    mov r5, r0
+; BE-NEXT:    vmov r0, s26
+; BE-NEXT:    bl __aeabi_h2f
+; BE-NEXT:    mov r7, r0
+; BE-NEXT:    vmov r0, s20
+; BE-NEXT:    bl __aeabi_h2f
+; BE-NEXT:    mov r6, r0
+; BE-NEXT:    vmov r0, s22
+; BE-NEXT:    bl __aeabi_h2f
+; BE-NEXT:    mov r4, r0
+; BE-NEXT:    vmov r0, s18
+; BE-NEXT:    bl __aeabi_h2f
+; BE-NEXT:    vmov s0, r0
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    vmov s0, r4
+; BE-NEXT:    mov r11, r1
+; BE-NEXT:    vmov.32 d9[0], r0
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    vmov s0, r6
+; BE-NEXT:    mov r8, r1
+; BE-NEXT:    vmov.32 d10[0], r0
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    vmov s0, r7
+; BE-NEXT:    mov r6, r1
+; BE-NEXT:    vmov.32 d11[0], r0
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    vmov s0, r5
+; BE-NEXT:    mov r7, r1
+; BE-NEXT:    vmov.32 d12[0], r0
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    vmov s0, r10
+; BE-NEXT:    mov r5, r1
+; BE-NEXT:    vmov.32 d13[0], r0
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    vmov.32 d14[0], r0
+; BE-NEXT:    vmov r0, s16
+; BE-NEXT:    mov r4, r1
+; BE-NEXT:    bl __aeabi_h2f
+; BE-NEXT:    vmov s0, r0
+; BE-NEXT:    vmov.32 d8[0], r9
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    vmov.32 d16[0], r0
+; BE-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
+; BE-NEXT:    vmov.32 d13[1], r5
+; BE-NEXT:    vmov.32 d8[1], r0
+; BE-NEXT:    vmov.32 d11[1], r6
+; BE-NEXT:    vmov.32 d9[1], r11
+; BE-NEXT:    vmov.32 d14[1], r4
+; BE-NEXT:    vmov.32 d12[1], r7
+; BE-NEXT:    vmov.32 d10[1], r8
+; BE-NEXT:    vmov.32 d16[1], r1
+; BE-NEXT:    vrev64.32 d1, d8
+; BE-NEXT:    vrev64.32 d3, d13
+; BE-NEXT:    vrev64.32 d5, d11
+; BE-NEXT:    vrev64.32 d7, d9
+; BE-NEXT:    vrev64.32 d0, d14
+; BE-NEXT:    vrev64.32 d2, d12
+; BE-NEXT:    vrev64.32 d4, d10
+; BE-NEXT:    vrev64.32 d6, d16
+; BE-NEXT:    add sp, sp, #8
+; BE-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14}
+; BE-NEXT:    add sp, sp, #4
+; BE-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; BE-NEON-LABEL: llrint_v8i64_v8f16:
+; BE-NEON:       @ %bb.0:
+; BE-NEON-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-NEON-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-NEON-NEXT:    .pad #4
+; BE-NEON-NEXT:    sub sp, sp, #4
+; BE-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14}
+; BE-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14}
+; BE-NEON-NEXT:    .pad #8
+; BE-NEON-NEXT:    sub sp, sp, #8
+; BE-NEON-NEXT:    vmov r0, s1
+; BE-NEON-NEXT:    vmov.f32 s18, s7
+; BE-NEON-NEXT:    vmov.f32 s16, s6
+; BE-NEON-NEXT:    vmov.f32 s20, s5
+; BE-NEON-NEXT:    vmov.f32 s22, s4
+; BE-NEON-NEXT:    vmov.f32 s24, s3
+; BE-NEON-NEXT:    vmov.f32 s26, s2
+; BE-NEON-NEXT:    vmov.f32 s28, s0
+; BE-NEON-NEXT:    bl __aeabi_h2f
+; BE-NEON-NEXT:    vmov s0, r0
+; BE-NEON-NEXT:    bl llrintf
+; BE-NEON-NEXT:    mov r9, r0
+; BE-NEON-NEXT:    vmov r0, s28
+; BE-NEON-NEXT:    str r1, [sp, #4] @ 4-byte Spill
+; BE-NEON-NEXT:    bl __aeabi_h2f
+; BE-NEON-NEXT:    mov r10, r0
+; BE-NEON-NEXT:    vmov r0, s24
+; BE-NEON-NEXT:    bl __aeabi_h2f
+; BE-NEON-NEXT:    mov r5, r0
+; BE-NEON-NEXT:    vmov r0, s26
+; BE-NEON-NEXT:    bl __aeabi_h2f
+; BE-NEON-NEXT:    mov r7, r0
+; BE-NEON-NEXT:    vmov r0, s20
+; BE-NEON-NEXT:    bl __aeabi_h2f
+; BE-NEON-NEXT:    mov r6, r0
+; BE-NEON-NEXT:    vmov r0, s22
+; BE-NEON-NEXT:    bl __aeabi_h2f
+; BE-NEON-NEXT:    mov r4, r0
+; BE-NEON-NEXT:    vmov r0, s18
+; BE-NEON-NEXT:    bl __aeabi_h2f
+; BE-NEON-NEXT:    vmov s0, r0
+; BE-NEON-NEXT:    bl llrintf
+; BE-NEON-NEXT:    vmov s0, r4
+; BE-NEON-NEXT:    mov r11, r1
+; BE-NEON-NEXT:    vmov.32 d9[0], r0
+; BE-NEON-NEXT:    bl llrintf
+; BE-NEON-NEXT:    vmov s0, r6
+; BE-NEON-NEXT:    mov r8, r1
+; BE-NEON-NEXT:    vmov.32 d10[0], r0
+; BE-NEON-NEXT:    bl llrintf
+; BE-NEON-NEXT:    vmov s0, r7
+; BE-NEON-NEXT:    mov r6, r1
+; BE-NEON-NEXT:    vmov.32 d11[0], r0
+; BE-NEON-NEXT:    bl llrintf
+; BE-NEON-NEXT:    vmov s0, r5
+; BE-NEON-NEXT:    mov r7, r1
+; BE-NEON-NEXT:    vmov.32 d12[0], r0
+; BE-NEON-NEXT:    bl llrintf
+; BE-NEON-NEXT:    vmov s0, r10
+; BE-NEON-NEXT:    mov r5, r1
+; BE-NEON-NEXT:    vmov.32 d13[0], r0
+; BE-NEON-NEXT:    bl llrintf
+; BE-NEON-NEXT:    vmov.32 d14[0], r0
+; BE-NEON-NEXT:    vmov r0, s16
+; BE-NEON-NEXT:    mov r4, r1
+; BE-NEON-NEXT:    bl __aeabi_h2f
+; BE-NEON-NEXT:    vmov s0, r0
+; BE-NEON-NEXT:    vmov.32 d8[0], r9
+; BE-NEON-NEXT:    bl llrintf
+; BE-NEON-NEXT:    vmov.32 d16[0], r0
+; BE-NEON-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
+; BE-NEON-NEXT:    vmov.32 d13[1], r5
+; BE-NEON-NEXT:    vmov.32 d8[1], r0
+; BE-NEON-NEXT:    vmov.32 d11[1], r6
+; BE-NEON-NEXT:    vmov.32 d9[1], r11
+; BE-NEON-NEXT:    vmov.32 d14[1], r4
+; BE-NEON-NEXT:    vmov.32 d12[1], r7
+; BE-NEON-NEXT:    vmov.32 d10[1], r8
+; BE-NEON-NEXT:    vmov.32 d16[1], r1
+; BE-NEON-NEXT:    vrev64.32 d1, d8
+; BE-NEON-NEXT:    vrev64.32 d3, d13
+; BE-NEON-NEXT:    vrev64.32 d5, d11
+; BE-NEON-NEXT:    vrev64.32 d7, d9
+; BE-NEON-NEXT:    vrev64.32 d0, d14
+; BE-NEON-NEXT:    vrev64.32 d2, d12
+; BE-NEON-NEXT:    vrev64.32 d4, d10
+; BE-NEON-NEXT:    vrev64.32 d6, d16
+; BE-NEON-NEXT:    add sp, sp, #8
+; BE-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14}
+; BE-NEON-NEXT:    add sp, sp, #4
+; BE-NEON-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+  %a = call <8 x i64> @llvm.llrint.v8i64.v8f16(<8 x half> %x)
+  ret <8 x i64> %a
+}
+declare <8 x i64> @llvm.llrint.v8i64.v8f16(<8 x half>)
+
+define <16 x i64> @llrint_v16i64_v16f16(<16 x half> %x) {
+; LE-LABEL: llrint_v16i64_v16f16:
+; LE:       @ %bb.0:
+; LE-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-NEXT:    .pad #4
+; LE-NEXT:    sub sp, sp, #4
+; LE-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEXT:    .pad #120
+; LE-NEXT:    sub sp, sp, #120
+; LE-NEXT:    mov r11, r0
+; LE-NEXT:    vmov r0, s7
+; LE-NEXT:    vstr s15, [sp, #24] @ 4-byte Spill
+; LE-NEXT:    vmov.f32 s23, s13
+; LE-NEXT:    vstr s14, [sp, #100] @ 4-byte Spill
+; LE-NEXT:    vmov.f32 s25, s12
+; LE-NEXT:    vmov.f32 s27, s11
+; LE-NEXT:    vstr s10, [sp, #104] @ 4-byte Spill
+; LE-NEXT:    vstr s9, [sp, #108] @ 4-byte Spill
+; LE-NEXT:    vmov.f32 s24, s8
+; LE-NEXT:    vmov.f32 s19, s6
+; LE-NEXT:    vmov.f32 s29, s5
+; LE-NEXT:    vmov.f32 s17, s4
+; LE-NEXT:    vmov.f32 s16, s3
+; LE-NEXT:    vmov.f32 s21, s2
+; LE-NEXT:    vmov.f32 s26, s1
+; LE-NEXT:    vmov.f32 s18, s0
+; LE-NEXT:    bl __aeabi_h2f
+; LE-NEXT:    vmov s0, r0
+; LE-NEXT:    bl llrintf
+; LE-NEXT:    mov r7, r0
+; LE-NEXT:    vmov r0, s25
+; LE-NEXT:    str r1, [sp, #56] @ 4-byte Spill
+; LE-NEXT:    bl __aeabi_h2f
+; LE-NEXT:    vmov s0, r0
+; LE-NEXT:    bl llrintf
+; LE-NEXT:    mov r5, r0
+; LE-NEXT:    vmov r0, s27
+; LE-NEXT:    str r1, [sp, #116] @ 4-byte Spill
+; LE-NEXT:    bl __aeabi_h2f
+; LE-NEXT:    vmov s0, r0
+; LE-NEXT:    bl llrintf
+; LE-NEXT:    mov r6, r0
+; LE-NEXT:    vmov r0, s29
+; LE-NEXT:    str r1, [sp, #112] @ 4-byte Spill
+; LE-NEXT:    bl __aeabi_h2f
+; LE-NEXT:    vmov s0, r0
+; LE-NEXT:    bl llrintf
+; LE-NEXT:    vmov.32 d15[0], r0
+; LE-NEXT:    vmov r0, s23
+; LE-NEXT:    mov r4, r1
+; LE-NEXT:    bl __aeabi_h2f
+; LE-NEXT:    vmov s0, r0
+; LE-NEXT:    add lr, sp, #80
+; LE-NEXT:    vmov.32 d17[0], r6
+; LE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEXT:    bl llrintf
+; LE-NEXT:    mov r6, r0
+; LE-NEXT:    vmov r0, s17
+; LE-NEXT:    vmov r8, s21
+; LE-NEXT:    str r1, [sp, #76] @ 4-byte Spill
+; LE-NEXT:    vmov r10, s19
+; LE-NEXT:    vmov.32 d10[0], r5
+; LE-NEXT:    bl __aeabi_h2f
+; LE-NEXT:    vmov s0, r0
+; LE-NEXT:    add lr, sp, #40
+; LE-NEXT:    vmov.32 d11[0], r6
+; LE-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-NEXT:    bl llrintf
+; LE-NEXT:    vmov.32 d14[0], r0
+; LE-NEXT:    mov r0, r10
+; LE-NEXT:    mov r9, r1
+; LE-NEXT:    bl __aeabi_h2f
+; LE-NEXT:    vmov s0, r0
+; LE-NEXT:    vmov.32 d11[0], r7
+; LE-NEXT:    bl llrintf
+; LE-NEXT:    vmov.32 d10[0], r0
+; LE-NEXT:    mov r0, r8
+; LE-NEXT:    mov r7, r1
+; LE-NEXT:    bl __aeabi_h2f
+; LE-NEXT:    mov r6, r0
+; LE-NEXT:    ldr r0, [sp, #56] @ 4-byte Reload
+; LE-NEXT:    vmov.32 d11[1], r0
+; LE-NEXT:    vmov r0, s18
+; LE-NEXT:    bl __aeabi_h2f
+; LE-NEXT:    mov r5, r0
+; LE-NEXT:    vmov r0, s16
+; LE-NEXT:    vmov.32 d10[1], r7
+; LE-NEXT:    add lr, sp, #56
+; LE-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-NEXT:    bl __aeabi_h2f
+; LE-NEXT:    vmov s0, r0
+; LE-NEXT:    vmov.32 d15[1], r4
+; LE-NEXT:    bl llrintf
+; LE-NEXT:    vmov.32 d9[0], r0
+; LE-NEXT:    vmov r0, s26
+; LE-NEXT:    add lr, sp, #24
+; LE-NEXT:    vmov r8, s24
+; LE-NEXT:    vmov.32 d14[1], r9
+; LE-NEXT:    mov r10, r1
+; LE-NEXT:    vmov s24, r5
+; LE-NEXT:    vldr s0, [sp, #24] @ 4-byte Reload
+; LE-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-NEXT:    vmov r7, s0
+; LE-NEXT:    bl __aeabi_h2f
+; LE-NEXT:    vmov.f32 s0, s24
+; LE-NEXT:    vmov s22, r0
+; LE-NEXT:    bl llrintf
+; LE-NEXT:    vmov.f32 s0, s22
+; LE-NEXT:    mov r5, r1
+; LE-NEXT:    vmov.32 d14[0], r0
+; LE-NEXT:    vmov s24, r6
+; LE-NEXT:    bl llrintf
+; LE-NEXT:    vmov.32 d15[0], r0
+; LE-NEXT:    mov r0, r7
+; LE-NEXT:    mov r6, r1
+; LE-NEXT:    bl __aeabi_h2f
+; LE-NEXT:    vmov.f32 s0, s24
+; LE-NEXT:    vmov s22, r0
+; LE-NEXT:    bl llrintf
+; LE-NEXT:    vmov.f32 s0, s22
+; LE-NEXT:    vmov.32 d8[0], r0
+; LE-NEXT:    add lr, sp, #8
+; LE-NEXT:    mov r9, r1
+; LE-NEXT:    vmov.32 d15[1], r6
+; LE-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-NEXT:    bl llrintf
+; LE-NEXT:    vmov.32 d13[0], r0
+; LE-NEXT:    mov r0, r8
+; LE-NEXT:    mov r6, r1
+; LE-NEXT:    bl __aeabi_h2f
+; LE-NEXT:    vldr s0, [sp, #100] @ 4-byte Reload
+; LE-NEXT:    mov r7, r0
+; LE-NEXT:    vmov.32 d14[1], r5
+; LE-NEXT:    vmov r0, s0
+; LE-NEXT:    bl __aeabi_h2f
+; LE-NEXT:    vldr s0, [sp, #104] @ 4-byte Reload
+; LE-NEXT:    vmov s20, r0
+; LE-NEXT:    vmov.32 d13[1], r6
+; LE-NEXT:    vmov r4, s0
+; LE-NEXT:    vldr s0, [sp, #108] @ 4-byte Reload
+; LE-NEXT:    vmov r0, s0
+; LE-NEXT:    bl __aeabi_h2f
+; LE-NEXT:    vmov.f32 s0, s20
+; LE-NEXT:    vmov s16, r0
+; LE-NEXT:    bl llrintf
+; LE-NEXT:    vmov.f32 s0, s16
+; LE-NEXT:    mov r5, r1
+; LE-NEXT:    vmov.32 d12[0], r0
+; LE-NEXT:    vmov s18, r7
+; LE-NEXT:    bl llrintf
+; LE-NEXT:    vmov.32 d11[0], r0
+; LE-NEXT:    mov r0, r4
+; LE-NEXT:    mov r6, r1
+; LE-NEXT:    bl __aeabi_h2f
+; LE-NEXT:    vmov.f32 s0, s18
+; LE-NEXT:    vmov s16, r0
+; LE-NEXT:    bl llrintf
+; LE-NEXT:    vmov.f32 s0, s16
+; LE-NEXT:    vmov.32 d10[0], r0
+; LE-NEXT:    mov r4, r1
+; LE-NEXT:    vmov.32 d11[1], r6
+; LE-NEXT:    bl llrintf
+; LE-NEXT:    add lr, sp, #80
+; LE-NEXT:    vmov.32 d10[1], r4
+; LE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEXT:    add lr, sp, #40
+; LE-NEXT:    vldmia lr, {d18, d19} @ 16-byte Reload
+; LE-NEXT:    add lr, sp, #8
+; LE-NEXT:    vmov.32 d16[0], r0
+; LE-NEXT:    ldr r0, [sp, #76] @ 4-byte Reload
+; LE-NEXT:    vldmia lr, {d20, d21} @ 16-byte Reload
+; LE-NEXT:    add lr, sp, #24
+; LE-NEXT:    vmov.32 d19[1], r0
+; LE-NEXT:    ldr r0, [sp, #116] @ 4-byte Reload
+; LE-NEXT:    vmov.32 d21[1], r10
+; LE-NEXT:    vmov.32 d18[1], r0
+; LE-NEXT:    ldr r0, [sp, #112] @ 4-byte Reload
+; LE-NEXT:    vmov.32 d12[1], r5
+; LE-NEXT:    vmov.32 d17[1], r0
+; LE-NEXT:    add r0, r11, #64
+; LE-NEXT:    vmov.32 d16[1], r1
+; LE-NEXT:    vst1.64 {d10, d11}, [r0:128]!
+; LE-NEXT:    vst1.64 {d16, d17}, [r0:128]!
+; LE-NEXT:    vst1.64 {d18, d19}, [r0:128]!
+; LE-NEXT:    vmov.32 d20[1], r9
+; LE-NEXT:    vst1.64 {d12, d13}, [r0:128]
+; LE-NEXT:    vst1.64 {d14, d15}, [r11:128]!
+; LE-NEXT:    vst1.64 {d20, d21}, [r11:128]!
+; LE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEXT:    add lr, sp, #56
+; LE-NEXT:    vst1.64 {d16, d17}, [r11:128]!
+; LE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEXT:    vst1.64 {d16, d17}, [r11:128]
+; LE-NEXT:    add sp, sp, #120
+; LE-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEXT:    add sp, sp, #4
+; LE-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; LE-NEON-LABEL: llrint_v16i64_v16f16:
+; LE-NEON:       @ %bb.0:
+; LE-NEON-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-NEON-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-NEON-NEXT:    .pad #4
+; LE-NEON-NEXT:    sub sp, sp, #4
+; LE-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEON-NEXT:    .pad #120
+; LE-NEON-NEXT:    sub sp, sp, #120
+; LE-NEON-NEXT:    mov r11, r0
+; LE-NEON-NEXT:    vmov r0, s7
+; LE-NEON-NEXT:    vstr s15, [sp, #24] @ 4-byte Spill
+; LE-NEON-NEXT:    vmov.f32 s23, s13
+; LE-NEON-NEXT:    vstr s14, [sp, #100] @ 4-byte Spill
+; LE-NEON-NEXT:    vmov.f32 s25, s12
+; LE-NEON-NEXT:    vmov.f32 s27, s11
+; LE-NEON-NEXT:    vstr s10, [sp, #104] @ 4-byte Spill
+; LE-NEON-NEXT:    vstr s9, [sp, #108] @ 4-byte Spill
+; LE-NEON-NEXT:    vmov.f32 s24, s8
+; LE-NEON-NEXT:    vmov.f32 s19, s6
+; LE-NEON-NEXT:    vmov.f32 s29, s5
+; LE-NEON-NEXT:    vmov.f32 s17, s4
+; LE-NEON-NEXT:    vmov.f32 s16, s3
+; LE-NEON-NEXT:    vmov.f32 s21, s2
+; LE-NEON-NEXT:    vmov.f32 s26, s1
+; LE-NEON-NEXT:    vmov.f32 s18, s0
+; LE-NEON-NEXT:    bl __aeabi_h2f
+; LE-NEON-NEXT:    vmov s0, r0
+; LE-NEON-NEXT:    bl llrintf
+; LE-NEON-NEXT:    mov r7, r0
+; LE-NEON-NEXT:    vmov r0, s25
+; LE-NEON-NEXT:    str r1, [sp, #56] @ 4-byte Spill
+; LE-NEON-NEXT:    bl __aeabi_h2f
+; LE-NEON-NEXT:    vmov s0, r0
+; LE-NEON-NEXT:    bl llrintf
+; LE-NEON-NEXT:    mov r5, r0
+; LE-NEON-NEXT:    vmov r0, s27
+; LE-NEON-NEXT:    str r1, [sp, #116] @ 4-byte Spill
+; LE-NEON-NEXT:    bl __aeabi_h2f
+; LE-NEON-NEXT:    vmov s0, r0
+; LE-NEON-NEXT:    bl llrintf
+; LE-NEON-NEXT:    mov r6, r0
+; LE-NEON-NEXT:    vmov r0, s29
+; LE-NEON-NEXT:    str r1, [sp, #112] @ 4-byte Spill
+; LE-NEON-NEXT:    bl __aeabi_h2f
+; LE-NEON-NEXT:    vmov s0, r0
+; LE-NEON-NEXT:    bl llrintf
+; LE-NEON-NEXT:    vmov.32 d15[0], r0
+; LE-NEON-NEXT:    vmov r0, s23
+; LE-NEON-NEXT:    mov r4, r1
+; LE-NEON-NEXT:    bl __aeabi_h2f
+; LE-NEON-NEXT:    vmov s0, r0
+; LE-NEON-NEXT:    add lr, sp, #80
+; LE-NEON-NEXT:    vmov.32 d17[0], r6
+; LE-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEON-NEXT:    bl llrintf
+; LE-NEON-NEXT:    mov r6, r0
+; LE-NEON-NEXT:    vmov r0, s17
+; LE-NEON-NEXT:    vmov r8, s21
+; LE-NEON-NEXT:    str r1, [sp, #76] @ 4-byte Spill
+; LE-NEON-NEXT:    vmov r10, s19
+; LE-NEON-NEXT:    vmov.32 d10[0], r5
+; LE-NEON-NEXT:    bl __aeabi_h2f
+; LE-NEON-NEXT:    vmov s0, r0
+; LE-NEON-NEXT:    add lr, sp, #40
+; LE-NEON-NEXT:    vmov.32 d11[0], r6
+; LE-NEON-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-NEON-NEXT:    bl llrintf
+; LE-NEON-NEXT:    vmov.32 d14[0], r0
+; LE-NEON-NEXT:    mov r0, r10
+; LE-NEON-NEXT:    mov r9, r1
+; LE-NEON-NEXT:    bl __aeabi_h2f
+; LE-NEON-NEXT:    vmov s0, r0
+; LE-NEON-NEXT:    vmov.32 d11[0], r7
+; LE-NEON-NEXT:    bl llrintf
+; LE-NEON-NEXT:    vmov.32 d10[0], r0
+; LE-NEON-NEXT:    mov r0, r8
+; LE-NEON-NEXT:    mov r7, r1
+; LE-NEON-NEXT:    bl __aeabi_h2f
+; LE-NEON-NEXT:    mov r6, r0
+; LE-NEON-NEXT:    ldr r0, [sp, #56] @ 4-byte Reload
+; LE-NEON-NEXT:    vmov.32 d11[1], r0
+; LE-NEON-NEXT:    vmov r0, s18
+; LE-NEON-NEXT:    bl __aeabi_h2f
+; LE-NEON-NEXT:    mov r5, r0
+; LE-NEON-NEXT:    vmov r0, s16
+; LE-NEON-NEXT:    vmov.32 d10[1], r7
+; LE-NEON-NEXT:    add lr, sp, #56
+; LE-NEON-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-NEON-NEXT:    bl __aeabi_h2f
+; LE-NEON-NEXT:    vmov s0, r0
+; LE-NEON-NEXT:    vmov.32 d15[1], r4
+; LE-NEON-NEXT:    bl llrintf
+; LE-NEON-NEXT:    vmov.32 d9[0], r0
+; LE-NEON-NEXT:    vmov r0, s26
+; LE-NEON-NEXT:    add lr, sp, #24
+; LE-NEON-NEXT:    vmov r8, s24
+; LE-NEON-NEXT:    vmov.32 d14[1], r9
+; LE-NEON-NEXT:    mov r10, r1
+; LE-NEON-NEXT:    vmov s24, r5
+; LE-NEON-NEXT:    vldr s0, [sp, #24] @ 4-byte Reload
+; LE-NEON-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-NEON-NEXT:    vmov r7, s0
+; LE-NEON-NEXT:    bl __aeabi_h2f
+; LE-NEON-NEXT:    vmov.f32 s0, s24
+; LE-NEON-NEXT:    vmov s22, r0
+; LE-NEON-NEXT:    bl llrintf
+; LE-NEON-NEXT:    vmov.f32 s0, s22
+; LE-NEON-NEXT:    mov r5, r1
+; LE-NEON-NEXT:    vmov.32 d14[0], r0
+; LE-NEON-NEXT:    vmov s24, r6
+; LE-NEON-NEXT:    bl llrintf
+; LE-NEON-NEXT:    vmov.32 d15[0], r0
+; LE-NEON-NEXT:    mov r0, r7
+; LE-NEON-NEXT:    mov r6, r1
+; LE-NEON-NEXT:    bl __aeabi_h2f
+; LE-NEON-NEXT:    vmov.f32 s0, s24
+; LE-NEON-NEXT:    vmov s22, r0
+; LE-NEON-NEXT:    bl llrintf
+; LE-NEON-NEXT:    vmov.f32 s0, s22
+; LE-NEON-NEXT:    vmov.32 d8[0], r0
+; LE-NEON-NEXT:    add lr, sp, #8
+; LE-NEON-NEXT:    mov r9, r1
+; LE-NEON-NEXT:    vmov.32 d15[1], r6
+; LE-NEON-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-NEON-NEXT:    bl llrintf
+; LE-NEON-NEXT:    vmov.32 d13[0], r0
+; LE-NEON-NEXT:    mov r0, r8
+; LE-NEON-NEXT:    mov r6, r1
+; LE-NEON-NEXT:    bl __aeabi_h2f
+; LE-NEON-NEXT:    vldr s0, [sp, #100] @ 4-byte Reload
+; LE-NEON-NEXT:    mov r7, r0
+; LE-NEON-NEXT:    vmov.32 d14[1], r5
+; LE-NEON-NEXT:    vmov r0, s0
+; LE-NEON-NEXT:    bl __aeabi_h2f
+; LE-NEON-NEXT:    vldr s0, [sp, #104] @ 4-byte Reload
+; LE-NEON-NEXT:    vmov s20, r0
+; LE-NEON-NEXT:    vmov.32 d13[1], r6
+; LE-NEON-NEXT:    vmov r4, s0
+; LE-NEON-NEXT:    vldr s0, [sp, #108] @ 4-byte Reload
+; LE-NEON-NEXT:    vmov r0, s0
+; LE-NEON-NEXT:    bl __aeabi_h2f
+; LE-NEON-NEXT:    vmov.f32 s0, s20
+; LE-NEON-NEXT:    vmov s16, r0
+; LE-NEON-NEXT:    bl llrintf
+; LE-NEON-NEXT:    vmov.f32 s0, s16
+; LE-NEON-NEXT:    mov r5, r1
+; LE-NEON-NEXT:    vmov.32 d12[0], r0
+; LE-NEON-NEXT:    vmov s18, r7
+; LE-NEON-NEXT:    bl llrintf
+; LE-NEON-NEXT:    vmov.32 d11[0], r0
+; LE-NEON-NEXT:    mov r0, r4
+; LE-NEON-NEXT:    mov r6, r1
+; LE-NEON-NEXT:    bl __aeabi_h2f
+; LE-NEON-NEXT:    vmov.f32 s0, s18
+; LE-NEON-NEXT:    vmov s16, r0
+; LE-NEON-NEXT:    bl llrintf
+; LE-NEON-NEXT:    vmov.f32 s0, s16
+; LE-NEON-NEXT:    vmov.32 d10[0], r0
+; LE-NEON-NEXT:    mov r4, r1
+; LE-NEON-NEXT:    vmov.32 d11[1], r6
+; LE-NEON-NEXT:    bl llrintf
+; LE-NEON-NEXT:    add lr, sp, #80
+; LE-NEON-NEXT:    vmov.32 d10[1], r4
+; LE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEON-NEXT:    add lr, sp, #40
+; LE-NEON-NEXT:    vldmia lr, {d18, d19} @ 16-byte Reload
+; LE-NEON-NEXT:    add lr, sp, #8
+; LE-NEON-NEXT:    vmov.32 d16[0], r0
+; LE-NEON-NEXT:    ldr r0, [sp, #76] @ 4-byte Reload
+; LE-NEON-NEXT:    vldmia lr, {d20, d21} @ 16-byte Reload
+; LE-NEON-NEXT:    add lr, sp, #24
+; LE-NEON-NEXT:    vmov.32 d19[1], r0
+; LE-NEON-NEXT:    ldr r0, [sp, #116] @ 4-byte Reload
+; LE-NEON-NEXT:    vmov.32 d21[1], r10
+; LE-NEON-NEXT:    vmov.32 d18[1], r0
+; LE-NEON-NEXT:    ldr r0, [sp, #112] @ 4-byte Reload
+; LE-NEON-NEXT:    vmov.32 d12[1], r5
+; LE-NEON-NEXT:    vmov.32 d17[1], r0
+; LE-NEON-NEXT:    add r0, r11, #64
+; LE-NEON-NEXT:    vmov.32 d16[1], r1
+; LE-NEON-NEXT:    vst1.64 {d10, d11}, [r0:128]!
+; LE-NEON-NEXT:    vst1.64 {d16, d17}, [r0:128]!
+; LE-NEON-NEXT:    vst1.64 {d18, d19}, [r0:128]!
+; LE-NEON-NEXT:    vmov.32 d20[1], r9
+; LE-NEON-NEXT:    vst1.64 {d12, d13}, [r0:128]
+; LE-NEON-NEXT:    vst1.64 {d14, d15}, [r11:128]!
+; LE-NEON-NEXT:    vst1.64 {d20, d21}, [r11:128]!
+; LE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEON-NEXT:    add lr, sp, #56
+; LE-NEON-NEXT:    vst1.64 {d16, d17}, [r11:128]!
+; LE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEON-NEXT:    vst1.64 {d16, d17}, [r11:128]
+; LE-NEON-NEXT:    add sp, sp, #120
+; LE-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEON-NEXT:    add sp, sp, #4
+; LE-NEON-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; BE-LABEL: llrint_v16i64_v16f16:
+; BE:       @ %bb.0:
+; BE-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-NEXT:    .pad #4
+; BE-NEXT:    sub sp, sp, #4
+; BE-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-NEXT:    .pad #112
+; BE-NEXT:    sub sp, sp, #112
+; BE-NEXT:    mov r11, r0
+; BE-NEXT:    vmov r0, s14
+; BE-NEXT:    vmov.f32 s17, s15
+; BE-NEXT:    vstr s13, [sp, #52] @ 4-byte Spill
+; BE-NEXT:    vmov.f32 s21, s12
+; BE-NEXT:    vstr s10, [sp, #68] @ 4-byte Spill
+; BE-NEXT:    vmov.f32 s23, s11
+; BE-NEXT:    vstr s7, [sp, #72] @ 4-byte Spill
+; BE-NEXT:    vmov.f32 s19, s9
+; BE-NEXT:    vstr s4, [sp, #28] @ 4-byte Spill
+; BE-NEXT:    vmov.f32 s26, s8
+; BE-NEXT:    vmov.f32 s24, s6
+; BE-NEXT:    vmov.f32 s18, s5
+; BE-NEXT:    vmov.f32 s25, s3
+; BE-NEXT:    vmov.f32 s16, s2
+; BE-NEXT:    vmov.f32 s27, s1
+; BE-NEXT:    vmov.f32 s29, s0
+; BE-NEXT:    bl __aeabi_h2f
+; BE-NEXT:    vmov s0, r0
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    mov r8, r0
+; BE-NEXT:    vmov r0, s29
+; BE-NEXT:    mov r4, r1
+; BE-NEXT:    bl __aeabi_h2f
+; BE-NEXT:    mov r9, r0
+; BE-NEXT:    vmov r0, s27
+; BE-NEXT:    bl __aeabi_h2f
+; BE-NEXT:    mov r7, r0
+; BE-NEXT:    vmov r0, s21
+; BE-NEXT:    bl __aeabi_h2f
+; BE-NEXT:    mov r6, r0
+; BE-NEXT:    vmov r0, s25
+; BE-NEXT:    bl __aeabi_h2f
+; BE-NEXT:    mov r5, r0
+; BE-NEXT:    vmov r0, s23
+; BE-NEXT:    bl __aeabi_h2f
+; BE-NEXT:    vmov s0, r0
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    vmov.32 d16[0], r0
+; BE-NEXT:    vmov s0, r5
+; BE-NEXT:    str r1, [sp, #108] @ 4-byte Spill
+; BE-NEXT:    vstr d16, [sp, #96] @ 8-byte Spill
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    vmov.32 d16[0], r0
+; BE-NEXT:    vmov s0, r6
+; BE-NEXT:    str r1, [sp, #92] @ 4-byte Spill
+; BE-NEXT:    vstr d16, [sp, #80] @ 8-byte Spill
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    vmov.32 d16[0], r0
+; BE-NEXT:    vmov s0, r7
+; BE-NEXT:    str r1, [sp, #76] @ 4-byte Spill
+; BE-NEXT:    vstr d16, [sp, #56] @ 8-byte Spill
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    vmov s0, r9
+; BE-NEXT:    mov r10, r1
+; BE-NEXT:    vmov.32 d14[0], r0
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    vmov.32 d15[0], r0
+; BE-NEXT:    vmov r0, s17
+; BE-NEXT:    mov r5, r1
+; BE-NEXT:    bl __aeabi_h2f
+; BE-NEXT:    vmov s0, r0
+; BE-NEXT:    vmov.32 d10[0], r8
+; BE-NEXT:    vmov r6, s19
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    vmov.32 d11[0], r0
+; BE-NEXT:    mov r0, r6
+; BE-NEXT:    mov r7, r1
+; BE-NEXT:    bl __aeabi_h2f
+; BE-NEXT:    mov r6, r0
+; BE-NEXT:    vmov r0, s18
+; BE-NEXT:    vmov.32 d10[1], r4
+; BE-NEXT:    vstr d10, [sp, #40] @ 8-byte Spill
+; BE-NEXT:    bl __aeabi_h2f
+; BE-NEXT:    mov r4, r0
+; BE-NEXT:    vmov r0, s16
+; BE-NEXT:    vmov.32 d11[1], r7
+; BE-NEXT:    vstr d11, [sp, #32] @ 8-byte Spill
+; BE-NEXT:    bl __aeabi_h2f
+; BE-NEXT:    vmov.32 d15[1], r5
+; BE-NEXT:    vmov s0, r0
+; BE-NEXT:    vstr d15, [sp, #16] @ 8-byte Spill
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    vldr s0, [sp, #28] @ 4-byte Reload
+; BE-NEXT:    vmov r5, s26
+; BE-NEXT:    vmov.32 d16[0], r0
+; BE-NEXT:    vmov s26, r4
+; BE-NEXT:    vmov r0, s0
+; BE-NEXT:    mov r8, r1
+; BE-NEXT:    vmov.32 d14[1], r10
+; BE-NEXT:    vmov r4, s24
+; BE-NEXT:    vstr d16, [sp] @ 8-byte Spill
+; BE-NEXT:    vstr d14, [sp, #8] @ 8-byte Spill
+; BE-NEXT:    bl __aeabi_h2f
+; BE-NEXT:    vmov.f32 s0, s26
+; BE-NEXT:    vmov s22, r0
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    vmov.f32 s0, s22
+; BE-NEXT:    mov r7, r1
+; BE-NEXT:    vmov.32 d13[0], r0
+; BE-NEXT:    vmov s24, r6
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    vmov.32 d14[0], r0
+; BE-NEXT:    mov r0, r4
+; BE-NEXT:    mov r6, r1
+; BE-NEXT:    bl __aeabi_h2f
+; BE-NEXT:    vmov.f32 s0, s24
+; BE-NEXT:    vmov s22, r0
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    vmov.f32 s0, s22
+; BE-NEXT:    mov r9, r1
+; BE-NEXT:    vmov.32 d12[0], r0
+; BE-NEXT:    vmov.32 d14[1], r6
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    vmov.32 d11[0], r0
+; BE-NEXT:    mov r0, r5
+; BE-NEXT:    mov r6, r1
+; BE-NEXT:    bl __aeabi_h2f
+; BE-NEXT:    vldr s0, [sp, #52] @ 4-byte Reload
+; BE-NEXT:    mov r4, r0
+; BE-NEXT:    vmov.32 d13[1], r7
+; BE-NEXT:    vmov r0, s0
+; BE-NEXT:    bl __aeabi_h2f
+; BE-NEXT:    vldr s0, [sp, #68] @ 4-byte Reload
+; BE-NEXT:    vmov s20, r0
+; BE-NEXT:    vmov.32 d11[1], r6
+; BE-NEXT:    vmov r7, s0
+; BE-NEXT:    vldr s0, [sp, #72] @ 4-byte Reload
+; BE-NEXT:    vmov r0, s0
+; BE-NEXT:    bl __aeabi_h2f
+; BE-NEXT:    vmov.f32 s0, s20
+; BE-NEXT:    vmov s16, r0
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    vmov.f32 s0, s16
+; BE-NEXT:    mov r5, r1
+; BE-NEXT:    vmov.32 d10[0], r0
+; BE-NEXT:    vmov s18, r4
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    vmov.32 d15[0], r0
+; BE-NEXT:    mov r0, r7
+; BE-NEXT:    mov r4, r1
+; BE-NEXT:    bl __aeabi_h2f
+; BE-NEXT:    vmov.f32 s0, s18
+; BE-NEXT:    vmov s16, r0
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    vmov.f32 s0, s16
+; BE-NEXT:    mov r6, r1
+; BE-NEXT:    vmov.32 d9[0], r0
+; BE-NEXT:    vmov.32 d15[1], r4
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    vmov.32 d24[0], r0
+; BE-NEXT:    ldr r0, [sp, #76] @ 4-byte Reload
+; BE-NEXT:    vldr d23, [sp, #56] @ 8-byte Reload
+; BE-NEXT:    vldr d20, [sp, #8] @ 8-byte Reload
+; BE-NEXT:    vmov.32 d23[1], r0
+; BE-NEXT:    ldr r0, [sp, #92] @ 4-byte Reload
+; BE-NEXT:    vldr d22, [sp, #80] @ 8-byte Reload
+; BE-NEXT:    vldr d26, [sp, #16] @ 8-byte Reload
+; BE-NEXT:    vrev64.32 d21, d20
+; BE-NEXT:    vmov.32 d22[1], r0
+; BE-NEXT:    ldr r0, [sp, #108] @ 4-byte Reload
+; BE-NEXT:    vldr d30, [sp] @ 8-byte Reload
+; BE-NEXT:    vldr d25, [sp, #96] @ 8-byte Reload
+; BE-NEXT:    vrev64.32 d20, d26
+; BE-NEXT:    vldr d26, [sp, #32] @ 8-byte Reload
+; BE-NEXT:    vmov.32 d10[1], r5
+; BE-NEXT:    vmov.32 d12[1], r9
+; BE-NEXT:    vldr d28, [sp, #40] @ 8-byte Reload
+; BE-NEXT:    vrev64.32 d27, d26
+; BE-NEXT:    vmov.32 d25[1], r0
+; BE-NEXT:    add r0, r11, #64
+; BE-NEXT:    vmov.32 d30[1], r8
+; BE-NEXT:    vmov.32 d9[1], r6
+; BE-NEXT:    vrev64.32 d26, d28
+; BE-NEXT:    vrev64.32 d29, d10
+; BE-NEXT:    vmov.32 d24[1], r1
+; BE-NEXT:    vrev64.32 d1, d12
+; BE-NEXT:    vrev64.32 d28, d23
+; BE-NEXT:    vrev64.32 d23, d22
+; BE-NEXT:    vrev64.32 d22, d30
+; BE-NEXT:    vrev64.32 d31, d25
+; BE-NEXT:    vrev64.32 d0, d9
+; BE-NEXT:    vrev64.32 d30, d24
+; BE-NEXT:    vst1.64 {d0, d1}, [r0:128]!
+; BE-NEXT:    vst1.64 {d30, d31}, [r0:128]!
+; BE-NEXT:    vst1.64 {d28, d29}, [r0:128]!
+; BE-NEXT:    vrev64.32 d19, d13
+; BE-NEXT:    vst1.64 {d26, d27}, [r0:128]
+; BE-NEXT:    vst1.64 {d20, d21}, [r11:128]!
+; BE-NEXT:    vrev64.32 d18, d14
+; BE-NEXT:    vst1.64 {d22, d23}, [r11:128]!
+; BE-NEXT:    vrev64.32 d17, d15
+; BE-NEXT:    vrev64.32 d16, d11
+; BE-NEXT:    vst1.64 {d18, d19}, [r11:128]!
+; BE-NEXT:    vst1.64 {d16, d17}, [r11:128]
+; BE-NEXT:    add sp, sp, #112
+; BE-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-NEXT:    add sp, sp, #4
+; BE-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; BE-NEON-LABEL: llrint_v16i64_v16f16:
+; BE-NEON:       @ %bb.0:
+; BE-NEON-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-NEON-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-NEON-NEXT:    .pad #4
+; BE-NEON-NEXT:    sub sp, sp, #4
+; BE-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-NEON-NEXT:    .pad #112
+; BE-NEON-NEXT:    sub sp, sp, #112
+; BE-NEON-NEXT:    mov r11, r0
+; BE-NEON-NEXT:    vmov r0, s14
+; BE-NEON-NEXT:    vmov.f32 s17, s15
+; BE-NEON-NEXT:    vstr s13, [sp, #52] @ 4-byte Spill
+; BE-NEON-NEXT:    vmov.f32 s21, s12
+; BE-NEON-NEXT:    vstr s10, [sp, #68] @ 4-byte Spill
+; BE-NEON-NEXT:    vmov.f32 s23, s11
+; BE-NEON-NEXT:    vstr s7, [sp, #72] @ 4-byte Spill
+; BE-NEON-NEXT:    vmov.f32 s19, s9
+; BE-NEON-NEXT:    vstr s4, [sp, #28] @ 4-byte Spill
+; BE-NEON-NEXT:    vmov.f32 s26, s8
+; BE-NEON-NEXT:    vmov.f32 s24, s6
+; BE-NEON-NEXT:    vmov.f32 s18, s5
+; BE-NEON-NEXT:    vmov.f32 s25, s3
+; BE-NEON-NEXT:    vmov.f32 s16, s2
+; BE-NEON-NEXT:    vmov.f32 s27, s1
+; BE-NEON-NEXT:    vmov.f32 s29, s0
+; BE-NEON-NEXT:    bl __aeabi_h2f
+; BE-NEON-NEXT:    vmov s0, r0
+; BE-NEON-NEXT:    bl llrintf
+; BE-NEON-NEXT:    mov r8, r0
+; BE-NEON-NEXT:    vmov r0, s29
+; BE-NEON-NEXT:    mov r4, r1
+; BE-NEON-NEXT:    bl __aeabi_h2f
+; BE-NEON-NEXT:    mov r9, r0
+; BE-NEON-NEXT:    vmov r0, s27
+; BE-NEON-NEXT:    bl __aeabi_h2f
+; BE-NEON-NEXT:    mov r7, r0
+; BE-NEON-NEXT:    vmov r0, s21
+; BE-NEON-NEXT:    bl __aeabi_h2f
+; BE-NEON-NEXT:    mov r6, r0
+; BE-NEON-NEXT:    vmov r0, s25
+; BE-NEON-NEXT:    bl __aeabi_h2f
+; BE-NEON-NEXT:    mov r5, r0
+; BE-NEON-NEXT:    vmov r0, s23
+; BE-NEON-NEXT:    bl __aeabi_h2f
+; BE-NEON-NEXT:    vmov s0, r0
+; BE-NEON-NEXT:    bl llrintf
+; BE-NEON-NEXT:    vmov.32 d16[0], r0
+; BE-NEON-NEXT:    vmov s0, r5
+; BE-NEON-NEXT:    str r1, [sp, #108] @ 4-byte Spill
+; BE-NEON-NEXT:    vstr d16, [sp, #96] @ 8-byte Spill
+; BE-NEON-NEXT:    bl llrintf
+; BE-NEON-NEXT:    vmov.32 d16[0], r0
+; BE-NEON-NEXT:    vmov s0, r6
+; BE-NEON-NEXT:    str r1, [sp, #92] @ 4-byte Spill
+; BE-NEON-NEXT:    vstr d16, [sp, #80] @ 8-byte Spill
+; BE-NEON-NEXT:    bl llrintf
+; BE-NEON-NEXT:    vmov.32 d16[0], r0
+; BE-NEON-NEXT:    vmov s0, r7
+; BE-NEON-NEXT:    str r1, [sp, #76] @ 4-byte Spill
+; BE-NEON-NEXT:    vstr d16, [sp, #56] @ 8-byte Spill
+; BE-NEON-NEXT:    bl llrintf
+; BE-NEON-NEXT:    vmov s0, r9
+; BE-NEON-NEXT:    mov r10, r1
+; BE-NEON-NEXT:    vmov.32 d14[0], r0
+; BE-NEON-NEXT:    bl llrintf
+; BE-NEON-NEXT:    vmov.32 d15[0], r0
+; BE-NEON-NEXT:    vmov r0, s17
+; BE-NEON-NEXT:    mov r5, r1
+; BE-NEON-NEXT:    bl __aeabi_h2f
+; BE-NEON-NEXT:    vmov s0, r0
+; BE-NEON-NEXT:    vmov.32 d10[0], r8
+; BE-NEON-NEXT:    vmov r6, s19
+; BE-NEON-NEXT:    bl llrintf
+; BE-NEON-NEXT:    vmov.32 d11[0], r0
+; BE-NEON-NEXT:    mov r0, r6
+; BE-NEON-NEXT:    mov r7, r1
+; BE-NEON-NEXT:    bl __aeabi_h2f
+; BE-NEON-NEXT:    mov r6, r0
+; BE-NEON-NEXT:    vmov r0, s18
+; BE-NEON-NEXT:    vmov.32 d10[1], r4
+; BE-NEON-NEXT:    vstr d10, [sp, #40] @ 8-byte Spill
+; BE-NEON-NEXT:    bl __aeabi_h2f
+; BE-NEON-NEXT:    mov r4, r0
+; BE-NEON-NEXT:    vmov r0, s16
+; BE-NEON-NEXT:    vmov.32 d11[1], r7
+; BE-NEON-NEXT:    vstr d11, [sp, #32] @ 8-byte Spill
+; BE-NEON-NEXT:    bl __aeabi_h2f
+; BE-NEON-NEXT:    vmov.32 d15[1], r5
+; BE-NEON-NEXT:    vmov s0, r0
+; BE-NEON-NEXT:    vstr d15, [sp, #16] @ 8-byte Spill
+; BE-NEON-NEXT:    bl llrintf
+; BE-NEON-NEXT:    vldr s0, [sp, #28] @ 4-byte Reload
+; BE-NEON-NEXT:    vmov r5, s26
+; BE-NEON-NEXT:    vmov.32 d16[0], r0
+; BE-NEON-NEXT:    vmov s26, r4
+; BE-NEON-NEXT:    vmov r0, s0
+; BE-NEON-NEXT:    mov r8, r1
+; BE-NEON-NEXT:    vmov.32 d14[1], r10
+; BE-NEON-NEXT:    vmov r4, s24
+; BE-NEON-NEXT:    vstr d16, [sp] @ 8-byte Spill
+; BE-NEON-NEXT:    vstr d14, [sp, #8] @ 8-byte Spill
+; BE-NEON-NEXT:    bl __aeabi_h2f
+; BE-NEON-NEXT:    vmov.f32 s0, s26
+; BE-NEON-NEXT:    vmov s22, r0
+; BE-NEON-NEXT:    bl llrintf
+; BE-NEON-NEXT:    vmov.f32 s0, s22
+; BE-NEON-NEXT:    mov r7, r1
+; BE-NEON-NEXT:    vmov.32 d13[0], r0
+; BE-NEON-NEXT:    vmov s24, r6
+; BE-NEON-NEXT:    bl llrintf
+; BE-NEON-NEXT:    vmov.32 d14[0], r0
+; BE-NEON-NEXT:    mov r0, r4
+; BE-NEON-NEXT:    mov r6, r1
+; BE-NEON-NEXT:    bl __aeabi_h2f
+; BE-NEON-NEXT:    vmov.f32 s0, s24
+; BE-NEON-NEXT:    vmov s22, r0
+; BE-NEON-NEXT:    bl llrintf
+; BE-NEON-NEXT:    vmov.f32 s0, s22
+; BE-NEON-NEXT:    mov r9, r1
+; BE-NEON-NEXT:    vmov.32 d12[0], r0
+; BE-NEON-NEXT:    vmov.32 d14[1], r6
+; BE-NEON-NEXT:    bl llrintf
+; BE-NEON-NEXT:    vmov.32 d11[0], r0
+; BE-NEON-NEXT:    mov r0, r5
+; BE-NEON-NEXT:    mov r6, r1
+; BE-NEON-NEXT:    bl __aeabi_h2f
+; BE-NEON-NEXT:    vldr s0, [sp, #52] @ 4-byte Reload
+; BE-NEON-NEXT:    mov r4, r0
+; BE-NEON-NEXT:    vmov.32 d13[1], r7
+; BE-NEON-NEXT:    vmov r0, s0
+; BE-NEON-NEXT:    bl __aeabi_h2f
+; BE-NEON-NEXT:    vldr s0, [sp, #68] @ 4-byte Reload
+; BE-NEON-NEXT:    vmov s20, r0
+; BE-NEON-NEXT:    vmov.32 d11[1], r6
+; BE-NEON-NEXT:    vmov r7, s0
+; BE-NEON-NEXT:    vldr s0, [sp, #72] @ 4-byte Reload
+; BE-NEON-NEXT:    vmov r0, s0
+; BE-NEON-NEXT:    bl __aeabi_h2f
+; BE-NEON-NEXT:    vmov.f32 s0, s20
+; BE-NEON-NEXT:    vmov s16, r0
+; BE-NEON-NEXT:    bl llrintf
+; BE-NEON-NEXT:    vmov.f32 s0, s16
+; BE-NEON-NEXT:    mov r5, r1
+; BE-NEON-NEXT:    vmov.32 d10[0], r0
+; BE-NEON-NEXT:    vmov s18, r4
+; BE-NEON-NEXT:    bl llrintf
+; BE-NEON-NEXT:    vmov.32 d15[0], r0
+; BE-NEON-NEXT:    mov r0, r7
+; BE-NEON-NEXT:    mov r4, r1
+; BE-NEON-NEXT:    bl __aeabi_h2f
+; BE-NEON-NEXT:    vmov.f32 s0, s18
+; BE-NEON-NEXT:    vmov s16, r0
+; BE-NEON-NEXT:    bl llrintf
+; BE-NEON-NEXT:    vmov.f32 s0, s16
+; BE-NEON-NEXT:    mov r6, r1
+; BE-NEON-NEXT:    vmov.32 d9[0], r0
+; BE-NEON-NEXT:    vmov.32 d15[1], r4
+; BE-NEON-NEXT:    bl llrintf
+; BE-NEON-NEXT:    vmov.32 d24[0], r0
+; BE-NEON-NEXT:    ldr r0, [sp, #76] @ 4-byte Reload
+; BE-NEON-NEXT:    vldr d23, [sp, #56] @ 8-byte Reload
+; BE-NEON-NEXT:    vldr d20, [sp, #8] @ 8-byte Reload
+; BE-NEON-NEXT:    vmov.32 d23[1], r0
+; BE-NEON-NEXT:    ldr r0, [sp, #92] @ 4-byte Reload
+; BE-NEON-NEXT:    vldr d22, [sp, #80] @ 8-byte Reload
+; BE-NEON-NEXT:    vldr d26, [sp, #16] @ 8-byte Reload
+; BE-NEON-NEXT:    vrev64.32 d21, d20
+; BE-NEON-NEXT:    vmov.32 d22[1], r0
+; BE-NEON-NEXT:    ldr r0, [sp, #108] @ 4-byte Reload
+; BE-NEON-NEXT:    vldr d30, [sp] @ 8-byte Reload
+; BE-NEON-NEXT:    vldr d25, [sp, #96] @ 8-byte Reload
+; BE-NEON-NEXT:    vrev64.32 d20, d26
+; BE-NEON-NEXT:    vldr d26, [sp, #32] @ 8-byte Reload
+; BE-NEON-NEXT:    vmov.32 d10[1], r5
+; BE-NEON-NEXT:    vmov.32 d12[1], r9
+; BE-NEON-NEXT:    vldr d28, [sp, #40] @ 8-byte Reload
+; BE-NEON-NEXT:    vrev64.32 d27, d26
+; BE-NEON-NEXT:    vmov.32 d25[1], r0
+; BE-NEON-NEXT:    add r0, r11, #64
+; BE-NEON-NEXT:    vmov.32 d30[1], r8
+; BE-NEON-NEXT:    vmov.32 d9[1], r6
+; BE-NEON-NEXT:    vrev64.32 d26, d28
+; BE-NEON-NEXT:    vrev64.32 d29, d10
+; BE-NEON-NEXT:    vmov.32 d24[1], r1
+; BE-NEON-NEXT:    vrev64.32 d1, d12
+; BE-NEON-NEXT:    vrev64.32 d28, d23
+; BE-NEON-NEXT:    vrev64.32 d23, d22
+; BE-NEON-NEXT:    vrev64.32 d22, d30
+; BE-NEON-NEXT:    vrev64.32 d31, d25
+; BE-NEON-NEXT:    vrev64.32 d0, d9
+; BE-NEON-NEXT:    vrev64.32 d30, d24
+; BE-NEON-NEXT:    vst1.64 {d0, d1}, [r0:128]!
+; BE-NEON-NEXT:    vst1.64 {d30, d31}, [r0:128]!
+; BE-NEON-NEXT:    vst1.64 {d28, d29}, [r0:128]!
+; BE-NEON-NEXT:    vrev64.32 d19, d13
+; BE-NEON-NEXT:    vst1.64 {d26, d27}, [r0:128]
+; BE-NEON-NEXT:    vst1.64 {d20, d21}, [r11:128]!
+; BE-NEON-NEXT:    vrev64.32 d18, d14
+; BE-NEON-NEXT:    vst1.64 {d22, d23}, [r11:128]!
+; BE-NEON-NEXT:    vrev64.32 d17, d15
+; BE-NEON-NEXT:    vrev64.32 d16, d11
+; BE-NEON-NEXT:    vst1.64 {d18, d19}, [r11:128]!
+; BE-NEON-NEXT:    vst1.64 {d16, d17}, [r11:128]
+; BE-NEON-NEXT:    add sp, sp, #112
+; BE-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-NEON-NEXT:    add sp, sp, #4
+; BE-NEON-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+  %a = call <16 x i64> @llvm.llrint.v16i64.v16f16(<16 x half> %x)
+  ret <16 x i64> %a
+}
+declare <16 x i64> @llvm.llrint.v16i64.v16f16(<16 x half>)
+
+define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) {
+; LE-LABEL: llrint_v32i64_v32f16:
+; LE:       @ %bb.0:
+; LE-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-NEXT:    .pad #4
+; LE-NEXT:    sub sp, sp, #4
+; LE-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEXT:    .pad #248
+; LE-NEXT:    sub sp, sp, #248
+; LE-NEXT:    add lr, sp, #256
+; LE-NEXT:    mov r11, r0
+; LE-NEXT:    vstr s15, [sp, #176] @ 4-byte Spill
+; LE-NEXT:    vmov.f32 s19, s14
+; LE-NEXT:    ldrh r0, [lr, #132]
+; LE-NEXT:    vmov.f32 s17, s11
+; LE-NEXT:    vstr s13, [sp, #196] @ 4-byte Spill
+; LE-NEXT:    vstr s12, [sp, #112] @ 4-byte Spill
+; LE-NEXT:    vstr s10, [sp, #136] @ 4-byte Spill
+; LE-NEXT:    vstr s9, [sp, #160] @ 4-byte Spill
+; LE-NEXT:    vstr s8, [sp, #200] @ 4-byte Spill
+; LE-NEXT:    vstr s7, [sp, #100] @ 4-byte Spill
+; LE-NEXT:    vstr s6, [sp, #116] @ 4-byte Spill
+; LE-NEXT:    vstr s5, [sp, #76] @ 4-byte Spill
+; LE-NEXT:    vstr s4, [sp, #120] @ 4-byte Spill
+; LE-NEXT:    vstr s3, [sp, #156] @ 4-byte Spill
+; LE-NEXT:    vstr s2, [sp, #192] @ 4-byte Spill
+; LE-NEXT:    vstr s1, [sp, #104] @ 4-byte Spill
+; LE-NEXT:    vstr s0, [sp, #108] @ 4-byte Spill
+; LE-NEXT:    bl __aeabi_h2f
+; LE-NEXT:    vmov s0, r0
+; LE-NEXT:    bl llrintf
+; LE-NEXT:    add lr, sp, #256
+; LE-NEXT:    str r0, [sp, #52] @ 4-byte Spill
+; LE-NEXT:    str r1, [sp, #56] @ 4-byte Spill
+; LE-NEXT:    ldrh r0, [lr, #108]
+; LE-NEXT:    bl __aeabi_h2f
+; LE-NEXT:    add lr, sp, #256
+; LE-NEXT:    str r0, [sp, #32] @ 4-byte Spill
+; LE-NEXT:    ldrh r0, [lr, #96]
+; LE-NEXT:    bl __aeabi_h2f
+; LE-NEXT:    add lr, sp, #256
+; LE-NEXT:    mov r5, r0
+; LE-NEXT:    ldrh r0, [lr, #100]
+; LE-NEXT:    bl __aeabi_h2f
+; LE-NEXT:    add lr, sp, #256
+; LE-NEXT:    mov r7, r0
+; LE-NEXT:    ldrh r0, [lr, #156]
+; LE-NEXT:    bl __aeabi_h2f
+; LE-NEXT:    add lr, sp, #256
+; LE-NEXT:    mov r6, r0
+; LE-NEXT:    ldrh r0, [lr, #152]
+; LE-NEXT:    bl __aeabi_h2f
+; LE-NEXT:    add lr, sp, #256
+; LE-NEXT:    mov r4, r0
+; LE-NEXT:    ldrh r0, [lr, #148]
+; LE-NEXT:    bl __aeabi_h2f
+; LE-NEXT:    vmov s0, r0
+; LE-NEXT:    bl llrintf
+; LE-NEXT:    vmov s0, r4
+; LE-NEXT:    mov r8, r1
+; LE-NEXT:    vmov.32 d13[0], r0
+; LE-NEXT:    bl llrintf
+; LE-NEXT:    vmov s0, r6
+; LE-NEXT:    mov r4, r1
+; LE-NEXT:    vmov.32 d14[0], r0
+; LE-NEXT:    bl llrintf
+; LE-NEXT:    vmov s0, r7
+; LE-NEXT:    mov r6, r1
+; LE-NEXT:    vmov.32 d15[0], r0
+; LE-NEXT:    bl llrintf
+; LE-NEXT:    vmov s0, r5
+; LE-NEXT:    mov r7, r1
+; LE-NEXT:    vmov.32 d11[0], r0
+; LE-NEXT:    bl llrintf
+; LE-NEXT:    add lr, sp, #256
+; LE-NEXT:    vmov.32 d10[0], r0
+; LE-NEXT:    mov r5, r1
+; LE-NEXT:    ldrh r0, [lr, #144]
+; LE-NEXT:    bl __aeabi_h2f
+; LE-NEXT:    add lr, sp, #256
+; LE-NEXT:    mov r10, r0
+; LE-NEXT:    vmov.32 d11[1], r7
+; LE-NEXT:    ldrh r0, [lr, #104]
+; LE-NEXT:    bl __aeabi_h2f
+; LE-NEXT:    vmov.32 d10[1], r5
+; LE-NEXT:    add lr, sp, #80
+; LE-NEXT:    mov r7, r0
+; LE-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-NEXT:    add lr, sp, #256
+; LE-NEXT:    ldrh r0, [lr, #124]
+; LE-NEXT:    bl __aeabi_h2f
+; LE-NEXT:    add lr, sp, #256
+; LE-NEXT:    mov r5, r0
+; LE-NEXT:    vmov.32 d15[1], r6
+; LE-NEXT:    ldrh r0, [lr, #120]
+; LE-NEXT:    bl __aeabi_h2f
+; LE-NEXT:    vmov.32 d14[1], r4
+; LE-NEXT:    add lr, sp, #16
+; LE-NEXT:    mov r6, r0
+; LE-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-NEXT:    add lr, sp, #256
+; LE-NEXT:    ldrh r0, [lr, #116]
+; LE-NEXT:    bl __aeabi_h2f
+; LE-NEXT:    add lr, sp, #256
+; LE-NEXT:    vorr q5, q6, q6
+; LE-NEXT:    mov r4, r0
+; LE-NEXT:    ldrh r0, [lr, #112]
+; LE-NEXT:    vmov.32 d11[1], r8
+; LE-NEXT:    bl __aeabi_h2f
+; LE-NEXT:    vmov s0, r0
+; LE-NEXT:    bl llrintf
+; LE-NEXT:    vmov s0, r4
+; LE-NEXT:    str r1, [sp, #12] @ 4-byte Spill
+; LE-NEXT:    vmov.32 d12[0], r0
+; LE-NEXT:    bl llrintf
+; LE-NEXT:    vmov s0, r6
+; LE-NEXT:    add lr, sp, #216
+; LE-NEXT:    vmov.32 d13[0], r0
+; LE-NEXT:    str r1, [sp, #8] @ 4-byte Spill
+; LE-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-NEXT:    bl llrintf
+; LE-NEXT:    vmov s0, r5
+; LE-NEXT:    mov r4, r1
+; LE-NEXT:    vmov.32 d12[0], r0
+; LE-NEXT:    bl llrintf
+; LE-NEXT:    vmov s0, r7
+; LE-NEXT:    add lr, sp, #232
+; LE-NEXT:    vmov.32 d13[0], r0
+; LE-NEXT:    mov r8, r1
+; LE-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-NEXT:    bl llrintf
+; LE-NEXT:    vmov s0, r10
+; LE-NEXT:    mov r9, r1
+; LE-NEXT:    vmov.32 d12[0], r0
+; LE-NEXT:    bl llrintf
+; LE-NEXT:    add lr, sp, #256
+; LE-NEXT:    vmov.32 d10[0], r0
+; LE-NEXT:    mov r5, r1
+; LE-NEXT:    ldrh r0, [lr, #140]
+; LE-NEXT:    bl __aeabi_h2f
+; LE-NEXT:    vmov s0, r0
+; LE-NEXT:    ldr r0, [sp, #32] @ 4-byte Reload
+; LE-NEXT:    vmov.32 d10[1], r5
+; LE-NEXT:    add lr, sp, #32
+; LE-NEXT:    vmov s16, r0
+; LE-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-NEXT:    bl llrintf
+; LE-NEXT:    add lr, sp, #256
+; LE-NEXT:    mov r6, r1
+; LE-NEXT:    vmov.32 d15[0], r0
+; LE-NEXT:    ldrh r1, [lr, #128]
+; LE-NEXT:    mov r0, r1
+; LE-NEXT:    bl __aeabi_h2f
+; LE-NEXT:    vmov.f32 s0, s16
+; LE-NEXT:    vmov s18, r0
+; LE-NEXT:    bl llrintf
+; LE-NEXT:    add lr, sp, #256
+; LE-NEXT:    mov r5, r1
+; LE-NEXT:    vmov.32 d13[0], r0
+; LE-NEXT:    ldr r1, [sp, #52] @ 4-byte Reload
+; LE-NEXT:    ldrh r0, [lr, #136]
+; LE-NEXT:    vmov.32 d15[1], r6
+; LE-NEXT:    vmov.32 d11[0], r1
+; LE-NEXT:    bl __aeabi_h2f
+; LE-NEXT:    vmov.f32 s0, s18
+; LE-NEXT:    vmov s16, r0
+; LE-NEXT:    bl llrintf
+; LE-NEXT:    vmov.f32 s0, s16
+; LE-NEXT:    vmov.32 d10[0], r0
+; LE-NEXT:    ldr r0, [sp, #56] @ 4-byte Reload
+; LE-NEXT:    mov r6, r1
+; LE-NEXT:    vmov.32 d11[1], r0
+; LE-NEXT:    bl llrintf
+; LE-NEXT:    vmov.32 d13[1], r5
+; LE-NEXT:    add lr, sp, #56
+; LE-NEXT:    ldr r2, [sp, #8] @ 4-byte Reload
+; LE-NEXT:    vmov.32 d12[1], r9
+; LE-NEXT:    vmov.32 d14[0], r0
+; LE-NEXT:    vmov r0, s19
+; LE-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-NEXT:    add lr, sp, #232
+; LE-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
+; LE-NEXT:    vmov.32 d13[1], r8
+; LE-NEXT:    vmov.32 d12[1], r4
+; LE-NEXT:    vmov.32 d10[1], r6
+; LE-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-NEXT:    add lr, sp, #216
+; LE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEXT:    vmov.32 d17[1], r2
+; LE-NEXT:    ldr r2, [sp, #12] @ 4-byte Reload
+; LE-NEXT:    vmov.32 d14[1], r1
+; LE-NEXT:    add r1, r11, #192
+; LE-NEXT:    vmov.32 d16[1], r2
+; LE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEXT:    add lr, sp, #32
+; LE-NEXT:    vst1.64 {d10, d11}, [r1:128]!
+; LE-NEXT:    vst1.64 {d14, d15}, [r1:128]!
+; LE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEXT:    add lr, sp, #16
+; LE-NEXT:    vst1.64 {d16, d17}, [r1:128]!
+; LE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEXT:    vst1.64 {d16, d17}, [r1:128]
+; LE-NEXT:    bl __aeabi_h2f
+; LE-NEXT:    str r0, [sp, #52] @ 4-byte Spill
+; LE-NEXT:    vmov r0, s17
+; LE-NEXT:    bl __aeabi_h2f
+; LE-NEXT:    vldr s0, [sp, #76] @ 4-byte Reload
+; LE-NEXT:    mov r10, r0
+; LE-NEXT:    vmov r0, s0
+; LE-NEXT:    bl __aeabi_h2f
+; LE-NEXT:    vldr s0, [sp, #100] @ 4-byte Reload
+; LE-NEXT:    mov r4, r0
+; LE-NEXT:    vmov r0, s0
+; LE-NEXT:    bl __aeabi_h2f
+; LE-NEXT:    vldr s0, [sp, #104] @ 4-byte Reload
+; LE-NEXT:    mov r7, r0
+; LE-NEXT:    vmov r0, s0
+; LE-NEXT:    bl __aeabi_h2f
+; LE-NEXT:    vldr s0, [sp, #108] @ 4-byte Reload
+; LE-NEXT:    mov r5, r0
+; LE-NEXT:    vmov r0, s0
+; LE-NEXT:    bl __aeabi_h2f
+; LE-NEXT:    vldr s0, [sp, #112] @ 4-byte Reload
+; LE-NEXT:    mov r6, r0
+; LE-NEXT:    vmov r0, s0
+; LE-NEXT:    bl __aeabi_h2f
+; LE-NEXT:    vmov s0, r0
+; LE-NEXT:    bl llrintf
+; LE-NEXT:    vmov s0, r6
+; LE-NEXT:    str r1, [sp, #112] @ 4-byte Spill
+; LE-NEXT:    vmov.32 d14[0], r0
+; LE-NEXT:    bl llrintf
+; LE-NEXT:    vmov s0, r5
+; LE-NEXT:    mov r9, r1
+; LE-NEXT:    vmov.32 d8[0], r0
+; LE-NEXT:    bl llrintf
+; LE-NEXT:    vmov s0, r7
+; LE-NEXT:    mov r8, r1
+; LE-NEXT:    vmov.32 d9[0], r0
+; LE-NEXT:    bl llrintf
+; LE-NEXT:    vmov s0, r4
+; LE-NEXT:    mov r5, r1
+; LE-NEXT:    vmov.32 d11[0], r0
+; LE-NEXT:    bl llrintf
+; LE-NEXT:    vmov s0, r10
+; LE-NEXT:    mov r4, r1
+; LE-NEXT:    vmov.32 d13[0], r0
+; LE-NEXT:    bl llrintf
+; LE-NEXT:    vldr s0, [sp, #116] @ 4-byte Reload
+; LE-NEXT:    mov r6, r0
+; LE-NEXT:    str r1, [sp, #108] @ 4-byte Spill
+; LE-NEXT:    vmov.32 d11[1], r5
+; LE-NEXT:    vmov r0, s0
+; LE-NEXT:    bl __aeabi_h2f
+; LE-NEXT:    vmov s0, r0
+; LE-NEXT:    vmov.32 d13[1], r4
+; LE-NEXT:    bl llrintf
+; LE-NEXT:    vldr s0, [sp, #120] @ 4-byte Reload
+; LE-NEXT:    mov r4, r0
+; LE-NEXT:    mov r5, r1
+; LE-NEXT:    vmov.32 d9[1], r8
+; LE-NEXT:    vmov r0, s0
+; LE-NEXT:    bl __aeabi_h2f
+; LE-NEXT:    vldr s0, [sp, #136] @ 4-byte Reload
+; LE-NEXT:    vmov.32 d10[0], r4
+; LE-NEXT:    vmov r7, s0
+; LE-NEXT:    vmov s0, r0
+; LE-NEXT:    bl llrintf
+; LE-NEXT:    vmov.32 d12[0], r0
+; LE-NEXT:    add lr, sp, #136
+; LE-NEXT:    add r10, r11, #128
+; LE-NEXT:    mov r0, r7
+; LE-NEXT:    vmov.32 d10[1], r5
+; LE-NEXT:    vmov.32 d12[1], r1
+; LE-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-NEXT:    add lr, sp, #120
+; LE-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-NEXT:    add lr, sp, #80
+; LE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEXT:    add lr, sp, #56
+; LE-NEXT:    vmov.32 d13[0], r6
+; LE-NEXT:    vst1.64 {d16, d17}, [r10:128]!
+; LE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEXT:    vst1.64 {d16, d17}, [r10:128]!
+; LE-NEXT:    vldr s0, [sp, #156] @ 4-byte Reload
+; LE-NEXT:    vmov r4, s0
+; LE-NEXT:    bl __aeabi_h2f
+; LE-NEXT:    vldr s0, [sp, #160] @ 4-byte Reload
+; LE-NEXT:    mov r5, r0
+; LE-NEXT:    ldr r0, [sp, #52] @ 4-byte Reload
+; LE-NEXT:    vmov.32 d8[1], r9
+; LE-NEXT:    vmov r7, s0
+; LE-NEXT:    add lr, sp, #160
+; LE-NEXT:    vldr s0, [sp, #176] @ 4-byte Reload
+; LE-NEXT:    vmov s20, r0
+; LE-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-NEXT:    vmov r0, s0
+; LE-NEXT:    bl __aeabi_h2f
+; LE-NEXT:    vmov.f32 s0, s20
+; LE-NEXT:    vmov s18, r0
+; LE-NEXT:    bl llrintf
+; LE-NEXT:    vmov.f32 s0, s18
+; LE-NEXT:    mov r6, r1
+; LE-NEXT:    vmov.32 d10[0], r0
+; LE-NEXT:    vmov s16, r5
+; LE-NEXT:    bl llrintf
+; LE-NEXT:    vmov.32 d11[0], r0
+; LE-NEXT:    mov r0, r7
+; LE-NEXT:    mov r5, r1
+; LE-NEXT:    bl __aeabi_h2f
+; LE-NEXT:    vmov.f32 s0, s16
+; LE-NEXT:    vmov s18, r0
+; LE-NEXT:    bl llrintf
+; LE-NEXT:    vmov.f32 s0, s18
+; LE-NEXT:    vmov.32 d12[0], r0
+; LE-NEXT:    add lr, sp, #176
+; LE-NEXT:    mov r8, r1
+; LE-NEXT:    vmov.32 d11[1], r5
+; LE-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-NEXT:    bl llrintf
+; LE-NEXT:    vmov.32 d13[0], r0
+; LE-NEXT:    mov r0, r4
+; LE-NEXT:    mov r5, r1
+; LE-NEXT:    bl __aeabi_h2f
+; LE-NEXT:    vldr s0, [sp, #196] @ 4-byte Reload
+; LE-NEXT:    mov r7, r0
+; LE-NEXT:    vmov.32 d10[1], r6
+; LE-NEXT:    vmov r0, s0
+; LE-NEXT:    bl __aeabi_h2f
+; LE-NEXT:    vldr s0, [sp, #192] @ 4-byte Reload
+; LE-NEXT:    vmov s16, r0
+; LE-NEXT:    vmov.32 d13[1], r5
+; LE-NEXT:    vmov r6, s0
+; LE-NEXT:    vldr s0, [sp, #200] @ 4-byte Reload
+; LE-NEXT:    vmov r0, s0
+; LE-NEXT:    bl __aeabi_h2f
+; LE-NEXT:    vmov.f32 s0, s16
+; LE-NEXT:    vmov s18, r0
+; LE-NEXT:    bl llrintf
+; LE-NEXT:    vmov.f32 s0, s18
+; LE-NEXT:    add lr, sp, #200
+; LE-NEXT:    vmov.32 d15[0], r0
+; LE-NEXT:    mov r4, r1
+; LE-NEXT:    vmov s16, r7
+; LE-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-NEXT:    bl llrintf
+; LE-NEXT:    vmov.32 d12[0], r0
+; LE-NEXT:    mov r0, r6
+; LE-NEXT:    mov r5, r1
+; LE-NEXT:    bl __aeabi_h2f
+; LE-NEXT:    vmov.f32 s0, s16
+; LE-NEXT:    vmov s18, r0
+; LE-NEXT:    bl llrintf
+; LE-NEXT:    vmov.f32 s0, s18
+; LE-NEXT:    mov r6, r1
+; LE-NEXT:    vmov.32 d15[0], r0
+; LE-NEXT:    vmov.32 d12[1], r5
+; LE-NEXT:    bl llrintf
+; LE-NEXT:    add lr, sp, #200
+; LE-NEXT:    vmov.32 d14[0], r0
+; LE-NEXT:    ldr r0, [sp, #112] @ 4-byte Reload
+; LE-NEXT:    vldmia lr, {d18, d19} @ 16-byte Reload
+; LE-NEXT:    add lr, sp, #176
+; LE-NEXT:    vmov.32 d19[1], r4
+; LE-NEXT:    vmov.32 d18[1], r0
+; LE-NEXT:    ldr r0, [sp, #108] @ 4-byte Reload
+; LE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEXT:    add lr, sp, #216
+; LE-NEXT:    vmov.32 d17[1], r0
+; LE-NEXT:    add r0, r11, #64
+; LE-NEXT:    vmov.32 d16[1], r8
+; LE-NEXT:    vorr q10, q8, q8
+; LE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEXT:    add lr, sp, #232
+; LE-NEXT:    vmov.32 d15[1], r6
+; LE-NEXT:    vst1.64 {d16, d17}, [r10:128]!
+; LE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEXT:    add lr, sp, #160
+; LE-NEXT:    vmov.32 d14[1], r1
+; LE-NEXT:    vst1.64 {d16, d17}, [r10:128]
+; LE-NEXT:    vst1.64 {d12, d13}, [r0:128]!
+; LE-NEXT:    vst1.64 {d20, d21}, [r0:128]!
+; LE-NEXT:    vst1.64 {d18, d19}, [r0:128]!
+; LE-NEXT:    vst1.64 {d10, d11}, [r0:128]
+; LE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEXT:    add lr, sp, #120
+; LE-NEXT:    vst1.64 {d16, d17}, [r11:128]!
+; LE-NEXT:    vst1.64 {d14, d15}, [r11:128]!
+; LE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEXT:    add lr, sp, #136
+; LE-NEXT:    vst1.64 {d16, d17}, [r11:128]!
+; LE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEXT:    vst1.64 {d16, d17}, [r11:128]
+; LE-NEXT:    add sp, sp, #248
+; LE-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEXT:    add sp, sp, #4
+; LE-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; LE-NEON-LABEL: llrint_v32i64_v32f16:
+; LE-NEON:       @ %bb.0:
+; LE-NEON-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-NEON-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-NEON-NEXT:    .pad #4
+; LE-NEON-NEXT:    sub sp, sp, #4
+; LE-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEON-NEXT:    .pad #248
+; LE-NEON-NEXT:    sub sp, sp, #248
+; LE-NEON-NEXT:    add lr, sp, #256
+; LE-NEON-NEXT:    mov r11, r0
+; LE-NEON-NEXT:    vstr s15, [sp, #176] @ 4-byte Spill
+; LE-NEON-NEXT:    vmov.f32 s19, s14
+; LE-NEON-NEXT:    ldrh r0, [lr, #132]
+; LE-NEON-NEXT:    vmov.f32 s17, s11
+; LE-NEON-NEXT:    vstr s13, [sp, #196] @ 4-byte Spill
+; LE-NEON-NEXT:    vstr s12, [sp, #112] @ 4-byte Spill
+; LE-NEON-NEXT:    vstr s10, [sp, #136] @ 4-byte Spill
+; LE-NEON-NEXT:    vstr s9, [sp, #160] @ 4-byte Spill
+; LE-NEON-NEXT:    vstr s8, [sp, #200] @ 4-byte Spill
+; LE-NEON-NEXT:    vstr s7, [sp, #100] @ 4-byte Spill
+; LE-NEON-NEXT:    vstr s6, [sp, #116] @ 4-byte Spill
+; LE-NEON-NEXT:    vstr s5, [sp, #76] @ 4-byte Spill
+; LE-NEON-NEXT:    vstr s4, [sp, #120] @ 4-byte Spill
+; LE-NEON-NEXT:    vstr s3, [sp, #156] @ 4-byte Spill
+; LE-NEON-NEXT:    vstr s2, [sp, #192] @ 4-byte Spill
+; LE-NEON-NEXT:    vstr s1, [sp, #104] @ 4-byte Spill
+; LE-NEON-NEXT:    vstr s0, [sp, #108] @ 4-byte Spill
+; LE-NEON-NEXT:    bl __aeabi_h2f
+; LE-NEON-NEXT:    vmov s0, r0
+; LE-NEON-NEXT:    bl llrintf
+; LE-NEON-NEXT:    add lr, sp, #256
+; LE-NEON-NEXT:    str r0, [sp, #52] @ 4-byte Spill
+; LE-NEON-NEXT:    str r1, [sp, #56] @ 4-byte Spill
+; LE-NEON-NEXT:    ldrh r0, [lr, #108]
+; LE-NEON-NEXT:    bl __aeabi_h2f
+; LE-NEON-NEXT:    add lr, sp, #256
+; LE-NEON-NEXT:    str r0, [sp, #32] @ 4-byte Spill
+; LE-NEON-NEXT:    ldrh r0, [lr, #96]
+; LE-NEON-NEXT:    bl __aeabi_h2f
+; LE-NEON-NEXT:    add lr, sp, #256
+; LE-NEON-NEXT:    mov r5, r0
+; LE-NEON-NEXT:    ldrh r0, [lr, #100]
+; LE-NEON-NEXT:    bl __aeabi_h2f
+; LE-NEON-NEXT:    add lr, sp, #256
+; LE-NEON-NEXT:    mov r7, r0
+; LE-NEON-NEXT:    ldrh r0, [lr, #156]
+; LE-NEON-NEXT:    bl __aeabi_h2f
+; LE-NEON-NEXT:    add lr, sp, #256
+; LE-NEON-NEXT:    mov r6, r0
+; LE-NEON-NEXT:    ldrh r0, [lr, #152]
+; LE-NEON-NEXT:    bl __aeabi_h2f
+; LE-NEON-NEXT:    add lr, sp, #256
+; LE-NEON-NEXT:    mov r4, r0
+; LE-NEON-NEXT:    ldrh r0, [lr, #148]
+; LE-NEON-NEXT:    bl __aeabi_h2f
+; LE-NEON-NEXT:    vmov s0, r0
+; LE-NEON-NEXT:    bl llrintf
+; LE-NEON-NEXT:    vmov s0, r4
+; LE-NEON-NEXT:    mov r8, r1
+; LE-NEON-NEXT:    vmov.32 d13[0], r0
+; LE-NEON-NEXT:    bl llrintf
+; LE-NEON-NEXT:    vmov s0, r6
+; LE-NEON-NEXT:    mov r4, r1
+; LE-NEON-NEXT:    vmov.32 d14[0], r0
+; LE-NEON-NEXT:    bl llrintf
+; LE-NEON-NEXT:    vmov s0, r7
+; LE-NEON-NEXT:    mov r6, r1
+; LE-NEON-NEXT:    vmov.32 d15[0], r0
+; LE-NEON-NEXT:    bl llrintf
+; LE-NEON-NEXT:    vmov s0, r5
+; LE-NEON-NEXT:    mov r7, r1
+; LE-NEON-NEXT:    vmov.32 d11[0], r0
+; LE-NEON-NEXT:    bl llrintf
+; LE-NEON-NEXT:    add lr, sp, #256
+; LE-NEON-NEXT:    vmov.32 d10[0], r0
+; LE-NEON-NEXT:    mov r5, r1
+; LE-NEON-NEXT:    ldrh r0, [lr, #144]
+; LE-NEON-NEXT:    bl __aeabi_h2f
+; LE-NEON-NEXT:    add lr, sp, #256
+; LE-NEON-NEXT:    mov r10, r0
+; LE-NEON-NEXT:    vmov.32 d11[1], r7
+; LE-NEON-NEXT:    ldrh r0, [lr, #104]
+; LE-NEON-NEXT:    bl __aeabi_h2f
+; LE-NEON-NEXT:    vmov.32 d10[1], r5
+; LE-NEON-NEXT:    add lr, sp, #80
+; LE-NEON-NEXT:    mov r7, r0
+; LE-NEON-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-NEON-NEXT:    add lr, sp, #256
+; LE-NEON-NEXT:    ldrh r0, [lr, #124]
+; LE-NEON-NEXT:    bl __aeabi_h2f
+; LE-NEON-NEXT:    add lr, sp, #256
+; LE-NEON-NEXT:    mov r5, r0
+; LE-NEON-NEXT:    vmov.32 d15[1], r6
+; LE-NEON-NEXT:    ldrh r0, [lr, #120]
+; LE-NEON-NEXT:    bl __aeabi_h2f
+; LE-NEON-NEXT:    vmov.32 d14[1], r4
+; LE-NEON-NEXT:    add lr, sp, #16
+; LE-NEON-NEXT:    mov r6, r0
+; LE-NEON-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-NEON-NEXT:    add lr, sp, #256
+; LE-NEON-NEXT:    ldrh r0, [lr, #116]
+; LE-NEON-NEXT:    bl __aeabi_h2f
+; LE-NEON-NEXT:    add lr, sp, #256
+; LE-NEON-NEXT:    vorr q5, q6, q6
+; LE-NEON-NEXT:    mov r4, r0
+; LE-NEON-NEXT:    ldrh r0, [lr, #112]
+; LE-NEON-NEXT:    vmov.32 d11[1], r8
+; LE-NEON-NEXT:    bl __aeabi_h2f
+; LE-NEON-NEXT:    vmov s0, r0
+; LE-NEON-NEXT:    bl llrintf
+; LE-NEON-NEXT:    vmov s0, r4
+; LE-NEON-NEXT:    str r1, [sp, #12] @ 4-byte Spill
+; LE-NEON-NEXT:    vmov.32 d12[0], r0
+; LE-NEON-NEXT:    bl llrintf
+; LE-NEON-NEXT:    vmov s0, r6
+; LE-NEON-NEXT:    add lr, sp, #216
+; LE-NEON-NEXT:    vmov.32 d13[0], r0
+; LE-NEON-NEXT:    str r1, [sp, #8] @ 4-byte Spill
+; LE-NEON-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-NEON-NEXT:    bl llrintf
+; LE-NEON-NEXT:    vmov s0, r5
+; LE-NEON-NEXT:    mov r4, r1
+; LE-NEON-NEXT:    vmov.32 d12[0], r0
+; LE-NEON-NEXT:    bl llrintf
+; LE-NEON-NEXT:    vmov s0, r7
+; LE-NEON-NEXT:    add lr, sp, #232
+; LE-NEON-NEXT:    vmov.32 d13[0], r0
+; LE-NEON-NEXT:    mov r8, r1
+; LE-NEON-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-NEON-NEXT:    bl llrintf
+; LE-NEON-NEXT:    vmov s0, r10
+; LE-NEON-NEXT:    mov r9, r1
+; LE-NEON-NEXT:    vmov.32 d12[0], r0
+; LE-NEON-NEXT:    bl llrintf
+; LE-NEON-NEXT:    add lr, sp, #256
+; LE-NEON-NEXT:    vmov.32 d10[0], r0
+; LE-NEON-NEXT:    mov r5, r1
+; LE-NEON-NEXT:    ldrh r0, [lr, #140]
+; LE-NEON-NEXT:    bl __aeabi_h2f
+; LE-NEON-NEXT:    vmov s0, r0
+; LE-NEON-NEXT:    ldr r0, [sp, #32] @ 4-byte Reload
+; LE-NEON-NEXT:    vmov.32 d10[1], r5
+; LE-NEON-NEXT:    add lr, sp, #32
+; LE-NEON-NEXT:    vmov s16, r0
+; LE-NEON-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-NEON-NEXT:    bl llrintf
+; LE-NEON-NEXT:    add lr, sp, #256
+; LE-NEON-NEXT:    mov r6, r1
+; LE-NEON-NEXT:    vmov.32 d15[0], r0
+; LE-NEON-NEXT:    ldrh r1, [lr, #128]
+; LE-NEON-NEXT:    mov r0, r1
+; LE-NEON-NEXT:    bl __aeabi_h2f
+; LE-NEON-NEXT:    vmov.f32 s0, s16
+; LE-NEON-NEXT:    vmov s18, r0
+; LE-NEON-NEXT:    bl llrintf
+; LE-NEON-NEXT:    add lr, sp, #256
+; LE-NEON-NEXT:    mov r5, r1
+; LE-NEON-NEXT:    vmov.32 d13[0], r0
+; LE-NEON-NEXT:    ldr r1, [sp, #52] @ 4-byte Reload
+; LE-NEON-NEXT:    ldrh r0, [lr, #136]
+; LE-NEON-NEXT:    vmov.32 d15[1], r6
+; LE-NEON-NEXT:    vmov.32 d11[0], r1
+; LE-NEON-NEXT:    bl __aeabi_h2f
+; LE-NEON-NEXT:    vmov.f32 s0, s18
+; LE-NEON-NEXT:    vmov s16, r0
+; LE-NEON-NEXT:    bl llrintf
+; LE-NEON-NEXT:    vmov.f32 s0, s16
+; LE-NEON-NEXT:    vmov.32 d10[0], r0
+; LE-NEON-NEXT:    ldr r0, [sp, #56] @ 4-byte Reload
+; LE-NEON-NEXT:    mov r6, r1
+; LE-NEON-NEXT:    vmov.32 d11[1], r0
+; LE-NEON-NEXT:    bl llrintf
+; LE-NEON-NEXT:    vmov.32 d13[1], r5
+; LE-NEON-NEXT:    add lr, sp, #56
+; LE-NEON-NEXT:    ldr r2, [sp, #8] @ 4-byte Reload
+; LE-NEON-NEXT:    vmov.32 d12[1], r9
+; LE-NEON-NEXT:    vmov.32 d14[0], r0
+; LE-NEON-NEXT:    vmov r0, s19
+; LE-NEON-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-NEON-NEXT:    add lr, sp, #232
+; LE-NEON-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
+; LE-NEON-NEXT:    vmov.32 d13[1], r8
+; LE-NEON-NEXT:    vmov.32 d12[1], r4
+; LE-NEON-NEXT:    vmov.32 d10[1], r6
+; LE-NEON-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-NEON-NEXT:    add lr, sp, #216
+; LE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEON-NEXT:    vmov.32 d17[1], r2
+; LE-NEON-NEXT:    ldr r2, [sp, #12] @ 4-byte Reload
+; LE-NEON-NEXT:    vmov.32 d14[1], r1
+; LE-NEON-NEXT:    add r1, r11, #192
+; LE-NEON-NEXT:    vmov.32 d16[1], r2
+; LE-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEON-NEXT:    add lr, sp, #32
+; LE-NEON-NEXT:    vst1.64 {d10, d11}, [r1:128]!
+; LE-NEON-NEXT:    vst1.64 {d14, d15}, [r1:128]!
+; LE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEON-NEXT:    add lr, sp, #16
+; LE-NEON-NEXT:    vst1.64 {d16, d17}, [r1:128]!
+; LE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEON-NEXT:    vst1.64 {d16, d17}, [r1:128]
+; LE-NEON-NEXT:    bl __aeabi_h2f
+; LE-NEON-NEXT:    str r0, [sp, #52] @ 4-byte Spill
+; LE-NEON-NEXT:    vmov r0, s17
+; LE-NEON-NEXT:    bl __aeabi_h2f
+; LE-NEON-NEXT:    vldr s0, [sp, #76] @ 4-byte Reload
+; LE-NEON-NEXT:    mov r10, r0
+; LE-NEON-NEXT:    vmov r0, s0
+; LE-NEON-NEXT:    bl __aeabi_h2f
+; LE-NEON-NEXT:    vldr s0, [sp, #100] @ 4-byte Reload
+; LE-NEON-NEXT:    mov r4, r0
+; LE-NEON-NEXT:    vmov r0, s0
+; LE-NEON-NEXT:    bl __aeabi_h2f
+; LE-NEON-NEXT:    vldr s0, [sp, #104] @ 4-byte Reload
+; LE-NEON-NEXT:    mov r7, r0
+; LE-NEON-NEXT:    vmov r0, s0
+; LE-NEON-NEXT:    bl __aeabi_h2f
+; LE-NEON-NEXT:    vldr s0, [sp, #108] @ 4-byte Reload
+; LE-NEON-NEXT:    mov r5, r0
+; LE-NEON-NEXT:    vmov r0, s0
+; LE-NEON-NEXT:    bl __aeabi_h2f
+; LE-NEON-NEXT:    vldr s0, [sp, #112] @ 4-byte Reload
+; LE-NEON-NEXT:    mov r6, r0
+; LE-NEON-NEXT:    vmov r0, s0
+; LE-NEON-NEXT:    bl __aeabi_h2f
+; LE-NEON-NEXT:    vmov s0, r0
+; LE-NEON-NEXT:    bl llrintf
+; LE-NEON-NEXT:    vmov s0, r6
+; LE-NEON-NEXT:    str r1, [sp, #112] @ 4-byte Spill
+; LE-NEON-NEXT:    vmov.32 d14[0], r0
+; LE-NEON-NEXT:    bl llrintf
+; LE-NEON-NEXT:    vmov s0, r5
+; LE-NEON-NEXT:    mov r9, r1
+; LE-NEON-NEXT:    vmov.32 d8[0], r0
+; LE-NEON-NEXT:    bl llrintf
+; LE-NEON-NEXT:    vmov s0, r7
+; LE-NEON-NEXT:    mov r8, r1
+; LE-NEON-NEXT:    vmov.32 d9[0], r0
+; LE-NEON-NEXT:    bl llrintf
+; LE-NEON-NEXT:    vmov s0, r4
+; LE-NEON-NEXT:    mov r5, r1
+; LE-NEON-NEXT:    vmov.32 d11[0], r0
+; LE-NEON-NEXT:    bl llrintf
+; LE-NEON-NEXT:    vmov s0, r10
+; LE-NEON-NEXT:    mov r4, r1
+; LE-NEON-NEXT:    vmov.32 d13[0], r0
+; LE-NEON-NEXT:    bl llrintf
+; LE-NEON-NEXT:    vldr s0, [sp, #116] @ 4-byte Reload
+; LE-NEON-NEXT:    mov r6, r0
+; LE-NEON-NEXT:    str r1, [sp, #108] @ 4-byte Spill
+; LE-NEON-NEXT:    vmov.32 d11[1], r5
+; LE-NEON-NEXT:    vmov r0, s0
+; LE-NEON-NEXT:    bl __aeabi_h2f
+; LE-NEON-NEXT:    vmov s0, r0
+; LE-NEON-NEXT:    vmov.32 d13[1], r4
+; LE-NEON-NEXT:    bl llrintf
+; LE-NEON-NEXT:    vldr s0, [sp, #120] @ 4-byte Reload
+; LE-NEON-NEXT:    mov r4, r0
+; LE-NEON-NEXT:    mov r5, r1
+; LE-NEON-NEXT:    vmov.32 d9[1], r8
+; LE-NEON-NEXT:    vmov r0, s0
+; LE-NEON-NEXT:    bl __aeabi_h2f
+; LE-NEON-NEXT:    vldr s0, [sp, #136] @ 4-byte Reload
+; LE-NEON-NEXT:    vmov.32 d10[0], r4
+; LE-NEON-NEXT:    vmov r7, s0
+; LE-NEON-NEXT:    vmov s0, r0
+; LE-NEON-NEXT:    bl llrintf
+; LE-NEON-NEXT:    vmov.32 d12[0], r0
+; LE-NEON-NEXT:    add lr, sp, #136
+; LE-NEON-NEXT:    add r10, r11, #128
+; LE-NEON-NEXT:    mov r0, r7
+; LE-NEON-NEXT:    vmov.32 d10[1], r5
+; LE-NEON-NEXT:    vmov.32 d12[1], r1
+; LE-NEON-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-NEON-NEXT:    add lr, sp, #120
+; LE-NEON-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-NEON-NEXT:    add lr, sp, #80
+; LE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEON-NEXT:    add lr, sp, #56
+; LE-NEON-NEXT:    vmov.32 d13[0], r6
+; LE-NEON-NEXT:    vst1.64 {d16, d17}, [r10:128]!
+; LE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEON-NEXT:    vst1.64 {d16, d17}, [r10:128]!
+; LE-NEON-NEXT:    vldr s0, [sp, #156] @ 4-byte Reload
+; LE-NEON-NEXT:    vmov r4, s0
+; LE-NEON-NEXT:    bl __aeabi_h2f
+; LE-NEON-NEXT:    vldr s0, [sp, #160] @ 4-byte Reload
+; LE-NEON-NEXT:    mov r5, r0
+; LE-NEON-NEXT:    ldr r0, [sp, #52] @ 4-byte Reload
+; LE-NEON-NEXT:    vmov.32 d8[1], r9
+; LE-NEON-NEXT:    vmov r7, s0
+; LE-NEON-NEXT:    add lr, sp, #160
+; LE-NEON-NEXT:    vldr s0, [sp, #176] @ 4-byte Reload
+; LE-NEON-NEXT:    vmov s20, r0
+; LE-NEON-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-NEON-NEXT:    vmov r0, s0
+; LE-NEON-NEXT:    bl __aeabi_h2f
+; LE-NEON-NEXT:    vmov.f32 s0, s20
+; LE-NEON-NEXT:    vmov s18, r0
+; LE-NEON-NEXT:    bl llrintf
+; LE-NEON-NEXT:    vmov.f32 s0, s18
+; LE-NEON-NEXT:    mov r6, r1
+; LE-NEON-NEXT:    vmov.32 d10[0], r0
+; LE-NEON-NEXT:    vmov s16, r5
+; LE-NEON-NEXT:    bl llrintf
+; LE-NEON-NEXT:    vmov.32 d11[0], r0
+; LE-NEON-NEXT:    mov r0, r7
+; LE-NEON-NEXT:    mov r5, r1
+; LE-NEON-NEXT:    bl __aeabi_h2f
+; LE-NEON-NEXT:    vmov.f32 s0, s16
+; LE-NEON-NEXT:    vmov s18, r0
+; LE-NEON-NEXT:    bl llrintf
+; LE-NEON-NEXT:    vmov.f32 s0, s18
+; LE-NEON-NEXT:    vmov.32 d12[0], r0
+; LE-NEON-NEXT:    add lr, sp, #176
+; LE-NEON-NEXT:    mov r8, r1
+; LE-NEON-NEXT:    vmov.32 d11[1], r5
+; LE-NEON-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-NEON-NEXT:    bl llrintf
+; LE-NEON-NEXT:    vmov.32 d13[0], r0
+; LE-NEON-NEXT:    mov r0, r4
+; LE-NEON-NEXT:    mov r5, r1
+; LE-NEON-NEXT:    bl __aeabi_h2f
+; LE-NEON-NEXT:    vldr s0, [sp, #196] @ 4-byte Reload
+; LE-NEON-NEXT:    mov r7, r0
+; LE-NEON-NEXT:    vmov.32 d10[1], r6
+; LE-NEON-NEXT:    vmov r0, s0
+; LE-NEON-NEXT:    bl __aeabi_h2f
+; LE-NEON-NEXT:    vldr s0, [sp, #192] @ 4-byte Reload
+; LE-NEON-NEXT:    vmov s16, r0
+; LE-NEON-NEXT:    vmov.32 d13[1], r5
+; LE-NEON-NEXT:    vmov r6, s0
+; LE-NEON-NEXT:    vldr s0, [sp, #200] @ 4-byte Reload
+; LE-NEON-NEXT:    vmov r0, s0
+; LE-NEON-NEXT:    bl __aeabi_h2f
+; LE-NEON-NEXT:    vmov.f32 s0, s16
+; LE-NEON-NEXT:    vmov s18, r0
+; LE-NEON-NEXT:    bl llrintf
+; LE-NEON-NEXT:    vmov.f32 s0, s18
+; LE-NEON-NEXT:    add lr, sp, #200
+; LE-NEON-NEXT:    vmov.32 d15[0], r0
+; LE-NEON-NEXT:    mov r4, r1
+; LE-NEON-NEXT:    vmov s16, r7
+; LE-NEON-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-NEON-NEXT:    bl llrintf
+; LE-NEON-NEXT:    vmov.32 d12[0], r0
+; LE-NEON-NEXT:    mov r0, r6
+; LE-NEON-NEXT:    mov r5, r1
+; LE-NEON-NEXT:    bl __aeabi_h2f
+; LE-NEON-NEXT:    vmov.f32 s0, s16
+; LE-NEON-NEXT:    vmov s18, r0
+; LE-NEON-NEXT:    bl llrintf
+; LE-NEON-NEXT:    vmov.f32 s0, s18
+; LE-NEON-NEXT:    mov r6, r1
+; LE-NEON-NEXT:    vmov.32 d15[0], r0
+; LE-NEON-NEXT:    vmov.32 d12[1], r5
+; LE-NEON-NEXT:    bl llrintf
+; LE-NEON-NEXT:    add lr, sp, #200
+; LE-NEON-NEXT:    vmov.32 d14[0], r0
+; LE-NEON-NEXT:    ldr r0, [sp, #112] @ 4-byte Reload
+; LE-NEON-NEXT:    vldmia lr, {d18, d19} @ 16-byte Reload
+; LE-NEON-NEXT:    add lr, sp, #176
+; LE-NEON-NEXT:    vmov.32 d19[1], r4
+; LE-NEON-NEXT:    vmov.32 d18[1], r0
+; LE-NEON-NEXT:    ldr r0, [sp, #108] @ 4-byte Reload
+; LE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEON-NEXT:    add lr, sp, #216
+; LE-NEON-NEXT:    vmov.32 d17[1], r0
+; LE-NEON-NEXT:    add r0, r11, #64
+; LE-NEON-NEXT:    vmov.32 d16[1], r8
+; LE-NEON-NEXT:    vorr q10, q8, q8
+; LE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEON-NEXT:    add lr, sp, #232
+; LE-NEON-NEXT:    vmov.32 d15[1], r6
+; LE-NEON-NEXT:    vst1.64 {d16, d17}, [r10:128]!
+; LE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEON-NEXT:    add lr, sp, #160
+; LE-NEON-NEXT:    vmov.32 d14[1], r1
+; LE-NEON-NEXT:    vst1.64 {d16, d17}, [r10:128]
+; LE-NEON-NEXT:    vst1.64 {d12, d13}, [r0:128]!
+; LE-NEON-NEXT:    vst1.64 {d20, d21}, [r0:128]!
+; LE-NEON-NEXT:    vst1.64 {d18, d19}, [r0:128]!
+; LE-NEON-NEXT:    vst1.64 {d10, d11}, [r0:128]
+; LE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEON-NEXT:    add lr, sp, #120
+; LE-NEON-NEXT:    vst1.64 {d16, d17}, [r11:128]!
+; LE-NEON-NEXT:    vst1.64 {d14, d15}, [r11:128]!
+; LE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEON-NEXT:    add lr, sp, #136
+; LE-NEON-NEXT:    vst1.64 {d16, d17}, [r11:128]!
+; LE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEON-NEXT:    vst1.64 {d16, d17}, [r11:128]
+; LE-NEON-NEXT:    add sp, sp, #248
+; LE-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEON-NEXT:    add sp, sp, #4
+; LE-NEON-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; BE-LABEL: llrint_v32i64_v32f16:
+; BE:       @ %bb.0:
+; BE-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-NEXT:    .pad #4
+; BE-NEXT:    sub sp, sp, #4
+; BE-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-NEXT:    .pad #176
+; BE-NEXT:    sub sp, sp, #176
+; BE-NEXT:    add lr, sp, #256
+; BE-NEXT:    mov r10, r0
+; BE-NEXT:    vstr s15, [sp, #112] @ 4-byte Spill
+; BE-NEXT:    ldrh r0, [lr, #74]
+; BE-NEXT:    vstr s14, [sp, #80] @ 4-byte Spill
+; BE-NEXT:    vstr s13, [sp, #48] @ 4-byte Spill
+; BE-NEXT:    vstr s12, [sp, #148] @ 4-byte Spill
+; BE-NEXT:    vstr s11, [sp, #76] @ 4-byte Spill
+; BE-NEXT:    vstr s10, [sp, #152] @ 4-byte Spill
+; BE-NEXT:    vstr s9, [sp, #156] @ 4-byte Spill
+; BE-NEXT:    vstr s8, [sp, #120] @ 4-byte Spill
+; BE-NEXT:    vstr s7, [sp, #136] @ 4-byte Spill
+; BE-NEXT:    vstr s6, [sp, #132] @ 4-byte Spill
+; BE-NEXT:    vstr s5, [sp, #144] @ 4-byte Spill
+; BE-NEXT:    vstr s4, [sp, #64] @ 4-byte Spill
+; BE-NEXT:    vstr s3, [sp, #104] @ 4-byte Spill
+; BE-NEXT:    vstr s2, [sp, #88] @ 4-byte Spill
+; BE-NEXT:    vstr s1, [sp, #56] @ 4-byte Spill
+; BE-NEXT:    vstr s0, [sp, #96] @ 4-byte Spill
+; BE-NEXT:    bl __aeabi_h2f
+; BE-NEXT:    vmov s0, r0
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    add lr, sp, #256
+; BE-NEXT:    mov r9, r0
+; BE-NEXT:    mov r8, r1
+; BE-NEXT:    ldrh r0, [lr, #62]
+; BE-NEXT:    bl __aeabi_h2f
+; BE-NEXT:    add lr, sp, #256
+; BE-NEXT:    mov r6, r0
+; BE-NEXT:    ldrh r0, [lr, #58]
+; BE-NEXT:    bl __aeabi_h2f
+; BE-NEXT:    add lr, sp, #256
+; BE-NEXT:    mov r7, r0
+; BE-NEXT:    ldrh r0, [lr, #66]
+; BE-NEXT:    bl __aeabi_h2f
+; BE-NEXT:    add lr, sp, #256
+; BE-NEXT:    mov r4, r0
+; BE-NEXT:    ldrh r0, [lr, #54]
+; BE-NEXT:    bl __aeabi_h2f
+; BE-NEXT:    add lr, sp, #256
+; BE-NEXT:    mov r5, r0
+; BE-NEXT:    ldrh r0, [lr, #50]
+; BE-NEXT:    bl __aeabi_h2f
+; BE-NEXT:    vmov s0, r0
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    vmov.32 d16[0], r0
+; BE-NEXT:    vmov s0, r5
+; BE-NEXT:    str r1, [sp, #44] @ 4-byte Spill
+; BE-NEXT:    vstr d16, [sp, #168] @ 8-byte Spill
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    vmov.32 d16[0], r0
+; BE-NEXT:    vmov s0, r4
+; BE-NEXT:    str r1, [sp, #40] @ 4-byte Spill
+; BE-NEXT:    vstr d16, [sp, #160] @ 8-byte Spill
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    vmov.32 d16[0], r0
+; BE-NEXT:    vmov s0, r7
+; BE-NEXT:    mov r4, r1
+; BE-NEXT:    vstr d16, [sp, #32] @ 8-byte Spill
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    vmov.32 d16[0], r0
+; BE-NEXT:    vmov s0, r6
+; BE-NEXT:    mov r11, r1
+; BE-NEXT:    vstr d16, [sp, #24] @ 8-byte Spill
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    add lr, sp, #256
+; BE-NEXT:    vmov.32 d16[0], r0
+; BE-NEXT:    mov r6, r1
+; BE-NEXT:    ldrh r0, [lr, #34]
+; BE-NEXT:    vstr d16, [sp, #16] @ 8-byte Spill
+; BE-NEXT:    bl __aeabi_h2f
+; BE-NEXT:    vmov s0, r0
+; BE-NEXT:    vmov.32 d8[0], r9
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    add lr, sp, #256
+; BE-NEXT:    mov r7, r1
+; BE-NEXT:    vmov.32 d12[0], r0
+; BE-NEXT:    ldrh r1, [lr, #38]
+; BE-NEXT:    mov r0, r1
+; BE-NEXT:    bl __aeabi_h2f
+; BE-NEXT:    vmov.32 d8[1], r8
+; BE-NEXT:    vmov s0, r0
+; BE-NEXT:    vstr d8, [sp, #8] @ 8-byte Spill
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    add lr, sp, #256
+; BE-NEXT:    mov r5, r1
+; BE-NEXT:    vmov.32 d13[0], r0
+; BE-NEXT:    ldrh r1, [lr, #26]
+; BE-NEXT:    mov r0, r1
+; BE-NEXT:    bl __aeabi_h2f
+; BE-NEXT:    vmov s0, r0
+; BE-NEXT:    vmov.32 d12[1], r7
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    add lr, sp, #256
+; BE-NEXT:    mov r7, r1
+; BE-NEXT:    vmov.32 d9[0], r0
+; BE-NEXT:    ldrh r1, [lr, #30]
+; BE-NEXT:    mov r0, r1
+; BE-NEXT:    bl __aeabi_h2f
+; BE-NEXT:    vmov s0, r0
+; BE-NEXT:    vmov.32 d13[1], r5
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    add lr, sp, #256
+; BE-NEXT:    mov r5, r1
+; BE-NEXT:    vmov.32 d15[0], r0
+; BE-NEXT:    ldrh r1, [lr, #78]
+; BE-NEXT:    mov r0, r1
+; BE-NEXT:    bl __aeabi_h2f
+; BE-NEXT:    vmov s0, r0
+; BE-NEXT:    vmov.32 d9[1], r7
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    add lr, sp, #256
+; BE-NEXT:    mov r7, r1
+; BE-NEXT:    vmov.32 d14[0], r0
+; BE-NEXT:    ldrh r1, [lr, #82]
+; BE-NEXT:    mov r0, r1
+; BE-NEXT:    bl __aeabi_h2f
+; BE-NEXT:    vmov s0, r0
+; BE-NEXT:    vmov.32 d15[1], r5
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    add lr, sp, #256
+; BE-NEXT:    mov r5, r1
+; BE-NEXT:    vmov.32 d8[0], r0
+; BE-NEXT:    ldrh r1, [lr, #86]
+; BE-NEXT:    mov r0, r1
+; BE-NEXT:    bl __aeabi_h2f
+; BE-NEXT:    vmov s0, r0
+; BE-NEXT:    vmov.32 d14[1], r7
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    add lr, sp, #256
+; BE-NEXT:    mov r7, r1
+; BE-NEXT:    vmov.32 d10[0], r0
+; BE-NEXT:    ldrh r1, [lr, #70]
+; BE-NEXT:    mov r0, r1
+; BE-NEXT:    bl __aeabi_h2f
+; BE-NEXT:    vmov s0, r0
+; BE-NEXT:    vmov.32 d8[1], r5
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    add lr, sp, #256
+; BE-NEXT:    mov r5, r1
+; BE-NEXT:    vmov.32 d11[0], r0
+; BE-NEXT:    ldrh r1, [lr, #46]
+; BE-NEXT:    mov r0, r1
+; BE-NEXT:    bl __aeabi_h2f
+; BE-NEXT:    vmov s0, r0
+; BE-NEXT:    vmov.32 d10[1], r7
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    vmov.32 d25[0], r0
+; BE-NEXT:    ldr r0, [sp, #40] @ 4-byte Reload
+; BE-NEXT:    ldr r2, [sp, #44] @ 4-byte Reload
+; BE-NEXT:    vldr d24, [sp, #160] @ 8-byte Reload
+; BE-NEXT:    vldr s0, [sp, #48] @ 4-byte Reload
+; BE-NEXT:    vmov.32 d24[1], r0
+; BE-NEXT:    vmov r0, s0
+; BE-NEXT:    vldr d26, [sp, #16] @ 8-byte Reload
+; BE-NEXT:    vstr d24, [sp, #160] @ 8-byte Spill
+; BE-NEXT:    vldr d24, [sp, #8] @ 8-byte Reload
+; BE-NEXT:    vrev64.32 d23, d14
+; BE-NEXT:    vldr d29, [sp, #24] @ 8-byte Reload
+; BE-NEXT:    vrev64.32 d22, d24
+; BE-NEXT:    vldr d24, [sp, #168] @ 8-byte Reload
+; BE-NEXT:    vmov.32 d26[1], r6
+; BE-NEXT:    vldr d28, [sp, #32] @ 8-byte Reload
+; BE-NEXT:    vmov.32 d25[1], r1
+; BE-NEXT:    add r1, r10, #192
+; BE-NEXT:    vmov.32 d29[1], r11
+; BE-NEXT:    add r11, r10, #128
+; BE-NEXT:    vmov.32 d24[1], r2
+; BE-NEXT:    vmov.32 d11[1], r5
+; BE-NEXT:    vmov.32 d28[1], r4
+; BE-NEXT:    vrev64.32 d27, d26
+; BE-NEXT:    vstr d24, [sp, #168] @ 8-byte Spill
+; BE-NEXT:    vstr d25, [sp, #48] @ 8-byte Spill
+; BE-NEXT:    vrev64.32 d25, d11
+; BE-NEXT:    vrev64.32 d26, d29
+; BE-NEXT:    vrev64.32 d24, d28
+; BE-NEXT:    vst1.64 {d26, d27}, [r1:128]!
+; BE-NEXT:    vst1.64 {d24, d25}, [r1:128]!
+; BE-NEXT:    vrev64.32 d21, d10
+; BE-NEXT:    vrev64.32 d19, d15
+; BE-NEXT:    vrev64.32 d17, d13
+; BE-NEXT:    vrev64.32 d20, d8
+; BE-NEXT:    vst1.64 {d22, d23}, [r1:128]!
+; BE-NEXT:    vrev64.32 d18, d9
+; BE-NEXT:    vrev64.32 d16, d12
+; BE-NEXT:    vst1.64 {d20, d21}, [r1:128]
+; BE-NEXT:    vst1.64 {d18, d19}, [r11:128]!
+; BE-NEXT:    vst1.64 {d16, d17}, [r11:128]!
+; BE-NEXT:    bl __aeabi_h2f
+; BE-NEXT:    vmov s0, r0
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    add lr, sp, #256
+; BE-NEXT:    mov r7, r0
+; BE-NEXT:    mov r8, r1
+; BE-NEXT:    ldrh r0, [lr, #42]
+; BE-NEXT:    bl __aeabi_h2f
+; BE-NEXT:    vldr s0, [sp, #56] @ 4-byte Reload
+; BE-NEXT:    mov r4, r0
+; BE-NEXT:    vmov r0, s0
+; BE-NEXT:    bl __aeabi_h2f
+; BE-NEXT:    vmov s0, r0
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    vmov s0, r4
+; BE-NEXT:    str r1, [sp, #44] @ 4-byte Spill
+; BE-NEXT:    vmov.32 d10[0], r0
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    vldr s0, [sp, #64] @ 4-byte Reload
+; BE-NEXT:    vmov.32 d16[0], r0
+; BE-NEXT:    vmov r2, s0
+; BE-NEXT:    vldr s0, [sp, #80] @ 4-byte Reload
+; BE-NEXT:    vmov.32 d16[1], r1
+; BE-NEXT:    vmov r4, s0
+; BE-NEXT:    vldr s0, [sp, #76] @ 4-byte Reload
+; BE-NEXT:    vstr d16, [sp, #80] @ 8-byte Spill
+; BE-NEXT:    vmov r5, s0
+; BE-NEXT:    mov r0, r2
+; BE-NEXT:    bl __aeabi_h2f
+; BE-NEXT:    vmov s0, r0
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    vmov.32 d11[0], r0
+; BE-NEXT:    mov r0, r4
+; BE-NEXT:    mov r9, r1
+; BE-NEXT:    bl __aeabi_h2f
+; BE-NEXT:    vmov s0, r0
+; BE-NEXT:    vmov.32 d8[0], r7
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    vmov.32 d12[0], r0
+; BE-NEXT:    mov r0, r5
+; BE-NEXT:    mov r6, r1
+; BE-NEXT:    bl __aeabi_h2f
+; BE-NEXT:    vldr s0, [sp, #88] @ 4-byte Reload
+; BE-NEXT:    mov r4, r0
+; BE-NEXT:    vmov.32 d8[1], r8
+; BE-NEXT:    vmov r7, s0
+; BE-NEXT:    vldr s0, [sp, #96] @ 4-byte Reload
+; BE-NEXT:    vstr d8, [sp, #88] @ 8-byte Spill
+; BE-NEXT:    vmov r0, s0
+; BE-NEXT:    bl __aeabi_h2f
+; BE-NEXT:    vldr s0, [sp, #104] @ 4-byte Reload
+; BE-NEXT:    vmov s19, r0
+; BE-NEXT:    vmov.32 d12[1], r6
+; BE-NEXT:    vmov r5, s0
+; BE-NEXT:    vldr s0, [sp, #112] @ 4-byte Reload
+; BE-NEXT:    vstr d12, [sp, #104] @ 8-byte Spill
+; BE-NEXT:    vmov r0, s0
+; BE-NEXT:    bl __aeabi_h2f
+; BE-NEXT:    vmov.f32 s0, s19
+; BE-NEXT:    vmov s30, r0
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    vmov.f32 s0, s30
+; BE-NEXT:    mov r6, r1
+; BE-NEXT:    vmov.32 d9[0], r0
+; BE-NEXT:    vmov s17, r4
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    vmov.32 d12[0], r0
+; BE-NEXT:    mov r0, r5
+; BE-NEXT:    mov r4, r1
+; BE-NEXT:    bl __aeabi_h2f
+; BE-NEXT:    vmov.f32 s0, s17
+; BE-NEXT:    vmov s30, r0
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    vmov.f32 s0, s30
+; BE-NEXT:    vmov.32 d16[0], r0
+; BE-NEXT:    str r1, [sp, #76] @ 4-byte Spill
+; BE-NEXT:    vmov.32 d12[1], r4
+; BE-NEXT:    vstr d16, [sp, #64] @ 8-byte Spill
+; BE-NEXT:    vstr d12, [sp, #112] @ 8-byte Spill
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    vmov.32 d16[0], r0
+; BE-NEXT:    mov r0, r7
+; BE-NEXT:    mov r8, r1
+; BE-NEXT:    vmov.32 d9[1], r6
+; BE-NEXT:    vstr d16, [sp, #56] @ 8-byte Spill
+; BE-NEXT:    vstr d9, [sp, #96] @ 8-byte Spill
+; BE-NEXT:    bl __aeabi_h2f
+; BE-NEXT:    vldr s0, [sp, #120] @ 4-byte Reload
+; BE-NEXT:    mov r5, r0
+; BE-NEXT:    ldr r0, [sp, #44] @ 4-byte Reload
+; BE-NEXT:    vmov r7, s0
+; BE-NEXT:    vldr s0, [sp, #132] @ 4-byte Reload
+; BE-NEXT:    vmov.32 d10[1], r0
+; BE-NEXT:    vmov r0, s0
+; BE-NEXT:    vstr d10, [sp, #120] @ 8-byte Spill
+; BE-NEXT:    bl __aeabi_h2f
+; BE-NEXT:    vldr s0, [sp, #136] @ 4-byte Reload
+; BE-NEXT:    vmov s26, r0
+; BE-NEXT:    vmov.32 d11[1], r9
+; BE-NEXT:    vmov r4, s0
+; BE-NEXT:    vldr s0, [sp, #144] @ 4-byte Reload
+; BE-NEXT:    vstr d11, [sp, #136] @ 8-byte Spill
+; BE-NEXT:    vmov r0, s0
+; BE-NEXT:    bl __aeabi_h2f
+; BE-NEXT:    vmov.f32 s0, s26
+; BE-NEXT:    vmov s22, r0
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    vmov.f32 s0, s22
+; BE-NEXT:    mov r6, r1
+; BE-NEXT:    vmov.32 d13[0], r0
+; BE-NEXT:    vmov s24, r5
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    vmov.32 d14[0], r0
+; BE-NEXT:    mov r0, r4
+; BE-NEXT:    mov r5, r1
+; BE-NEXT:    bl __aeabi_h2f
+; BE-NEXT:    vmov.f32 s0, s24
+; BE-NEXT:    vmov s22, r0
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    vmov.f32 s0, s22
+; BE-NEXT:    mov r9, r1
+; BE-NEXT:    vmov.32 d12[0], r0
+; BE-NEXT:    vmov.32 d14[1], r5
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    vmov.32 d11[0], r0
+; BE-NEXT:    mov r0, r7
+; BE-NEXT:    mov r5, r1
+; BE-NEXT:    bl __aeabi_h2f
+; BE-NEXT:    vldr s0, [sp, #148] @ 4-byte Reload
+; BE-NEXT:    mov r7, r0
+; BE-NEXT:    vmov.32 d13[1], r6
+; BE-NEXT:    vmov r0, s0
+; BE-NEXT:    bl __aeabi_h2f
+; BE-NEXT:    vldr s0, [sp, #152] @ 4-byte Reload
+; BE-NEXT:    vmov s20, r0
+; BE-NEXT:    vmov.32 d11[1], r5
+; BE-NEXT:    vmov r4, s0
+; BE-NEXT:    vldr s0, [sp, #156] @ 4-byte Reload
+; BE-NEXT:    vmov r0, s0
+; BE-NEXT:    bl __aeabi_h2f
+; BE-NEXT:    vmov.f32 s0, s20
+; BE-NEXT:    vmov s16, r0
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    vmov.f32 s0, s16
+; BE-NEXT:    mov r6, r1
+; BE-NEXT:    vmov.32 d10[0], r0
+; BE-NEXT:    vmov s18, r7
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    vmov.32 d15[0], r0
+; BE-NEXT:    mov r0, r4
+; BE-NEXT:    mov r5, r1
+; BE-NEXT:    bl __aeabi_h2f
+; BE-NEXT:    vmov.f32 s0, s18
+; BE-NEXT:    vmov s16, r0
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    vmov.f32 s0, s16
+; BE-NEXT:    mov r4, r1
+; BE-NEXT:    vmov.32 d9[0], r0
+; BE-NEXT:    vmov.32 d15[1], r5
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    vldr d16, [sp, #160] @ 8-byte Reload
+; BE-NEXT:    vldr d20, [sp, #136] @ 8-byte Reload
+; BE-NEXT:    vrev64.32 d19, d14
+; BE-NEXT:    vrev64.32 d31, d16
+; BE-NEXT:    vldr d16, [sp, #168] @ 8-byte Reload
+; BE-NEXT:    vrev64.32 d18, d20
+; BE-NEXT:    vldr d20, [sp, #120] @ 8-byte Reload
+; BE-NEXT:    vldr d22, [sp, #96] @ 8-byte Reload
+; BE-NEXT:    vmov.32 d28[0], r0
+; BE-NEXT:    ldr r0, [sp, #76] @ 4-byte Reload
+; BE-NEXT:    vrev64.32 d21, d20
+; BE-NEXT:    vrev64.32 d30, d16
+; BE-NEXT:    vldr d16, [sp, #48] @ 8-byte Reload
+; BE-NEXT:    vldr d23, [sp, #64] @ 8-byte Reload
+; BE-NEXT:    vrev64.32 d20, d22
+; BE-NEXT:    vldr d22, [sp, #112] @ 8-byte Reload
+; BE-NEXT:    vrev64.32 d1, d16
+; BE-NEXT:    vldr d16, [sp, #80] @ 8-byte Reload
+; BE-NEXT:    vmov.32 d23[1], r0
+; BE-NEXT:    add r0, r10, #64
+; BE-NEXT:    vrev64.32 d25, d22
+; BE-NEXT:    vldr d22, [sp, #104] @ 8-byte Reload
+; BE-NEXT:    vmov.32 d9[1], r4
+; BE-NEXT:    vrev64.32 d0, d16
+; BE-NEXT:    vmov.32 d28[1], r1
+; BE-NEXT:    vldr d29, [sp, #56] @ 8-byte Reload
+; BE-NEXT:    vrev64.32 d3, d15
+; BE-NEXT:    vrev64.32 d24, d22
+; BE-NEXT:    vldr d22, [sp, #88] @ 8-byte Reload
+; BE-NEXT:    vmov.32 d10[1], r6
+; BE-NEXT:    vrev64.32 d5, d23
+; BE-NEXT:    vst1.64 {d0, d1}, [r11:128]!
+; BE-NEXT:    vrev64.32 d2, d9
+; BE-NEXT:    vrev64.32 d27, d22
+; BE-NEXT:    vmov.32 d29[1], r8
+; BE-NEXT:    vrev64.32 d4, d28
+; BE-NEXT:    vst1.64 {d30, d31}, [r11:128]
+; BE-NEXT:    vst1.64 {d2, d3}, [r0:128]!
+; BE-NEXT:    vmov.32 d12[1], r9
+; BE-NEXT:    vrev64.32 d26, d10
+; BE-NEXT:    vst1.64 {d4, d5}, [r0:128]!
+; BE-NEXT:    vrev64.32 d23, d29
+; BE-NEXT:    vst1.64 {d26, d27}, [r0:128]!
+; BE-NEXT:    vrev64.32 d22, d12
+; BE-NEXT:    vst1.64 {d24, d25}, [r0:128]
+; BE-NEXT:    vst1.64 {d20, d21}, [r10:128]!
+; BE-NEXT:    vst1.64 {d22, d23}, [r10:128]!
+; BE-NEXT:    vrev64.32 d17, d11
+; BE-NEXT:    vrev64.32 d16, d13
+; BE-NEXT:    vst1.64 {d18, d19}, [r10:128]!
+; BE-NEXT:    vst1.64 {d16, d17}, [r10:128]
+; BE-NEXT:    add sp, sp, #176
+; BE-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-NEXT:    add sp, sp, #4
+; BE-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; BE-NEON-LABEL: llrint_v32i64_v32f16:
+; BE-NEON:       @ %bb.0:
+; BE-NEON-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-NEON-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-NEON-NEXT:    .pad #4
+; BE-NEON-NEXT:    sub sp, sp, #4
+; BE-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-NEON-NEXT:    .pad #176
+; BE-NEON-NEXT:    sub sp, sp, #176
+; BE-NEON-NEXT:    add lr, sp, #256
+; BE-NEON-NEXT:    mov r10, r0
+; BE-NEON-NEXT:    vstr s15, [sp, #112] @ 4-byte Spill
+; BE-NEON-NEXT:    ldrh r0, [lr, #74]
+; BE-NEON-NEXT:    vstr s14, [sp, #80] @ 4-byte Spill
+; BE-NEON-NEXT:    vstr s13, [sp, #48] @ 4-byte Spill
+; BE-NEON-NEXT:    vstr s12, [sp, #148] @ 4-byte Spill
+; BE-NEON-NEXT:    vstr s11, [sp, #76] @ 4-byte Spill
+; BE-NEON-NEXT:    vstr s10, [sp, #152] @ 4-byte Spill
+; BE-NEON-NEXT:    vstr s9, [sp, #156] @ 4-byte Spill
+; BE-NEON-NEXT:    vstr s8, [sp, #120] @ 4-byte Spill
+; BE-NEON-NEXT:    vstr s7, [sp, #136] @ 4-byte Spill
+; BE-NEON-NEXT:    vstr s6, [sp, #132] @ 4-byte Spill
+; BE-NEON-NEXT:    vstr s5, [sp, #144] @ 4-byte Spill
+; BE-NEON-NEXT:    vstr s4, [sp, #64] @ 4-byte Spill
+; BE-NEON-NEXT:    vstr s3, [sp, #104] @ 4-byte Spill
+; BE-NEON-NEXT:    vstr s2, [sp, #88] @ 4-byte Spill
+; BE-NEON-NEXT:    vstr s1, [sp, #56] @ 4-byte Spill
+; BE-NEON-NEXT:    vstr s0, [sp, #96] @ 4-byte Spill
+; BE-NEON-NEXT:    bl __aeabi_h2f
+; BE-NEON-NEXT:    vmov s0, r0
+; BE-NEON-NEXT:    bl llrintf
+; BE-NEON-NEXT:    add lr, sp, #256
+; BE-NEON-NEXT:    mov r9, r0
+; BE-NEON-NEXT:    mov r8, r1
+; BE-NEON-NEXT:    ldrh r0, [lr, #62]
+; BE-NEON-NEXT:    bl __aeabi_h2f
+; BE-NEON-NEXT:    add lr, sp, #256
+; BE-NEON-NEXT:    mov r6, r0
+; BE-NEON-NEXT:    ldrh r0, [lr, #58]
+; BE-NEON-NEXT:    bl __aeabi_h2f
+; BE-NEON-NEXT:    add lr, sp, #256
+; BE-NEON-NEXT:    mov r7, r0
+; BE-NEON-NEXT:    ldrh r0, [lr, #66]
+; BE-NEON-NEXT:    bl __aeabi_h2f
+; BE-NEON-NEXT:    add lr, sp, #256
+; BE-NEON-NEXT:    mov r4, r0
+; BE-NEON-NEXT:    ldrh r0, [lr, #54]
+; BE-NEON-NEXT:    bl __aeabi_h2f
+; BE-NEON-NEXT:    add lr, sp, #256
+; BE-NEON-NEXT:    mov r5, r0
+; BE-NEON-NEXT:    ldrh r0, [lr, #50]
+; BE-NEON-NEXT:    bl __aeabi_h2f
+; BE-NEON-NEXT:    vmov s0, r0
+; BE-NEON-NEXT:    bl llrintf
+; BE-NEON-NEXT:    vmov.32 d16[0], r0
+; BE-NEON-NEXT:    vmov s0, r5
+; BE-NEON-NEXT:    str r1, [sp, #44] @ 4-byte Spill
+; BE-NEON-NEXT:    vstr d16, [sp, #168] @ 8-byte Spill
+; BE-NEON-NEXT:    bl llrintf
+; BE-NEON-NEXT:    vmov.32 d16[0], r0
+; BE-NEON-NEXT:    vmov s0, r4
+; BE-NEON-NEXT:    str r1, [sp, #40] @ 4-byte Spill
+; BE-NEON-NEXT:    vstr d16, [sp, #160] @ 8-byte Spill
+; BE-NEON-NEXT:    bl llrintf
+; BE-NEON-NEXT:    vmov.32 d16[0], r0
+; BE-NEON-NEXT:    vmov s0, r7
+; BE-NEON-NEXT:    mov r4, r1
+; BE-NEON-NEXT:    vstr d16, [sp, #32] @ 8-byte Spill
+; BE-NEON-NEXT:    bl llrintf
+; BE-NEON-NEXT:    vmov.32 d16[0], r0
+; BE-NEON-NEXT:    vmov s0, r6
+; BE-NEON-NEXT:    mov r11, r1
+; BE-NEON-NEXT:    vstr d16, [sp, #24] @ 8-byte Spill
+; BE-NEON-NEXT:    bl llrintf
+; BE-NEON-NEXT:    add lr, sp, #256
+; BE-NEON-NEXT:    vmov.32 d16[0], r0
+; BE-NEON-NEXT:    mov r6, r1
+; BE-NEON-NEXT:    ldrh r0, [lr, #34]
+; BE-NEON-NEXT:    vstr d16, [sp, #16] @ 8-byte Spill
+; BE-NEON-NEXT:    bl __aeabi_h2f
+; BE-NEON-NEXT:    vmov s0, r0
+; BE-NEON-NEXT:    vmov.32 d8[0], r9
+; BE-NEON-NEXT:    bl llrintf
+; BE-NEON-NEXT:    add lr, sp, #256
+; BE-NEON-NEXT:    mov r7, r1
+; BE-NEON-NEXT:    vmov.32 d12[0], r0
+; BE-NEON-NEXT:    ldrh r1, [lr, #38]
+; BE-NEON-NEXT:    mov r0, r1
+; BE-NEON-NEXT:    bl __aeabi_h2f
+; BE-NEON-NEXT:    vmov.32 d8[1], r8
+; BE-NEON-NEXT:    vmov s0, r0
+; BE-NEON-NEXT:    vstr d8, [sp, #8] @ 8-byte Spill
+; BE-NEON-NEXT:    bl llrintf
+; BE-NEON-NEXT:    add lr, sp, #256
+; BE-NEON-NEXT:    mov r5, r1
+; BE-NEON-NEXT:    vmov.32 d13[0], r0
+; BE-NEON-NEXT:    ldrh r1, [lr, #26]
+; BE-NEON-NEXT:    mov r0, r1
+; BE-NEON-NEXT:    bl __aeabi_h2f
+; BE-NEON-NEXT:    vmov s0, r0
+; BE-NEON-NEXT:    vmov.32 d12[1], r7
+; BE-NEON-NEXT:    bl llrintf
+; BE-NEON-NEXT:    add lr, sp, #256
+; BE-NEON-NEXT:    mov r7, r1
+; BE-NEON-NEXT:    vmov.32 d9[0], r0
+; BE-NEON-NEXT:    ldrh r1, [lr, #30]
+; BE-NEON-NEXT:    mov r0, r1
+; BE-NEON-NEXT:    bl __aeabi_h2f
+; BE-NEON-NEXT:    vmov s0, r0
+; BE-NEON-NEXT:    vmov.32 d13[1], r5
+; BE-NEON-NEXT:    bl llrintf
+; BE-NEON-NEXT:    add lr, sp, #256
+; BE-NEON-NEXT:    mov r5, r1
+; BE-NEON-NEXT:    vmov.32 d15[0], r0
+; BE-NEON-NEXT:    ldrh r1, [lr, #78]
+; BE-NEON-NEXT:    mov r0, r1
+; BE-NEON-NEXT:    bl __aeabi_h2f
+; BE-NEON-NEXT:    vmov s0, r0
+; BE-NEON-NEXT:    vmov.32 d9[1], r7
+; BE-NEON-NEXT:    bl llrintf
+; BE-NEON-NEXT:    add lr, sp, #256
+; BE-NEON-NEXT:    mov r7, r1
+; BE-NEON-NEXT:    vmov.32 d14[0], r0
+; BE-NEON-NEXT:    ldrh r1, [lr, #82]
+; BE-NEON-NEXT:    mov r0, r1
+; BE-NEON-NEXT:    bl __aeabi_h2f
+; BE-NEON-NEXT:    vmov s0, r0
+; BE-NEON-NEXT:    vmov.32 d15[1], r5
+; BE-NEON-NEXT:    bl llrintf
+; BE-NEON-NEXT:    add lr, sp, #256
+; BE-NEON-NEXT:    mov r5, r1
+; BE-NEON-NEXT:    vmov.32 d8[0], r0
+; BE-NEON-NEXT:    ldrh r1, [lr, #86]
+; BE-NEON-NEXT:    mov r0, r1
+; BE-NEON-NEXT:    bl __aeabi_h2f
+; BE-NEON-NEXT:    vmov s0, r0
+; BE-NEON-NEXT:    vmov.32 d14[1], r7
+; BE-NEON-NEXT:    bl llrintf
+; BE-NEON-NEXT:    add lr, sp, #256
+; BE-NEON-NEXT:    mov r7, r1
+; BE-NEON-NEXT:    vmov.32 d10[0], r0
+; BE-NEON-NEXT:    ldrh r1, [lr, #70]
+; BE-NEON-NEXT:    mov r0, r1
+; BE-NEON-NEXT:    bl __aeabi_h2f
+; BE-NEON-NEXT:    vmov s0, r0
+; BE-NEON-NEXT:    vmov.32 d8[1], r5
+; BE-NEON-NEXT:    bl llrintf
+; BE-NEON-NEXT:    add lr, sp, #256
+; BE-NEON-NEXT:    mov r5, r1
+; BE-NEON-NEXT:    vmov.32 d11[0], r0
+; BE-NEON-NEXT:    ldrh r1, [lr, #46]
+; BE-NEON-NEXT:    mov r0, r1
+; BE-NEON-NEXT:    bl __aeabi_h2f
+; BE-NEON-NEXT:    vmov s0, r0
+; BE-NEON-NEXT:    vmov.32 d10[1], r7
+; BE-NEON-NEXT:    bl llrintf
+; BE-NEON-NEXT:    vmov.32 d25[0], r0
+; BE-NEON-NEXT:    ldr r0, [sp, #40] @ 4-byte Reload
+; BE-NEON-NEXT:    ldr r2, [sp, #44] @ 4-byte Reload
+; BE-NEON-NEXT:    vldr d24, [sp, #160] @ 8-byte Reload
+; BE-NEON-NEXT:    vldr s0, [sp, #48] @ 4-byte Reload
+; BE-NEON-NEXT:    vmov.32 d24[1], r0
+; BE-NEON-NEXT:    vmov r0, s0
+; BE-NEON-NEXT:    vldr d26, [sp, #16] @ 8-byte Reload
+; BE-NEON-NEXT:    vstr d24, [sp, #160] @ 8-byte Spill
+; BE-NEON-NEXT:    vldr d24, [sp, #8] @ 8-byte Reload
+; BE-NEON-NEXT:    vrev64.32 d23, d14
+; BE-NEON-NEXT:    vldr d29, [sp, #24] @ 8-byte Reload
+; BE-NEON-NEXT:    vrev64.32 d22, d24
+; BE-NEON-NEXT:    vldr d24, [sp, #168] @ 8-byte Reload
+; BE-NEON-NEXT:    vmov.32 d26[1], r6
+; BE-NEON-NEXT:    vldr d28, [sp, #32] @ 8-byte Reload
+; BE-NEON-NEXT:    vmov.32 d25[1], r1
+; BE-NEON-NEXT:    add r1, r10, #192
+; BE-NEON-NEXT:    vmov.32 d29[1], r11
+; BE-NEON-NEXT:    add r11, r10, #128
+; BE-NEON-NEXT:    vmov.32 d24[1], r2
+; BE-NEON-NEXT:    vmov.32 d11[1], r5
+; BE-NEON-NEXT:    vmov.32 d28[1], r4
+; BE-NEON-NEXT:    vrev64.32 d27, d26
+; BE-NEON-NEXT:    vstr d24, [sp, #168] @ 8-byte Spill
+; BE-NEON-NEXT:    vstr d25, [sp, #48] @ 8-byte Spill
+; BE-NEON-NEXT:    vrev64.32 d25, d11
+; BE-NEON-NEXT:    vrev64.32 d26, d29
+; BE-NEON-NEXT:    vrev64.32 d24, d28
+; BE-NEON-NEXT:    vst1.64 {d26, d27}, [r1:128]!
+; BE-NEON-NEXT:    vst1.64 {d24, d25}, [r1:128]!
+; BE-NEON-NEXT:    vrev64.32 d21, d10
+; BE-NEON-NEXT:    vrev64.32 d19, d15
+; BE-NEON-NEXT:    vrev64.32 d17, d13
+; BE-NEON-NEXT:    vrev64.32 d20, d8
+; BE-NEON-NEXT:    vst1.64 {d22, d23}, [r1:128]!
+; BE-NEON-NEXT:    vrev64.32 d18, d9
+; BE-NEON-NEXT:    vrev64.32 d16, d12
+; BE-NEON-NEXT:    vst1.64 {d20, d21}, [r1:128]
+; BE-NEON-NEXT:    vst1.64 {d18, d19}, [r11:128]!
+; BE-NEON-NEXT:    vst1.64 {d16, d17}, [r11:128]!
+; BE-NEON-NEXT:    bl __aeabi_h2f
+; BE-NEON-NEXT:    vmov s0, r0
+; BE-NEON-NEXT:    bl llrintf
+; BE-NEON-NEXT:    add lr, sp, #256
+; BE-NEON-NEXT:    mov r7, r0
+; BE-NEON-NEXT:    mov r8, r1
+; BE-NEON-NEXT:    ldrh r0, [lr, #42]
+; BE-NEON-NEXT:    bl __aeabi_h2f
+; BE-NEON-NEXT:    vldr s0, [sp, #56] @ 4-byte Reload
+; BE-NEON-NEXT:    mov r4, r0
+; BE-NEON-NEXT:    vmov r0, s0
+; BE-NEON-NEXT:    bl __aeabi_h2f
+; BE-NEON-NEXT:    vmov s0, r0
+; BE-NEON-NEXT:    bl llrintf
+; BE-NEON-NEXT:    vmov s0, r4
+; BE-NEON-NEXT:    str r1, [sp, #44] @ 4-byte Spill
+; BE-NEON-NEXT:    vmov.32 d10[0], r0
+; BE-NEON-NEXT:    bl llrintf
+; BE-NEON-NEXT:    vldr s0, [sp, #64] @ 4-byte Reload
+; BE-NEON-NEXT:    vmov.32 d16[0], r0
+; BE-NEON-NEXT:    vmov r2, s0
+; BE-NEON-NEXT:    vldr s0, [sp, #80] @ 4-byte Reload
+; BE-NEON-NEXT:    vmov.32 d16[1], r1
+; BE-NEON-NEXT:    vmov r4, s0
+; BE-NEON-NEXT:    vldr s0, [sp, #76] @ 4-byte Reload
+; BE-NEON-NEXT:    vstr d16, [sp, #80] @ 8-byte Spill
+; BE-NEON-NEXT:    vmov r5, s0
+; BE-NEON-NEXT:    mov r0, r2
+; BE-NEON-NEXT:    bl __aeabi_h2f
+; BE-NEON-NEXT:    vmov s0, r0
+; BE-NEON-NEXT:    bl llrintf
+; BE-NEON-NEXT:    vmov.32 d11[0], r0
+; BE-NEON-NEXT:    mov r0, r4
+; BE-NEON-NEXT:    mov r9, r1
+; BE-NEON-NEXT:    bl __aeabi_h2f
+; BE-NEON-NEXT:    vmov s0, r0
+; BE-NEON-NEXT:    vmov.32 d8[0], r7
+; BE-NEON-NEXT:    bl llrintf
+; BE-NEON-NEXT:    vmov.32 d12[0], r0
+; BE-NEON-NEXT:    mov r0, r5
+; BE-NEON-NEXT:    mov r6, r1
+; BE-NEON-NEXT:    bl __aeabi_h2f
+; BE-NEON-NEXT:    vldr s0, [sp, #88] @ 4-byte Reload
+; BE-NEON-NEXT:    mov r4, r0
+; BE-NEON-NEXT:    vmov.32 d8[1], r8
+; BE-NEON-NEXT:    vmov r7, s0
+; BE-NEON-NEXT:    vldr s0, [sp, #96] @ 4-byte Reload
+; BE-NEON-NEXT:    vstr d8, [sp, #88] @ 8-byte Spill
+; BE-NEON-NEXT:    vmov r0, s0
+; BE-NEON-NEXT:    bl __aeabi_h2f
+; BE-NEON-NEXT:    vldr s0, [sp, #104] @ 4-byte Reload
+; BE-NEON-NEXT:    vmov s19, r0
+; BE-NEON-NEXT:    vmov.32 d12[1], r6
+; BE-NEON-NEXT:    vmov r5, s0
+; BE-NEON-NEXT:    vldr s0, [sp, #112] @ 4-byte Reload
+; BE-NEON-NEXT:    vstr d12, [sp, #104] @ 8-byte Spill
+; BE-NEON-NEXT:    vmov r0, s0
+; BE-NEON-NEXT:    bl __aeabi_h2f
+; BE-NEON-NEXT:    vmov.f32 s0, s19
+; BE-NEON-NEXT:    vmov s30, r0
+; BE-NEON-NEXT:    bl llrintf
+; BE-NEON-NEXT:    vmov.f32 s0, s30
+; BE-NEON-NEXT:    mov r6, r1
+; BE-NEON-NEXT:    vmov.32 d9[0], r0
+; BE-NEON-NEXT:    vmov s17, r4
+; BE-NEON-NEXT:    bl llrintf
+; BE-NEON-NEXT:    vmov.32 d12[0], r0
+; BE-NEON-NEXT:    mov r0, r5
+; BE-NEON-NEXT:    mov r4, r1
+; BE-NEON-NEXT:    bl __aeabi_h2f
+; BE-NEON-NEXT:    vmov.f32 s0, s17
+; BE-NEON-NEXT:    vmov s30, r0
+; BE-NEON-NEXT:    bl llrintf
+; BE-NEON-NEXT:    vmov.f32 s0, s30
+; BE-NEON-NEXT:    vmov.32 d16[0], r0
+; BE-NEON-NEXT:    str r1, [sp, #76] @ 4-byte Spill
+; BE-NEON-NEXT:    vmov.32 d12[1], r4
+; BE-NEON-NEXT:    vstr d16, [sp, #64] @ 8-byte Spill
+; BE-NEON-NEXT:    vstr d12, [sp, #112] @ 8-byte Spill
+; BE-NEON-NEXT:    bl llrintf
+; BE-NEON-NEXT:    vmov.32 d16[0], r0
+; BE-NEON-NEXT:    mov r0, r7
+; BE-NEON-NEXT:    mov r8, r1
+; BE-NEON-NEXT:    vmov.32 d9[1], r6
+; BE-NEON-NEXT:    vstr d16, [sp, #56] @ 8-byte Spill
+; BE-NEON-NEXT:    vstr d9, [sp, #96] @ 8-byte Spill
+; BE-NEON-NEXT:    bl __aeabi_h2f
+; BE-NEON-NEXT:    vldr s0, [sp, #120] @ 4-byte Reload
+; BE-NEON-NEXT:    mov r5, r0
+; BE-NEON-NEXT:    ldr r0, [sp, #44] @ 4-byte Reload
+; BE-NEON-NEXT:    vmov r7, s0
+; BE-NEON-NEXT:    vldr s0, [sp, #132] @ 4-byte Reload
+; BE-NEON-NEXT:    vmov.32 d10[1], r0
+; BE-NEON-NEXT:    vmov r0, s0
+; BE-NEON-NEXT:    vstr d10, [sp, #120] @ 8-byte Spill
+; BE-NEON-NEXT:    bl __aeabi_h2f
+; BE-NEON-NEXT:    vldr s0, [sp, #136] @ 4-byte Reload
+; BE-NEON-NEXT:    vmov s26, r0
+; BE-NEON-NEXT:    vmov.32 d11[1], r9
+; BE-NEON-NEXT:    vmov r4, s0
+; BE-NEON-NEXT:    vldr s0, [sp, #144] @ 4-byte Reload
+; BE-NEON-NEXT:    vstr d11, [sp, #136] @ 8-byte Spill
+; BE-NEON-NEXT:    vmov r0, s0
+; BE-NEON-NEXT:    bl __aeabi_h2f
+; BE-NEON-NEXT:    vmov.f32 s0, s26
+; BE-NEON-NEXT:    vmov s22, r0
+; BE-NEON-NEXT:    bl llrintf
+; BE-NEON-NEXT:    vmov.f32 s0, s22
+; BE-NEON-NEXT:    mov r6, r1
+; BE-NEON-NEXT:    vmov.32 d13[0], r0
+; BE-NEON-NEXT:    vmov s24, r5
+; BE-NEON-NEXT:    bl llrintf
+; BE-NEON-NEXT:    vmov.32 d14[0], r0
+; BE-NEON-NEXT:    mov r0, r4
+; BE-NEON-NEXT:    mov r5, r1
+; BE-NEON-NEXT:    bl __aeabi_h2f
+; BE-NEON-NEXT:    vmov.f32 s0, s24
+; BE-NEON-NEXT:    vmov s22, r0
+; BE-NEON-NEXT:    bl llrintf
+; BE-NEON-NEXT:    vmov.f32 s0, s22
+; BE-NEON-NEXT:    mov r9, r1
+; BE-NEON-NEXT:    vmov.32 d12[0], r0
+; BE-NEON-NEXT:    vmov.32 d14[1], r5
+; BE-NEON-NEXT:    bl llrintf
+; BE-NEON-NEXT:    vmov.32 d11[0], r0
+; BE-NEON-NEXT:    mov r0, r7
+; BE-NEON-NEXT:    mov r5, r1
+; BE-NEON-NEXT:    bl __aeabi_h2f
+; BE-NEON-NEXT:    vldr s0, [sp, #148] @ 4-byte Reload
+; BE-NEON-NEXT:    mov r7, r0
+; BE-NEON-NEXT:    vmov.32 d13[1], r6
+; BE-NEON-NEXT:    vmov r0, s0
+; BE-NEON-NEXT:    bl __aeabi_h2f
+; BE-NEON-NEXT:    vldr s0, [sp, #152] @ 4-byte Reload
+; BE-NEON-NEXT:    vmov s20, r0
+; BE-NEON-NEXT:    vmov.32 d11[1], r5
+; BE-NEON-NEXT:    vmov r4, s0
+; BE-NEON-NEXT:    vldr s0, [sp, #156] @ 4-byte Reload
+; BE-NEON-NEXT:    vmov r0, s0
+; BE-NEON-NEXT:    bl __aeabi_h2f
+; BE-NEON-NEXT:    vmov.f32 s0, s20
+; BE-NEON-NEXT:    vmov s16, r0
+; BE-NEON-NEXT:    bl llrintf
+; BE-NEON-NEXT:    vmov.f32 s0, s16
+; BE-NEON-NEXT:    mov r6, r1
+; BE-NEON-NEXT:    vmov.32 d10[0], r0
+; BE-NEON-NEXT:    vmov s18, r7
+; BE-NEON-NEXT:    bl llrintf
+; BE-NEON-NEXT:    vmov.32 d15[0], r0
+; BE-NEON-NEXT:    mov r0, r4
+; BE-NEON-NEXT:    mov r5, r1
+; BE-NEON-NEXT:    bl __aeabi_h2f
+; BE-NEON-NEXT:    vmov.f32 s0, s18
+; BE-NEON-NEXT:    vmov s16, r0
+; BE-NEON-NEXT:    bl llrintf
+; BE-NEON-NEXT:    vmov.f32 s0, s16
+; BE-NEON-NEXT:    mov r4, r1
+; BE-NEON-NEXT:    vmov.32 d9[0], r0
+; BE-NEON-NEXT:    vmov.32 d15[1], r5
+; BE-NEON-NEXT:    bl llrintf
+; BE-NEON-NEXT:    vldr d16, [sp, #160] @ 8-byte Reload
+; BE-NEON-NEXT:    vldr d20, [sp, #136] @ 8-byte Reload
+; BE-NEON-NEXT:    vrev64.32 d19, d14
+; BE-NEON-NEXT:    vrev64.32 d31, d16
+; BE-NEON-NEXT:    vldr d16, [sp, #168] @ 8-byte Reload
+; BE-NEON-NEXT:    vrev64.32 d18, d20
+; BE-NEON-NEXT:    vldr d20, [sp, #120] @ 8-byte Reload
+; BE-NEON-NEXT:    vldr d22, [sp, #96] @ 8-byte Reload
+; BE-NEON-NEXT:    vmov.32 d28[0], r0
+; BE-NEON-NEXT:    ldr r0, [sp, #76] @ 4-byte Reload
+; BE-NEON-NEXT:    vrev64.32 d21, d20
+; BE-NEON-NEXT:    vrev64.32 d30, d16
+; BE-NEON-NEXT:    vldr d16, [sp, #48] @ 8-byte Reload
+; BE-NEON-NEXT:    vldr d23, [sp, #64] @ 8-byte Reload
+; BE-NEON-NEXT:    vrev64.32 d20, d22
+; BE-NEON-NEXT:    vldr d22, [sp, #112] @ 8-byte Reload
+; BE-NEON-NEXT:    vrev64.32 d1, d16
+; BE-NEON-NEXT:    vldr d16, [sp, #80] @ 8-byte Reload
+; BE-NEON-NEXT:    vmov.32 d23[1], r0
+; BE-NEON-NEXT:    add r0, r10, #64
+; BE-NEON-NEXT:    vrev64.32 d25, d22
+; BE-NEON-NEXT:    vldr d22, [sp, #104] @ 8-byte Reload
+; BE-NEON-NEXT:    vmov.32 d9[1], r4
+; BE-NEON-NEXT:    vrev64.32 d0, d16
+; BE-NEON-NEXT:    vmov.32 d28[1], r1
+; BE-NEON-NEXT:    vldr d29, [sp, #56] @ 8-byte Reload
+; BE-NEON-NEXT:    vrev64.32 d3, d15
+; BE-NEON-NEXT:    vrev64.32 d24, d22
+; BE-NEON-NEXT:    vldr d22, [sp, #88] @ 8-byte Reload
+; BE-NEON-NEXT:    vmov.32 d10[1], r6
+; BE-NEON-NEXT:    vrev64.32 d5, d23
+; BE-NEON-NEXT:    vst1.64 {d0, d1}, [r11:128]!
+; BE-NEON-NEXT:    vrev64.32 d2, d9
+; BE-NEON-NEXT:    vrev64.32 d27, d22
+; BE-NEON-NEXT:    vmov.32 d29[1], r8
+; BE-NEON-NEXT:    vrev64.32 d4, d28
+; BE-NEON-NEXT:    vst1.64 {d30, d31}, [r11:128]
+; BE-NEON-NEXT:    vst1.64 {d2, d3}, [r0:128]!
+; BE-NEON-NEXT:    vmov.32 d12[1], r9
+; BE-NEON-NEXT:    vrev64.32 d26, d10
+; BE-NEON-NEXT:    vst1.64 {d4, d5}, [r0:128]!
+; BE-NEON-NEXT:    vrev64.32 d23, d29
+; BE-NEON-NEXT:    vst1.64 {d26, d27}, [r0:128]!
+; BE-NEON-NEXT:    vrev64.32 d22, d12
+; BE-NEON-NEXT:    vst1.64 {d24, d25}, [r0:128]
+; BE-NEON-NEXT:    vst1.64 {d20, d21}, [r10:128]!
+; BE-NEON-NEXT:    vst1.64 {d22, d23}, [r10:128]!
+; BE-NEON-NEXT:    vrev64.32 d17, d11
+; BE-NEON-NEXT:    vrev64.32 d16, d13
+; BE-NEON-NEXT:    vst1.64 {d18, d19}, [r10:128]!
+; BE-NEON-NEXT:    vst1.64 {d16, d17}, [r10:128]
+; BE-NEON-NEXT:    add sp, sp, #176
+; BE-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-NEON-NEXT:    add sp, sp, #4
+; BE-NEON-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+  %a = call <32 x i64> @llvm.llrint.v32i64.v32f16(<32 x half> %x)
+  ret <32 x i64> %a
+}
+declare <32 x i64> @llvm.llrint.v32i64.v32f16(<32 x half>)
+
+define <1 x i64> @llrint_v1i64_v1f32(<1 x float> %x) {
+; LE-LABEL: llrint_v1i64_v1f32:
+; LE:       @ %bb.0:
+; LE-NEXT:    .save {r11, lr}
+; LE-NEXT:    push {r11, lr}
+; LE-NEXT:    bl llrintf
+; LE-NEXT:    vmov.32 d0[0], r0
+; LE-NEXT:    vmov.32 d0[1], r1
+; LE-NEXT:    pop {r11, pc}
+;
+; LE-NEON-LABEL: llrint_v1i64_v1f32:
+; LE-NEON:       @ %bb.0:
+; LE-NEON-NEXT:    .save {r11, lr}
+; LE-NEON-NEXT:    push {r11, lr}
+; LE-NEON-NEXT:    bl llrintf
+; LE-NEON-NEXT:    vmov.32 d0[0], r0
+; LE-NEON-NEXT:    vmov.32 d0[1], r1
+; LE-NEON-NEXT:    pop {r11, pc}
+;
+; BE-LABEL: llrint_v1i64_v1f32:
+; BE:       @ %bb.0:
+; BE-NEXT:    .save {r11, lr}
+; BE-NEXT:    push {r11, lr}
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    vmov.32 d16[0], r0
+; BE-NEXT:    vmov.32 d16[1], r1
+; BE-NEXT:    vrev64.32 d0, d16
+; BE-NEXT:    pop {r11, pc}
+;
+; BE-NEON-LABEL: llrint_v1i64_v1f32:
+; BE-NEON:       @ %bb.0:
+; BE-NEON-NEXT:    .save {r11, lr}
+; BE-NEON-NEXT:    push {r11, lr}
+; BE-NEON-NEXT:    bl llrintf
+; BE-NEON-NEXT:    vmov.32 d16[0], r0
+; BE-NEON-NEXT:    vmov.32 d16[1], r1
+; BE-NEON-NEXT:    vrev64.32 d0, d16
+; BE-NEON-NEXT:    pop {r11, pc}
+  %a = call <1 x i64> @llvm.llrint.v1i64.v1f32(<1 x float> %x)
+  ret <1 x i64> %a
+}
+declare <1 x i64> @llvm.llrint.v1i64.v1f32(<1 x float>)
+
+define <2 x i64> @llrint_v2i64_v2f32(<2 x float> %x) {
+; LE-LABEL: llrint_v2i64_v2f32:
+; LE:       @ %bb.0:
+; LE-NEXT:    .save {r4, lr}
+; LE-NEXT:    push {r4, lr}
+; LE-NEXT:    .vsave {d10, d11}
+; LE-NEXT:    vpush {d10, d11}
+; LE-NEXT:    .vsave {d8}
+; LE-NEXT:    vpush {d8}
+; LE-NEXT:    vmov.f64 d8, d0
+; LE-NEXT:    vmov.f32 s0, s17
+; LE-NEXT:    bl llrintf
+; LE-NEXT:    vmov.f32 s0, s16
+; LE-NEXT:    mov r4, r1
+; LE-NEXT:    vmov.32 d11[0], r0
+; LE-NEXT:    bl llrintf
+; LE-NEXT:    vmov.32 d10[0], r0
+; LE-NEXT:    vmov.32 d11[1], r4
+; LE-NEXT:    vmov.32 d10[1], r1
+; LE-NEXT:    vorr q0, q5, q5
+; LE-NEXT:    vpop {d8}
+; LE-NEXT:    vpop {d10, d11}
+; LE-NEXT:    pop {r4, pc}
+;
+; LE-NEON-LABEL: llrint_v2i64_v2f32:
+; LE-NEON:       @ %bb.0:
+; LE-NEON-NEXT:    .save {r4, lr}
+; LE-NEON-NEXT:    push {r4, lr}
+; LE-NEON-NEXT:    .vsave {d10, d11}
+; LE-NEON-NEXT:    vpush {d10, d11}
+; LE-NEON-NEXT:    .vsave {d8}
+; LE-NEON-NEXT:    vpush {d8}
+; LE-NEON-NEXT:    vmov.f64 d8, d0
+; LE-NEON-NEXT:    vmov.f32 s0, s17
+; LE-NEON-NEXT:    bl llrintf
+; LE-NEON-NEXT:    vmov.f32 s0, s16
+; LE-NEON-NEXT:    mov r4, r1
+; LE-NEON-NEXT:    vmov.32 d11[0], r0
+; LE-NEON-NEXT:    bl llrintf
+; LE-NEON-NEXT:    vmov.32 d10[0], r0
+; LE-NEON-NEXT:    vmov.32 d11[1], r4
+; LE-NEON-NEXT:    vmov.32 d10[1], r1
+; LE-NEON-NEXT:    vorr q0, q5, q5
+; LE-NEON-NEXT:    vpop {d8}
+; LE-NEON-NEXT:    vpop {d10, d11}
+; LE-NEON-NEXT:    pop {r4, pc}
+;
+; BE-LABEL: llrint_v2i64_v2f32:
+; BE:       @ %bb.0:
+; BE-NEXT:    .save {r4, lr}
+; BE-NEXT:    push {r4, lr}
+; BE-NEXT:    .vsave {d10, d11}
+; BE-NEXT:    vpush {d10, d11}
+; BE-NEXT:    .vsave {d8}
+; BE-NEXT:    vpush {d8}
+; BE-NEXT:    vrev64.32 d8, d0
+; BE-NEXT:    vmov.f32 s0, s17
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    vmov.f32 s0, s16
+; BE-NEXT:    mov r4, r1
+; BE-NEXT:    vmov.32 d11[0], r0
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    vmov.32 d10[0], r0
+; BE-NEXT:    vmov.32 d11[1], r4
+; BE-NEXT:    vmov.32 d10[1], r1
+; BE-NEXT:    vrev64.32 q0, q5
+; BE-NEXT:    vpop {d8}
+; BE-NEXT:    vpop {d10, d11}
+; BE-NEXT:    pop {r4, pc}
+;
+; BE-NEON-LABEL: llrint_v2i64_v2f32:
+; BE-NEON:       @ %bb.0:
+; BE-NEON-NEXT:    .save {r4, lr}
+; BE-NEON-NEXT:    push {r4, lr}
+; BE-NEON-NEXT:    .vsave {d10, d11}
+; BE-NEON-NEXT:    vpush {d10, d11}
+; BE-NEON-NEXT:    .vsave {d8}
+; BE-NEON-NEXT:    vpush {d8}
+; BE-NEON-NEXT:    vrev64.32 d8, d0
+; BE-NEON-NEXT:    vmov.f32 s0, s17
+; BE-NEON-NEXT:    bl llrintf
+; BE-NEON-NEXT:    vmov.f32 s0, s16
+; BE-NEON-NEXT:    mov r4, r1
+; BE-NEON-NEXT:    vmov.32 d11[0], r0
+; BE-NEON-NEXT:    bl llrintf
+; BE-NEON-NEXT:    vmov.32 d10[0], r0
+; BE-NEON-NEXT:    vmov.32 d11[1], r4
+; BE-NEON-NEXT:    vmov.32 d10[1], r1
+; BE-NEON-NEXT:    vrev64.32 q0, q5
+; BE-NEON-NEXT:    vpop {d8}
+; BE-NEON-NEXT:    vpop {d10, d11}
+; BE-NEON-NEXT:    pop {r4, pc}
+  %a = call <2 x i64> @llvm.llrint.v2i64.v2f32(<2 x float> %x)
+  ret <2 x i64> %a
+}
+declare <2 x i64> @llvm.llrint.v2i64.v2f32(<2 x float>)
+
+define <4 x i64> @llrint_v4i64_v4f32(<4 x float> %x) {
+; LE-LABEL: llrint_v4i64_v4f32:
+; LE:       @ %bb.0:
+; LE-NEXT:    .save {r4, r5, r6, lr}
+; LE-NEXT:    push {r4, r5, r6, lr}
+; LE-NEXT:    .vsave {d8, d9, d10, d11, d12, d13}
+; LE-NEXT:    vpush {d8, d9, d10, d11, d12, d13}
+; LE-NEXT:    vorr q5, q0, q0
+; LE-NEXT:    vmov.f32 s0, s23
+; LE-NEXT:    bl llrintf
+; LE-NEXT:    vmov.f32 s0, s20
+; LE-NEXT:    mov r4, r1
+; LE-NEXT:    vmov.32 d9[0], r0
+; LE-NEXT:    bl llrintf
+; LE-NEXT:    vmov.f32 s0, s21
+; LE-NEXT:    mov r5, r1
+; LE-NEXT:    vmov.32 d12[0], r0
+; LE-NEXT:    bl llrintf
+; LE-NEXT:    vmov.f32 s0, s22
+; LE-NEXT:    mov r6, r1
+; LE-NEXT:    vmov.32 d13[0], r0
+; LE-NEXT:    bl llrintf
+; LE-NEXT:    vmov.32 d8[0], r0
+; LE-NEXT:    vmov.32 d13[1], r6
+; LE-NEXT:    vmov.32 d9[1], r4
+; LE-NEXT:    vmov.32 d12[1], r5
+; LE-NEXT:    vmov.32 d8[1], r1
+; LE-NEXT:    vorr q0, q6, q6
+; LE-NEXT:    vorr q1, q4, q4
+; LE-NEXT:    vpop {d8, d9, d10, d11, d12, d13}
+; LE-NEXT:    pop {r4, r5, r6, pc}
+;
+; LE-NEON-LABEL: llrint_v4i64_v4f32:
+; LE-NEON:       @ %bb.0:
+; LE-NEON-NEXT:    .save {r4, r5, r6, lr}
+; LE-NEON-NEXT:    push {r4, r5, r6, lr}
+; LE-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13}
+; LE-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13}
+; LE-NEON-NEXT:    vorr q5, q0, q0
+; LE-NEON-NEXT:    vmov.f32 s0, s23
+; LE-NEON-NEXT:    bl llrintf
+; LE-NEON-NEXT:    vmov.f32 s0, s20
+; LE-NEON-NEXT:    mov r4, r1
+; LE-NEON-NEXT:    vmov.32 d9[0], r0
+; LE-NEON-NEXT:    bl llrintf
+; LE-NEON-NEXT:    vmov.f32 s0, s21
+; LE-NEON-NEXT:    mov r5, r1
+; LE-NEON-NEXT:    vmov.32 d12[0], r0
+; LE-NEON-NEXT:    bl llrintf
+; LE-NEON-NEXT:    vmov.f32 s0, s22
+; LE-NEON-NEXT:    mov r6, r1
+; LE-NEON-NEXT:    vmov.32 d13[0], r0
+; LE-NEON-NEXT:    bl llrintf
+; LE-NEON-NEXT:    vmov.32 d8[0], r0
+; LE-NEON-NEXT:    vmov.32 d13[1], r6
+; LE-NEON-NEXT:    vmov.32 d9[1], r4
+; LE-NEON-NEXT:    vmov.32 d12[1], r5
+; LE-NEON-NEXT:    vmov.32 d8[1], r1
+; LE-NEON-NEXT:    vorr q0, q6, q6
+; LE-NEON-NEXT:    vorr q1, q4, q4
+; LE-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13}
+; LE-NEON-NEXT:    pop {r4, r5, r6, pc}
+;
+; BE-LABEL: llrint_v4i64_v4f32:
+; BE:       @ %bb.0:
+; BE-NEXT:    .save {r4, r5, r6, lr}
+; BE-NEXT:    push {r4, r5, r6, lr}
+; BE-NEXT:    .vsave {d8, d9, d10, d11, d12, d13}
+; BE-NEXT:    vpush {d8, d9, d10, d11, d12, d13}
+; BE-NEXT:    vrev64.32 d8, d1
+; BE-NEXT:    vrev64.32 d9, d0
+; BE-NEXT:    vmov.f32 s0, s17
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    vmov.f32 s0, s18
+; BE-NEXT:    mov r4, r1
+; BE-NEXT:    vmov.32 d11[0], r0
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    vmov.f32 s0, s19
+; BE-NEXT:    mov r5, r1
+; BE-NEXT:    vmov.32 d12[0], r0
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    vmov.f32 s0, s16
+; BE-NEXT:    mov r6, r1
+; BE-NEXT:    vmov.32 d13[0], r0
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    vmov.32 d10[0], r0
+; BE-NEXT:    vmov.32 d13[1], r6
+; BE-NEXT:    vmov.32 d11[1], r4
+; BE-NEXT:    vmov.32 d12[1], r5
+; BE-NEXT:    vmov.32 d10[1], r1
+; BE-NEXT:    vrev64.32 q0, q6
+; BE-NEXT:    vrev64.32 q1, q5
+; BE-NEXT:    vpop {d8, d9, d10, d11, d12, d13}
+; BE-NEXT:    pop {r4, r5, r6, pc}
+;
+; BE-NEON-LABEL: llrint_v4i64_v4f32:
+; BE-NEON:       @ %bb.0:
+; BE-NEON-NEXT:    .save {r4, r5, r6, lr}
+; BE-NEON-NEXT:    push {r4, r5, r6, lr}
+; BE-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13}
+; BE-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13}
+; BE-NEON-NEXT:    vrev64.32 d8, d1
+; BE-NEON-NEXT:    vrev64.32 d9, d0
+; BE-NEON-NEXT:    vmov.f32 s0, s17
+; BE-NEON-NEXT:    bl llrintf
+; BE-NEON-NEXT:    vmov.f32 s0, s18
+; BE-NEON-NEXT:    mov r4, r1
+; BE-NEON-NEXT:    vmov.32 d11[0], r0
+; BE-NEON-NEXT:    bl llrintf
+; BE-NEON-NEXT:    vmov.f32 s0, s19
+; BE-NEON-NEXT:    mov r5, r1
+; BE-NEON-NEXT:    vmov.32 d12[0], r0
+; BE-NEON-NEXT:    bl llrintf
+; BE-NEON-NEXT:    vmov.f32 s0, s16
+; BE-NEON-NEXT:    mov r6, r1
+; BE-NEON-NEXT:    vmov.32 d13[0], r0
+; BE-NEON-NEXT:    bl llrintf
+; BE-NEON-NEXT:    vmov.32 d10[0], r0
+; BE-NEON-NEXT:    vmov.32 d13[1], r6
+; BE-NEON-NEXT:    vmov.32 d11[1], r4
+; BE-NEON-NEXT:    vmov.32 d12[1], r5
+; BE-NEON-NEXT:    vmov.32 d10[1], r1
+; BE-NEON-NEXT:    vrev64.32 q0, q6
+; BE-NEON-NEXT:    vrev64.32 q1, q5
+; BE-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13}
+; BE-NEON-NEXT:    pop {r4, r5, r6, pc}
+  %a = call <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float> %x)
+  ret <4 x i64> %a
+}
+declare <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float>)
+
+define <8 x i64> @llrint_v8i64_v8f32(<8 x float> %x) {
+; LE-LABEL: llrint_v8i64_v8f32:
+; LE:       @ %bb.0:
+; LE-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, lr}
+; LE-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, lr}
+; LE-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEXT:    .pad #40
+; LE-NEXT:    sub sp, sp, #40
+; LE-NEXT:    vorr q6, q1, q1
+; LE-NEXT:    add lr, sp, #24
+; LE-NEXT:    vorr q7, q0, q0
+; LE-NEXT:    vstmia lr, {d2, d3} @ 16-byte Spill
+; LE-NEXT:    vmov.f32 s0, s27
+; LE-NEXT:    bl llrintf
+; LE-NEXT:    vmov.f32 s0, s24
+; LE-NEXT:    mov r8, r1
+; LE-NEXT:    vmov.32 d9[0], r0
+; LE-NEXT:    bl llrintf
+; LE-NEXT:    vmov.f32 s0, s25
+; LE-NEXT:    mov r9, r1
+; LE-NEXT:    vmov.32 d10[0], r0
+; LE-NEXT:    bl llrintf
+; LE-NEXT:    vorr q6, q7, q7
+; LE-NEXT:    add lr, sp, #8
+; LE-NEXT:    mov r10, r1
+; LE-NEXT:    vmov.32 d11[0], r0
+; LE-NEXT:    vmov.f32 s0, s26
+; LE-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-NEXT:    bl llrintf
+; LE-NEXT:    vmov.f32 s0, s27
+; LE-NEXT:    mov r7, r1
+; LE-NEXT:    vmov.32 d14[0], r0
+; LE-NEXT:    bl llrintf
+; LE-NEXT:    vmov.f32 s0, s24
+; LE-NEXT:    mov r4, r1
+; LE-NEXT:    vmov.32 d15[0], r0
+; LE-NEXT:    bl llrintf
+; LE-NEXT:    add lr, sp, #8
+; LE-NEXT:    mov r5, r1
+; LE-NEXT:    vmov.32 d12[0], r0
+; LE-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
+; LE-NEXT:    vmov.f32 s0, s1
+; LE-NEXT:    bl llrintf
+; LE-NEXT:    add lr, sp, #24
+; LE-NEXT:    mov r6, r1
+; LE-NEXT:    vmov.32 d13[0], r0
+; LE-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
+; LE-NEXT:    vmov.f32 s0, s2
+; LE-NEXT:    bl llrintf
+; LE-NEXT:    vmov.32 d8[0], r0
+; LE-NEXT:    vmov.32 d13[1], r6
+; LE-NEXT:    vmov.32 d15[1], r4
+; LE-NEXT:    vmov.32 d11[1], r10
+; LE-NEXT:    vmov.32 d9[1], r8
+; LE-NEXT:    vmov.32 d12[1], r5
+; LE-NEXT:    vmov.32 d14[1], r7
+; LE-NEXT:    vorr q0, q6, q6
+; LE-NEXT:    vmov.32 d10[1], r9
+; LE-NEXT:    vorr q1, q7, q7
+; LE-NEXT:    vmov.32 d8[1], r1
+; LE-NEXT:    vorr q2, q5, q5
+; LE-NEXT:    vorr q3, q4, q4
+; LE-NEXT:    add sp, sp, #40
+; LE-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, pc}
+;
+; LE-NEON-LABEL: llrint_v8i64_v8f32:
+; LE-NEON:       @ %bb.0:
+; LE-NEON-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, lr}
+; LE-NEON-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, lr}
+; LE-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEON-NEXT:    .pad #40
+; LE-NEON-NEXT:    sub sp, sp, #40
+; LE-NEON-NEXT:    vorr q6, q1, q1
+; LE-NEON-NEXT:    add lr, sp, #24
+; LE-NEON-NEXT:    vorr q7, q0, q0
+; LE-NEON-NEXT:    vstmia lr, {d2, d3} @ 16-byte Spill
+; LE-NEON-NEXT:    vmov.f32 s0, s27
+; LE-NEON-NEXT:    bl llrintf
+; LE-NEON-NEXT:    vmov.f32 s0, s24
+; LE-NEON-NEXT:    mov r8, r1
+; LE-NEON-NEXT:    vmov.32 d9[0], r0
+; LE-NEON-NEXT:    bl llrintf
+; LE-NEON-NEXT:    vmov.f32 s0, s25
+; LE-NEON-NEXT:    mov r9, r1
+; LE-NEON-NEXT:    vmov.32 d10[0], r0
+; LE-NEON-NEXT:    bl llrintf
+; LE-NEON-NEXT:    vorr q6, q7, q7
+; LE-NEON-NEXT:    add lr, sp, #8
+; LE-NEON-NEXT:    mov r10, r1
+; LE-NEON-NEXT:    vmov.32 d11[0], r0
+; LE-NEON-NEXT:    vmov.f32 s0, s26
+; LE-NEON-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-NEON-NEXT:    bl llrintf
+; LE-NEON-NEXT:    vmov.f32 s0, s27
+; LE-NEON-NEXT:    mov r7, r1
+; LE-NEON-NEXT:    vmov.32 d14[0], r0
+; LE-NEON-NEXT:    bl llrintf
+; LE-NEON-NEXT:    vmov.f32 s0, s24
+; LE-NEON-NEXT:    mov r4, r1
+; LE-NEON-NEXT:    vmov.32 d15[0], r0
+; LE-NEON-NEXT:    bl llrintf
+; LE-NEON-NEXT:    add lr, sp, #8
+; LE-NEON-NEXT:    mov r5, r1
+; LE-NEON-NEXT:    vmov.32 d12[0], r0
+; LE-NEON-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
+; LE-NEON-NEXT:    vmov.f32 s0, s1
+; LE-NEON-NEXT:    bl llrintf
+; LE-NEON-NEXT:    add lr, sp, #24
+; LE-NEON-NEXT:    mov r6, r1
+; LE-NEON-NEXT:    vmov.32 d13[0], r0
+; LE-NEON-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
+; LE-NEON-NEXT:    vmov.f32 s0, s2
+; LE-NEON-NEXT:    bl llrintf
+; LE-NEON-NEXT:    vmov.32 d8[0], r0
+; LE-NEON-NEXT:    vmov.32 d13[1], r6
+; LE-NEON-NEXT:    vmov.32 d15[1], r4
+; LE-NEON-NEXT:    vmov.32 d11[1], r10
+; LE-NEON-NEXT:    vmov.32 d9[1], r8
+; LE-NEON-NEXT:    vmov.32 d12[1], r5
+; LE-NEON-NEXT:    vmov.32 d14[1], r7
+; LE-NEON-NEXT:    vorr q0, q6, q6
+; LE-NEON-NEXT:    vmov.32 d10[1], r9
+; LE-NEON-NEXT:    vorr q1, q7, q7
+; LE-NEON-NEXT:    vmov.32 d8[1], r1
+; LE-NEON-NEXT:    vorr q2, q5, q5
+; LE-NEON-NEXT:    vorr q3, q4, q4
+; LE-NEON-NEXT:    add sp, sp, #40
+; LE-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEON-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, pc}
+;
+; BE-LABEL: llrint_v8i64_v8f32:
+; BE:       @ %bb.0:
+; BE-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, lr}
+; BE-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, lr}
+; BE-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-NEXT:    .pad #32
+; BE-NEXT:    sub sp, sp, #32
+; BE-NEXT:    vorr q4, q1, q1
+; BE-NEXT:    add lr, sp, #8
+; BE-NEXT:    vorr q5, q0, q0
+; BE-NEXT:    vstmia lr, {d0, d1} @ 16-byte Spill
+; BE-NEXT:    vrev64.32 d12, d8
+; BE-NEXT:    vmov.f32 s0, s25
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    vmov.f32 s0, s24
+; BE-NEXT:    mov r8, r1
+; BE-NEXT:    vmov.32 d15[0], r0
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    vrev64.32 d0, d11
+; BE-NEXT:    mov r9, r1
+; BE-NEXT:    vrev64.32 d8, d9
+; BE-NEXT:    vorr d9, d0, d0
+; BE-NEXT:    vmov.32 d14[0], r0
+; BE-NEXT:    vstr d8, [sp, #24] @ 8-byte Spill
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    vmov.f32 s0, s17
+; BE-NEXT:    mov r10, r1
+; BE-NEXT:    vmov.32 d10[0], r0
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    add lr, sp, #8
+; BE-NEXT:    vmov.f32 s0, s19
+; BE-NEXT:    mov r7, r1
+; BE-NEXT:    vmov.32 d13[0], r0
+; BE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEXT:    vrev64.32 d8, d16
+; BE-NEXT:    vstr d8, [sp, #8] @ 8-byte Spill
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    vmov.f32 s0, s16
+; BE-NEXT:    mov r4, r1
+; BE-NEXT:    vmov.32 d11[0], r0
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    vldr d0, [sp, #8] @ 8-byte Reload
+; BE-NEXT:    mov r5, r1
+; BE-NEXT:    vmov.32 d8[0], r0
+; BE-NEXT:    vmov.f32 s0, s1
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    vldr d0, [sp, #24] @ 8-byte Reload
+; BE-NEXT:    mov r6, r1
+; BE-NEXT:    @ kill: def $s0 killed $s0 killed $d0
+; BE-NEXT:    vmov.32 d9[0], r0
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    vmov.32 d12[0], r0
+; BE-NEXT:    vmov.32 d9[1], r6
+; BE-NEXT:    vmov.32 d11[1], r4
+; BE-NEXT:    vmov.32 d15[1], r8
+; BE-NEXT:    vmov.32 d13[1], r7
+; BE-NEXT:    vmov.32 d8[1], r5
+; BE-NEXT:    vmov.32 d10[1], r10
+; BE-NEXT:    vmov.32 d14[1], r9
+; BE-NEXT:    vmov.32 d12[1], r1
+; BE-NEXT:    vrev64.32 q0, q4
+; BE-NEXT:    vrev64.32 q1, q5
+; BE-NEXT:    vrev64.32 q2, q7
+; BE-NEXT:    vrev64.32 q3, q6
+; BE-NEXT:    add sp, sp, #32
+; BE-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, pc}
+;
+; BE-NEON-LABEL: llrint_v8i64_v8f32:
+; BE-NEON:       @ %bb.0:
+; BE-NEON-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, lr}
+; BE-NEON-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, lr}
+; BE-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-NEON-NEXT:    .pad #32
+; BE-NEON-NEXT:    sub sp, sp, #32
+; BE-NEON-NEXT:    vorr q4, q1, q1
+; BE-NEON-NEXT:    add lr, sp, #8
+; BE-NEON-NEXT:    vorr q5, q0, q0
+; BE-NEON-NEXT:    vstmia lr, {d0, d1} @ 16-byte Spill
+; BE-NEON-NEXT:    vrev64.32 d12, d8
+; BE-NEON-NEXT:    vmov.f32 s0, s25
+; BE-NEON-NEXT:    bl llrintf
+; BE-NEON-NEXT:    vmov.f32 s0, s24
+; BE-NEON-NEXT:    mov r8, r1
+; BE-NEON-NEXT:    vmov.32 d15[0], r0
+; BE-NEON-NEXT:    bl llrintf
+; BE-NEON-NEXT:    vrev64.32 d0, d11
+; BE-NEON-NEXT:    mov r9, r1
+; BE-NEON-NEXT:    vrev64.32 d8, d9
+; BE-NEON-NEXT:    vorr d9, d0, d0
+; BE-NEON-NEXT:    vmov.32 d14[0], r0
+; BE-NEON-NEXT:    vstr d8, [sp, #24] @ 8-byte Spill
+; BE-NEON-NEXT:    bl llrintf
+; BE-NEON-NEXT:    vmov.f32 s0, s17
+; BE-NEON-NEXT:    mov r10, r1
+; BE-NEON-NEXT:    vmov.32 d10[0], r0
+; BE-NEON-NEXT:    bl llrintf
+; BE-NEON-NEXT:    add lr, sp, #8
+; BE-NEON-NEXT:    vmov.f32 s0, s19
+; BE-NEON-NEXT:    mov r7, r1
+; BE-NEON-NEXT:    vmov.32 d13[0], r0
+; BE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEON-NEXT:    vrev64.32 d8, d16
+; BE-NEON-NEXT:    vstr d8, [sp, #8] @ 8-byte Spill
+; BE-NEON-NEXT:    bl llrintf
+; BE-NEON-NEXT:    vmov.f32 s0, s16
+; BE-NEON-NEXT:    mov r4, r1
+; BE-NEON-NEXT:    vmov.32 d11[0], r0
+; BE-NEON-NEXT:    bl llrintf
+; BE-NEON-NEXT:    vldr d0, [sp, #8] @ 8-byte Reload
+; BE-NEON-NEXT:    mov r5, r1
+; BE-NEON-NEXT:    vmov.32 d8[0], r0
+; BE-NEON-NEXT:    vmov.f32 s0, s1
+; BE-NEON-NEXT:    bl llrintf
+; BE-NEON-NEXT:    vldr d0, [sp, #24] @ 8-byte Reload
+; BE-NEON-NEXT:    mov r6, r1
+; BE-NEON-NEXT:    @ kill: def $s0 killed $s0 killed $d0
+; BE-NEON-NEXT:    vmov.32 d9[0], r0
+; BE-NEON-NEXT:    bl llrintf
+; BE-NEON-NEXT:    vmov.32 d12[0], r0
+; BE-NEON-NEXT:    vmov.32 d9[1], r6
+; BE-NEON-NEXT:    vmov.32 d11[1], r4
+; BE-NEON-NEXT:    vmov.32 d15[1], r8
+; BE-NEON-NEXT:    vmov.32 d13[1], r7
+; BE-NEON-NEXT:    vmov.32 d8[1], r5
+; BE-NEON-NEXT:    vmov.32 d10[1], r10
+; BE-NEON-NEXT:    vmov.32 d14[1], r9
+; BE-NEON-NEXT:    vmov.32 d12[1], r1
+; BE-NEON-NEXT:    vrev64.32 q0, q4
+; BE-NEON-NEXT:    vrev64.32 q1, q5
+; BE-NEON-NEXT:    vrev64.32 q2, q7
+; BE-NEON-NEXT:    vrev64.32 q3, q6
+; BE-NEON-NEXT:    add sp, sp, #32
+; BE-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-NEON-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, pc}
+  %a = call <8 x i64> @llvm.llrint.v8i64.v8f32(<8 x float> %x)
+  ret <8 x i64> %a
+}
+declare <8 x i64> @llvm.llrint.v8i64.v8f32(<8 x float>)
+
+define <16 x i64> @llrint_v16i64_v16f32(<16 x float> %x) {
+; LE-LABEL: llrint_v16i64_v16f32:
+; LE:       @ %bb.0:
+; LE-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-NEXT:    .pad #4
+; LE-NEXT:    sub sp, sp, #4
+; LE-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEXT:    .pad #160
+; LE-NEXT:    sub sp, sp, #160
+; LE-NEXT:    add lr, sp, #112
+; LE-NEXT:    vorr q5, q3, q3
+; LE-NEXT:    vorr q6, q0, q0
+; LE-NEXT:    mov r4, r0
+; LE-NEXT:    vstmia lr, {d4, d5} @ 16-byte Spill
+; LE-NEXT:    add lr, sp, #48
+; LE-NEXT:    vorr q7, q1, q1
+; LE-NEXT:    vstmia lr, {d0, d1} @ 16-byte Spill
+; LE-NEXT:    vmov.f32 s0, s23
+; LE-NEXT:    bl llrintf
+; LE-NEXT:    vmov.f32 s0, s24
+; LE-NEXT:    add lr, sp, #144
+; LE-NEXT:    vmov.32 d17[0], r0
+; LE-NEXT:    str r1, [sp, #108] @ 4-byte Spill
+; LE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEXT:    bl llrintf
+; LE-NEXT:    vmov.f32 s0, s25
+; LE-NEXT:    str r1, [sp, #84] @ 4-byte Spill
+; LE-NEXT:    vmov.32 d8[0], r0
+; LE-NEXT:    bl llrintf
+; LE-NEXT:    vmov.f32 s0, s28
+; LE-NEXT:    add lr, sp, #128
+; LE-NEXT:    vmov.32 d9[0], r0
+; LE-NEXT:    str r1, [sp, #44] @ 4-byte Spill
+; LE-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-NEXT:    bl llrintf
+; LE-NEXT:    vmov.f32 s0, s29
+; LE-NEXT:    mov r9, r1
+; LE-NEXT:    vmov.32 d8[0], r0
+; LE-NEXT:    bl llrintf
+; LE-NEXT:    vmov.f32 s0, s30
+; LE-NEXT:    mov r6, r1
+; LE-NEXT:    vmov.32 d9[0], r0
+; LE-NEXT:    bl llrintf
+; LE-NEXT:    vmov.f32 s0, s31
+; LE-NEXT:    mov r5, r1
+; LE-NEXT:    vmov.32 d12[0], r0
+; LE-NEXT:    bl llrintf
+; LE-NEXT:    add lr, sp, #112
+; LE-NEXT:    mov r7, r1
+; LE-NEXT:    vmov.32 d13[0], r0
+; LE-NEXT:    vldmia lr, {d14, d15} @ 16-byte Reload
+; LE-NEXT:    vmov.f32 s0, s29
+; LE-NEXT:    bl llrintf
+; LE-NEXT:    vmov.f32 s0, s22
+; LE-NEXT:    add lr, sp, #24
+; LE-NEXT:    vmov.32 d17[0], r0
+; LE-NEXT:    mov r11, r1
+; LE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEXT:    vmov.32 d13[1], r7
+; LE-NEXT:    bl llrintf
+; LE-NEXT:    add lr, sp, #144
+; LE-NEXT:    vmov.f32 s0, s21
+; LE-NEXT:    vmov.32 d12[1], r5
+; LE-NEXT:    str r1, [sp, #40] @ 4-byte Spill
+; LE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEXT:    vmov.32 d16[0], r0
+; LE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEXT:    add lr, sp, #88
+; LE-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-NEXT:    bl llrintf
+; LE-NEXT:    vmov.f32 s0, s20
+; LE-NEXT:    mov r10, r1
+; LE-NEXT:    vmov.32 d13[0], r0
+; LE-NEXT:    vmov.32 d9[1], r6
+; LE-NEXT:    bl llrintf
+; LE-NEXT:    vmov.f32 s0, s31
+; LE-NEXT:    vmov.32 d12[0], r0
+; LE-NEXT:    add lr, sp, #8
+; LE-NEXT:    mov r8, r1
+; LE-NEXT:    vmov.32 d8[1], r9
+; LE-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-NEXT:    add lr, sp, #64
+; LE-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-NEXT:    bl llrintf
+; LE-NEXT:    add lr, sp, #128
+; LE-NEXT:    vmov.32 d9[0], r0
+; LE-NEXT:    ldr r0, [sp, #44] @ 4-byte Reload
+; LE-NEXT:    mov r9, r1
+; LE-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; LE-NEXT:    add lr, sp, #48
+; LE-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
+; LE-NEXT:    vmov.f32 s0, s27
+; LE-NEXT:    vmov.32 d11[1], r0
+; LE-NEXT:    bl llrintf
+; LE-NEXT:    vmov.f32 s0, s26
+; LE-NEXT:    vmov.32 d15[0], r0
+; LE-NEXT:    ldr r0, [sp, #84] @ 4-byte Reload
+; LE-NEXT:    add lr, sp, #128
+; LE-NEXT:    mov r7, r1
+; LE-NEXT:    vmov.32 d10[1], r0
+; LE-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-NEXT:    bl llrintf
+; LE-NEXT:    vmov.32 d14[0], r0
+; LE-NEXT:    add lr, sp, #144
+; LE-NEXT:    ldr r0, [sp, #108] @ 4-byte Reload
+; LE-NEXT:    mov r5, r1
+; LE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEXT:    vmov.32 d17[1], r0
+; LE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEXT:    add lr, sp, #112
+; LE-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; LE-NEXT:    vmov.f32 s0, s20
+; LE-NEXT:    bl llrintf
+; LE-NEXT:    add lr, sp, #24
+; LE-NEXT:    vmov.f32 s0, s22
+; LE-NEXT:    mov r6, r1
+; LE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEXT:    vmov.32 d16[0], r0
+; LE-NEXT:    vmov.32 d17[1], r11
+; LE-NEXT:    vorr q6, q8, q8
+; LE-NEXT:    bl llrintf
+; LE-NEXT:    add lr, sp, #144
+; LE-NEXT:    vmov.32 d8[0], r0
+; LE-NEXT:    ldr r0, [sp, #40] @ 4-byte Reload
+; LE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEXT:    add lr, sp, #8
+; LE-NEXT:    vldmia lr, {d18, d19} @ 16-byte Reload
+; LE-NEXT:    add lr, sp, #128
+; LE-NEXT:    vmov.32 d9[1], r9
+; LE-NEXT:    vmov.32 d12[1], r6
+; LE-NEXT:    vmov.32 d19[1], r10
+; LE-NEXT:    vmov.32 d8[1], r1
+; LE-NEXT:    vmov.32 d16[1], r0
+; LE-NEXT:    add r0, r4, #64
+; LE-NEXT:    vmov.32 d18[1], r8
+; LE-NEXT:    vst1.64 {d12, d13}, [r0:128]!
+; LE-NEXT:    vst1.64 {d8, d9}, [r0:128]!
+; LE-NEXT:    vst1.64 {d18, d19}, [r0:128]!
+; LE-NEXT:    vst1.64 {d16, d17}, [r0:128]
+; LE-NEXT:    vmov.32 d15[1], r7
+; LE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEXT:    add lr, sp, #64
+; LE-NEXT:    vmov.32 d14[1], r5
+; LE-NEXT:    vst1.64 {d16, d17}, [r4:128]!
+; LE-NEXT:    vst1.64 {d14, d15}, [r4:128]!
+; LE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEXT:    add lr, sp, #88
+; LE-NEXT:    vst1.64 {d16, d17}, [r4:128]!
+; LE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEXT:    vst1.64 {d16, d17}, [r4:128]
+; LE-NEXT:    add sp, sp, #160
+; LE-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEXT:    add sp, sp, #4
+; LE-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; LE-NEON-LABEL: llrint_v16i64_v16f32:
+; LE-NEON:       @ %bb.0:
+; LE-NEON-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-NEON-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-NEON-NEXT:    .pad #4
+; LE-NEON-NEXT:    sub sp, sp, #4
+; LE-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEON-NEXT:    .pad #160
+; LE-NEON-NEXT:    sub sp, sp, #160
+; LE-NEON-NEXT:    add lr, sp, #112
+; LE-NEON-NEXT:    vorr q5, q3, q3
+; LE-NEON-NEXT:    vorr q6, q0, q0
+; LE-NEON-NEXT:    mov r4, r0
+; LE-NEON-NEXT:    vstmia lr, {d4, d5} @ 16-byte Spill
+; LE-NEON-NEXT:    add lr, sp, #48
+; LE-NEON-NEXT:    vorr q7, q1, q1
+; LE-NEON-NEXT:    vstmia lr, {d0, d1} @ 16-byte Spill
+; LE-NEON-NEXT:    vmov.f32 s0, s23
+; LE-NEON-NEXT:    bl llrintf
+; LE-NEON-NEXT:    vmov.f32 s0, s24
+; LE-NEON-NEXT:    add lr, sp, #144
+; LE-NEON-NEXT:    vmov.32 d17[0], r0
+; LE-NEON-NEXT:    str r1, [sp, #108] @ 4-byte Spill
+; LE-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEON-NEXT:    bl llrintf
+; LE-NEON-NEXT:    vmov.f32 s0, s25
+; LE-NEON-NEXT:    str r1, [sp, #84] @ 4-byte Spill
+; LE-NEON-NEXT:    vmov.32 d8[0], r0
+; LE-NEON-NEXT:    bl llrintf
+; LE-NEON-NEXT:    vmov.f32 s0, s28
+; LE-NEON-NEXT:    add lr, sp, #128
+; LE-NEON-NEXT:    vmov.32 d9[0], r0
+; LE-NEON-NEXT:    str r1, [sp, #44] @ 4-byte Spill
+; LE-NEON-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-NEON-NEXT:    bl llrintf
+; LE-NEON-NEXT:    vmov.f32 s0, s29
+; LE-NEON-NEXT:    mov r9, r1
+; LE-NEON-NEXT:    vmov.32 d8[0], r0
+; LE-NEON-NEXT:    bl llrintf
+; LE-NEON-NEXT:    vmov.f32 s0, s30
+; LE-NEON-NEXT:    mov r6, r1
+; LE-NEON-NEXT:    vmov.32 d9[0], r0
+; LE-NEON-NEXT:    bl llrintf
+; LE-NEON-NEXT:    vmov.f32 s0, s31
+; LE-NEON-NEXT:    mov r5, r1
+; LE-NEON-NEXT:    vmov.32 d12[0], r0
+; LE-NEON-NEXT:    bl llrintf
+; LE-NEON-NEXT:    add lr, sp, #112
+; LE-NEON-NEXT:    mov r7, r1
+; LE-NEON-NEXT:    vmov.32 d13[0], r0
+; LE-NEON-NEXT:    vldmia lr, {d14, d15} @ 16-byte Reload
+; LE-NEON-NEXT:    vmov.f32 s0, s29
+; LE-NEON-NEXT:    bl llrintf
+; LE-NEON-NEXT:    vmov.f32 s0, s22
+; LE-NEON-NEXT:    add lr, sp, #24
+; LE-NEON-NEXT:    vmov.32 d17[0], r0
+; LE-NEON-NEXT:    mov r11, r1
+; LE-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEON-NEXT:    vmov.32 d13[1], r7
+; LE-NEON-NEXT:    bl llrintf
+; LE-NEON-NEXT:    add lr, sp, #144
+; LE-NEON-NEXT:    vmov.f32 s0, s21
+; LE-NEON-NEXT:    vmov.32 d12[1], r5
+; LE-NEON-NEXT:    str r1, [sp, #40] @ 4-byte Spill
+; LE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEON-NEXT:    vmov.32 d16[0], r0
+; LE-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEON-NEXT:    add lr, sp, #88
+; LE-NEON-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-NEON-NEXT:    bl llrintf
+; LE-NEON-NEXT:    vmov.f32 s0, s20
+; LE-NEON-NEXT:    mov r10, r1
+; LE-NEON-NEXT:    vmov.32 d13[0], r0
+; LE-NEON-NEXT:    vmov.32 d9[1], r6
+; LE-NEON-NEXT:    bl llrintf
+; LE-NEON-NEXT:    vmov.f32 s0, s31
+; LE-NEON-NEXT:    vmov.32 d12[0], r0
+; LE-NEON-NEXT:    add lr, sp, #8
+; LE-NEON-NEXT:    mov r8, r1
+; LE-NEON-NEXT:    vmov.32 d8[1], r9
+; LE-NEON-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-NEON-NEXT:    add lr, sp, #64
+; LE-NEON-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-NEON-NEXT:    bl llrintf
+; LE-NEON-NEXT:    add lr, sp, #128
+; LE-NEON-NEXT:    vmov.32 d9[0], r0
+; LE-NEON-NEXT:    ldr r0, [sp, #44] @ 4-byte Reload
+; LE-NEON-NEXT:    mov r9, r1
+; LE-NEON-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; LE-NEON-NEXT:    add lr, sp, #48
+; LE-NEON-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
+; LE-NEON-NEXT:    vmov.f32 s0, s27
+; LE-NEON-NEXT:    vmov.32 d11[1], r0
+; LE-NEON-NEXT:    bl llrintf
+; LE-NEON-NEXT:    vmov.f32 s0, s26
+; LE-NEON-NEXT:    vmov.32 d15[0], r0
+; LE-NEON-NEXT:    ldr r0, [sp, #84] @ 4-byte Reload
+; LE-NEON-NEXT:    add lr, sp, #128
+; LE-NEON-NEXT:    mov r7, r1
+; LE-NEON-NEXT:    vmov.32 d10[1], r0
+; LE-NEON-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-NEON-NEXT:    bl llrintf
+; LE-NEON-NEXT:    vmov.32 d14[0], r0
+; LE-NEON-NEXT:    add lr, sp, #144
+; LE-NEON-NEXT:    ldr r0, [sp, #108] @ 4-byte Reload
+; LE-NEON-NEXT:    mov r5, r1
+; LE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEON-NEXT:    vmov.32 d17[1], r0
+; LE-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEON-NEXT:    add lr, sp, #112
+; LE-NEON-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; LE-NEON-NEXT:    vmov.f32 s0, s20
+; LE-NEON-NEXT:    bl llrintf
+; LE-NEON-NEXT:    add lr, sp, #24
+; LE-NEON-NEXT:    vmov.f32 s0, s22
+; LE-NEON-NEXT:    mov r6, r1
+; LE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEON-NEXT:    vmov.32 d16[0], r0
+; LE-NEON-NEXT:    vmov.32 d17[1], r11
+; LE-NEON-NEXT:    vorr q6, q8, q8
+; LE-NEON-NEXT:    bl llrintf
+; LE-NEON-NEXT:    add lr, sp, #144
+; LE-NEON-NEXT:    vmov.32 d8[0], r0
+; LE-NEON-NEXT:    ldr r0, [sp, #40] @ 4-byte Reload
+; LE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEON-NEXT:    add lr, sp, #8
+; LE-NEON-NEXT:    vldmia lr, {d18, d19} @ 16-byte Reload
+; LE-NEON-NEXT:    add lr, sp, #128
+; LE-NEON-NEXT:    vmov.32 d9[1], r9
+; LE-NEON-NEXT:    vmov.32 d12[1], r6
+; LE-NEON-NEXT:    vmov.32 d19[1], r10
+; LE-NEON-NEXT:    vmov.32 d8[1], r1
+; LE-NEON-NEXT:    vmov.32 d16[1], r0
+; LE-NEON-NEXT:    add r0, r4, #64
+; LE-NEON-NEXT:    vmov.32 d18[1], r8
+; LE-NEON-NEXT:    vst1.64 {d12, d13}, [r0:128]!
+; LE-NEON-NEXT:    vst1.64 {d8, d9}, [r0:128]!
+; LE-NEON-NEXT:    vst1.64 {d18, d19}, [r0:128]!
+; LE-NEON-NEXT:    vst1.64 {d16, d17}, [r0:128]
+; LE-NEON-NEXT:    vmov.32 d15[1], r7
+; LE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEON-NEXT:    add lr, sp, #64
+; LE-NEON-NEXT:    vmov.32 d14[1], r5
+; LE-NEON-NEXT:    vst1.64 {d16, d17}, [r4:128]!
+; LE-NEON-NEXT:    vst1.64 {d14, d15}, [r4:128]!
+; LE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEON-NEXT:    add lr, sp, #88
+; LE-NEON-NEXT:    vst1.64 {d16, d17}, [r4:128]!
+; LE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEON-NEXT:    vst1.64 {d16, d17}, [r4:128]
+; LE-NEON-NEXT:    add sp, sp, #160
+; LE-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEON-NEXT:    add sp, sp, #4
+; LE-NEON-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; BE-LABEL: llrint_v16i64_v16f32:
+; BE:       @ %bb.0:
+; BE-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-NEXT:    .pad #4
+; BE-NEXT:    sub sp, sp, #4
+; BE-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-NEXT:    .pad #144
+; BE-NEXT:    sub sp, sp, #144
+; BE-NEXT:    vorr q6, q3, q3
+; BE-NEXT:    add lr, sp, #112
+; BE-NEXT:    vorr q7, q0, q0
+; BE-NEXT:    mov r4, r0
+; BE-NEXT:    vstmia lr, {d4, d5} @ 16-byte Spill
+; BE-NEXT:    add lr, sp, #96
+; BE-NEXT:    vrev64.32 d8, d13
+; BE-NEXT:    vstmia lr, {d2, d3} @ 16-byte Spill
+; BE-NEXT:    vmov.f32 s0, s17
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    vmov.f32 s0, s16
+; BE-NEXT:    str r1, [sp, #88] @ 4-byte Spill
+; BE-NEXT:    vmov.32 d11[0], r0
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    vrev64.32 d8, d14
+; BE-NEXT:    add lr, sp, #128
+; BE-NEXT:    vmov.32 d10[0], r0
+; BE-NEXT:    str r1, [sp, #92] @ 4-byte Spill
+; BE-NEXT:    vmov.f32 s0, s16
+; BE-NEXT:    vrev64.32 d9, d12
+; BE-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-NEXT:    vstr d9, [sp, #64] @ 8-byte Spill
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    vmov.f32 s0, s19
+; BE-NEXT:    mov r9, r1
+; BE-NEXT:    vmov.32 d12[0], r0
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    vmov.f32 s0, s17
+; BE-NEXT:    str r1, [sp, #84] @ 4-byte Spill
+; BE-NEXT:    vmov.32 d11[0], r0
+; BE-NEXT:    vrev64.32 d9, d15
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    vmov.f32 s0, s18
+; BE-NEXT:    mov r6, r1
+; BE-NEXT:    vmov.32 d13[0], r0
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    vmov.f32 s0, s19
+; BE-NEXT:    mov r5, r1
+; BE-NEXT:    vmov.32 d14[0], r0
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    vldr d0, [sp, #64] @ 8-byte Reload
+; BE-NEXT:    mov r7, r1
+; BE-NEXT:    @ kill: def $s0 killed $s0 killed $d0
+; BE-NEXT:    vmov.32 d15[0], r0
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    vmov.32 d10[0], r0
+; BE-NEXT:    add lr, sp, #40
+; BE-NEXT:    str r1, [sp, #60] @ 4-byte Spill
+; BE-NEXT:    vmov.32 d15[1], r7
+; BE-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-NEXT:    add lr, sp, #96
+; BE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEXT:    vrev64.32 d8, d16
+; BE-NEXT:    vmov.f32 s0, s17
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    vmov.f32 s0, s16
+; BE-NEXT:    vmov.32 d14[1], r5
+; BE-NEXT:    add lr, sp, #64
+; BE-NEXT:    mov r10, r1
+; BE-NEXT:    vmov.32 d11[0], r0
+; BE-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    vmov.32 d10[0], r0
+; BE-NEXT:    add lr, sp, #24
+; BE-NEXT:    mov r11, r1
+; BE-NEXT:    vmov.32 d13[1], r6
+; BE-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-NEXT:    add lr, sp, #96
+; BE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEXT:    vrev64.32 d8, d17
+; BE-NEXT:    vmov.f32 s0, s17
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    vmov.f32 s0, s16
+; BE-NEXT:    vmov.32 d12[1], r9
+; BE-NEXT:    add lr, sp, #96
+; BE-NEXT:    mov r8, r1
+; BE-NEXT:    vmov.32 d11[0], r0
+; BE-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    vmov.32 d10[0], r0
+; BE-NEXT:    add lr, sp, #8
+; BE-NEXT:    ldr r0, [sp, #88] @ 4-byte Reload
+; BE-NEXT:    mov r9, r1
+; BE-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-NEXT:    add lr, sp, #112
+; BE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEXT:    add lr, sp, #128
+; BE-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-NEXT:    vrev64.32 d8, d16
+; BE-NEXT:    vmov.32 d11[1], r0
+; BE-NEXT:    vmov.f32 s0, s17
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    vmov.f32 s0, s16
+; BE-NEXT:    vmov.32 d15[0], r0
+; BE-NEXT:    ldr r0, [sp, #92] @ 4-byte Reload
+; BE-NEXT:    add lr, sp, #128
+; BE-NEXT:    mov r7, r1
+; BE-NEXT:    vmov.32 d10[1], r0
+; BE-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    add lr, sp, #112
+; BE-NEXT:    vmov.32 d14[0], r0
+; BE-NEXT:    ldr r0, [sp, #84] @ 4-byte Reload
+; BE-NEXT:    mov r5, r1
+; BE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEXT:    add lr, sp, #40
+; BE-NEXT:    vrev64.32 d8, d17
+; BE-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
+; BE-NEXT:    vmov.f32 s0, s17
+; BE-NEXT:    vmov.32 d13[1], r0
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    vmov.f32 s0, s16
+; BE-NEXT:    vmov.32 d11[0], r0
+; BE-NEXT:    ldr r0, [sp, #60] @ 4-byte Reload
+; BE-NEXT:    mov r6, r1
+; BE-NEXT:    vmov.32 d12[1], r0
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    add lr, sp, #24
+; BE-NEXT:    vmov.32 d10[0], r0
+; BE-NEXT:    add r0, r4, #64
+; BE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEXT:    add lr, sp, #8
+; BE-NEXT:    vmov.32 d17[1], r10
+; BE-NEXT:    vmov.32 d16[1], r11
+; BE-NEXT:    vorr q12, q8, q8
+; BE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEXT:    add lr, sp, #128
+; BE-NEXT:    vmov.32 d15[1], r7
+; BE-NEXT:    vmov.32 d11[1], r6
+; BE-NEXT:    vmov.32 d14[1], r5
+; BE-NEXT:    vldmia lr, {d18, d19} @ 16-byte Reload
+; BE-NEXT:    add lr, sp, #96
+; BE-NEXT:    vmov.32 d10[1], r1
+; BE-NEXT:    vmov.32 d17[1], r8
+; BE-NEXT:    vldmia lr, {d20, d21} @ 16-byte Reload
+; BE-NEXT:    add lr, sp, #64
+; BE-NEXT:    vmov.32 d16[1], r9
+; BE-NEXT:    vrev64.32 q14, q7
+; BE-NEXT:    vorr q13, q8, q8
+; BE-NEXT:    vrev64.32 q15, q5
+; BE-NEXT:    vldmia lr, {d22, d23} @ 16-byte Reload
+; BE-NEXT:    vrev64.32 q8, q6
+; BE-NEXT:    vst1.64 {d28, d29}, [r0:128]!
+; BE-NEXT:    vst1.64 {d30, d31}, [r0:128]!
+; BE-NEXT:    vrev64.32 q9, q9
+; BE-NEXT:    vrev64.32 q10, q10
+; BE-NEXT:    vst1.64 {d16, d17}, [r0:128]!
+; BE-NEXT:    vrev64.32 q11, q11
+; BE-NEXT:    vrev64.32 q12, q12
+; BE-NEXT:    vst1.64 {d18, d19}, [r0:128]
+; BE-NEXT:    vst1.64 {d20, d21}, [r4:128]!
+; BE-NEXT:    vst1.64 {d22, d23}, [r4:128]!
+; BE-NEXT:    vrev64.32 q13, q13
+; BE-NEXT:    vst1.64 {d24, d25}, [r4:128]!
+; BE-NEXT:    vst1.64 {d26, d27}, [r4:128]
+; BE-NEXT:    add sp, sp, #144
+; BE-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-NEXT:    add sp, sp, #4
+; BE-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; BE-NEON-LABEL: llrint_v16i64_v16f32:
+; BE-NEON:       @ %bb.0:
+; BE-NEON-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-NEON-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-NEON-NEXT:    .pad #4
+; BE-NEON-NEXT:    sub sp, sp, #4
+; BE-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-NEON-NEXT:    .pad #144
+; BE-NEON-NEXT:    sub sp, sp, #144
+; BE-NEON-NEXT:    vorr q6, q3, q3
+; BE-NEON-NEXT:    add lr, sp, #112
+; BE-NEON-NEXT:    vorr q7, q0, q0
+; BE-NEON-NEXT:    mov r4, r0
+; BE-NEON-NEXT:    vstmia lr, {d4, d5} @ 16-byte Spill
+; BE-NEON-NEXT:    add lr, sp, #96
+; BE-NEON-NEXT:    vrev64.32 d8, d13
+; BE-NEON-NEXT:    vstmia lr, {d2, d3} @ 16-byte Spill
+; BE-NEON-NEXT:    vmov.f32 s0, s17
+; BE-NEON-NEXT:    bl llrintf
+; BE-NEON-NEXT:    vmov.f32 s0, s16
+; BE-NEON-NEXT:    str r1, [sp, #88] @ 4-byte Spill
+; BE-NEON-NEXT:    vmov.32 d11[0], r0
+; BE-NEON-NEXT:    bl llrintf
+; BE-NEON-NEXT:    vrev64.32 d8, d14
+; BE-NEON-NEXT:    add lr, sp, #128
+; BE-NEON-NEXT:    vmov.32 d10[0], r0
+; BE-NEON-NEXT:    str r1, [sp, #92] @ 4-byte Spill
+; BE-NEON-NEXT:    vmov.f32 s0, s16
+; BE-NEON-NEXT:    vrev64.32 d9, d12
+; BE-NEON-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-NEON-NEXT:    vstr d9, [sp, #64] @ 8-byte Spill
+; BE-NEON-NEXT:    bl llrintf
+; BE-NEON-NEXT:    vmov.f32 s0, s19
+; BE-NEON-NEXT:    mov r9, r1
+; BE-NEON-NEXT:    vmov.32 d12[0], r0
+; BE-NEON-NEXT:    bl llrintf
+; BE-NEON-NEXT:    vmov.f32 s0, s17
+; BE-NEON-NEXT:    str r1, [sp, #84] @ 4-byte Spill
+; BE-NEON-NEXT:    vmov.32 d11[0], r0
+; BE-NEON-NEXT:    vrev64.32 d9, d15
+; BE-NEON-NEXT:    bl llrintf
+; BE-NEON-NEXT:    vmov.f32 s0, s18
+; BE-NEON-NEXT:    mov r6, r1
+; BE-NEON-NEXT:    vmov.32 d13[0], r0
+; BE-NEON-NEXT:    bl llrintf
+; BE-NEON-NEXT:    vmov.f32 s0, s19
+; BE-NEON-NEXT:    mov r5, r1
+; BE-NEON-NEXT:    vmov.32 d14[0], r0
+; BE-NEON-NEXT:    bl llrintf
+; BE-NEON-NEXT:    vldr d0, [sp, #64] @ 8-byte Reload
+; BE-NEON-NEXT:    mov r7, r1
+; BE-NEON-NEXT:    @ kill: def $s0 killed $s0 killed $d0
+; BE-NEON-NEXT:    vmov.32 d15[0], r0
+; BE-NEON-NEXT:    bl llrintf
+; BE-NEON-NEXT:    vmov.32 d10[0], r0
+; BE-NEON-NEXT:    add lr, sp, #40
+; BE-NEON-NEXT:    str r1, [sp, #60] @ 4-byte Spill
+; BE-NEON-NEXT:    vmov.32 d15[1], r7
+; BE-NEON-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-NEON-NEXT:    add lr, sp, #96
+; BE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEON-NEXT:    vrev64.32 d8, d16
+; BE-NEON-NEXT:    vmov.f32 s0, s17
+; BE-NEON-NEXT:    bl llrintf
+; BE-NEON-NEXT:    vmov.f32 s0, s16
+; BE-NEON-NEXT:    vmov.32 d14[1], r5
+; BE-NEON-NEXT:    add lr, sp, #64
+; BE-NEON-NEXT:    mov r10, r1
+; BE-NEON-NEXT:    vmov.32 d11[0], r0
+; BE-NEON-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
+; BE-NEON-NEXT:    bl llrintf
+; BE-NEON-NEXT:    vmov.32 d10[0], r0
+; BE-NEON-NEXT:    add lr, sp, #24
+; BE-NEON-NEXT:    mov r11, r1
+; BE-NEON-NEXT:    vmov.32 d13[1], r6
+; BE-NEON-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-NEON-NEXT:    add lr, sp, #96
+; BE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEON-NEXT:    vrev64.32 d8, d17
+; BE-NEON-NEXT:    vmov.f32 s0, s17
+; BE-NEON-NEXT:    bl llrintf
+; BE-NEON-NEXT:    vmov.f32 s0, s16
+; BE-NEON-NEXT:    vmov.32 d12[1], r9
+; BE-NEON-NEXT:    add lr, sp, #96
+; BE-NEON-NEXT:    mov r8, r1
+; BE-NEON-NEXT:    vmov.32 d11[0], r0
+; BE-NEON-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
+; BE-NEON-NEXT:    bl llrintf
+; BE-NEON-NEXT:    vmov.32 d10[0], r0
+; BE-NEON-NEXT:    add lr, sp, #8
+; BE-NEON-NEXT:    ldr r0, [sp, #88] @ 4-byte Reload
+; BE-NEON-NEXT:    mov r9, r1
+; BE-NEON-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-NEON-NEXT:    add lr, sp, #112
+; BE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEON-NEXT:    add lr, sp, #128
+; BE-NEON-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-NEON-NEXT:    vrev64.32 d8, d16
+; BE-NEON-NEXT:    vmov.32 d11[1], r0
+; BE-NEON-NEXT:    vmov.f32 s0, s17
+; BE-NEON-NEXT:    bl llrintf
+; BE-NEON-NEXT:    vmov.f32 s0, s16
+; BE-NEON-NEXT:    vmov.32 d15[0], r0
+; BE-NEON-NEXT:    ldr r0, [sp, #92] @ 4-byte Reload
+; BE-NEON-NEXT:    add lr, sp, #128
+; BE-NEON-NEXT:    mov r7, r1
+; BE-NEON-NEXT:    vmov.32 d10[1], r0
+; BE-NEON-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-NEON-NEXT:    bl llrintf
+; BE-NEON-NEXT:    add lr, sp, #112
+; BE-NEON-NEXT:    vmov.32 d14[0], r0
+; BE-NEON-NEXT:    ldr r0, [sp, #84] @ 4-byte Reload
+; BE-NEON-NEXT:    mov r5, r1
+; BE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEON-NEXT:    add lr, sp, #40
+; BE-NEON-NEXT:    vrev64.32 d8, d17
+; BE-NEON-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
+; BE-NEON-NEXT:    vmov.f32 s0, s17
+; BE-NEON-NEXT:    vmov.32 d13[1], r0
+; BE-NEON-NEXT:    bl llrintf
+; BE-NEON-NEXT:    vmov.f32 s0, s16
+; BE-NEON-NEXT:    vmov.32 d11[0], r0
+; BE-NEON-NEXT:    ldr r0, [sp, #60] @ 4-byte Reload
+; BE-NEON-NEXT:    mov r6, r1
+; BE-NEON-NEXT:    vmov.32 d12[1], r0
+; BE-NEON-NEXT:    bl llrintf
+; BE-NEON-NEXT:    add lr, sp, #24
+; BE-NEON-NEXT:    vmov.32 d10[0], r0
+; BE-NEON-NEXT:    add r0, r4, #64
+; BE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEON-NEXT:    add lr, sp, #8
+; BE-NEON-NEXT:    vmov.32 d17[1], r10
+; BE-NEON-NEXT:    vmov.32 d16[1], r11
+; BE-NEON-NEXT:    vorr q12, q8, q8
+; BE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEON-NEXT:    add lr, sp, #128
+; BE-NEON-NEXT:    vmov.32 d15[1], r7
+; BE-NEON-NEXT:    vmov.32 d11[1], r6
+; BE-NEON-NEXT:    vmov.32 d14[1], r5
+; BE-NEON-NEXT:    vldmia lr, {d18, d19} @ 16-byte Reload
+; BE-NEON-NEXT:    add lr, sp, #96
+; BE-NEON-NEXT:    vmov.32 d10[1], r1
+; BE-NEON-NEXT:    vmov.32 d17[1], r8
+; BE-NEON-NEXT:    vldmia lr, {d20, d21} @ 16-byte Reload
+; BE-NEON-NEXT:    add lr, sp, #64
+; BE-NEON-NEXT:    vmov.32 d16[1], r9
+; BE-NEON-NEXT:    vrev64.32 q14, q7
+; BE-NEON-NEXT:    vorr q13, q8, q8
+; BE-NEON-NEXT:    vrev64.32 q15, q5
+; BE-NEON-NEXT:    vldmia lr, {d22, d23} @ 16-byte Reload
+; BE-NEON-NEXT:    vrev64.32 q8, q6
+; BE-NEON-NEXT:    vst1.64 {d28, d29}, [r0:128]!
+; BE-NEON-NEXT:    vst1.64 {d30, d31}, [r0:128]!
+; BE-NEON-NEXT:    vrev64.32 q9, q9
+; BE-NEON-NEXT:    vrev64.32 q10, q10
+; BE-NEON-NEXT:    vst1.64 {d16, d17}, [r0:128]!
+; BE-NEON-NEXT:    vrev64.32 q11, q11
+; BE-NEON-NEXT:    vrev64.32 q12, q12
+; BE-NEON-NEXT:    vst1.64 {d18, d19}, [r0:128]
+; BE-NEON-NEXT:    vst1.64 {d20, d21}, [r4:128]!
+; BE-NEON-NEXT:    vst1.64 {d22, d23}, [r4:128]!
+; BE-NEON-NEXT:    vrev64.32 q13, q13
+; BE-NEON-NEXT:    vst1.64 {d24, d25}, [r4:128]!
+; BE-NEON-NEXT:    vst1.64 {d26, d27}, [r4:128]
+; BE-NEON-NEXT:    add sp, sp, #144
+; BE-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-NEON-NEXT:    add sp, sp, #4
+; BE-NEON-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+  %a = call <16 x i64> @llvm.llrint.v16i64.v16f32(<16 x float> %x)
+  ret <16 x i64> %a
+}
+declare <16 x i64> @llvm.llrint.v16i64.v16f32(<16 x float>)
+
+define <32 x i64> @llrint_v32i64_v32f32(<32 x float> %x) {
+; LE-LABEL: llrint_v32i64_v32f32:
+; LE:       @ %bb.0:
+; LE-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-NEXT:    .pad #4
+; LE-NEXT:    sub sp, sp, #4
+; LE-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEXT:    .pad #184
+; LE-NEXT:    sub sp, sp, #184
+; LE-NEXT:    add lr, sp, #152
+; LE-NEXT:    vorr q7, q3, q3
+; LE-NEXT:    vorr q4, q2, q2
+; LE-NEXT:    mov r5, r0
+; LE-NEXT:    vstmia lr, {d2, d3} @ 16-byte Spill
+; LE-NEXT:    add lr, sp, #88
+; LE-NEXT:    vstmia lr, {d0, d1} @ 16-byte Spill
+; LE-NEXT:    vmov.f32 s0, s3
+; LE-NEXT:    str r0, [sp, #68] @ 4-byte Spill
+; LE-NEXT:    bl llrintf
+; LE-NEXT:    vmov.f32 s0, s18
+; LE-NEXT:    add lr, sp, #168
+; LE-NEXT:    vmov.32 d17[0], r0
+; LE-NEXT:    str r1, [sp, #16] @ 4-byte Spill
+; LE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEXT:    bl llrintf
+; LE-NEXT:    vmov.f32 s0, s16
+; LE-NEXT:    mov r8, r1
+; LE-NEXT:    vmov.32 d10[0], r0
+; LE-NEXT:    bl llrintf
+; LE-NEXT:    vmov.f32 s0, s17
+; LE-NEXT:    mov r6, r1
+; LE-NEXT:    vmov.32 d12[0], r0
+; LE-NEXT:    bl llrintf
+; LE-NEXT:    vmov.f32 s0, s19
+; LE-NEXT:    mov r4, r1
+; LE-NEXT:    vmov.32 d13[0], r0
+; LE-NEXT:    bl llrintf
+; LE-NEXT:    vmov.f32 s0, s31
+; LE-NEXT:    mov r7, r1
+; LE-NEXT:    vmov.32 d11[0], r0
+; LE-NEXT:    bl llrintf
+; LE-NEXT:    vmov.f32 s0, s30
+; LE-NEXT:    str r1, [sp, #8] @ 4-byte Spill
+; LE-NEXT:    vmov.32 d9[0], r0
+; LE-NEXT:    vmov.32 d11[1], r7
+; LE-NEXT:    bl llrintf
+; LE-NEXT:    vmov.f32 s0, s29
+; LE-NEXT:    vmov.32 d8[0], r0
+; LE-NEXT:    add lr, sp, #104
+; LE-NEXT:    str r1, [sp, #12] @ 4-byte Spill
+; LE-NEXT:    vmov.32 d13[1], r4
+; LE-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-NEXT:    bl llrintf
+; LE-NEXT:    vmov.32 d9[0], r0
+; LE-NEXT:    add r0, sp, #320
+; LE-NEXT:    add lr, sp, #120
+; LE-NEXT:    mov r11, r1
+; LE-NEXT:    vld1.64 {d0, d1}, [r0]
+; LE-NEXT:    add r0, sp, #304
+; LE-NEXT:    vstmia lr, {d0, d1} @ 16-byte Spill
+; LE-NEXT:    add lr, sp, #72
+; LE-NEXT:    vld1.64 {d0, d1}, [r0]
+; LE-NEXT:    add r0, sp, #336
+; LE-NEXT:    vstmia lr, {d0, d1} @ 16-byte Spill
+; LE-NEXT:    add lr, sp, #32
+; LE-NEXT:    vld1.64 {d0, d1}, [r0]
+; LE-NEXT:    add r0, sp, #288
+; LE-NEXT:    vmov.32 d12[1], r6
+; LE-NEXT:    vstmia lr, {d0, d1} @ 16-byte Spill
+; LE-NEXT:    add lr, sp, #48
+; LE-NEXT:    vld1.64 {d0, d1}, [r0]
+; LE-NEXT:    vmov.32 d10[1], r8
+; LE-NEXT:    add r8, r5, #64
+; LE-NEXT:    vstmia lr, {d0, d1} @ 16-byte Spill
+; LE-NEXT:    add lr, sp, #152
+; LE-NEXT:    vst1.64 {d12, d13}, [r8:128]!
+; LE-NEXT:    vst1.64 {d10, d11}, [r8:128]!
+; LE-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
+; LE-NEXT:    vmov.f32 s0, s27
+; LE-NEXT:    bl llrintf
+; LE-NEXT:    vmov.f32 s0, s28
+; LE-NEXT:    mov r4, r1
+; LE-NEXT:    vmov.32 d11[0], r0
+; LE-NEXT:    bl llrintf
+; LE-NEXT:    vmov.f32 s0, s26
+; LE-NEXT:    mov r9, r1
+; LE-NEXT:    vmov.32 d8[0], r0
+; LE-NEXT:    vmov.32 d11[1], r4
+; LE-NEXT:    bl llrintf
+; LE-NEXT:    vmov.32 d10[0], r0
+; LE-NEXT:    add lr, sp, #136
+; LE-NEXT:    ldr r0, [sp, #16] @ 4-byte Reload
+; LE-NEXT:    mov r10, r1
+; LE-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-NEXT:    add lr, sp, #168
+; LE-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; LE-NEXT:    add lr, sp, #88
+; LE-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
+; LE-NEXT:    vmov.f32 s0, s26
+; LE-NEXT:    vmov.32 d11[1], r0
+; LE-NEXT:    bl llrintf
+; LE-NEXT:    vmov.f32 s0, s25
+; LE-NEXT:    vmov.32 d10[0], r0
+; LE-NEXT:    add lr, sp, #168
+; LE-NEXT:    ldr r0, [sp, #8] @ 4-byte Reload
+; LE-NEXT:    mov r7, r1
+; LE-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-NEXT:    add lr, sp, #104
+; LE-NEXT:    vorr q5, q6, q6
+; LE-NEXT:    vldmia lr, {d14, d15} @ 16-byte Reload
+; LE-NEXT:    vmov.32 d15[1], r0
+; LE-NEXT:    bl llrintf
+; LE-NEXT:    vmov.f32 s0, s20
+; LE-NEXT:    vmov.32 d13[0], r0
+; LE-NEXT:    ldr r0, [sp, #12] @ 4-byte Reload
+; LE-NEXT:    add lr, sp, #104
+; LE-NEXT:    mov r4, r1
+; LE-NEXT:    vmov.32 d14[1], r0
+; LE-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-NEXT:    bl llrintf
+; LE-NEXT:    vmov.32 d12[0], r0
+; LE-NEXT:    add lr, sp, #152
+; LE-NEXT:    mov r6, r1
+; LE-NEXT:    vorr q7, q6, q6
+; LE-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
+; LE-NEXT:    vmov.32 d9[1], r11
+; LE-NEXT:    vmov.f32 s0, s25
+; LE-NEXT:    bl llrintf
+; LE-NEXT:    vmov.f32 s0, s24
+; LE-NEXT:    mov r5, r1
+; LE-NEXT:    vmov.32 d11[0], r0
+; LE-NEXT:    vmov.32 d8[1], r9
+; LE-NEXT:    bl llrintf
+; LE-NEXT:    add lr, sp, #136
+; LE-NEXT:    mov r11, r1
+; LE-NEXT:    vmov.32 d10[0], r0
+; LE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEXT:    vmov.32 d16[1], r10
+; LE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEXT:    add lr, sp, #120
+; LE-NEXT:    vst1.64 {d8, d9}, [r8:128]!
+; LE-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
+; LE-NEXT:    vmov.f32 s0, s1
+; LE-NEXT:    bl llrintf
+; LE-NEXT:    add lr, sp, #152
+; LE-NEXT:    vmov.32 d17[0], r0
+; LE-NEXT:    mov r10, r1
+; LE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEXT:    add lr, sp, #104
+; LE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEXT:    add lr, sp, #72
+; LE-NEXT:    vst1.64 {d16, d17}, [r8:128]
+; LE-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-NEXT:    vmov.f32 s0, s19
+; LE-NEXT:    bl llrintf
+; LE-NEXT:    add lr, sp, #168
+; LE-NEXT:    vmov.f32 s0, s18
+; LE-NEXT:    mov r8, r1
+; LE-NEXT:    vmov.32 d13[0], r0
+; LE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEXT:    vmov.32 d16[1], r7
+; LE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEXT:    bl llrintf
+; LE-NEXT:    vmov.f32 s0, s17
+; LE-NEXT:    vmov.32 d12[0], r0
+; LE-NEXT:    add lr, sp, #104
+; LE-NEXT:    mov r7, r1
+; LE-NEXT:    vmov.32 d15[1], r4
+; LE-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-NEXT:    bl llrintf
+; LE-NEXT:    vmov.f32 s0, s16
+; LE-NEXT:    mov r9, r1
+; LE-NEXT:    vmov.32 d13[0], r0
+; LE-NEXT:    vmov.32 d14[1], r6
+; LE-NEXT:    bl llrintf
+; LE-NEXT:    vmov.32 d12[0], r0
+; LE-NEXT:    add lr, sp, #88
+; LE-NEXT:    mov r6, r1
+; LE-NEXT:    vmov.32 d11[1], r5
+; LE-NEXT:    vmov.32 d10[1], r11
+; LE-NEXT:    ldr r11, [sp, #68] @ 4-byte Reload
+; LE-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-NEXT:    add lr, sp, #16
+; LE-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-NEXT:    add lr, sp, #32
+; LE-NEXT:    vst1.64 {d14, d15}, [r11:128]!
+; LE-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; LE-NEXT:    vmov.f32 s0, s23
+; LE-NEXT:    bl llrintf
+; LE-NEXT:    add lr, sp, #152
+; LE-NEXT:    mov r4, r1
+; LE-NEXT:    vmov.32 d9[0], r0
+; LE-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
+; LE-NEXT:    add lr, sp, #120
+; LE-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
+; LE-NEXT:    @ kill: def $s0 killed $s0 killed $q0
+; LE-NEXT:    vmov.32 d13[1], r10
+; LE-NEXT:    bl llrintf
+; LE-NEXT:    vmov.f32 s0, s22
+; LE-NEXT:    vmov.32 d12[0], r0
+; LE-NEXT:    add lr, sp, #152
+; LE-NEXT:    mov r5, r1
+; LE-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-NEXT:    add lr, sp, #104
+; LE-NEXT:    vldmia lr, {d14, d15} @ 16-byte Reload
+; LE-NEXT:    vmov.32 d15[1], r8
+; LE-NEXT:    bl llrintf
+; LE-NEXT:    vmov.f32 s0, s21
+; LE-NEXT:    vmov.32 d8[0], r0
+; LE-NEXT:    add lr, sp, #72
+; LE-NEXT:    mov r8, r1
+; LE-NEXT:    vmov.32 d14[1], r7
+; LE-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-NEXT:    add lr, sp, #104
+; LE-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-NEXT:    bl llrintf
+; LE-NEXT:    vmov.f32 s0, s20
+; LE-NEXT:    add lr, sp, #88
+; LE-NEXT:    mov r7, r1
+; LE-NEXT:    vmov.32 d9[0], r0
+; LE-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
+; LE-NEXT:    vmov.32 d13[1], r9
+; LE-NEXT:    bl llrintf
+; LE-NEXT:    vmov.32 d8[0], r0
+; LE-NEXT:    add lr, sp, #32
+; LE-NEXT:    mov r9, r1
+; LE-NEXT:    vmov.32 d12[1], r6
+; LE-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-NEXT:    add lr, sp, #88
+; LE-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-NEXT:    add lr, sp, #120
+; LE-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-NEXT:    vmov.f32 s0, s19
+; LE-NEXT:    bl llrintf
+; LE-NEXT:    vmov.f32 s0, s18
+; LE-NEXT:    add lr, sp, #72
+; LE-NEXT:    mov r10, r1
+; LE-NEXT:    vmov.32 d15[0], r0
+; LE-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
+; LE-NEXT:    vmov.32 d13[1], r4
+; LE-NEXT:    bl llrintf
+; LE-NEXT:    add lr, sp, #152
+; LE-NEXT:    mov r4, r1
+; LE-NEXT:    vmov.32 d14[0], r0
+; LE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEXT:    vmov.32 d16[1], r5
+; LE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEXT:    add lr, sp, #168
+; LE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEXT:    add lr, sp, #48
+; LE-NEXT:    vst1.64 {d16, d17}, [r11:128]!
+; LE-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; LE-NEXT:    vmov.f32 s0, s21
+; LE-NEXT:    bl llrintf
+; LE-NEXT:    vmov.f32 s0, s20
+; LE-NEXT:    vmov.32 d12[1], r8
+; LE-NEXT:    add lr, sp, #72
+; LE-NEXT:    mov r5, r1
+; LE-NEXT:    vmov.32 d9[0], r0
+; LE-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-NEXT:    bl llrintf
+; LE-NEXT:    vmov.f32 s0, s23
+; LE-NEXT:    add lr, sp, #32
+; LE-NEXT:    mov r6, r1
+; LE-NEXT:    vmov.32 d8[0], r0
+; LE-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
+; LE-NEXT:    vmov.32 d13[1], r7
+; LE-NEXT:    bl llrintf
+; LE-NEXT:    add lr, sp, #48
+; LE-NEXT:    mov r7, r1
+; LE-NEXT:    vmov.32 d11[0], r0
+; LE-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
+; LE-NEXT:    vmov.f32 s0, s2
+; LE-NEXT:    vmov.32 d12[1], r9
+; LE-NEXT:    bl llrintf
+; LE-NEXT:    add lr, sp, #16
+; LE-NEXT:    vmov.32 d10[0], r0
+; LE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEXT:    add lr, sp, #136
+; LE-NEXT:    vmov.32 d11[1], r7
+; LE-NEXT:    vst1.64 {d16, d17}, [r11:128]!
+; LE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEXT:    add lr, sp, #152
+; LE-NEXT:    vmov.32 d15[1], r10
+; LE-NEXT:    vst1.64 {d16, d17}, [r11:128]
+; LE-NEXT:    vmov.32 d10[1], r1
+; LE-NEXT:    ldr r1, [sp, #68] @ 4-byte Reload
+; LE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEXT:    add r0, r1, #192
+; LE-NEXT:    add lr, sp, #72
+; LE-NEXT:    vmov.32 d14[1], r4
+; LE-NEXT:    vst1.64 {d16, d17}, [r0:128]!
+; LE-NEXT:    vst1.64 {d14, d15}, [r0:128]!
+; LE-NEXT:    vst1.64 {d12, d13}, [r0:128]!
+; LE-NEXT:    vmov.32 d9[1], r5
+; LE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEXT:    add lr, sp, #88
+; LE-NEXT:    vmov.32 d8[1], r6
+; LE-NEXT:    vst1.64 {d16, d17}, [r0:128]
+; LE-NEXT:    add r0, r1, #128
+; LE-NEXT:    vst1.64 {d8, d9}, [r0:128]!
+; LE-NEXT:    vst1.64 {d10, d11}, [r0:128]!
+; LE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEXT:    add lr, sp, #104
+; LE-NEXT:    vst1.64 {d16, d17}, [r0:128]!
+; LE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEXT:    vst1.64 {d16, d17}, [r0:128]
+; LE-NEXT:    add sp, sp, #184
+; LE-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEXT:    add sp, sp, #4
+; LE-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; LE-NEON-LABEL: llrint_v32i64_v32f32:
+; LE-NEON:       @ %bb.0:
+; LE-NEON-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-NEON-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-NEON-NEXT:    .pad #4
+; LE-NEON-NEXT:    sub sp, sp, #4
+; LE-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEON-NEXT:    .pad #184
+; LE-NEON-NEXT:    sub sp, sp, #184
+; LE-NEON-NEXT:    add lr, sp, #152
+; LE-NEON-NEXT:    vorr q7, q3, q3
+; LE-NEON-NEXT:    vorr q4, q2, q2
+; LE-NEON-NEXT:    mov r5, r0
+; LE-NEON-NEXT:    vstmia lr, {d2, d3} @ 16-byte Spill
+; LE-NEON-NEXT:    add lr, sp, #88
+; LE-NEON-NEXT:    vstmia lr, {d0, d1} @ 16-byte Spill
+; LE-NEON-NEXT:    vmov.f32 s0, s3
+; LE-NEON-NEXT:    str r0, [sp, #68] @ 4-byte Spill
+; LE-NEON-NEXT:    bl llrintf
+; LE-NEON-NEXT:    vmov.f32 s0, s18
+; LE-NEON-NEXT:    add lr, sp, #168
+; LE-NEON-NEXT:    vmov.32 d17[0], r0
+; LE-NEON-NEXT:    str r1, [sp, #16] @ 4-byte Spill
+; LE-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEON-NEXT:    bl llrintf
+; LE-NEON-NEXT:    vmov.f32 s0, s16
+; LE-NEON-NEXT:    mov r8, r1
+; LE-NEON-NEXT:    vmov.32 d10[0], r0
+; LE-NEON-NEXT:    bl llrintf
+; LE-NEON-NEXT:    vmov.f32 s0, s17
+; LE-NEON-NEXT:    mov r6, r1
+; LE-NEON-NEXT:    vmov.32 d12[0], r0
+; LE-NEON-NEXT:    bl llrintf
+; LE-NEON-NEXT:    vmov.f32 s0, s19
+; LE-NEON-NEXT:    mov r4, r1
+; LE-NEON-NEXT:    vmov.32 d13[0], r0
+; LE-NEON-NEXT:    bl llrintf
+; LE-NEON-NEXT:    vmov.f32 s0, s31
+; LE-NEON-NEXT:    mov r7, r1
+; LE-NEON-NEXT:    vmov.32 d11[0], r0
+; LE-NEON-NEXT:    bl llrintf
+; LE-NEON-NEXT:    vmov.f32 s0, s30
+; LE-NEON-NEXT:    str r1, [sp, #8] @ 4-byte Spill
+; LE-NEON-NEXT:    vmov.32 d9[0], r0
+; LE-NEON-NEXT:    vmov.32 d11[1], r7
+; LE-NEON-NEXT:    bl llrintf
+; LE-NEON-NEXT:    vmov.f32 s0, s29
+; LE-NEON-NEXT:    vmov.32 d8[0], r0
+; LE-NEON-NEXT:    add lr, sp, #104
+; LE-NEON-NEXT:    str r1, [sp, #12] @ 4-byte Spill
+; LE-NEON-NEXT:    vmov.32 d13[1], r4
+; LE-NEON-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-NEON-NEXT:    bl llrintf
+; LE-NEON-NEXT:    vmov.32 d9[0], r0
+; LE-NEON-NEXT:    add r0, sp, #320
+; LE-NEON-NEXT:    add lr, sp, #120
+; LE-NEON-NEXT:    mov r11, r1
+; LE-NEON-NEXT:    vld1.64 {d0, d1}, [r0]
+; LE-NEON-NEXT:    add r0, sp, #304
+; LE-NEON-NEXT:    vstmia lr, {d0, d1} @ 16-byte Spill
+; LE-NEON-NEXT:    add lr, sp, #72
+; LE-NEON-NEXT:    vld1.64 {d0, d1}, [r0]
+; LE-NEON-NEXT:    add r0, sp, #336
+; LE-NEON-NEXT:    vstmia lr, {d0, d1} @ 16-byte Spill
+; LE-NEON-NEXT:    add lr, sp, #32
+; LE-NEON-NEXT:    vld1.64 {d0, d1}, [r0]
+; LE-NEON-NEXT:    add r0, sp, #288
+; LE-NEON-NEXT:    vmov.32 d12[1], r6
+; LE-NEON-NEXT:    vstmia lr, {d0, d1} @ 16-byte Spill
+; LE-NEON-NEXT:    add lr, sp, #48
+; LE-NEON-NEXT:    vld1.64 {d0, d1}, [r0]
+; LE-NEON-NEXT:    vmov.32 d10[1], r8
+; LE-NEON-NEXT:    add r8, r5, #64
+; LE-NEON-NEXT:    vstmia lr, {d0, d1} @ 16-byte Spill
+; LE-NEON-NEXT:    add lr, sp, #152
+; LE-NEON-NEXT:    vst1.64 {d12, d13}, [r8:128]!
+; LE-NEON-NEXT:    vst1.64 {d10, d11}, [r8:128]!
+; LE-NEON-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
+; LE-NEON-NEXT:    vmov.f32 s0, s27
+; LE-NEON-NEXT:    bl llrintf
+; LE-NEON-NEXT:    vmov.f32 s0, s28
+; LE-NEON-NEXT:    mov r4, r1
+; LE-NEON-NEXT:    vmov.32 d11[0], r0
+; LE-NEON-NEXT:    bl llrintf
+; LE-NEON-NEXT:    vmov.f32 s0, s26
+; LE-NEON-NEXT:    mov r9, r1
+; LE-NEON-NEXT:    vmov.32 d8[0], r0
+; LE-NEON-NEXT:    vmov.32 d11[1], r4
+; LE-NEON-NEXT:    bl llrintf
+; LE-NEON-NEXT:    vmov.32 d10[0], r0
+; LE-NEON-NEXT:    add lr, sp, #136
+; LE-NEON-NEXT:    ldr r0, [sp, #16] @ 4-byte Reload
+; LE-NEON-NEXT:    mov r10, r1
+; LE-NEON-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-NEON-NEXT:    add lr, sp, #168
+; LE-NEON-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; LE-NEON-NEXT:    add lr, sp, #88
+; LE-NEON-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
+; LE-NEON-NEXT:    vmov.f32 s0, s26
+; LE-NEON-NEXT:    vmov.32 d11[1], r0
+; LE-NEON-NEXT:    bl llrintf
+; LE-NEON-NEXT:    vmov.f32 s0, s25
+; LE-NEON-NEXT:    vmov.32 d10[0], r0
+; LE-NEON-NEXT:    add lr, sp, #168
+; LE-NEON-NEXT:    ldr r0, [sp, #8] @ 4-byte Reload
+; LE-NEON-NEXT:    mov r7, r1
+; LE-NEON-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-NEON-NEXT:    add lr, sp, #104
+; LE-NEON-NEXT:    vorr q5, q6, q6
+; LE-NEON-NEXT:    vldmia lr, {d14, d15} @ 16-byte Reload
+; LE-NEON-NEXT:    vmov.32 d15[1], r0
+; LE-NEON-NEXT:    bl llrintf
+; LE-NEON-NEXT:    vmov.f32 s0, s20
+; LE-NEON-NEXT:    vmov.32 d13[0], r0
+; LE-NEON-NEXT:    ldr r0, [sp, #12] @ 4-byte Reload
+; LE-NEON-NEXT:    add lr, sp, #104
+; LE-NEON-NEXT:    mov r4, r1
+; LE-NEON-NEXT:    vmov.32 d14[1], r0
+; LE-NEON-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-NEON-NEXT:    bl llrintf
+; LE-NEON-NEXT:    vmov.32 d12[0], r0
+; LE-NEON-NEXT:    add lr, sp, #152
+; LE-NEON-NEXT:    mov r6, r1
+; LE-NEON-NEXT:    vorr q7, q6, q6
+; LE-NEON-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
+; LE-NEON-NEXT:    vmov.32 d9[1], r11
+; LE-NEON-NEXT:    vmov.f32 s0, s25
+; LE-NEON-NEXT:    bl llrintf
+; LE-NEON-NEXT:    vmov.f32 s0, s24
+; LE-NEON-NEXT:    mov r5, r1
+; LE-NEON-NEXT:    vmov.32 d11[0], r0
+; LE-NEON-NEXT:    vmov.32 d8[1], r9
+; LE-NEON-NEXT:    bl llrintf
+; LE-NEON-NEXT:    add lr, sp, #136
+; LE-NEON-NEXT:    mov r11, r1
+; LE-NEON-NEXT:    vmov.32 d10[0], r0
+; LE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEON-NEXT:    vmov.32 d16[1], r10
+; LE-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEON-NEXT:    add lr, sp, #120
+; LE-NEON-NEXT:    vst1.64 {d8, d9}, [r8:128]!
+; LE-NEON-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
+; LE-NEON-NEXT:    vmov.f32 s0, s1
+; LE-NEON-NEXT:    bl llrintf
+; LE-NEON-NEXT:    add lr, sp, #152
+; LE-NEON-NEXT:    vmov.32 d17[0], r0
+; LE-NEON-NEXT:    mov r10, r1
+; LE-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEON-NEXT:    add lr, sp, #104
+; LE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEON-NEXT:    add lr, sp, #72
+; LE-NEON-NEXT:    vst1.64 {d16, d17}, [r8:128]
+; LE-NEON-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-NEON-NEXT:    vmov.f32 s0, s19
+; LE-NEON-NEXT:    bl llrintf
+; LE-NEON-NEXT:    add lr, sp, #168
+; LE-NEON-NEXT:    vmov.f32 s0, s18
+; LE-NEON-NEXT:    mov r8, r1
+; LE-NEON-NEXT:    vmov.32 d13[0], r0
+; LE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEON-NEXT:    vmov.32 d16[1], r7
+; LE-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEON-NEXT:    bl llrintf
+; LE-NEON-NEXT:    vmov.f32 s0, s17
+; LE-NEON-NEXT:    vmov.32 d12[0], r0
+; LE-NEON-NEXT:    add lr, sp, #104
+; LE-NEON-NEXT:    mov r7, r1
+; LE-NEON-NEXT:    vmov.32 d15[1], r4
+; LE-NEON-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-NEON-NEXT:    bl llrintf
+; LE-NEON-NEXT:    vmov.f32 s0, s16
+; LE-NEON-NEXT:    mov r9, r1
+; LE-NEON-NEXT:    vmov.32 d13[0], r0
+; LE-NEON-NEXT:    vmov.32 d14[1], r6
+; LE-NEON-NEXT:    bl llrintf
+; LE-NEON-NEXT:    vmov.32 d12[0], r0
+; LE-NEON-NEXT:    add lr, sp, #88
+; LE-NEON-NEXT:    mov r6, r1
+; LE-NEON-NEXT:    vmov.32 d11[1], r5
+; LE-NEON-NEXT:    vmov.32 d10[1], r11
+; LE-NEON-NEXT:    ldr r11, [sp, #68] @ 4-byte Reload
+; LE-NEON-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-NEON-NEXT:    add lr, sp, #16
+; LE-NEON-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-NEON-NEXT:    add lr, sp, #32
+; LE-NEON-NEXT:    vst1.64 {d14, d15}, [r11:128]!
+; LE-NEON-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; LE-NEON-NEXT:    vmov.f32 s0, s23
+; LE-NEON-NEXT:    bl llrintf
+; LE-NEON-NEXT:    add lr, sp, #152
+; LE-NEON-NEXT:    mov r4, r1
+; LE-NEON-NEXT:    vmov.32 d9[0], r0
+; LE-NEON-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
+; LE-NEON-NEXT:    add lr, sp, #120
+; LE-NEON-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
+; LE-NEON-NEXT:    @ kill: def $s0 killed $s0 killed $q0
+; LE-NEON-NEXT:    vmov.32 d13[1], r10
+; LE-NEON-NEXT:    bl llrintf
+; LE-NEON-NEXT:    vmov.f32 s0, s22
+; LE-NEON-NEXT:    vmov.32 d12[0], r0
+; LE-NEON-NEXT:    add lr, sp, #152
+; LE-NEON-NEXT:    mov r5, r1
+; LE-NEON-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-NEON-NEXT:    add lr, sp, #104
+; LE-NEON-NEXT:    vldmia lr, {d14, d15} @ 16-byte Reload
+; LE-NEON-NEXT:    vmov.32 d15[1], r8
+; LE-NEON-NEXT:    bl llrintf
+; LE-NEON-NEXT:    vmov.f32 s0, s21
+; LE-NEON-NEXT:    vmov.32 d8[0], r0
+; LE-NEON-NEXT:    add lr, sp, #72
+; LE-NEON-NEXT:    mov r8, r1
+; LE-NEON-NEXT:    vmov.32 d14[1], r7
+; LE-NEON-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-NEON-NEXT:    add lr, sp, #104
+; LE-NEON-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-NEON-NEXT:    bl llrintf
+; LE-NEON-NEXT:    vmov.f32 s0, s20
+; LE-NEON-NEXT:    add lr, sp, #88
+; LE-NEON-NEXT:    mov r7, r1
+; LE-NEON-NEXT:    vmov.32 d9[0], r0
+; LE-NEON-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
+; LE-NEON-NEXT:    vmov.32 d13[1], r9
+; LE-NEON-NEXT:    bl llrintf
+; LE-NEON-NEXT:    vmov.32 d8[0], r0
+; LE-NEON-NEXT:    add lr, sp, #32
+; LE-NEON-NEXT:    mov r9, r1
+; LE-NEON-NEXT:    vmov.32 d12[1], r6
+; LE-NEON-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-NEON-NEXT:    add lr, sp, #88
+; LE-NEON-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-NEON-NEXT:    add lr, sp, #120
+; LE-NEON-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-NEON-NEXT:    vmov.f32 s0, s19
+; LE-NEON-NEXT:    bl llrintf
+; LE-NEON-NEXT:    vmov.f32 s0, s18
+; LE-NEON-NEXT:    add lr, sp, #72
+; LE-NEON-NEXT:    mov r10, r1
+; LE-NEON-NEXT:    vmov.32 d15[0], r0
+; LE-NEON-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
+; LE-NEON-NEXT:    vmov.32 d13[1], r4
+; LE-NEON-NEXT:    bl llrintf
+; LE-NEON-NEXT:    add lr, sp, #152
+; LE-NEON-NEXT:    mov r4, r1
+; LE-NEON-NEXT:    vmov.32 d14[0], r0
+; LE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEON-NEXT:    vmov.32 d16[1], r5
+; LE-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEON-NEXT:    add lr, sp, #168
+; LE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEON-NEXT:    add lr, sp, #48
+; LE-NEON-NEXT:    vst1.64 {d16, d17}, [r11:128]!
+; LE-NEON-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; LE-NEON-NEXT:    vmov.f32 s0, s21
+; LE-NEON-NEXT:    bl llrintf
+; LE-NEON-NEXT:    vmov.f32 s0, s20
+; LE-NEON-NEXT:    vmov.32 d12[1], r8
+; LE-NEON-NEXT:    add lr, sp, #72
+; LE-NEON-NEXT:    mov r5, r1
+; LE-NEON-NEXT:    vmov.32 d9[0], r0
+; LE-NEON-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-NEON-NEXT:    bl llrintf
+; LE-NEON-NEXT:    vmov.f32 s0, s23
+; LE-NEON-NEXT:    add lr, sp, #32
+; LE-NEON-NEXT:    mov r6, r1
+; LE-NEON-NEXT:    vmov.32 d8[0], r0
+; LE-NEON-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
+; LE-NEON-NEXT:    vmov.32 d13[1], r7
+; LE-NEON-NEXT:    bl llrintf
+; LE-NEON-NEXT:    add lr, sp, #48
+; LE-NEON-NEXT:    mov r7, r1
+; LE-NEON-NEXT:    vmov.32 d11[0], r0
+; LE-NEON-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
+; LE-NEON-NEXT:    vmov.f32 s0, s2
+; LE-NEON-NEXT:    vmov.32 d12[1], r9
+; LE-NEON-NEXT:    bl llrintf
+; LE-NEON-NEXT:    add lr, sp, #16
+; LE-NEON-NEXT:    vmov.32 d10[0], r0
+; LE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEON-NEXT:    add lr, sp, #136
+; LE-NEON-NEXT:    vmov.32 d11[1], r7
+; LE-NEON-NEXT:    vst1.64 {d16, d17}, [r11:128]!
+; LE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEON-NEXT:    add lr, sp, #152
+; LE-NEON-NEXT:    vmov.32 d15[1], r10
+; LE-NEON-NEXT:    vst1.64 {d16, d17}, [r11:128]
+; LE-NEON-NEXT:    vmov.32 d10[1], r1
+; LE-NEON-NEXT:    ldr r1, [sp, #68] @ 4-byte Reload
+; LE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEON-NEXT:    add r0, r1, #192
+; LE-NEON-NEXT:    add lr, sp, #72
+; LE-NEON-NEXT:    vmov.32 d14[1], r4
+; LE-NEON-NEXT:    vst1.64 {d16, d17}, [r0:128]!
+; LE-NEON-NEXT:    vst1.64 {d14, d15}, [r0:128]!
+; LE-NEON-NEXT:    vst1.64 {d12, d13}, [r0:128]!
+; LE-NEON-NEXT:    vmov.32 d9[1], r5
+; LE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEON-NEXT:    add lr, sp, #88
+; LE-NEON-NEXT:    vmov.32 d8[1], r6
+; LE-NEON-NEXT:    vst1.64 {d16, d17}, [r0:128]
+; LE-NEON-NEXT:    add r0, r1, #128
+; LE-NEON-NEXT:    vst1.64 {d8, d9}, [r0:128]!
+; LE-NEON-NEXT:    vst1.64 {d10, d11}, [r0:128]!
+; LE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEON-NEXT:    add lr, sp, #104
+; LE-NEON-NEXT:    vst1.64 {d16, d17}, [r0:128]!
+; LE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEON-NEXT:    vst1.64 {d16, d17}, [r0:128]
+; LE-NEON-NEXT:    add sp, sp, #184
+; LE-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEON-NEXT:    add sp, sp, #4
+; LE-NEON-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; BE-LABEL: llrint_v32i64_v32f32:
+; BE:       @ %bb.0:
+; BE-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-NEXT:    .pad #4
+; BE-NEXT:    sub sp, sp, #4
+; BE-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-NEXT:    .pad #256
+; BE-NEXT:    sub sp, sp, #256
+; BE-NEXT:    add lr, sp, #208
+; BE-NEXT:    str r0, [sp, #156] @ 4-byte Spill
+; BE-NEXT:    add r0, sp, #408
+; BE-NEXT:    vstmia lr, {d6, d7} @ 16-byte Spill
+; BE-NEXT:    add lr, sp, #120
+; BE-NEXT:    vld1.64 {d10, d11}, [r0]
+; BE-NEXT:    add r0, sp, #392
+; BE-NEXT:    vstmia lr, {d4, d5} @ 16-byte Spill
+; BE-NEXT:    add lr, sp, #160
+; BE-NEXT:    vstmia lr, {d2, d3} @ 16-byte Spill
+; BE-NEXT:    add lr, sp, #176
+; BE-NEXT:    vrev64.32 d8, d10
+; BE-NEXT:    vstmia lr, {d0, d1} @ 16-byte Spill
+; BE-NEXT:    add lr, sp, #136
+; BE-NEXT:    vmov.f32 s0, s17
+; BE-NEXT:    vld1.64 {d12, d13}, [r0]
+; BE-NEXT:    add r0, sp, #360
+; BE-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
+; BE-NEXT:    add lr, sp, #192
+; BE-NEXT:    vld1.64 {d16, d17}, [r0]
+; BE-NEXT:    add r0, sp, #376
+; BE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-NEXT:    add lr, sp, #40
+; BE-NEXT:    vld1.64 {d16, d17}, [r0]
+; BE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    vmov.f32 s0, s16
+; BE-NEXT:    str r1, [sp, #88] @ 4-byte Spill
+; BE-NEXT:    vmov.32 d15[0], r0
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    vrev64.32 d9, d11
+; BE-NEXT:    add lr, sp, #240
+; BE-NEXT:    vmov.32 d14[0], r0
+; BE-NEXT:    str r1, [sp, #104] @ 4-byte Spill
+; BE-NEXT:    vmov.f32 s0, s18
+; BE-NEXT:    vrev64.32 d8, d13
+; BE-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    vmov.f32 s0, s17
+; BE-NEXT:    mov r5, r1
+; BE-NEXT:    vmov.32 d14[0], r0
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    vmov.f32 s0, s19
+; BE-NEXT:    add lr, sp, #192
+; BE-NEXT:    str r1, [sp, #72] @ 4-byte Spill
+; BE-NEXT:    vmov.32 d13[0], r0
+; BE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEXT:    vrev64.32 d10, d16
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    vmov.f32 s0, s20
+; BE-NEXT:    add lr, sp, #224
+; BE-NEXT:    vmov.32 d15[0], r0
+; BE-NEXT:    mov r4, r1
+; BE-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    vmov.f32 s0, s21
+; BE-NEXT:    mov r7, r1
+; BE-NEXT:    vmov.32 d14[0], r0
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    vmov.f32 s0, s16
+; BE-NEXT:    mov r6, r1
+; BE-NEXT:    vmov.32 d15[0], r0
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    vmov.32 d12[0], r0
+; BE-NEXT:    add lr, sp, #24
+; BE-NEXT:    mov r9, r1
+; BE-NEXT:    vmov.32 d15[1], r6
+; BE-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
+; BE-NEXT:    add lr, sp, #192
+; BE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEXT:    vrev64.32 d8, d17
+; BE-NEXT:    vmov.f32 s0, s17
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    vmov.f32 s0, s16
+; BE-NEXT:    vmov.32 d14[1], r7
+; BE-NEXT:    add lr, sp, #56
+; BE-NEXT:    mov r10, r1
+; BE-NEXT:    vmov.32 d11[0], r0
+; BE-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    vmov.32 d10[0], r0
+; BE-NEXT:    add lr, sp, #192
+; BE-NEXT:    mov r11, r1
+; BE-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-NEXT:    add lr, sp, #40
+; BE-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
+; BE-NEXT:    add lr, sp, #224
+; BE-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-NEXT:    vrev64.32 d8, d12
+; BE-NEXT:    vmov.32 d11[1], r4
+; BE-NEXT:    vmov.f32 s0, s17
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    vmov.f32 s0, s16
+; BE-NEXT:    vmov.32 d10[1], r5
+; BE-NEXT:    add lr, sp, #224
+; BE-NEXT:    mov r8, r1
+; BE-NEXT:    vmov.32 d15[0], r0
+; BE-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    vrev64.32 d8, d13
+; BE-NEXT:    add lr, sp, #8
+; BE-NEXT:    vmov.32 d14[0], r0
+; BE-NEXT:    ldr r0, [sp, #88] @ 4-byte Reload
+; BE-NEXT:    vmov.f32 s0, s17
+; BE-NEXT:    mov r5, r1
+; BE-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
+; BE-NEXT:    add lr, sp, #240
+; BE-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-NEXT:    vmov.32 d11[1], r0
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    vmov.f32 s0, s16
+; BE-NEXT:    vmov.32 d15[0], r0
+; BE-NEXT:    ldr r0, [sp, #104] @ 4-byte Reload
+; BE-NEXT:    add lr, sp, #240
+; BE-NEXT:    mov r4, r1
+; BE-NEXT:    vmov.32 d10[1], r0
+; BE-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    add lr, sp, #136
+; BE-NEXT:    vmov.32 d14[0], r0
+; BE-NEXT:    ldr r0, [sp, #72] @ 4-byte Reload
+; BE-NEXT:    mov r6, r1
+; BE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEXT:    add lr, sp, #24
+; BE-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
+; BE-NEXT:    vrev64.32 d8, d16
+; BE-NEXT:    vmov.32 d13[1], r0
+; BE-NEXT:    vmov.f32 s0, s17
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    vmov.f32 s0, s16
+; BE-NEXT:    mov r7, r1
+; BE-NEXT:    vmov.32 d11[0], r0
+; BE-NEXT:    vmov.32 d12[1], r9
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    add lr, sp, #192
+; BE-NEXT:    vmov.32 d15[1], r4
+; BE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEXT:    add lr, sp, #8
+; BE-NEXT:    vmov.32 d17[1], r10
+; BE-NEXT:    vmov.32 d16[1], r11
+; BE-NEXT:    vorr q9, q8, q8
+; BE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEXT:    add lr, sp, #192
+; BE-NEXT:    vmov.32 d17[1], r8
+; BE-NEXT:    vmov.32 d16[1], r5
+; BE-NEXT:    vorr q10, q8, q8
+; BE-NEXT:    vrev64.32 q8, q6
+; BE-NEXT:    vmov.32 d14[1], r6
+; BE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-NEXT:    add lr, sp, #240
+; BE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEXT:    vmov.32 d10[0], r0
+; BE-NEXT:    vrev64.32 q8, q8
+; BE-NEXT:    vmov.32 d11[1], r7
+; BE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-NEXT:    add lr, sp, #224
+; BE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEXT:    vmov.32 d10[1], r1
+; BE-NEXT:    vrev64.32 q8, q8
+; BE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-NEXT:    add lr, sp, #56
+; BE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEXT:    add lr, sp, #136
+; BE-NEXT:    vrev64.32 q8, q8
+; BE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-NEXT:    add lr, sp, #104
+; BE-NEXT:    vrev64.32 q8, q9
+; BE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-NEXT:    add lr, sp, #88
+; BE-NEXT:    vrev64.32 q8, q10
+; BE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-NEXT:    add lr, sp, #72
+; BE-NEXT:    vrev64.32 q8, q7
+; BE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-NEXT:    add lr, sp, #208
+; BE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEXT:    add lr, sp, #56
+; BE-NEXT:    vrev64.32 d8, d17
+; BE-NEXT:    vrev64.32 q8, q5
+; BE-NEXT:    vmov.f32 s0, s17
+; BE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    vmov.f32 s0, s16
+; BE-NEXT:    mov r4, r1
+; BE-NEXT:    vmov.32 d13[0], r0
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    add lr, sp, #120
+; BE-NEXT:    vmov.32 d12[0], r0
+; BE-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-NEXT:    vmov.32 d13[1], r4
+; BE-NEXT:    vrev64.32 d8, d10
+; BE-NEXT:    vmov.32 d12[1], r1
+; BE-NEXT:    vmov.f32 s0, s17
+; BE-NEXT:    vrev64.32 q6, q6
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    vmov.f32 s0, s16
+; BE-NEXT:    vmov.32 d15[0], r0
+; BE-NEXT:    vmov.32 d15[1], r1
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    vmov.32 d14[0], r0
+; BE-NEXT:    ldr r6, [sp, #156] @ 4-byte Reload
+; BE-NEXT:    vrev64.32 d8, d11
+; BE-NEXT:    add r5, r6, #64
+; BE-NEXT:    vmov.32 d14[1], r1
+; BE-NEXT:    vmov.f32 s0, s17
+; BE-NEXT:    vrev64.32 q8, q7
+; BE-NEXT:    vst1.64 {d16, d17}, [r5:128]!
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    vmov.f32 s0, s16
+; BE-NEXT:    vmov.32 d15[0], r0
+; BE-NEXT:    vmov.32 d15[1], r1
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    add lr, sp, #208
+; BE-NEXT:    vmov.32 d14[0], r0
+; BE-NEXT:    vldmia lr, {d18, d19} @ 16-byte Reload
+; BE-NEXT:    vmov.32 d14[1], r1
+; BE-NEXT:    vrev64.32 d8, d18
+; BE-NEXT:    vrev64.32 q8, q7
+; BE-NEXT:    vmov.f32 s0, s17
+; BE-NEXT:    vst1.64 {d16, d17}, [r5:128]!
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    vmov.f32 s0, s16
+; BE-NEXT:    mov r4, r1
+; BE-NEXT:    vmov.32 d15[0], r0
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    vmov.32 d14[0], r0
+; BE-NEXT:    add lr, sp, #160
+; BE-NEXT:    vmov.32 d15[1], r4
+; BE-NEXT:    vmov.32 d14[1], r1
+; BE-NEXT:    vrev64.32 q8, q7
+; BE-NEXT:    vst1.64 {d16, d17}, [r5:128]!
+; BE-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-NEXT:    vrev64.32 d8, d11
+; BE-NEXT:    vst1.64 {d12, d13}, [r5:128]
+; BE-NEXT:    vmov.f32 s0, s17
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    vmov.f32 s0, s16
+; BE-NEXT:    mov r4, r1
+; BE-NEXT:    vmov.32 d13[0], r0
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    vmov.32 d12[0], r0
+; BE-NEXT:    add lr, sp, #208
+; BE-NEXT:    vmov.32 d13[1], r4
+; BE-NEXT:    vmov.32 d12[1], r1
+; BE-NEXT:    vrev64.32 q8, q6
+; BE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-NEXT:    add lr, sp, #176
+; BE-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
+; BE-NEXT:    vrev64.32 d8, d12
+; BE-NEXT:    vmov.f32 s0, s17
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    vmov.f32 s0, s16
+; BE-NEXT:    vmov.32 d15[0], r0
+; BE-NEXT:    vmov.32 d15[1], r1
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    vmov.32 d14[0], r0
+; BE-NEXT:    mov r5, r6
+; BE-NEXT:    vrev64.32 d8, d13
+; BE-NEXT:    vmov.32 d14[1], r1
+; BE-NEXT:    vmov.f32 s0, s17
+; BE-NEXT:    vrev64.32 q8, q7
+; BE-NEXT:    vst1.64 {d16, d17}, [r5:128]!
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    vmov.f32 s0, s16
+; BE-NEXT:    vmov.32 d15[0], r0
+; BE-NEXT:    vmov.32 d15[1], r1
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    vmov.32 d14[0], r0
+; BE-NEXT:    vrev64.32 d8, d10
+; BE-NEXT:    vmov.32 d14[1], r1
+; BE-NEXT:    vmov.f32 s0, s17
+; BE-NEXT:    vrev64.32 q8, q7
+; BE-NEXT:    vst1.64 {d16, d17}, [r5:128]!
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    vmov.f32 s0, s16
+; BE-NEXT:    mov r4, r1
+; BE-NEXT:    vmov.32 d15[0], r0
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    vmov.32 d14[0], r0
+; BE-NEXT:    add lr, sp, #208
+; BE-NEXT:    add r0, r6, #192
+; BE-NEXT:    vmov.32 d15[1], r4
+; BE-NEXT:    vmov.32 d14[1], r1
+; BE-NEXT:    vrev64.32 q8, q7
+; BE-NEXT:    vst1.64 {d16, d17}, [r5:128]!
+; BE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEXT:    add lr, sp, #56
+; BE-NEXT:    vst1.64 {d16, d17}, [r5:128]
+; BE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEXT:    add lr, sp, #192
+; BE-NEXT:    vst1.64 {d16, d17}, [r0:128]!
+; BE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEXT:    add lr, sp, #240
+; BE-NEXT:    vst1.64 {d16, d17}, [r0:128]!
+; BE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEXT:    add lr, sp, #224
+; BE-NEXT:    vst1.64 {d16, d17}, [r0:128]!
+; BE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEXT:    add lr, sp, #136
+; BE-NEXT:    vst1.64 {d16, d17}, [r0:128]
+; BE-NEXT:    add r0, r6, #128
+; BE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEXT:    add lr, sp, #104
+; BE-NEXT:    vst1.64 {d16, d17}, [r0:128]!
+; BE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEXT:    add lr, sp, #88
+; BE-NEXT:    vst1.64 {d16, d17}, [r0:128]!
+; BE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEXT:    add lr, sp, #72
+; BE-NEXT:    vst1.64 {d16, d17}, [r0:128]!
+; BE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEXT:    vst1.64 {d16, d17}, [r0:128]
+; BE-NEXT:    add sp, sp, #256
+; BE-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-NEXT:    add sp, sp, #4
+; BE-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; BE-NEON-LABEL: llrint_v32i64_v32f32:
+; BE-NEON:       @ %bb.0:
+; BE-NEON-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-NEON-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-NEON-NEXT:    .pad #4
+; BE-NEON-NEXT:    sub sp, sp, #4
+; BE-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-NEON-NEXT:    .pad #256
+; BE-NEON-NEXT:    sub sp, sp, #256
+; BE-NEON-NEXT:    add lr, sp, #208
+; BE-NEON-NEXT:    str r0, [sp, #156] @ 4-byte Spill
+; BE-NEON-NEXT:    add r0, sp, #408
+; BE-NEON-NEXT:    vstmia lr, {d6, d7} @ 16-byte Spill
+; BE-NEON-NEXT:    add lr, sp, #120
+; BE-NEON-NEXT:    vld1.64 {d10, d11}, [r0]
+; BE-NEON-NEXT:    add r0, sp, #392
+; BE-NEON-NEXT:    vstmia lr, {d4, d5} @ 16-byte Spill
+; BE-NEON-NEXT:    add lr, sp, #160
+; BE-NEON-NEXT:    vstmia lr, {d2, d3} @ 16-byte Spill
+; BE-NEON-NEXT:    add lr, sp, #176
+; BE-NEON-NEXT:    vrev64.32 d8, d10
+; BE-NEON-NEXT:    vstmia lr, {d0, d1} @ 16-byte Spill
+; BE-NEON-NEXT:    add lr, sp, #136
+; BE-NEON-NEXT:    vmov.f32 s0, s17
+; BE-NEON-NEXT:    vld1.64 {d12, d13}, [r0]
+; BE-NEON-NEXT:    add r0, sp, #360
+; BE-NEON-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
+; BE-NEON-NEXT:    add lr, sp, #192
+; BE-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
+; BE-NEON-NEXT:    add r0, sp, #376
+; BE-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-NEON-NEXT:    add lr, sp, #40
+; BE-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
+; BE-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-NEON-NEXT:    bl llrintf
+; BE-NEON-NEXT:    vmov.f32 s0, s16
+; BE-NEON-NEXT:    str r1, [sp, #88] @ 4-byte Spill
+; BE-NEON-NEXT:    vmov.32 d15[0], r0
+; BE-NEON-NEXT:    bl llrintf
+; BE-NEON-NEXT:    vrev64.32 d9, d11
+; BE-NEON-NEXT:    add lr, sp, #240
+; BE-NEON-NEXT:    vmov.32 d14[0], r0
+; BE-NEON-NEXT:    str r1, [sp, #104] @ 4-byte Spill
+; BE-NEON-NEXT:    vmov.f32 s0, s18
+; BE-NEON-NEXT:    vrev64.32 d8, d13
+; BE-NEON-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
+; BE-NEON-NEXT:    bl llrintf
+; BE-NEON-NEXT:    vmov.f32 s0, s17
+; BE-NEON-NEXT:    mov r5, r1
+; BE-NEON-NEXT:    vmov.32 d14[0], r0
+; BE-NEON-NEXT:    bl llrintf
+; BE-NEON-NEXT:    vmov.f32 s0, s19
+; BE-NEON-NEXT:    add lr, sp, #192
+; BE-NEON-NEXT:    str r1, [sp, #72] @ 4-byte Spill
+; BE-NEON-NEXT:    vmov.32 d13[0], r0
+; BE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEON-NEXT:    vrev64.32 d10, d16
+; BE-NEON-NEXT:    bl llrintf
+; BE-NEON-NEXT:    vmov.f32 s0, s20
+; BE-NEON-NEXT:    add lr, sp, #224
+; BE-NEON-NEXT:    vmov.32 d15[0], r0
+; BE-NEON-NEXT:    mov r4, r1
+; BE-NEON-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
+; BE-NEON-NEXT:    bl llrintf
+; BE-NEON-NEXT:    vmov.f32 s0, s21
+; BE-NEON-NEXT:    mov r7, r1
+; BE-NEON-NEXT:    vmov.32 d14[0], r0
+; BE-NEON-NEXT:    bl llrintf
+; BE-NEON-NEXT:    vmov.f32 s0, s16
+; BE-NEON-NEXT:    mov r6, r1
+; BE-NEON-NEXT:    vmov.32 d15[0], r0
+; BE-NEON-NEXT:    bl llrintf
+; BE-NEON-NEXT:    vmov.32 d12[0], r0
+; BE-NEON-NEXT:    add lr, sp, #24
+; BE-NEON-NEXT:    mov r9, r1
+; BE-NEON-NEXT:    vmov.32 d15[1], r6
+; BE-NEON-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
+; BE-NEON-NEXT:    add lr, sp, #192
+; BE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEON-NEXT:    vrev64.32 d8, d17
+; BE-NEON-NEXT:    vmov.f32 s0, s17
+; BE-NEON-NEXT:    bl llrintf
+; BE-NEON-NEXT:    vmov.f32 s0, s16
+; BE-NEON-NEXT:    vmov.32 d14[1], r7
+; BE-NEON-NEXT:    add lr, sp, #56
+; BE-NEON-NEXT:    mov r10, r1
+; BE-NEON-NEXT:    vmov.32 d11[0], r0
+; BE-NEON-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
+; BE-NEON-NEXT:    bl llrintf
+; BE-NEON-NEXT:    vmov.32 d10[0], r0
+; BE-NEON-NEXT:    add lr, sp, #192
+; BE-NEON-NEXT:    mov r11, r1
+; BE-NEON-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-NEON-NEXT:    add lr, sp, #40
+; BE-NEON-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
+; BE-NEON-NEXT:    add lr, sp, #224
+; BE-NEON-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-NEON-NEXT:    vrev64.32 d8, d12
+; BE-NEON-NEXT:    vmov.32 d11[1], r4
+; BE-NEON-NEXT:    vmov.f32 s0, s17
+; BE-NEON-NEXT:    bl llrintf
+; BE-NEON-NEXT:    vmov.f32 s0, s16
+; BE-NEON-NEXT:    vmov.32 d10[1], r5
+; BE-NEON-NEXT:    add lr, sp, #224
+; BE-NEON-NEXT:    mov r8, r1
+; BE-NEON-NEXT:    vmov.32 d15[0], r0
+; BE-NEON-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-NEON-NEXT:    bl llrintf
+; BE-NEON-NEXT:    vrev64.32 d8, d13
+; BE-NEON-NEXT:    add lr, sp, #8
+; BE-NEON-NEXT:    vmov.32 d14[0], r0
+; BE-NEON-NEXT:    ldr r0, [sp, #88] @ 4-byte Reload
+; BE-NEON-NEXT:    vmov.f32 s0, s17
+; BE-NEON-NEXT:    mov r5, r1
+; BE-NEON-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
+; BE-NEON-NEXT:    add lr, sp, #240
+; BE-NEON-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-NEON-NEXT:    vmov.32 d11[1], r0
+; BE-NEON-NEXT:    bl llrintf
+; BE-NEON-NEXT:    vmov.f32 s0, s16
+; BE-NEON-NEXT:    vmov.32 d15[0], r0
+; BE-NEON-NEXT:    ldr r0, [sp, #104] @ 4-byte Reload
+; BE-NEON-NEXT:    add lr, sp, #240
+; BE-NEON-NEXT:    mov r4, r1
+; BE-NEON-NEXT:    vmov.32 d10[1], r0
+; BE-NEON-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-NEON-NEXT:    bl llrintf
+; BE-NEON-NEXT:    add lr, sp, #136
+; BE-NEON-NEXT:    vmov.32 d14[0], r0
+; BE-NEON-NEXT:    ldr r0, [sp, #72] @ 4-byte Reload
+; BE-NEON-NEXT:    mov r6, r1
+; BE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEON-NEXT:    add lr, sp, #24
+; BE-NEON-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
+; BE-NEON-NEXT:    vrev64.32 d8, d16
+; BE-NEON-NEXT:    vmov.32 d13[1], r0
+; BE-NEON-NEXT:    vmov.f32 s0, s17
+; BE-NEON-NEXT:    bl llrintf
+; BE-NEON-NEXT:    vmov.f32 s0, s16
+; BE-NEON-NEXT:    mov r7, r1
+; BE-NEON-NEXT:    vmov.32 d11[0], r0
+; BE-NEON-NEXT:    vmov.32 d12[1], r9
+; BE-NEON-NEXT:    bl llrintf
+; BE-NEON-NEXT:    add lr, sp, #192
+; BE-NEON-NEXT:    vmov.32 d15[1], r4
+; BE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEON-NEXT:    add lr, sp, #8
+; BE-NEON-NEXT:    vmov.32 d17[1], r10
+; BE-NEON-NEXT:    vmov.32 d16[1], r11
+; BE-NEON-NEXT:    vorr q9, q8, q8
+; BE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEON-NEXT:    add lr, sp, #192
+; BE-NEON-NEXT:    vmov.32 d17[1], r8
+; BE-NEON-NEXT:    vmov.32 d16[1], r5
+; BE-NEON-NEXT:    vorr q10, q8, q8
+; BE-NEON-NEXT:    vrev64.32 q8, q6
+; BE-NEON-NEXT:    vmov.32 d14[1], r6
+; BE-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-NEON-NEXT:    add lr, sp, #240
+; BE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEON-NEXT:    vmov.32 d10[0], r0
+; BE-NEON-NEXT:    vrev64.32 q8, q8
+; BE-NEON-NEXT:    vmov.32 d11[1], r7
+; BE-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-NEON-NEXT:    add lr, sp, #224
+; BE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEON-NEXT:    vmov.32 d10[1], r1
+; BE-NEON-NEXT:    vrev64.32 q8, q8
+; BE-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-NEON-NEXT:    add lr, sp, #56
+; BE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEON-NEXT:    add lr, sp, #136
+; BE-NEON-NEXT:    vrev64.32 q8, q8
+; BE-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-NEON-NEXT:    add lr, sp, #104
+; BE-NEON-NEXT:    vrev64.32 q8, q9
+; BE-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-NEON-NEXT:    add lr, sp, #88
+; BE-NEON-NEXT:    vrev64.32 q8, q10
+; BE-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-NEON-NEXT:    add lr, sp, #72
+; BE-NEON-NEXT:    vrev64.32 q8, q7
+; BE-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-NEON-NEXT:    add lr, sp, #208
+; BE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEON-NEXT:    add lr, sp, #56
+; BE-NEON-NEXT:    vrev64.32 d8, d17
+; BE-NEON-NEXT:    vrev64.32 q8, q5
+; BE-NEON-NEXT:    vmov.f32 s0, s17
+; BE-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-NEON-NEXT:    bl llrintf
+; BE-NEON-NEXT:    vmov.f32 s0, s16
+; BE-NEON-NEXT:    mov r4, r1
+; BE-NEON-NEXT:    vmov.32 d13[0], r0
+; BE-NEON-NEXT:    bl llrintf
+; BE-NEON-NEXT:    add lr, sp, #120
+; BE-NEON-NEXT:    vmov.32 d12[0], r0
+; BE-NEON-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-NEON-NEXT:    vmov.32 d13[1], r4
+; BE-NEON-NEXT:    vrev64.32 d8, d10
+; BE-NEON-NEXT:    vmov.32 d12[1], r1
+; BE-NEON-NEXT:    vmov.f32 s0, s17
+; BE-NEON-NEXT:    vrev64.32 q6, q6
+; BE-NEON-NEXT:    bl llrintf
+; BE-NEON-NEXT:    vmov.f32 s0, s16
+; BE-NEON-NEXT:    vmov.32 d15[0], r0
+; BE-NEON-NEXT:    vmov.32 d15[1], r1
+; BE-NEON-NEXT:    bl llrintf
+; BE-NEON-NEXT:    vmov.32 d14[0], r0
+; BE-NEON-NEXT:    ldr r6, [sp, #156] @ 4-byte Reload
+; BE-NEON-NEXT:    vrev64.32 d8, d11
+; BE-NEON-NEXT:    add r5, r6, #64
+; BE-NEON-NEXT:    vmov.32 d14[1], r1
+; BE-NEON-NEXT:    vmov.f32 s0, s17
+; BE-NEON-NEXT:    vrev64.32 q8, q7
+; BE-NEON-NEXT:    vst1.64 {d16, d17}, [r5:128]!
+; BE-NEON-NEXT:    bl llrintf
+; BE-NEON-NEXT:    vmov.f32 s0, s16
+; BE-NEON-NEXT:    vmov.32 d15[0], r0
+; BE-NEON-NEXT:    vmov.32 d15[1], r1
+; BE-NEON-NEXT:    bl llrintf
+; BE-NEON-NEXT:    add lr, sp, #208
+; BE-NEON-NEXT:    vmov.32 d14[0], r0
+; BE-NEON-NEXT:    vldmia lr, {d18, d19} @ 16-byte Reload
+; BE-NEON-NEXT:    vmov.32 d14[1], r1
+; BE-NEON-NEXT:    vrev64.32 d8, d18
+; BE-NEON-NEXT:    vrev64.32 q8, q7
+; BE-NEON-NEXT:    vmov.f32 s0, s17
+; BE-NEON-NEXT:    vst1.64 {d16, d17}, [r5:128]!
+; BE-NEON-NEXT:    bl llrintf
+; BE-NEON-NEXT:    vmov.f32 s0, s16
+; BE-NEON-NEXT:    mov r4, r1
+; BE-NEON-NEXT:    vmov.32 d15[0], r0
+; BE-NEON-NEXT:    bl llrintf
+; BE-NEON-NEXT:    vmov.32 d14[0], r0
+; BE-NEON-NEXT:    add lr, sp, #160
+; BE-NEON-NEXT:    vmov.32 d15[1], r4
+; BE-NEON-NEXT:    vmov.32 d14[1], r1
+; BE-NEON-NEXT:    vrev64.32 q8, q7
+; BE-NEON-NEXT:    vst1.64 {d16, d17}, [r5:128]!
+; BE-NEON-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-NEON-NEXT:    vrev64.32 d8, d11
+; BE-NEON-NEXT:    vst1.64 {d12, d13}, [r5:128]
+; BE-NEON-NEXT:    vmov.f32 s0, s17
+; BE-NEON-NEXT:    bl llrintf
+; BE-NEON-NEXT:    vmov.f32 s0, s16
+; BE-NEON-NEXT:    mov r4, r1
+; BE-NEON-NEXT:    vmov.32 d13[0], r0
+; BE-NEON-NEXT:    bl llrintf
+; BE-NEON-NEXT:    vmov.32 d12[0], r0
+; BE-NEON-NEXT:    add lr, sp, #208
+; BE-NEON-NEXT:    vmov.32 d13[1], r4
+; BE-NEON-NEXT:    vmov.32 d12[1], r1
+; BE-NEON-NEXT:    vrev64.32 q8, q6
+; BE-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-NEON-NEXT:    add lr, sp, #176
+; BE-NEON-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
+; BE-NEON-NEXT:    vrev64.32 d8, d12
+; BE-NEON-NEXT:    vmov.f32 s0, s17
+; BE-NEON-NEXT:    bl llrintf
+; BE-NEON-NEXT:    vmov.f32 s0, s16
+; BE-NEON-NEXT:    vmov.32 d15[0], r0
+; BE-NEON-NEXT:    vmov.32 d15[1], r1
+; BE-NEON-NEXT:    bl llrintf
+; BE-NEON-NEXT:    vmov.32 d14[0], r0
+; BE-NEON-NEXT:    mov r5, r6
+; BE-NEON-NEXT:    vrev64.32 d8, d13
+; BE-NEON-NEXT:    vmov.32 d14[1], r1
+; BE-NEON-NEXT:    vmov.f32 s0, s17
+; BE-NEON-NEXT:    vrev64.32 q8, q7
+; BE-NEON-NEXT:    vst1.64 {d16, d17}, [r5:128]!
+; BE-NEON-NEXT:    bl llrintf
+; BE-NEON-NEXT:    vmov.f32 s0, s16
+; BE-NEON-NEXT:    vmov.32 d15[0], r0
+; BE-NEON-NEXT:    vmov.32 d15[1], r1
+; BE-NEON-NEXT:    bl llrintf
+; BE-NEON-NEXT:    vmov.32 d14[0], r0
+; BE-NEON-NEXT:    vrev64.32 d8, d10
+; BE-NEON-NEXT:    vmov.32 d14[1], r1
+; BE-NEON-NEXT:    vmov.f32 s0, s17
+; BE-NEON-NEXT:    vrev64.32 q8, q7
+; BE-NEON-NEXT:    vst1.64 {d16, d17}, [r5:128]!
+; BE-NEON-NEXT:    bl llrintf
+; BE-NEON-NEXT:    vmov.f32 s0, s16
+; BE-NEON-NEXT:    mov r4, r1
+; BE-NEON-NEXT:    vmov.32 d15[0], r0
+; BE-NEON-NEXT:    bl llrintf
+; BE-NEON-NEXT:    vmov.32 d14[0], r0
+; BE-NEON-NEXT:    add lr, sp, #208
+; BE-NEON-NEXT:    add r0, r6, #192
+; BE-NEON-NEXT:    vmov.32 d15[1], r4
+; BE-NEON-NEXT:    vmov.32 d14[1], r1
+; BE-NEON-NEXT:    vrev64.32 q8, q7
+; BE-NEON-NEXT:    vst1.64 {d16, d17}, [r5:128]!
+; BE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEON-NEXT:    add lr, sp, #56
+; BE-NEON-NEXT:    vst1.64 {d16, d17}, [r5:128]
+; BE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEON-NEXT:    add lr, sp, #192
+; BE-NEON-NEXT:    vst1.64 {d16, d17}, [r0:128]!
+; BE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEON-NEXT:    add lr, sp, #240
+; BE-NEON-NEXT:    vst1.64 {d16, d17}, [r0:128]!
+; BE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEON-NEXT:    add lr, sp, #224
+; BE-NEON-NEXT:    vst1.64 {d16, d17}, [r0:128]!
+; BE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEON-NEXT:    add lr, sp, #136
+; BE-NEON-NEXT:    vst1.64 {d16, d17}, [r0:128]
+; BE-NEON-NEXT:    add r0, r6, #128
+; BE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEON-NEXT:    add lr, sp, #104
+; BE-NEON-NEXT:    vst1.64 {d16, d17}, [r0:128]!
+; BE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEON-NEXT:    add lr, sp, #88
+; BE-NEON-NEXT:    vst1.64 {d16, d17}, [r0:128]!
+; BE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEON-NEXT:    add lr, sp, #72
+; BE-NEON-NEXT:    vst1.64 {d16, d17}, [r0:128]!
+; BE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEON-NEXT:    vst1.64 {d16, d17}, [r0:128]
+; BE-NEON-NEXT:    add sp, sp, #256
+; BE-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-NEON-NEXT:    add sp, sp, #4
+; BE-NEON-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+  %a = call <32 x i64> @llvm.llrint.v32i64.v32f32(<32 x float> %x)
+  ret <32 x i64> %a
+}
+declare <32 x i64> @llvm.llrint.v32i64.v32f32(<32 x float>)
+
+define <1 x i64> @llrint_v1i64_v1f64(<1 x double> %x) {
+; LE-LABEL: llrint_v1i64_v1f64:
+; LE:       @ %bb.0:
+; LE-NEXT:    .save {r11, lr}
+; LE-NEXT:    push {r11, lr}
+; LE-NEXT:    bl llrint
+; LE-NEXT:    vmov.32 d0[0], r0
+; LE-NEXT:    vmov.32 d0[1], r1
+; LE-NEXT:    pop {r11, pc}
+;
+; LE-NEON-LABEL: llrint_v1i64_v1f64:
+; LE-NEON:       @ %bb.0:
+; LE-NEON-NEXT:    .save {r11, lr}
+; LE-NEON-NEXT:    push {r11, lr}
+; LE-NEON-NEXT:    bl llrint
+; LE-NEON-NEXT:    vmov.32 d0[0], r0
+; LE-NEON-NEXT:    vmov.32 d0[1], r1
+; LE-NEON-NEXT:    pop {r11, pc}
+;
+; BE-LABEL: llrint_v1i64_v1f64:
+; BE:       @ %bb.0:
+; BE-NEXT:    .save {r11, lr}
+; BE-NEXT:    push {r11, lr}
+; BE-NEXT:    bl llrint
+; BE-NEXT:    vmov.32 d16[0], r0
+; BE-NEXT:    vmov.32 d16[1], r1
+; BE-NEXT:    vrev64.32 d0, d16
+; BE-NEXT:    pop {r11, pc}
+;
+; BE-NEON-LABEL: llrint_v1i64_v1f64:
+; BE-NEON:       @ %bb.0:
+; BE-NEON-NEXT:    .save {r11, lr}
+; BE-NEON-NEXT:    push {r11, lr}
+; BE-NEON-NEXT:    bl llrint
+; BE-NEON-NEXT:    vmov.32 d16[0], r0
+; BE-NEON-NEXT:    vmov.32 d16[1], r1
+; BE-NEON-NEXT:    vrev64.32 d0, d16
+; BE-NEON-NEXT:    pop {r11, pc}
+  %a = call <1 x i64> @llvm.llrint.v1i64.v1f64(<1 x double> %x)
+  ret <1 x i64> %a
+}
+declare <1 x i64> @llvm.llrint.v1i64.v1f64(<1 x double>)
+
+define <2 x i64> @llrint_v2i64_v2f64(<2 x double> %x) {
+; LE-LABEL: llrint_v2i64_v2f64:
+; LE:       @ %bb.0:
+; LE-NEXT:    .save {r4, lr}
+; LE-NEXT:    push {r4, lr}
+; LE-NEXT:    .vsave {d8, d9, d10, d11}
+; LE-NEXT:    vpush {d8, d9, d10, d11}
+; LE-NEXT:    vorr q4, q0, q0
+; LE-NEXT:    vorr d0, d9, d9
+; LE-NEXT:    bl llrint
+; LE-NEXT:    vorr d0, d8, d8
+; LE-NEXT:    mov r4, r1
+; LE-NEXT:    vmov.32 d11[0], r0
+; LE-NEXT:    bl llrint
+; LE-NEXT:    vmov.32 d10[0], r0
+; LE-NEXT:    vmov.32 d11[1], r4
+; LE-NEXT:    vmov.32 d10[1], r1
+; LE-NEXT:    vorr q0, q5, q5
+; LE-NEXT:    vpop {d8, d9, d10, d11}
+; LE-NEXT:    pop {r4, pc}
+;
+; LE-NEON-LABEL: llrint_v2i64_v2f64:
+; LE-NEON:       @ %bb.0:
+; LE-NEON-NEXT:    .save {r4, lr}
+; LE-NEON-NEXT:    push {r4, lr}
+; LE-NEON-NEXT:    .vsave {d8, d9, d10, d11}
+; LE-NEON-NEXT:    vpush {d8, d9, d10, d11}
+; LE-NEON-NEXT:    vorr q4, q0, q0
+; LE-NEON-NEXT:    vorr d0, d9, d9
+; LE-NEON-NEXT:    bl llrint
+; LE-NEON-NEXT:    vorr d0, d8, d8
+; LE-NEON-NEXT:    mov r4, r1
+; LE-NEON-NEXT:    vmov.32 d11[0], r0
+; LE-NEON-NEXT:    bl llrint
+; LE-NEON-NEXT:    vmov.32 d10[0], r0
+; LE-NEON-NEXT:    vmov.32 d11[1], r4
+; LE-NEON-NEXT:    vmov.32 d10[1], r1
+; LE-NEON-NEXT:    vorr q0, q5, q5
+; LE-NEON-NEXT:    vpop {d8, d9, d10, d11}
+; LE-NEON-NEXT:    pop {r4, pc}
+;
+; BE-LABEL: llrint_v2i64_v2f64:
+; BE:       @ %bb.0:
+; BE-NEXT:    .save {r4, lr}
+; BE-NEXT:    push {r4, lr}
+; BE-NEXT:    .vsave {d8, d9, d10, d11}
+; BE-NEXT:    vpush {d8, d9, d10, d11}
+; BE-NEXT:    vorr q4, q0, q0
+; BE-NEXT:    vorr d0, d9, d9
+; BE-NEXT:    bl llrint
+; BE-NEXT:    vorr d0, d8, d8
+; BE-NEXT:    mov r4, r1
+; BE-NEXT:    vmov.32 d11[0], r0
+; BE-NEXT:    bl llrint
+; BE-NEXT:    vmov.32 d10[0], r0
+; BE-NEXT:    vmov.32 d11[1], r4
+; BE-NEXT:    vmov.32 d10[1], r1
+; BE-NEXT:    vrev64.32 q0, q5
+; BE-NEXT:    vpop {d8, d9, d10, d11}
+; BE-NEXT:    pop {r4, pc}
+;
+; BE-NEON-LABEL: llrint_v2i64_v2f64:
+; BE-NEON:       @ %bb.0:
+; BE-NEON-NEXT:    .save {r4, lr}
+; BE-NEON-NEXT:    push {r4, lr}
+; BE-NEON-NEXT:    .vsave {d8, d9, d10, d11}
+; BE-NEON-NEXT:    vpush {d8, d9, d10, d11}
+; BE-NEON-NEXT:    vorr q4, q0, q0
+; BE-NEON-NEXT:    vorr d0, d9, d9
+; BE-NEON-NEXT:    bl llrint
+; BE-NEON-NEXT:    vorr d0, d8, d8
+; BE-NEON-NEXT:    mov r4, r1
+; BE-NEON-NEXT:    vmov.32 d11[0], r0
+; BE-NEON-NEXT:    bl llrint
+; BE-NEON-NEXT:    vmov.32 d10[0], r0
+; BE-NEON-NEXT:    vmov.32 d11[1], r4
+; BE-NEON-NEXT:    vmov.32 d10[1], r1
+; BE-NEON-NEXT:    vrev64.32 q0, q5
+; BE-NEON-NEXT:    vpop {d8, d9, d10, d11}
+; BE-NEON-NEXT:    pop {r4, pc}
+  %a = call <2 x i64> @llvm.llrint.v2i64.v2f64(<2 x double> %x)
+  ret <2 x i64> %a
+}
+declare <2 x i64> @llvm.llrint.v2i64.v2f64(<2 x double>)
+
+define <4 x i64> @llrint_v4i64_v4f64(<4 x double> %x) {
+; LE-LABEL: llrint_v4i64_v4f64:
+; LE:       @ %bb.0:
+; LE-NEXT:    .save {r4, r5, r6, lr}
+; LE-NEXT:    push {r4, r5, r6, lr}
+; LE-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEXT:    vorr q5, q1, q1
+; LE-NEXT:    vorr q6, q0, q0
+; LE-NEXT:    vorr d0, d11, d11
+; LE-NEXT:    bl llrint
+; LE-NEXT:    vorr d0, d12, d12
+; LE-NEXT:    mov r4, r1
+; LE-NEXT:    vmov.32 d9[0], r0
+; LE-NEXT:    bl llrint
+; LE-NEXT:    vorr d0, d13, d13
+; LE-NEXT:    mov r5, r1
+; LE-NEXT:    vmov.32 d14[0], r0
+; LE-NEXT:    bl llrint
+; LE-NEXT:    vorr d0, d10, d10
+; LE-NEXT:    mov r6, r1
+; LE-NEXT:    vmov.32 d15[0], r0
+; LE-NEXT:    bl llrint
+; LE-NEXT:    vmov.32 d8[0], r0
+; LE-NEXT:    vmov.32 d15[1], r6
+; LE-NEXT:    vmov.32 d9[1], r4
+; LE-NEXT:    vmov.32 d14[1], r5
+; LE-NEXT:    vmov.32 d8[1], r1
+; LE-NEXT:    vorr q0, q7, q7
+; LE-NEXT:    vorr q1, q4, q4
+; LE-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEXT:    pop {r4, r5, r6, pc}
+;
+; LE-NEON-LABEL: llrint_v4i64_v4f64:
+; LE-NEON:       @ %bb.0:
+; LE-NEON-NEXT:    .save {r4, r5, r6, lr}
+; LE-NEON-NEXT:    push {r4, r5, r6, lr}
+; LE-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEON-NEXT:    vorr q5, q1, q1
+; LE-NEON-NEXT:    vorr q6, q0, q0
+; LE-NEON-NEXT:    vorr d0, d11, d11
+; LE-NEON-NEXT:    bl llrint
+; LE-NEON-NEXT:    vorr d0, d12, d12
+; LE-NEON-NEXT:    mov r4, r1
+; LE-NEON-NEXT:    vmov.32 d9[0], r0
+; LE-NEON-NEXT:    bl llrint
+; LE-NEON-NEXT:    vorr d0, d13, d13
+; LE-NEON-NEXT:    mov r5, r1
+; LE-NEON-NEXT:    vmov.32 d14[0], r0
+; LE-NEON-NEXT:    bl llrint
+; LE-NEON-NEXT:    vorr d0, d10, d10
+; LE-NEON-NEXT:    mov r6, r1
+; LE-NEON-NEXT:    vmov.32 d15[0], r0
+; LE-NEON-NEXT:    bl llrint
+; LE-NEON-NEXT:    vmov.32 d8[0], r0
+; LE-NEON-NEXT:    vmov.32 d15[1], r6
+; LE-NEON-NEXT:    vmov.32 d9[1], r4
+; LE-NEON-NEXT:    vmov.32 d14[1], r5
+; LE-NEON-NEXT:    vmov.32 d8[1], r1
+; LE-NEON-NEXT:    vorr q0, q7, q7
+; LE-NEON-NEXT:    vorr q1, q4, q4
+; LE-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEON-NEXT:    pop {r4, r5, r6, pc}
+;
+; BE-LABEL: llrint_v4i64_v4f64:
+; BE:       @ %bb.0:
+; BE-NEXT:    .save {r4, r5, r6, lr}
+; BE-NEXT:    push {r4, r5, r6, lr}
+; BE-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-NEXT:    vorr q4, q1, q1
+; BE-NEXT:    vorr q5, q0, q0
+; BE-NEXT:    vorr d0, d9, d9
+; BE-NEXT:    bl llrint
+; BE-NEXT:    vorr d0, d10, d10
+; BE-NEXT:    mov r4, r1
+; BE-NEXT:    vmov.32 d13[0], r0
+; BE-NEXT:    bl llrint
+; BE-NEXT:    vorr d0, d11, d11
+; BE-NEXT:    mov r5, r1
+; BE-NEXT:    vmov.32 d14[0], r0
+; BE-NEXT:    bl llrint
+; BE-NEXT:    vorr d0, d8, d8
+; BE-NEXT:    mov r6, r1
+; BE-NEXT:    vmov.32 d15[0], r0
+; BE-NEXT:    bl llrint
+; BE-NEXT:    vmov.32 d12[0], r0
+; BE-NEXT:    vmov.32 d15[1], r6
+; BE-NEXT:    vmov.32 d13[1], r4
+; BE-NEXT:    vmov.32 d14[1], r5
+; BE-NEXT:    vmov.32 d12[1], r1
+; BE-NEXT:    vrev64.32 q0, q7
+; BE-NEXT:    vrev64.32 q1, q6
+; BE-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-NEXT:    pop {r4, r5, r6, pc}
+;
+; BE-NEON-LABEL: llrint_v4i64_v4f64:
+; BE-NEON:       @ %bb.0:
+; BE-NEON-NEXT:    .save {r4, r5, r6, lr}
+; BE-NEON-NEXT:    push {r4, r5, r6, lr}
+; BE-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-NEON-NEXT:    vorr q4, q1, q1
+; BE-NEON-NEXT:    vorr q5, q0, q0
+; BE-NEON-NEXT:    vorr d0, d9, d9
+; BE-NEON-NEXT:    bl llrint
+; BE-NEON-NEXT:    vorr d0, d10, d10
+; BE-NEON-NEXT:    mov r4, r1
+; BE-NEON-NEXT:    vmov.32 d13[0], r0
+; BE-NEON-NEXT:    bl llrint
+; BE-NEON-NEXT:    vorr d0, d11, d11
+; BE-NEON-NEXT:    mov r5, r1
+; BE-NEON-NEXT:    vmov.32 d14[0], r0
+; BE-NEON-NEXT:    bl llrint
+; BE-NEON-NEXT:    vorr d0, d8, d8
+; BE-NEON-NEXT:    mov r6, r1
+; BE-NEON-NEXT:    vmov.32 d15[0], r0
+; BE-NEON-NEXT:    bl llrint
+; BE-NEON-NEXT:    vmov.32 d12[0], r0
+; BE-NEON-NEXT:    vmov.32 d15[1], r6
+; BE-NEON-NEXT:    vmov.32 d13[1], r4
+; BE-NEON-NEXT:    vmov.32 d14[1], r5
+; BE-NEON-NEXT:    vmov.32 d12[1], r1
+; BE-NEON-NEXT:    vrev64.32 q0, q7
+; BE-NEON-NEXT:    vrev64.32 q1, q6
+; BE-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-NEON-NEXT:    pop {r4, r5, r6, pc}
+  %a = call <4 x i64> @llvm.llrint.v4i64.v4f64(<4 x double> %x)
+  ret <4 x i64> %a
+}
+declare <4 x i64> @llvm.llrint.v4i64.v4f64(<4 x double>)
+
+define <8 x i64> @llrint_v8i64_v8f64(<8 x double> %x) {
+; LE-LABEL: llrint_v8i64_v8f64:
+; LE:       @ %bb.0:
+; LE-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, lr}
+; LE-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, lr}
+; LE-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEXT:    .pad #40
+; LE-NEXT:    sub sp, sp, #40
+; LE-NEXT:    vorr q4, q0, q0
+; LE-NEXT:    add lr, sp, #24
+; LE-NEXT:    vorr d0, d7, d7
+; LE-NEXT:    vstmia lr, {d6, d7} @ 16-byte Spill
+; LE-NEXT:    vorr q7, q2, q2
+; LE-NEXT:    vorr q6, q1, q1
+; LE-NEXT:    bl llrint
+; LE-NEXT:    vorr d0, d14, d14
+; LE-NEXT:    add lr, sp, #8
+; LE-NEXT:    vmov.32 d17[0], r0
+; LE-NEXT:    mov r8, r1
+; LE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEXT:    bl llrint
+; LE-NEXT:    vorr d0, d15, d15
+; LE-NEXT:    mov r9, r1
+; LE-NEXT:    vmov.32 d10[0], r0
+; LE-NEXT:    bl llrint
+; LE-NEXT:    vorr d0, d12, d12
+; LE-NEXT:    mov r10, r1
+; LE-NEXT:    vmov.32 d11[0], r0
+; LE-NEXT:    bl llrint
+; LE-NEXT:    vorr d0, d13, d13
+; LE-NEXT:    mov r7, r1
+; LE-NEXT:    vmov.32 d14[0], r0
+; LE-NEXT:    bl llrint
+; LE-NEXT:    vorr d0, d8, d8
+; LE-NEXT:    mov r4, r1
+; LE-NEXT:    vmov.32 d15[0], r0
+; LE-NEXT:    bl llrint
+; LE-NEXT:    vorr d0, d9, d9
+; LE-NEXT:    mov r5, r1
+; LE-NEXT:    vmov.32 d12[0], r0
+; LE-NEXT:    bl llrint
+; LE-NEXT:    add lr, sp, #24
+; LE-NEXT:    mov r6, r1
+; LE-NEXT:    vmov.32 d13[0], r0
+; LE-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
+; LE-NEXT:    @ kill: def $d0 killed $d0 killed $q0
+; LE-NEXT:    bl llrint
+; LE-NEXT:    add lr, sp, #8
+; LE-NEXT:    vmov.32 d13[1], r6
+; LE-NEXT:    vldmia lr, {d6, d7} @ 16-byte Reload
+; LE-NEXT:    vmov.32 d15[1], r4
+; LE-NEXT:    vmov.32 d11[1], r10
+; LE-NEXT:    vmov.32 d6[0], r0
+; LE-NEXT:    vmov.32 d12[1], r5
+; LE-NEXT:    vmov.32 d14[1], r7
+; LE-NEXT:    vorr q0, q6, q6
+; LE-NEXT:    vmov.32 d10[1], r9
+; LE-NEXT:    vorr q1, q7, q7
+; LE-NEXT:    vmov.32 d7[1], r8
+; LE-NEXT:    vorr q2, q5, q5
+; LE-NEXT:    vmov.32 d6[1], r1
+; LE-NEXT:    add sp, sp, #40
+; LE-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, pc}
+;
+; LE-NEON-LABEL: llrint_v8i64_v8f64:
+; LE-NEON:       @ %bb.0:
+; LE-NEON-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, lr}
+; LE-NEON-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, lr}
+; LE-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEON-NEXT:    .pad #40
+; LE-NEON-NEXT:    sub sp, sp, #40
+; LE-NEON-NEXT:    vorr q4, q0, q0
+; LE-NEON-NEXT:    add lr, sp, #24
+; LE-NEON-NEXT:    vorr d0, d7, d7
+; LE-NEON-NEXT:    vstmia lr, {d6, d7} @ 16-byte Spill
+; LE-NEON-NEXT:    vorr q7, q2, q2
+; LE-NEON-NEXT:    vorr q6, q1, q1
+; LE-NEON-NEXT:    bl llrint
+; LE-NEON-NEXT:    vorr d0, d14, d14
+; LE-NEON-NEXT:    add lr, sp, #8
+; LE-NEON-NEXT:    vmov.32 d17[0], r0
+; LE-NEON-NEXT:    mov r8, r1
+; LE-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEON-NEXT:    bl llrint
+; LE-NEON-NEXT:    vorr d0, d15, d15
+; LE-NEON-NEXT:    mov r9, r1
+; LE-NEON-NEXT:    vmov.32 d10[0], r0
+; LE-NEON-NEXT:    bl llrint
+; LE-NEON-NEXT:    vorr d0, d12, d12
+; LE-NEON-NEXT:    mov r10, r1
+; LE-NEON-NEXT:    vmov.32 d11[0], r0
+; LE-NEON-NEXT:    bl llrint
+; LE-NEON-NEXT:    vorr d0, d13, d13
+; LE-NEON-NEXT:    mov r7, r1
+; LE-NEON-NEXT:    vmov.32 d14[0], r0
+; LE-NEON-NEXT:    bl llrint
+; LE-NEON-NEXT:    vorr d0, d8, d8
+; LE-NEON-NEXT:    mov r4, r1
+; LE-NEON-NEXT:    vmov.32 d15[0], r0
+; LE-NEON-NEXT:    bl llrint
+; LE-NEON-NEXT:    vorr d0, d9, d9
+; LE-NEON-NEXT:    mov r5, r1
+; LE-NEON-NEXT:    vmov.32 d12[0], r0
+; LE-NEON-NEXT:    bl llrint
+; LE-NEON-NEXT:    add lr, sp, #24
+; LE-NEON-NEXT:    mov r6, r1
+; LE-NEON-NEXT:    vmov.32 d13[0], r0
+; LE-NEON-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
+; LE-NEON-NEXT:    @ kill: def $d0 killed $d0 killed $q0
+; LE-NEON-NEXT:    bl llrint
+; LE-NEON-NEXT:    add lr, sp, #8
+; LE-NEON-NEXT:    vmov.32 d13[1], r6
+; LE-NEON-NEXT:    vldmia lr, {d6, d7} @ 16-byte Reload
+; LE-NEON-NEXT:    vmov.32 d15[1], r4
+; LE-NEON-NEXT:    vmov.32 d11[1], r10
+; LE-NEON-NEXT:    vmov.32 d6[0], r0
+; LE-NEON-NEXT:    vmov.32 d12[1], r5
+; LE-NEON-NEXT:    vmov.32 d14[1], r7
+; LE-NEON-NEXT:    vorr q0, q6, q6
+; LE-NEON-NEXT:    vmov.32 d10[1], r9
+; LE-NEON-NEXT:    vorr q1, q7, q7
+; LE-NEON-NEXT:    vmov.32 d7[1], r8
+; LE-NEON-NEXT:    vorr q2, q5, q5
+; LE-NEON-NEXT:    vmov.32 d6[1], r1
+; LE-NEON-NEXT:    add sp, sp, #40
+; LE-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEON-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, pc}
+;
+; BE-LABEL: llrint_v8i64_v8f64:
+; BE:       @ %bb.0:
+; BE-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, lr}
+; BE-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, lr}
+; BE-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-NEXT:    .pad #40
+; BE-NEXT:    sub sp, sp, #40
+; BE-NEXT:    vorr q4, q0, q0
+; BE-NEXT:    add lr, sp, #24
+; BE-NEXT:    vorr d0, d7, d7
+; BE-NEXT:    vstmia lr, {d6, d7} @ 16-byte Spill
+; BE-NEXT:    vorr q7, q2, q2
+; BE-NEXT:    vorr q6, q1, q1
+; BE-NEXT:    bl llrint
+; BE-NEXT:    vorr d0, d14, d14
+; BE-NEXT:    add lr, sp, #8
+; BE-NEXT:    vmov.32 d17[0], r0
+; BE-NEXT:    mov r8, r1
+; BE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-NEXT:    bl llrint
+; BE-NEXT:    vorr d0, d15, d15
+; BE-NEXT:    mov r9, r1
+; BE-NEXT:    vmov.32 d10[0], r0
+; BE-NEXT:    bl llrint
+; BE-NEXT:    vorr d0, d12, d12
+; BE-NEXT:    mov r10, r1
+; BE-NEXT:    vmov.32 d11[0], r0
+; BE-NEXT:    bl llrint
+; BE-NEXT:    vorr d0, d13, d13
+; BE-NEXT:    mov r7, r1
+; BE-NEXT:    vmov.32 d14[0], r0
+; BE-NEXT:    bl llrint
+; BE-NEXT:    vorr d0, d8, d8
+; BE-NEXT:    mov r4, r1
+; BE-NEXT:    vmov.32 d15[0], r0
+; BE-NEXT:    bl llrint
+; BE-NEXT:    vorr d0, d9, d9
+; BE-NEXT:    mov r5, r1
+; BE-NEXT:    vmov.32 d12[0], r0
+; BE-NEXT:    bl llrint
+; BE-NEXT:    add lr, sp, #24
+; BE-NEXT:    mov r6, r1
+; BE-NEXT:    vmov.32 d13[0], r0
+; BE-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
+; BE-NEXT:    @ kill: def $d0 killed $d0 killed $q0
+; BE-NEXT:    bl llrint
+; BE-NEXT:    add lr, sp, #8
+; BE-NEXT:    vmov.32 d13[1], r6
+; BE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEXT:    vmov.32 d15[1], r4
+; BE-NEXT:    vmov.32 d16[0], r0
+; BE-NEXT:    vmov.32 d11[1], r10
+; BE-NEXT:    vmov.32 d17[1], r8
+; BE-NEXT:    vmov.32 d12[1], r5
+; BE-NEXT:    vmov.32 d14[1], r7
+; BE-NEXT:    vmov.32 d10[1], r9
+; BE-NEXT:    vmov.32 d16[1], r1
+; BE-NEXT:    vrev64.32 q0, q6
+; BE-NEXT:    vrev64.32 q1, q7
+; BE-NEXT:    vrev64.32 q2, q5
+; BE-NEXT:    vrev64.32 q3, q8
+; BE-NEXT:    add sp, sp, #40
+; BE-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, pc}
+;
+; BE-NEON-LABEL: llrint_v8i64_v8f64:
+; BE-NEON:       @ %bb.0:
+; BE-NEON-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, lr}
+; BE-NEON-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, lr}
+; BE-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-NEON-NEXT:    .pad #40
+; BE-NEON-NEXT:    sub sp, sp, #40
+; BE-NEON-NEXT:    vorr q4, q0, q0
+; BE-NEON-NEXT:    add lr, sp, #24
+; BE-NEON-NEXT:    vorr d0, d7, d7
+; BE-NEON-NEXT:    vstmia lr, {d6, d7} @ 16-byte Spill
+; BE-NEON-NEXT:    vorr q7, q2, q2
+; BE-NEON-NEXT:    vorr q6, q1, q1
+; BE-NEON-NEXT:    bl llrint
+; BE-NEON-NEXT:    vorr d0, d14, d14
+; BE-NEON-NEXT:    add lr, sp, #8
+; BE-NEON-NEXT:    vmov.32 d17[0], r0
+; BE-NEON-NEXT:    mov r8, r1
+; BE-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-NEON-NEXT:    bl llrint
+; BE-NEON-NEXT:    vorr d0, d15, d15
+; BE-NEON-NEXT:    mov r9, r1
+; BE-NEON-NEXT:    vmov.32 d10[0], r0
+; BE-NEON-NEXT:    bl llrint
+; BE-NEON-NEXT:    vorr d0, d12, d12
+; BE-NEON-NEXT:    mov r10, r1
+; BE-NEON-NEXT:    vmov.32 d11[0], r0
+; BE-NEON-NEXT:    bl llrint
+; BE-NEON-NEXT:    vorr d0, d13, d13
+; BE-NEON-NEXT:    mov r7, r1
+; BE-NEON-NEXT:    vmov.32 d14[0], r0
+; BE-NEON-NEXT:    bl llrint
+; BE-NEON-NEXT:    vorr d0, d8, d8
+; BE-NEON-NEXT:    mov r4, r1
+; BE-NEON-NEXT:    vmov.32 d15[0], r0
+; BE-NEON-NEXT:    bl llrint
+; BE-NEON-NEXT:    vorr d0, d9, d9
+; BE-NEON-NEXT:    mov r5, r1
+; BE-NEON-NEXT:    vmov.32 d12[0], r0
+; BE-NEON-NEXT:    bl llrint
+; BE-NEON-NEXT:    add lr, sp, #24
+; BE-NEON-NEXT:    mov r6, r1
+; BE-NEON-NEXT:    vmov.32 d13[0], r0
+; BE-NEON-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
+; BE-NEON-NEXT:    @ kill: def $d0 killed $d0 killed $q0
+; BE-NEON-NEXT:    bl llrint
+; BE-NEON-NEXT:    add lr, sp, #8
+; BE-NEON-NEXT:    vmov.32 d13[1], r6
+; BE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEON-NEXT:    vmov.32 d15[1], r4
+; BE-NEON-NEXT:    vmov.32 d16[0], r0
+; BE-NEON-NEXT:    vmov.32 d11[1], r10
+; BE-NEON-NEXT:    vmov.32 d17[1], r8
+; BE-NEON-NEXT:    vmov.32 d12[1], r5
+; BE-NEON-NEXT:    vmov.32 d14[1], r7
+; BE-NEON-NEXT:    vmov.32 d10[1], r9
+; BE-NEON-NEXT:    vmov.32 d16[1], r1
+; BE-NEON-NEXT:    vrev64.32 q0, q6
+; BE-NEON-NEXT:    vrev64.32 q1, q7
+; BE-NEON-NEXT:    vrev64.32 q2, q5
+; BE-NEON-NEXT:    vrev64.32 q3, q8
+; BE-NEON-NEXT:    add sp, sp, #40
+; BE-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-NEON-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, pc}
+  %a = call <8 x i64> @llvm.llrint.v8i64.v8f64(<8 x double> %x)
+  ret <8 x i64> %a
+}
+declare <8 x i64> @llvm.llrint.v8i64.v8f64(<8 x double>)
+
+define <16 x i64> @llrint_v16f64(<16 x double> %x) {
+; LE-LABEL: llrint_v16f64:
+; LE:       @ %bb.0:
+; LE-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-NEXT:    .pad #4
+; LE-NEXT:    sub sp, sp, #4
+; LE-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEXT:    .pad #176
+; LE-NEXT:    sub sp, sp, #176
+; LE-NEXT:    add lr, sp, #40
+; LE-NEXT:    str r0, [sp, #140] @ 4-byte Spill
+; LE-NEXT:    add r0, sp, #312
+; LE-NEXT:    vorr q6, q2, q2
+; LE-NEXT:    vstmia lr, {d6, d7} @ 16-byte Spill
+; LE-NEXT:    add lr, sp, #96
+; LE-NEXT:    vorr q7, q1, q1
+; LE-NEXT:    vstmia lr, {d0, d1} @ 16-byte Spill
+; LE-NEXT:    add lr, sp, #144
+; LE-NEXT:    vorr d0, d1, d1
+; LE-NEXT:    vld1.64 {d16, d17}, [r0]
+; LE-NEXT:    add r0, sp, #280
+; LE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEXT:    add lr, sp, #80
+; LE-NEXT:    vld1.64 {d16, d17}, [r0]
+; LE-NEXT:    add r0, sp, #296
+; LE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEXT:    add lr, sp, #120
+; LE-NEXT:    vld1.64 {d16, d17}, [r0]
+; LE-NEXT:    add r0, sp, #328
+; LE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEXT:    add lr, sp, #56
+; LE-NEXT:    vld1.64 {d16, d17}, [r0]
+; LE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEXT:    bl llrint
+; LE-NEXT:    vorr d0, d14, d14
+; LE-NEXT:    str r1, [sp, #116] @ 4-byte Spill
+; LE-NEXT:    vmov.32 d11[0], r0
+; LE-NEXT:    bl llrint
+; LE-NEXT:    vorr d0, d15, d15
+; LE-NEXT:    str r1, [sp, #76] @ 4-byte Spill
+; LE-NEXT:    vmov.32 d8[0], r0
+; LE-NEXT:    bl llrint
+; LE-NEXT:    vorr d0, d12, d12
+; LE-NEXT:    add lr, sp, #160
+; LE-NEXT:    vmov.32 d9[0], r0
+; LE-NEXT:    str r1, [sp, #72] @ 4-byte Spill
+; LE-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-NEXT:    bl llrint
+; LE-NEXT:    vorr d0, d13, d13
+; LE-NEXT:    mov r6, r1
+; LE-NEXT:    vmov.32 d14[0], r0
+; LE-NEXT:    bl llrint
+; LE-NEXT:    add lr, sp, #40
+; LE-NEXT:    mov r4, r1
+; LE-NEXT:    vmov.32 d15[0], r0
+; LE-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-NEXT:    vorr d0, d8, d8
+; LE-NEXT:    bl llrint
+; LE-NEXT:    vorr d0, d9, d9
+; LE-NEXT:    mov r7, r1
+; LE-NEXT:    vmov.32 d12[0], r0
+; LE-NEXT:    bl llrint
+; LE-NEXT:    add lr, sp, #96
+; LE-NEXT:    mov r5, r1
+; LE-NEXT:    vmov.32 d13[0], r0
+; LE-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
+; LE-NEXT:    @ kill: def $d0 killed $d0 killed $q0
+; LE-NEXT:    bl llrint
+; LE-NEXT:    vmov.32 d10[0], r0
+; LE-NEXT:    add lr, sp, #40
+; LE-NEXT:    mov r10, r1
+; LE-NEXT:    vmov.32 d13[1], r5
+; LE-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-NEXT:    add lr, sp, #56
+; LE-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-NEXT:    vorr d0, d9, d9
+; LE-NEXT:    bl llrint
+; LE-NEXT:    vorr d0, d8, d8
+; LE-NEXT:    vmov.32 d12[1], r7
+; LE-NEXT:    add lr, sp, #96
+; LE-NEXT:    mov r9, r1
+; LE-NEXT:    vmov.32 d11[0], r0
+; LE-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-NEXT:    bl llrint
+; LE-NEXT:    vmov.32 d10[0], r0
+; LE-NEXT:    add lr, sp, #24
+; LE-NEXT:    mov r11, r1
+; LE-NEXT:    vmov.32 d15[1], r4
+; LE-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-NEXT:    add lr, sp, #144
+; LE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEXT:    vorr d0, d17, d17
+; LE-NEXT:    bl llrint
+; LE-NEXT:    add lr, sp, #8
+; LE-NEXT:    vmov.32 d14[1], r6
+; LE-NEXT:    mov r8, r1
+; LE-NEXT:    vmov.32 d17[0], r0
+; LE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEXT:    add lr, sp, #56
+; LE-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-NEXT:    add lr, sp, #80
+; LE-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; LE-NEXT:    vorr d0, d11, d11
+; LE-NEXT:    bl llrint
+; LE-NEXT:    vmov.32 d15[0], r0
+; LE-NEXT:    add lr, sp, #160
+; LE-NEXT:    vorr d0, d10, d10
+; LE-NEXT:    ldr r0, [sp, #72] @ 4-byte Reload
+; LE-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-NEXT:    mov r6, r1
+; LE-NEXT:    vmov.32 d9[1], r0
+; LE-NEXT:    bl llrint
+; LE-NEXT:    vmov.32 d14[0], r0
+; LE-NEXT:    ldr r0, [sp, #76] @ 4-byte Reload
+; LE-NEXT:    add lr, sp, #160
+; LE-NEXT:    mov r4, r1
+; LE-NEXT:    vmov.32 d8[1], r0
+; LE-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-NEXT:    add lr, sp, #120
+; LE-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; LE-NEXT:    vorr d0, d11, d11
+; LE-NEXT:    bl llrint
+; LE-NEXT:    vmov.32 d13[0], r0
+; LE-NEXT:    add lr, sp, #40
+; LE-NEXT:    vorr d0, d10, d10
+; LE-NEXT:    ldr r0, [sp, #116] @ 4-byte Reload
+; LE-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-NEXT:    mov r5, r1
+; LE-NEXT:    vmov.32 d9[1], r0
+; LE-NEXT:    bl llrint
+; LE-NEXT:    add lr, sp, #144
+; LE-NEXT:    mov r7, r1
+; LE-NEXT:    vmov.32 d12[0], r0
+; LE-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
+; LE-NEXT:    @ kill: def $d0 killed $d0 killed $q0
+; LE-NEXT:    vmov.32 d8[1], r10
+; LE-NEXT:    bl llrint
+; LE-NEXT:    add lr, sp, #8
+; LE-NEXT:    vmov.32 d15[1], r6
+; LE-NEXT:    vldmia lr, {d20, d21} @ 16-byte Reload
+; LE-NEXT:    add lr, sp, #24
+; LE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEXT:    add lr, sp, #160
+; LE-NEXT:    vmov.32 d20[0], r0
+; LE-NEXT:    vmov.32 d21[1], r8
+; LE-NEXT:    vmov.32 d20[1], r1
+; LE-NEXT:    ldr r1, [sp, #140] @ 4-byte Reload
+; LE-NEXT:    vmov.32 d13[1], r5
+; LE-NEXT:    mov r0, r1
+; LE-NEXT:    vst1.64 {d8, d9}, [r0:128]!
+; LE-NEXT:    vldmia lr, {d18, d19} @ 16-byte Reload
+; LE-NEXT:    add lr, sp, #56
+; LE-NEXT:    vmov.32 d14[1], r4
+; LE-NEXT:    vst1.64 {d18, d19}, [r0:128]!
+; LE-NEXT:    vldmia lr, {d18, d19} @ 16-byte Reload
+; LE-NEXT:    add lr, sp, #96
+; LE-NEXT:    vmov.32 d12[1], r7
+; LE-NEXT:    vst1.64 {d18, d19}, [r0:128]!
+; LE-NEXT:    vldmia lr, {d18, d19} @ 16-byte Reload
+; LE-NEXT:    vmov.32 d17[1], r9
+; LE-NEXT:    vst1.64 {d18, d19}, [r0:128]
+; LE-NEXT:    add r0, r1, #64
+; LE-NEXT:    vst1.64 {d14, d15}, [r0:128]!
+; LE-NEXT:    vst1.64 {d12, d13}, [r0:128]!
+; LE-NEXT:    vmov.32 d16[1], r11
+; LE-NEXT:    vst1.64 {d20, d21}, [r0:128]!
+; LE-NEXT:    vst1.64 {d16, d17}, [r0:128]
+; LE-NEXT:    add sp, sp, #176
+; LE-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEXT:    add sp, sp, #4
+; LE-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; LE-NEON-LABEL: llrint_v16f64:
+; LE-NEON:       @ %bb.0:
+; LE-NEON-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-NEON-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-NEON-NEXT:    .pad #4
+; LE-NEON-NEXT:    sub sp, sp, #4
+; LE-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEON-NEXT:    .pad #176
+; LE-NEON-NEXT:    sub sp, sp, #176
+; LE-NEON-NEXT:    add lr, sp, #40
+; LE-NEON-NEXT:    str r0, [sp, #140] @ 4-byte Spill
+; LE-NEON-NEXT:    add r0, sp, #312
+; LE-NEON-NEXT:    vorr q6, q2, q2
+; LE-NEON-NEXT:    vstmia lr, {d6, d7} @ 16-byte Spill
+; LE-NEON-NEXT:    add lr, sp, #96
+; LE-NEON-NEXT:    vorr q7, q1, q1
+; LE-NEON-NEXT:    vstmia lr, {d0, d1} @ 16-byte Spill
+; LE-NEON-NEXT:    add lr, sp, #144
+; LE-NEON-NEXT:    vorr d0, d1, d1
+; LE-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
+; LE-NEON-NEXT:    add r0, sp, #280
+; LE-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEON-NEXT:    add lr, sp, #80
+; LE-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
+; LE-NEON-NEXT:    add r0, sp, #296
+; LE-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEON-NEXT:    add lr, sp, #120
+; LE-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
+; LE-NEON-NEXT:    add r0, sp, #328
+; LE-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEON-NEXT:    add lr, sp, #56
+; LE-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
+; LE-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEON-NEXT:    bl llrint
+; LE-NEON-NEXT:    vorr d0, d14, d14
+; LE-NEON-NEXT:    str r1, [sp, #116] @ 4-byte Spill
+; LE-NEON-NEXT:    vmov.32 d11[0], r0
+; LE-NEON-NEXT:    bl llrint
+; LE-NEON-NEXT:    vorr d0, d15, d15
+; LE-NEON-NEXT:    str r1, [sp, #76] @ 4-byte Spill
+; LE-NEON-NEXT:    vmov.32 d8[0], r0
+; LE-NEON-NEXT:    bl llrint
+; LE-NEON-NEXT:    vorr d0, d12, d12
+; LE-NEON-NEXT:    add lr, sp, #160
+; LE-NEON-NEXT:    vmov.32 d9[0], r0
+; LE-NEON-NEXT:    str r1, [sp, #72] @ 4-byte Spill
+; LE-NEON-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-NEON-NEXT:    bl llrint
+; LE-NEON-NEXT:    vorr d0, d13, d13
+; LE-NEON-NEXT:    mov r6, r1
+; LE-NEON-NEXT:    vmov.32 d14[0], r0
+; LE-NEON-NEXT:    bl llrint
+; LE-NEON-NEXT:    add lr, sp, #40
+; LE-NEON-NEXT:    mov r4, r1
+; LE-NEON-NEXT:    vmov.32 d15[0], r0
+; LE-NEON-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-NEON-NEXT:    vorr d0, d8, d8
+; LE-NEON-NEXT:    bl llrint
+; LE-NEON-NEXT:    vorr d0, d9, d9
+; LE-NEON-NEXT:    mov r7, r1
+; LE-NEON-NEXT:    vmov.32 d12[0], r0
+; LE-NEON-NEXT:    bl llrint
+; LE-NEON-NEXT:    add lr, sp, #96
+; LE-NEON-NEXT:    mov r5, r1
+; LE-NEON-NEXT:    vmov.32 d13[0], r0
+; LE-NEON-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
+; LE-NEON-NEXT:    @ kill: def $d0 killed $d0 killed $q0
+; LE-NEON-NEXT:    bl llrint
+; LE-NEON-NEXT:    vmov.32 d10[0], r0
+; LE-NEON-NEXT:    add lr, sp, #40
+; LE-NEON-NEXT:    mov r10, r1
+; LE-NEON-NEXT:    vmov.32 d13[1], r5
+; LE-NEON-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-NEON-NEXT:    add lr, sp, #56
+; LE-NEON-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-NEON-NEXT:    vorr d0, d9, d9
+; LE-NEON-NEXT:    bl llrint
+; LE-NEON-NEXT:    vorr d0, d8, d8
+; LE-NEON-NEXT:    vmov.32 d12[1], r7
+; LE-NEON-NEXT:    add lr, sp, #96
+; LE-NEON-NEXT:    mov r9, r1
+; LE-NEON-NEXT:    vmov.32 d11[0], r0
+; LE-NEON-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-NEON-NEXT:    bl llrint
+; LE-NEON-NEXT:    vmov.32 d10[0], r0
+; LE-NEON-NEXT:    add lr, sp, #24
+; LE-NEON-NEXT:    mov r11, r1
+; LE-NEON-NEXT:    vmov.32 d15[1], r4
+; LE-NEON-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-NEON-NEXT:    add lr, sp, #144
+; LE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEON-NEXT:    vorr d0, d17, d17
+; LE-NEON-NEXT:    bl llrint
+; LE-NEON-NEXT:    add lr, sp, #8
+; LE-NEON-NEXT:    vmov.32 d14[1], r6
+; LE-NEON-NEXT:    mov r8, r1
+; LE-NEON-NEXT:    vmov.32 d17[0], r0
+; LE-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEON-NEXT:    add lr, sp, #56
+; LE-NEON-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-NEON-NEXT:    add lr, sp, #80
+; LE-NEON-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; LE-NEON-NEXT:    vorr d0, d11, d11
+; LE-NEON-NEXT:    bl llrint
+; LE-NEON-NEXT:    vmov.32 d15[0], r0
+; LE-NEON-NEXT:    add lr, sp, #160
+; LE-NEON-NEXT:    vorr d0, d10, d10
+; LE-NEON-NEXT:    ldr r0, [sp, #72] @ 4-byte Reload
+; LE-NEON-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-NEON-NEXT:    mov r6, r1
+; LE-NEON-NEXT:    vmov.32 d9[1], r0
+; LE-NEON-NEXT:    bl llrint
+; LE-NEON-NEXT:    vmov.32 d14[0], r0
+; LE-NEON-NEXT:    ldr r0, [sp, #76] @ 4-byte Reload
+; LE-NEON-NEXT:    add lr, sp, #160
+; LE-NEON-NEXT:    mov r4, r1
+; LE-NEON-NEXT:    vmov.32 d8[1], r0
+; LE-NEON-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-NEON-NEXT:    add lr, sp, #120
+; LE-NEON-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; LE-NEON-NEXT:    vorr d0, d11, d11
+; LE-NEON-NEXT:    bl llrint
+; LE-NEON-NEXT:    vmov.32 d13[0], r0
+; LE-NEON-NEXT:    add lr, sp, #40
+; LE-NEON-NEXT:    vorr d0, d10, d10
+; LE-NEON-NEXT:    ldr r0, [sp, #116] @ 4-byte Reload
+; LE-NEON-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-NEON-NEXT:    mov r5, r1
+; LE-NEON-NEXT:    vmov.32 d9[1], r0
+; LE-NEON-NEXT:    bl llrint
+; LE-NEON-NEXT:    add lr, sp, #144
+; LE-NEON-NEXT:    mov r7, r1
+; LE-NEON-NEXT:    vmov.32 d12[0], r0
+; LE-NEON-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
+; LE-NEON-NEXT:    @ kill: def $d0 killed $d0 killed $q0
+; LE-NEON-NEXT:    vmov.32 d8[1], r10
+; LE-NEON-NEXT:    bl llrint
+; LE-NEON-NEXT:    add lr, sp, #8
+; LE-NEON-NEXT:    vmov.32 d15[1], r6
+; LE-NEON-NEXT:    vldmia lr, {d20, d21} @ 16-byte Reload
+; LE-NEON-NEXT:    add lr, sp, #24
+; LE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEON-NEXT:    add lr, sp, #160
+; LE-NEON-NEXT:    vmov.32 d20[0], r0
+; LE-NEON-NEXT:    vmov.32 d21[1], r8
+; LE-NEON-NEXT:    vmov.32 d20[1], r1
+; LE-NEON-NEXT:    ldr r1, [sp, #140] @ 4-byte Reload
+; LE-NEON-NEXT:    vmov.32 d13[1], r5
+; LE-NEON-NEXT:    mov r0, r1
+; LE-NEON-NEXT:    vst1.64 {d8, d9}, [r0:128]!
+; LE-NEON-NEXT:    vldmia lr, {d18, d19} @ 16-byte Reload
+; LE-NEON-NEXT:    add lr, sp, #56
+; LE-NEON-NEXT:    vmov.32 d14[1], r4
+; LE-NEON-NEXT:    vst1.64 {d18, d19}, [r0:128]!
+; LE-NEON-NEXT:    vldmia lr, {d18, d19} @ 16-byte Reload
+; LE-NEON-NEXT:    add lr, sp, #96
+; LE-NEON-NEXT:    vmov.32 d12[1], r7
+; LE-NEON-NEXT:    vst1.64 {d18, d19}, [r0:128]!
+; LE-NEON-NEXT:    vldmia lr, {d18, d19} @ 16-byte Reload
+; LE-NEON-NEXT:    vmov.32 d17[1], r9
+; LE-NEON-NEXT:    vst1.64 {d18, d19}, [r0:128]
+; LE-NEON-NEXT:    add r0, r1, #64
+; LE-NEON-NEXT:    vst1.64 {d14, d15}, [r0:128]!
+; LE-NEON-NEXT:    vst1.64 {d12, d13}, [r0:128]!
+; LE-NEON-NEXT:    vmov.32 d16[1], r11
+; LE-NEON-NEXT:    vst1.64 {d20, d21}, [r0:128]!
+; LE-NEON-NEXT:    vst1.64 {d16, d17}, [r0:128]
+; LE-NEON-NEXT:    add sp, sp, #176
+; LE-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEON-NEXT:    add sp, sp, #4
+; LE-NEON-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; BE-LABEL: llrint_v16f64:
+; BE:       @ %bb.0:
+; BE-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-NEXT:    .pad #4
+; BE-NEXT:    sub sp, sp, #4
+; BE-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-NEXT:    .pad #168
+; BE-NEXT:    sub sp, sp, #168
+; BE-NEXT:    add lr, sp, #64
+; BE-NEXT:    str r0, [sp, #132] @ 4-byte Spill
+; BE-NEXT:    add r0, sp, #304
+; BE-NEXT:    vorr q4, q3, q3
+; BE-NEXT:    vstmia lr, {d0, d1} @ 16-byte Spill
+; BE-NEXT:    add lr, sp, #48
+; BE-NEXT:    vorr d0, d1, d1
+; BE-NEXT:    vld1.64 {d16, d17}, [r0]
+; BE-NEXT:    add r0, sp, #320
+; BE-NEXT:    vorr q6, q2, q2
+; BE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-NEXT:    add lr, sp, #88
+; BE-NEXT:    vorr q7, q1, q1
+; BE-NEXT:    vld1.64 {d16, d17}, [r0]
+; BE-NEXT:    add r0, sp, #272
+; BE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-NEXT:    add lr, sp, #112
+; BE-NEXT:    vld1.64 {d16, d17}, [r0]
+; BE-NEXT:    add r0, sp, #288
+; BE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-NEXT:    add lr, sp, #24
+; BE-NEXT:    vld1.64 {d16, d17}, [r0]
+; BE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-NEXT:    bl llrint
+; BE-NEXT:    vorr d0, d14, d14
+; BE-NEXT:    add lr, sp, #136
+; BE-NEXT:    vmov.32 d17[0], r0
+; BE-NEXT:    str r1, [sp, #108] @ 4-byte Spill
+; BE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-NEXT:    bl llrint
+; BE-NEXT:    vorr d0, d15, d15
+; BE-NEXT:    str r1, [sp, #84] @ 4-byte Spill
+; BE-NEXT:    vmov.32 d10[0], r0
+; BE-NEXT:    bl llrint
+; BE-NEXT:    vorr d0, d12, d12
+; BE-NEXT:    add lr, sp, #152
+; BE-NEXT:    vmov.32 d11[0], r0
+; BE-NEXT:    str r1, [sp, #44] @ 4-byte Spill
+; BE-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-NEXT:    bl llrint
+; BE-NEXT:    vorr d0, d13, d13
+; BE-NEXT:    mov r6, r1
+; BE-NEXT:    vmov.32 d10[0], r0
+; BE-NEXT:    bl llrint
+; BE-NEXT:    vorr d0, d8, d8
+; BE-NEXT:    mov r4, r1
+; BE-NEXT:    vmov.32 d11[0], r0
+; BE-NEXT:    bl llrint
+; BE-NEXT:    vorr d0, d9, d9
+; BE-NEXT:    mov r7, r1
+; BE-NEXT:    vmov.32 d12[0], r0
+; BE-NEXT:    bl llrint
+; BE-NEXT:    add lr, sp, #64
+; BE-NEXT:    mov r5, r1
+; BE-NEXT:    vmov.32 d13[0], r0
+; BE-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
+; BE-NEXT:    @ kill: def $d0 killed $d0 killed $q0
+; BE-NEXT:    bl llrint
+; BE-NEXT:    add lr, sp, #136
+; BE-NEXT:    mov r9, r1
+; BE-NEXT:    vmov.32 d13[1], r5
+; BE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEXT:    vmov.32 d16[0], r0
+; BE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-NEXT:    add lr, sp, #24
+; BE-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
+; BE-NEXT:    vorr d0, d9, d9
+; BE-NEXT:    bl llrint
+; BE-NEXT:    vorr d0, d8, d8
+; BE-NEXT:    vmov.32 d12[1], r7
+; BE-NEXT:    add lr, sp, #64
+; BE-NEXT:    mov r10, r1
+; BE-NEXT:    vmov.32 d15[0], r0
+; BE-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
+; BE-NEXT:    bl llrint
+; BE-NEXT:    vmov.32 d14[0], r0
+; BE-NEXT:    add lr, sp, #8
+; BE-NEXT:    mov r11, r1
+; BE-NEXT:    vmov.32 d11[1], r4
+; BE-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
+; BE-NEXT:    add lr, sp, #48
+; BE-NEXT:    vorr q6, q5, q5
+; BE-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
+; BE-NEXT:    vorr d0, d9, d9
+; BE-NEXT:    bl llrint
+; BE-NEXT:    vorr d0, d8, d8
+; BE-NEXT:    vmov.32 d12[1], r6
+; BE-NEXT:    add lr, sp, #24
+; BE-NEXT:    mov r8, r1
+; BE-NEXT:    vmov.32 d11[0], r0
+; BE-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
+; BE-NEXT:    bl llrint
+; BE-NEXT:    vmov.32 d10[0], r0
+; BE-NEXT:    add lr, sp, #48
+; BE-NEXT:    ldr r0, [sp, #44] @ 4-byte Reload
+; BE-NEXT:    mov r6, r1
+; BE-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-NEXT:    add lr, sp, #152
+; BE-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
+; BE-NEXT:    add lr, sp, #88
+; BE-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
+; BE-NEXT:    vorr d0, d13, d13
+; BE-NEXT:    vmov.32 d9[1], r0
+; BE-NEXT:    bl llrint
+; BE-NEXT:    vmov.32 d15[0], r0
+; BE-NEXT:    ldr r0, [sp, #84] @ 4-byte Reload
+; BE-NEXT:    vorr d0, d12, d12
+; BE-NEXT:    add lr, sp, #152
+; BE-NEXT:    mov r4, r1
+; BE-NEXT:    vmov.32 d8[1], r0
+; BE-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
+; BE-NEXT:    bl llrint
+; BE-NEXT:    add lr, sp, #136
+; BE-NEXT:    vmov.32 d14[0], r0
+; BE-NEXT:    ldr r0, [sp, #108] @ 4-byte Reload
+; BE-NEXT:    mov r5, r1
+; BE-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-NEXT:    add lr, sp, #112
+; BE-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
+; BE-NEXT:    vorr d0, d9, d9
+; BE-NEXT:    vmov.32 d11[1], r0
+; BE-NEXT:    bl llrint
+; BE-NEXT:    vorr d0, d8, d8
+; BE-NEXT:    mov r7, r1
+; BE-NEXT:    vmov.32 d13[0], r0
+; BE-NEXT:    vmov.32 d10[1], r9
+; BE-NEXT:    bl llrint
+; BE-NEXT:    add lr, sp, #8
+; BE-NEXT:    vmov.32 d12[0], r0
+; BE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEXT:    add lr, sp, #48
+; BE-NEXT:    vmov.32 d17[1], r10
+; BE-NEXT:    vmov.32 d16[1], r11
+; BE-NEXT:    vorr q12, q8, q8
+; BE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEXT:    add lr, sp, #152
+; BE-NEXT:    vmov.32 d17[1], r8
+; BE-NEXT:    vldmia lr, {d18, d19} @ 16-byte Reload
+; BE-NEXT:    add lr, sp, #24
+; BE-NEXT:    vmov.32 d13[1], r7
+; BE-NEXT:    vmov.32 d16[1], r6
+; BE-NEXT:    vldmia lr, {d20, d21} @ 16-byte Reload
+; BE-NEXT:    add lr, sp, #64
+; BE-NEXT:    vorr q13, q8, q8
+; BE-NEXT:    vmov.32 d12[1], r1
+; BE-NEXT:    ldr r1, [sp, #132] @ 4-byte Reload
+; BE-NEXT:    vrev64.32 q8, q5
+; BE-NEXT:    mov r0, r1
+; BE-NEXT:    vldmia lr, {d22, d23} @ 16-byte Reload
+; BE-NEXT:    vrev64.32 q9, q9
+; BE-NEXT:    vrev64.32 q10, q10
+; BE-NEXT:    vst1.64 {d16, d17}, [r0:128]!
+; BE-NEXT:    vst1.64 {d18, d19}, [r0:128]!
+; BE-NEXT:    vrev64.32 q11, q11
+; BE-NEXT:    vmov.32 d15[1], r4
+; BE-NEXT:    vst1.64 {d20, d21}, [r0:128]!
+; BE-NEXT:    vrev64.32 q15, q6
+; BE-NEXT:    vmov.32 d14[1], r5
+; BE-NEXT:    vrev64.32 q12, q12
+; BE-NEXT:    vst1.64 {d22, d23}, [r0:128]
+; BE-NEXT:    add r0, r1, #64
+; BE-NEXT:    vrev64.32 q13, q13
+; BE-NEXT:    vst1.64 {d30, d31}, [r0:128]!
+; BE-NEXT:    vst1.64 {d24, d25}, [r0:128]!
+; BE-NEXT:    vrev64.32 q14, q7
+; BE-NEXT:    vst1.64 {d26, d27}, [r0:128]!
+; BE-NEXT:    vst1.64 {d28, d29}, [r0:128]
+; BE-NEXT:    add sp, sp, #168
+; BE-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-NEXT:    add sp, sp, #4
+; BE-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; BE-NEON-LABEL: llrint_v16f64:
+; BE-NEON:       @ %bb.0:
+; BE-NEON-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-NEON-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-NEON-NEXT:    .pad #4
+; BE-NEON-NEXT:    sub sp, sp, #4
+; BE-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-NEON-NEXT:    .pad #168
+; BE-NEON-NEXT:    sub sp, sp, #168
+; BE-NEON-NEXT:    add lr, sp, #64
+; BE-NEON-NEXT:    str r0, [sp, #132] @ 4-byte Spill
+; BE-NEON-NEXT:    add r0, sp, #304
+; BE-NEON-NEXT:    vorr q4, q3, q3
+; BE-NEON-NEXT:    vstmia lr, {d0, d1} @ 16-byte Spill
+; BE-NEON-NEXT:    add lr, sp, #48
+; BE-NEON-NEXT:    vorr d0, d1, d1
+; BE-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
+; BE-NEON-NEXT:    add r0, sp, #320
+; BE-NEON-NEXT:    vorr q6, q2, q2
+; BE-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-NEON-NEXT:    add lr, sp, #88
+; BE-NEON-NEXT:    vorr q7, q1, q1
+; BE-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
+; BE-NEON-NEXT:    add r0, sp, #272
+; BE-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-NEON-NEXT:    add lr, sp, #112
+; BE-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
+; BE-NEON-NEXT:    add r0, sp, #288
+; BE-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-NEON-NEXT:    add lr, sp, #24
+; BE-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
+; BE-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-NEON-NEXT:    bl llrint
+; BE-NEON-NEXT:    vorr d0, d14, d14
+; BE-NEON-NEXT:    add lr, sp, #136
+; BE-NEON-NEXT:    vmov.32 d17[0], r0
+; BE-NEON-NEXT:    str r1, [sp, #108] @ 4-byte Spill
+; BE-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-NEON-NEXT:    bl llrint
+; BE-NEON-NEXT:    vorr d0, d15, d15
+; BE-NEON-NEXT:    str r1, [sp, #84] @ 4-byte Spill
+; BE-NEON-NEXT:    vmov.32 d10[0], r0
+; BE-NEON-NEXT:    bl llrint
+; BE-NEON-NEXT:    vorr d0, d12, d12
+; BE-NEON-NEXT:    add lr, sp, #152
+; BE-NEON-NEXT:    vmov.32 d11[0], r0
+; BE-NEON-NEXT:    str r1, [sp, #44] @ 4-byte Spill
+; BE-NEON-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-NEON-NEXT:    bl llrint
+; BE-NEON-NEXT:    vorr d0, d13, d13
+; BE-NEON-NEXT:    mov r6, r1
+; BE-NEON-NEXT:    vmov.32 d10[0], r0
+; BE-NEON-NEXT:    bl llrint
+; BE-NEON-NEXT:    vorr d0, d8, d8
+; BE-NEON-NEXT:    mov r4, r1
+; BE-NEON-NEXT:    vmov.32 d11[0], r0
+; BE-NEON-NEXT:    bl llrint
+; BE-NEON-NEXT:    vorr d0, d9, d9
+; BE-NEON-NEXT:    mov r7, r1
+; BE-NEON-NEXT:    vmov.32 d12[0], r0
+; BE-NEON-NEXT:    bl llrint
+; BE-NEON-NEXT:    add lr, sp, #64
+; BE-NEON-NEXT:    mov r5, r1
+; BE-NEON-NEXT:    vmov.32 d13[0], r0
+; BE-NEON-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
+; BE-NEON-NEXT:    @ kill: def $d0 killed $d0 killed $q0
+; BE-NEON-NEXT:    bl llrint
+; BE-NEON-NEXT:    add lr, sp, #136
+; BE-NEON-NEXT:    mov r9, r1
+; BE-NEON-NEXT:    vmov.32 d13[1], r5
+; BE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEON-NEXT:    vmov.32 d16[0], r0
+; BE-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-NEON-NEXT:    add lr, sp, #24
+; BE-NEON-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
+; BE-NEON-NEXT:    vorr d0, d9, d9
+; BE-NEON-NEXT:    bl llrint
+; BE-NEON-NEXT:    vorr d0, d8, d8
+; BE-NEON-NEXT:    vmov.32 d12[1], r7
+; BE-NEON-NEXT:    add lr, sp, #64
+; BE-NEON-NEXT:    mov r10, r1
+; BE-NEON-NEXT:    vmov.32 d15[0], r0
+; BE-NEON-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
+; BE-NEON-NEXT:    bl llrint
+; BE-NEON-NEXT:    vmov.32 d14[0], r0
+; BE-NEON-NEXT:    add lr, sp, #8
+; BE-NEON-NEXT:    mov r11, r1
+; BE-NEON-NEXT:    vmov.32 d11[1], r4
+; BE-NEON-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
+; BE-NEON-NEXT:    add lr, sp, #48
+; BE-NEON-NEXT:    vorr q6, q5, q5
+; BE-NEON-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
+; BE-NEON-NEXT:    vorr d0, d9, d9
+; BE-NEON-NEXT:    bl llrint
+; BE-NEON-NEXT:    vorr d0, d8, d8
+; BE-NEON-NEXT:    vmov.32 d12[1], r6
+; BE-NEON-NEXT:    add lr, sp, #24
+; BE-NEON-NEXT:    mov r8, r1
+; BE-NEON-NEXT:    vmov.32 d11[0], r0
+; BE-NEON-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
+; BE-NEON-NEXT:    bl llrint
+; BE-NEON-NEXT:    vmov.32 d10[0], r0
+; BE-NEON-NEXT:    add lr, sp, #48
+; BE-NEON-NEXT:    ldr r0, [sp, #44] @ 4-byte Reload
+; BE-NEON-NEXT:    mov r6, r1
+; BE-NEON-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-NEON-NEXT:    add lr, sp, #152
+; BE-NEON-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
+; BE-NEON-NEXT:    add lr, sp, #88
+; BE-NEON-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
+; BE-NEON-NEXT:    vorr d0, d13, d13
+; BE-NEON-NEXT:    vmov.32 d9[1], r0
+; BE-NEON-NEXT:    bl llrint
+; BE-NEON-NEXT:    vmov.32 d15[0], r0
+; BE-NEON-NEXT:    ldr r0, [sp, #84] @ 4-byte Reload
+; BE-NEON-NEXT:    vorr d0, d12, d12
+; BE-NEON-NEXT:    add lr, sp, #152
+; BE-NEON-NEXT:    mov r4, r1
+; BE-NEON-NEXT:    vmov.32 d8[1], r0
+; BE-NEON-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
+; BE-NEON-NEXT:    bl llrint
+; BE-NEON-NEXT:    add lr, sp, #136
+; BE-NEON-NEXT:    vmov.32 d14[0], r0
+; BE-NEON-NEXT:    ldr r0, [sp, #108] @ 4-byte Reload
+; BE-NEON-NEXT:    mov r5, r1
+; BE-NEON-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-NEON-NEXT:    add lr, sp, #112
+; BE-NEON-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
+; BE-NEON-NEXT:    vorr d0, d9, d9
+; BE-NEON-NEXT:    vmov.32 d11[1], r0
+; BE-NEON-NEXT:    bl llrint
+; BE-NEON-NEXT:    vorr d0, d8, d8
+; BE-NEON-NEXT:    mov r7, r1
+; BE-NEON-NEXT:    vmov.32 d13[0], r0
+; BE-NEON-NEXT:    vmov.32 d10[1], r9
+; BE-NEON-NEXT:    bl llrint
+; BE-NEON-NEXT:    add lr, sp, #8
+; BE-NEON-NEXT:    vmov.32 d12[0], r0
+; BE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEON-NEXT:    add lr, sp, #48
+; BE-NEON-NEXT:    vmov.32 d17[1], r10
+; BE-NEON-NEXT:    vmov.32 d16[1], r11
+; BE-NEON-NEXT:    vorr q12, q8, q8
+; BE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEON-NEXT:    add lr, sp, #152
+; BE-NEON-NEXT:    vmov.32 d17[1], r8
+; BE-NEON-NEXT:    vldmia lr, {d18, d19} @ 16-byte Reload
+; BE-NEON-NEXT:    add lr, sp, #24
+; BE-NEON-NEXT:    vmov.32 d13[1], r7
+; BE-NEON-NEXT:    vmov.32 d16[1], r6
+; BE-NEON-NEXT:    vldmia lr, {d20, d21} @ 16-byte Reload
+; BE-NEON-NEXT:    add lr, sp, #64
+; BE-NEON-NEXT:    vorr q13, q8, q8
+; BE-NEON-NEXT:    vmov.32 d12[1], r1
+; BE-NEON-NEXT:    ldr r1, [sp, #132] @ 4-byte Reload
+; BE-NEON-NEXT:    vrev64.32 q8, q5
+; BE-NEON-NEXT:    mov r0, r1
+; BE-NEON-NEXT:    vldmia lr, {d22, d23} @ 16-byte Reload
+; BE-NEON-NEXT:    vrev64.32 q9, q9
+; BE-NEON-NEXT:    vrev64.32 q10, q10
+; BE-NEON-NEXT:    vst1.64 {d16, d17}, [r0:128]!
+; BE-NEON-NEXT:    vst1.64 {d18, d19}, [r0:128]!
+; BE-NEON-NEXT:    vrev64.32 q11, q11
+; BE-NEON-NEXT:    vmov.32 d15[1], r4
+; BE-NEON-NEXT:    vst1.64 {d20, d21}, [r0:128]!
+; BE-NEON-NEXT:    vrev64.32 q15, q6
+; BE-NEON-NEXT:    vmov.32 d14[1], r5
+; BE-NEON-NEXT:    vrev64.32 q12, q12
+; BE-NEON-NEXT:    vst1.64 {d22, d23}, [r0:128]
+; BE-NEON-NEXT:    add r0, r1, #64
+; BE-NEON-NEXT:    vrev64.32 q13, q13
+; BE-NEON-NEXT:    vst1.64 {d30, d31}, [r0:128]!
+; BE-NEON-NEXT:    vst1.64 {d24, d25}, [r0:128]!
+; BE-NEON-NEXT:    vrev64.32 q14, q7
+; BE-NEON-NEXT:    vst1.64 {d26, d27}, [r0:128]!
+; BE-NEON-NEXT:    vst1.64 {d28, d29}, [r0:128]
+; BE-NEON-NEXT:    add sp, sp, #168
+; BE-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-NEON-NEXT:    add sp, sp, #4
+; BE-NEON-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+  %a = call <16 x i64> @llvm.llrint.v16i64.v16f64(<16 x double> %x)
+  ret <16 x i64> %a
+}
+declare <16 x i64> @llvm.llrint.v16i64.v16f64(<16 x double>)
+
+define <32 x i64> @llrint_v32f64(<32 x double> %x) {
+; LE-LABEL: llrint_v32f64:
+; LE:       @ %bb.0:
+; LE-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-NEXT:    .pad #4
+; LE-NEXT:    sub sp, sp, #4
+; LE-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEXT:    .pad #208
+; LE-NEXT:    sub sp, sp, #208
+; LE-NEXT:    add lr, sp, #72
+; LE-NEXT:    str r0, [sp, #156] @ 4-byte Spill
+; LE-NEXT:    add r0, sp, #456
+; LE-NEXT:    vorr q4, q0, q0
+; LE-NEXT:    vstmia lr, {d6, d7} @ 16-byte Spill
+; LE-NEXT:    add lr, sp, #56
+; LE-NEXT:    vorr d0, d7, d7
+; LE-NEXT:    vstmia lr, {d2, d3} @ 16-byte Spill
+; LE-NEXT:    add lr, sp, #104
+; LE-NEXT:    vorr q5, q2, q2
+; LE-NEXT:    vld1.64 {d16, d17}, [r0]
+; LE-NEXT:    add r0, sp, #344
+; LE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEXT:    add lr, sp, #192
+; LE-NEXT:    vld1.64 {d16, d17}, [r0]
+; LE-NEXT:    add r0, sp, #376
+; LE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEXT:    add lr, sp, #160
+; LE-NEXT:    vld1.64 {d16, d17}, [r0]
+; LE-NEXT:    add r0, sp, #360
+; LE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEXT:    add lr, sp, #136
+; LE-NEXT:    vld1.64 {d16, d17}, [r0]
+; LE-NEXT:    add r0, sp, #440
+; LE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEXT:    add lr, sp, #176
+; LE-NEXT:    vld1.64 {d16, d17}, [r0]
+; LE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEXT:    bl llrint
+; LE-NEXT:    vorr d0, d10, d10
+; LE-NEXT:    str r1, [sp, #120] @ 4-byte Spill
+; LE-NEXT:    vmov.32 d13[0], r0
+; LE-NEXT:    bl llrint
+; LE-NEXT:    vorr d0, d11, d11
+; LE-NEXT:    mov r10, r1
+; LE-NEXT:    vmov.32 d14[0], r0
+; LE-NEXT:    bl llrint
+; LE-NEXT:    vorr d0, d8, d8
+; LE-NEXT:    add lr, sp, #88
+; LE-NEXT:    vmov.32 d15[0], r0
+; LE-NEXT:    mov r11, r1
+; LE-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-NEXT:    bl llrint
+; LE-NEXT:    vorr d0, d9, d9
+; LE-NEXT:    mov r9, r1
+; LE-NEXT:    vmov.32 d10[0], r0
+; LE-NEXT:    bl llrint
+; LE-NEXT:    add lr, sp, #40
+; LE-NEXT:    vmov.32 d11[0], r0
+; LE-NEXT:    mov r6, r1
+; LE-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-NEXT:    add lr, sp, #56
+; LE-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; LE-NEXT:    vorr d0, d10, d10
+; LE-NEXT:    bl llrint
+; LE-NEXT:    vorr d0, d11, d11
+; LE-NEXT:    mov r4, r1
+; LE-NEXT:    vmov.32 d8[0], r0
+; LE-NEXT:    bl llrint
+; LE-NEXT:    add lr, sp, #72
+; LE-NEXT:    mov r7, r1
+; LE-NEXT:    vmov.32 d9[0], r0
+; LE-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
+; LE-NEXT:    @ kill: def $d0 killed $d0 killed $q0
+; LE-NEXT:    bl llrint
+; LE-NEXT:    vmov.32 d12[0], r0
+; LE-NEXT:    add lr, sp, #56
+; LE-NEXT:    mov r5, r1
+; LE-NEXT:    vmov.32 d9[1], r7
+; LE-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-NEXT:    add lr, sp, #176
+; LE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEXT:    vorr d0, d17, d17
+; LE-NEXT:    bl llrint
+; LE-NEXT:    vmov.32 d8[1], r4
+; LE-NEXT:    add lr, sp, #72
+; LE-NEXT:    mov r7, r1
+; LE-NEXT:    vmov.32 d13[0], r0
+; LE-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-NEXT:    add lr, sp, #104
+; LE-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-NEXT:    vorr d0, d9, d9
+; LE-NEXT:    bl llrint
+; LE-NEXT:    add lr, sp, #40
+; LE-NEXT:    vorr d0, d8, d8
+; LE-NEXT:    mov r8, r1
+; LE-NEXT:    vmov.32 d15[0], r0
+; LE-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; LE-NEXT:    vmov.32 d11[1], r6
+; LE-NEXT:    bl llrint
+; LE-NEXT:    add lr, sp, #176
+; LE-NEXT:    mov r4, r1
+; LE-NEXT:    vmov.32 d14[0], r0
+; LE-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
+; LE-NEXT:    @ kill: def $d0 killed $d0 killed $q0
+; LE-NEXT:    vmov.32 d10[1], r9
+; LE-NEXT:    bl llrint
+; LE-NEXT:    add lr, sp, #88
+; LE-NEXT:    vmov.32 d12[0], r0
+; LE-NEXT:    ldr r0, [sp, #120] @ 4-byte Reload
+; LE-NEXT:    vldmia lr, {d20, d21} @ 16-byte Reload
+; LE-NEXT:    add lr, sp, #56
+; LE-NEXT:    vldmia lr, {d18, d19} @ 16-byte Reload
+; LE-NEXT:    add lr, sp, #24
+; LE-NEXT:    vmov.32 d19[1], r0
+; LE-NEXT:    add r0, sp, #408
+; LE-NEXT:    ldr r2, [sp, #156] @ 4-byte Reload
+; LE-NEXT:    vld1.64 {d16, d17}, [r0]
+; LE-NEXT:    vmov.32 d13[1], r7
+; LE-NEXT:    mov r0, r2
+; LE-NEXT:    vmov.32 d12[1], r1
+; LE-NEXT:    add r1, sp, #488
+; LE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEXT:    add lr, sp, #40
+; LE-NEXT:    vld1.64 {d16, d17}, [r1]
+; LE-NEXT:    add r1, sp, #472
+; LE-NEXT:    vst1.64 {d10, d11}, [r0:128]!
+; LE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEXT:    add lr, sp, #72
+; LE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEXT:    add lr, sp, #56
+; LE-NEXT:    vmov.32 d21[1], r11
+; LE-NEXT:    vmov.32 d20[1], r10
+; LE-NEXT:    add r10, r2, #192
+; LE-NEXT:    vst1.64 {d16, d17}, [r0:128]!
+; LE-NEXT:    vld1.64 {d16, d17}, [r1]
+; LE-NEXT:    add r1, sp, #392
+; LE-NEXT:    vmov.32 d18[1], r5
+; LE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEXT:    add lr, sp, #72
+; LE-NEXT:    vst1.64 {d20, d21}, [r0:128]!
+; LE-NEXT:    vld1.64 {d16, d17}, [r1]
+; LE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEXT:    add lr, sp, #104
+; LE-NEXT:    vst1.64 {d18, d19}, [r0:128]
+; LE-NEXT:    add r0, sp, #312
+; LE-NEXT:    vld1.64 {d16, d17}, [r0]
+; LE-NEXT:    add r0, sp, #328
+; LE-NEXT:    vmov.32 d15[1], r8
+; LE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEXT:    add lr, sp, #120
+; LE-NEXT:    vld1.64 {d16, d17}, [r0]
+; LE-NEXT:    add r0, sp, #424
+; LE-NEXT:    vmov.32 d14[1], r4
+; LE-NEXT:    vst1.64 {d12, d13}, [r10:128]!
+; LE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEXT:    add lr, sp, #176
+; LE-NEXT:    vld1.64 {d16, d17}, [r0]
+; LE-NEXT:    vst1.64 {d14, d15}, [r10:128]!
+; LE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEXT:    add lr, sp, #192
+; LE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEXT:    vorr d0, d17, d17
+; LE-NEXT:    bl llrint
+; LE-NEXT:    add lr, sp, #136
+; LE-NEXT:    mov r9, r1
+; LE-NEXT:    vmov.32 d9[0], r0
+; LE-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; LE-NEXT:    vorr d0, d10, d10
+; LE-NEXT:    bl llrint
+; LE-NEXT:    vorr d0, d11, d11
+; LE-NEXT:    mov r8, r1
+; LE-NEXT:    vmov.32 d12[0], r0
+; LE-NEXT:    bl llrint
+; LE-NEXT:    add lr, sp, #160
+; LE-NEXT:    mov r5, r1
+; LE-NEXT:    vmov.32 d13[0], r0
+; LE-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; LE-NEXT:    vorr d0, d10, d10
+; LE-NEXT:    bl llrint
+; LE-NEXT:    vorr d0, d11, d11
+; LE-NEXT:    mov r6, r1
+; LE-NEXT:    vmov.32 d14[0], r0
+; LE-NEXT:    bl llrint
+; LE-NEXT:    add lr, sp, #192
+; LE-NEXT:    mov r4, r1
+; LE-NEXT:    vmov.32 d15[0], r0
+; LE-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
+; LE-NEXT:    @ kill: def $d0 killed $d0 killed $q0
+; LE-NEXT:    bl llrint
+; LE-NEXT:    vmov.32 d8[0], r0
+; LE-NEXT:    add lr, sp, #192
+; LE-NEXT:    mov r11, r1
+; LE-NEXT:    vmov.32 d15[1], r4
+; LE-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-NEXT:    add lr, sp, #176
+; LE-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-NEXT:    vorr d0, d9, d9
+; LE-NEXT:    bl llrint
+; LE-NEXT:    vorr d0, d8, d8
+; LE-NEXT:    vmov.32 d14[1], r6
+; LE-NEXT:    add lr, sp, #136
+; LE-NEXT:    mov r4, r1
+; LE-NEXT:    vmov.32 d11[0], r0
+; LE-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-NEXT:    bl llrint
+; LE-NEXT:    vmov.32 d10[0], r0
+; LE-NEXT:    add lr, sp, #176
+; LE-NEXT:    mov r7, r1
+; LE-NEXT:    vmov.32 d13[1], r5
+; LE-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-NEXT:    add lr, sp, #24
+; LE-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-NEXT:    vorr d0, d9, d9
+; LE-NEXT:    bl llrint
+; LE-NEXT:    vorr d0, d8, d8
+; LE-NEXT:    vmov.32 d12[1], r8
+; LE-NEXT:    add lr, sp, #88
+; LE-NEXT:    mov r5, r1
+; LE-NEXT:    vmov.32 d15[0], r0
+; LE-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-NEXT:    bl llrint
+; LE-NEXT:    add lr, sp, #192
+; LE-NEXT:    str r1, [sp, #24] @ 4-byte Spill
+; LE-NEXT:    vmov.32 d14[0], r0
+; LE-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-NEXT:    add lr, sp, #40
+; LE-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; LE-NEXT:    vorr d0, d11, d11
+; LE-NEXT:    vmov.32 d9[1], r9
+; LE-NEXT:    bl llrint
+; LE-NEXT:    vorr d0, d10, d10
+; LE-NEXT:    vmov.32 d8[1], r11
+; LE-NEXT:    add lr, sp, #192
+; LE-NEXT:    mov r6, r1
+; LE-NEXT:    vmov.32 d13[0], r0
+; LE-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-NEXT:    bl llrint
+; LE-NEXT:    vmov.32 d12[0], r0
+; LE-NEXT:    add lr, sp, #160
+; LE-NEXT:    str r1, [sp, #40] @ 4-byte Spill
+; LE-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-NEXT:    add lr, sp, #176
+; LE-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; LE-NEXT:    add lr, sp, #56
+; LE-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-NEXT:    vorr d0, d9, d9
+; LE-NEXT:    vmov.32 d11[1], r4
+; LE-NEXT:    bl llrint
+; LE-NEXT:    vorr d0, d8, d8
+; LE-NEXT:    vmov.32 d10[1], r7
+; LE-NEXT:    add lr, sp, #176
+; LE-NEXT:    mov r8, r1
+; LE-NEXT:    vmov.32 d13[0], r0
+; LE-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-NEXT:    bl llrint
+; LE-NEXT:    vmov.32 d12[0], r0
+; LE-NEXT:    add lr, sp, #8
+; LE-NEXT:    mov r11, r1
+; LE-NEXT:    vmov.32 d15[1], r5
+; LE-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-NEXT:    add lr, sp, #72
+; LE-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-NEXT:    vorr d0, d9, d9
+; LE-NEXT:    bl llrint
+; LE-NEXT:    vmov.32 d11[0], r0
+; LE-NEXT:    ldr r0, [sp, #24] @ 4-byte Reload
+; LE-NEXT:    vorr d0, d8, d8
+; LE-NEXT:    add lr, sp, #56
+; LE-NEXT:    mov r9, r1
+; LE-NEXT:    vmov.32 d14[1], r0
+; LE-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-NEXT:    bl llrint
+; LE-NEXT:    vmov.32 d10[0], r0
+; LE-NEXT:    add lr, sp, #72
+; LE-NEXT:    mov r7, r1
+; LE-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-NEXT:    add lr, sp, #160
+; LE-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
+; LE-NEXT:    add lr, sp, #104
+; LE-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-NEXT:    vorr d0, d9, d9
+; LE-NEXT:    vmov.32 d13[1], r6
+; LE-NEXT:    bl llrint
+; LE-NEXT:    vmov.32 d11[0], r0
+; LE-NEXT:    ldr r0, [sp, #40] @ 4-byte Reload
+; LE-NEXT:    vorr d0, d8, d8
+; LE-NEXT:    add lr, sp, #160
+; LE-NEXT:    mov r6, r1
+; LE-NEXT:    vmov.32 d12[1], r0
+; LE-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-NEXT:    bl llrint
+; LE-NEXT:    add lr, sp, #8
+; LE-NEXT:    mov r4, r1
+; LE-NEXT:    vmov.32 d10[0], r0
+; LE-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
+; LE-NEXT:    add lr, sp, #120
+; LE-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-NEXT:    vorr d0, d9, d9
+; LE-NEXT:    vmov.32 d13[1], r8
+; LE-NEXT:    bl llrint
+; LE-NEXT:    vorr d0, d8, d8
+; LE-NEXT:    mov r5, r1
+; LE-NEXT:    vmov.32 d15[0], r0
+; LE-NEXT:    vmov.32 d12[1], r11
+; LE-NEXT:    bl llrint
+; LE-NEXT:    add lr, sp, #72
+; LE-NEXT:    vmov.32 d14[0], r0
+; LE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEXT:    add lr, sp, #160
+; LE-NEXT:    vmov.32 d17[1], r9
+; LE-NEXT:    vmov.32 d16[1], r7
+; LE-NEXT:    vst1.64 {d12, d13}, [r10:128]!
+; LE-NEXT:    vorr q9, q8, q8
+; LE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEXT:    add lr, sp, #136
+; LE-NEXT:    vmov.32 d15[1], r5
+; LE-NEXT:    vst1.64 {d16, d17}, [r10:128]
+; LE-NEXT:    vmov.32 d14[1], r1
+; LE-NEXT:    ldr r1, [sp, #156] @ 4-byte Reload
+; LE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEXT:    add r0, r1, #128
+; LE-NEXT:    add lr, sp, #56
+; LE-NEXT:    vmov.32 d11[1], r6
+; LE-NEXT:    vst1.64 {d16, d17}, [r0:128]!
+; LE-NEXT:    vst1.64 {d18, d19}, [r0:128]!
+; LE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEXT:    add lr, sp, #176
+; LE-NEXT:    vmov.32 d10[1], r4
+; LE-NEXT:    vst1.64 {d16, d17}, [r0:128]!
+; LE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEXT:    add lr, sp, #192
+; LE-NEXT:    vst1.64 {d16, d17}, [r0:128]
+; LE-NEXT:    add r0, r1, #64
+; LE-NEXT:    vst1.64 {d10, d11}, [r0:128]!
+; LE-NEXT:    vst1.64 {d14, d15}, [r0:128]!
+; LE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEXT:    add lr, sp, #88
+; LE-NEXT:    vst1.64 {d16, d17}, [r0:128]!
+; LE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEXT:    vst1.64 {d16, d17}, [r0:128]
+; LE-NEXT:    add sp, sp, #208
+; LE-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEXT:    add sp, sp, #4
+; LE-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; LE-NEON-LABEL: llrint_v32f64:
+; LE-NEON:       @ %bb.0:
+; LE-NEON-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-NEON-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-NEON-NEXT:    .pad #4
+; LE-NEON-NEXT:    sub sp, sp, #4
+; LE-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEON-NEXT:    .pad #208
+; LE-NEON-NEXT:    sub sp, sp, #208
+; LE-NEON-NEXT:    add lr, sp, #72
+; LE-NEON-NEXT:    str r0, [sp, #156] @ 4-byte Spill
+; LE-NEON-NEXT:    add r0, sp, #456
+; LE-NEON-NEXT:    vorr q4, q0, q0
+; LE-NEON-NEXT:    vstmia lr, {d6, d7} @ 16-byte Spill
+; LE-NEON-NEXT:    add lr, sp, #56
+; LE-NEON-NEXT:    vorr d0, d7, d7
+; LE-NEON-NEXT:    vstmia lr, {d2, d3} @ 16-byte Spill
+; LE-NEON-NEXT:    add lr, sp, #104
+; LE-NEON-NEXT:    vorr q5, q2, q2
+; LE-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
+; LE-NEON-NEXT:    add r0, sp, #344
+; LE-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEON-NEXT:    add lr, sp, #192
+; LE-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
+; LE-NEON-NEXT:    add r0, sp, #376
+; LE-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEON-NEXT:    add lr, sp, #160
+; LE-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
+; LE-NEON-NEXT:    add r0, sp, #360
+; LE-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEON-NEXT:    add lr, sp, #136
+; LE-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
+; LE-NEON-NEXT:    add r0, sp, #440
+; LE-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEON-NEXT:    add lr, sp, #176
+; LE-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
+; LE-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEON-NEXT:    bl llrint
+; LE-NEON-NEXT:    vorr d0, d10, d10
+; LE-NEON-NEXT:    str r1, [sp, #120] @ 4-byte Spill
+; LE-NEON-NEXT:    vmov.32 d13[0], r0
+; LE-NEON-NEXT:    bl llrint
+; LE-NEON-NEXT:    vorr d0, d11, d11
+; LE-NEON-NEXT:    mov r10, r1
+; LE-NEON-NEXT:    vmov.32 d14[0], r0
+; LE-NEON-NEXT:    bl llrint
+; LE-NEON-NEXT:    vorr d0, d8, d8
+; LE-NEON-NEXT:    add lr, sp, #88
+; LE-NEON-NEXT:    vmov.32 d15[0], r0
+; LE-NEON-NEXT:    mov r11, r1
+; LE-NEON-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-NEON-NEXT:    bl llrint
+; LE-NEON-NEXT:    vorr d0, d9, d9
+; LE-NEON-NEXT:    mov r9, r1
+; LE-NEON-NEXT:    vmov.32 d10[0], r0
+; LE-NEON-NEXT:    bl llrint
+; LE-NEON-NEXT:    add lr, sp, #40
+; LE-NEON-NEXT:    vmov.32 d11[0], r0
+; LE-NEON-NEXT:    mov r6, r1
+; LE-NEON-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-NEON-NEXT:    add lr, sp, #56
+; LE-NEON-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; LE-NEON-NEXT:    vorr d0, d10, d10
+; LE-NEON-NEXT:    bl llrint
+; LE-NEON-NEXT:    vorr d0, d11, d11
+; LE-NEON-NEXT:    mov r4, r1
+; LE-NEON-NEXT:    vmov.32 d8[0], r0
+; LE-NEON-NEXT:    bl llrint
+; LE-NEON-NEXT:    add lr, sp, #72
+; LE-NEON-NEXT:    mov r7, r1
+; LE-NEON-NEXT:    vmov.32 d9[0], r0
+; LE-NEON-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
+; LE-NEON-NEXT:    @ kill: def $d0 killed $d0 killed $q0
+; LE-NEON-NEXT:    bl llrint
+; LE-NEON-NEXT:    vmov.32 d12[0], r0
+; LE-NEON-NEXT:    add lr, sp, #56
+; LE-NEON-NEXT:    mov r5, r1
+; LE-NEON-NEXT:    vmov.32 d9[1], r7
+; LE-NEON-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-NEON-NEXT:    add lr, sp, #176
+; LE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEON-NEXT:    vorr d0, d17, d17
+; LE-NEON-NEXT:    bl llrint
+; LE-NEON-NEXT:    vmov.32 d8[1], r4
+; LE-NEON-NEXT:    add lr, sp, #72
+; LE-NEON-NEXT:    mov r7, r1
+; LE-NEON-NEXT:    vmov.32 d13[0], r0
+; LE-NEON-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-NEON-NEXT:    add lr, sp, #104
+; LE-NEON-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-NEON-NEXT:    vorr d0, d9, d9
+; LE-NEON-NEXT:    bl llrint
+; LE-NEON-NEXT:    add lr, sp, #40
+; LE-NEON-NEXT:    vorr d0, d8, d8
+; LE-NEON-NEXT:    mov r8, r1
+; LE-NEON-NEXT:    vmov.32 d15[0], r0
+; LE-NEON-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; LE-NEON-NEXT:    vmov.32 d11[1], r6
+; LE-NEON-NEXT:    bl llrint
+; LE-NEON-NEXT:    add lr, sp, #176
+; LE-NEON-NEXT:    mov r4, r1
+; LE-NEON-NEXT:    vmov.32 d14[0], r0
+; LE-NEON-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
+; LE-NEON-NEXT:    @ kill: def $d0 killed $d0 killed $q0
+; LE-NEON-NEXT:    vmov.32 d10[1], r9
+; LE-NEON-NEXT:    bl llrint
+; LE-NEON-NEXT:    add lr, sp, #88
+; LE-NEON-NEXT:    vmov.32 d12[0], r0
+; LE-NEON-NEXT:    ldr r0, [sp, #120] @ 4-byte Reload
+; LE-NEON-NEXT:    vldmia lr, {d20, d21} @ 16-byte Reload
+; LE-NEON-NEXT:    add lr, sp, #56
+; LE-NEON-NEXT:    vldmia lr, {d18, d19} @ 16-byte Reload
+; LE-NEON-NEXT:    add lr, sp, #24
+; LE-NEON-NEXT:    vmov.32 d19[1], r0
+; LE-NEON-NEXT:    add r0, sp, #408
+; LE-NEON-NEXT:    ldr r2, [sp, #156] @ 4-byte Reload
+; LE-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
+; LE-NEON-NEXT:    vmov.32 d13[1], r7
+; LE-NEON-NEXT:    mov r0, r2
+; LE-NEON-NEXT:    vmov.32 d12[1], r1
+; LE-NEON-NEXT:    add r1, sp, #488
+; LE-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEON-NEXT:    add lr, sp, #40
+; LE-NEON-NEXT:    vld1.64 {d16, d17}, [r1]
+; LE-NEON-NEXT:    add r1, sp, #472
+; LE-NEON-NEXT:    vst1.64 {d10, d11}, [r0:128]!
+; LE-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEON-NEXT:    add lr, sp, #72
+; LE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEON-NEXT:    add lr, sp, #56
+; LE-NEON-NEXT:    vmov.32 d21[1], r11
+; LE-NEON-NEXT:    vmov.32 d20[1], r10
+; LE-NEON-NEXT:    add r10, r2, #192
+; LE-NEON-NEXT:    vst1.64 {d16, d17}, [r0:128]!
+; LE-NEON-NEXT:    vld1.64 {d16, d17}, [r1]
+; LE-NEON-NEXT:    add r1, sp, #392
+; LE-NEON-NEXT:    vmov.32 d18[1], r5
+; LE-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEON-NEXT:    add lr, sp, #72
+; LE-NEON-NEXT:    vst1.64 {d20, d21}, [r0:128]!
+; LE-NEON-NEXT:    vld1.64 {d16, d17}, [r1]
+; LE-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEON-NEXT:    add lr, sp, #104
+; LE-NEON-NEXT:    vst1.64 {d18, d19}, [r0:128]
+; LE-NEON-NEXT:    add r0, sp, #312
+; LE-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
+; LE-NEON-NEXT:    add r0, sp, #328
+; LE-NEON-NEXT:    vmov.32 d15[1], r8
+; LE-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEON-NEXT:    add lr, sp, #120
+; LE-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
+; LE-NEON-NEXT:    add r0, sp, #424
+; LE-NEON-NEXT:    vmov.32 d14[1], r4
+; LE-NEON-NEXT:    vst1.64 {d12, d13}, [r10:128]!
+; LE-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEON-NEXT:    add lr, sp, #176
+; LE-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
+; LE-NEON-NEXT:    vst1.64 {d14, d15}, [r10:128]!
+; LE-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEON-NEXT:    add lr, sp, #192
+; LE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEON-NEXT:    vorr d0, d17, d17
+; LE-NEON-NEXT:    bl llrint
+; LE-NEON-NEXT:    add lr, sp, #136
+; LE-NEON-NEXT:    mov r9, r1
+; LE-NEON-NEXT:    vmov.32 d9[0], r0
+; LE-NEON-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; LE-NEON-NEXT:    vorr d0, d10, d10
+; LE-NEON-NEXT:    bl llrint
+; LE-NEON-NEXT:    vorr d0, d11, d11
+; LE-NEON-NEXT:    mov r8, r1
+; LE-NEON-NEXT:    vmov.32 d12[0], r0
+; LE-NEON-NEXT:    bl llrint
+; LE-NEON-NEXT:    add lr, sp, #160
+; LE-NEON-NEXT:    mov r5, r1
+; LE-NEON-NEXT:    vmov.32 d13[0], r0
+; LE-NEON-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; LE-NEON-NEXT:    vorr d0, d10, d10
+; LE-NEON-NEXT:    bl llrint
+; LE-NEON-NEXT:    vorr d0, d11, d11
+; LE-NEON-NEXT:    mov r6, r1
+; LE-NEON-NEXT:    vmov.32 d14[0], r0
+; LE-NEON-NEXT:    bl llrint
+; LE-NEON-NEXT:    add lr, sp, #192
+; LE-NEON-NEXT:    mov r4, r1
+; LE-NEON-NEXT:    vmov.32 d15[0], r0
+; LE-NEON-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
+; LE-NEON-NEXT:    @ kill: def $d0 killed $d0 killed $q0
+; LE-NEON-NEXT:    bl llrint
+; LE-NEON-NEXT:    vmov.32 d8[0], r0
+; LE-NEON-NEXT:    add lr, sp, #192
+; LE-NEON-NEXT:    mov r11, r1
+; LE-NEON-NEXT:    vmov.32 d15[1], r4
+; LE-NEON-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-NEON-NEXT:    add lr, sp, #176
+; LE-NEON-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-NEON-NEXT:    vorr d0, d9, d9
+; LE-NEON-NEXT:    bl llrint
+; LE-NEON-NEXT:    vorr d0, d8, d8
+; LE-NEON-NEXT:    vmov.32 d14[1], r6
+; LE-NEON-NEXT:    add lr, sp, #136
+; LE-NEON-NEXT:    mov r4, r1
+; LE-NEON-NEXT:    vmov.32 d11[0], r0
+; LE-NEON-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-NEON-NEXT:    bl llrint
+; LE-NEON-NEXT:    vmov.32 d10[0], r0
+; LE-NEON-NEXT:    add lr, sp, #176
+; LE-NEON-NEXT:    mov r7, r1
+; LE-NEON-NEXT:    vmov.32 d13[1], r5
+; LE-NEON-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-NEON-NEXT:    add lr, sp, #24
+; LE-NEON-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-NEON-NEXT:    vorr d0, d9, d9
+; LE-NEON-NEXT:    bl llrint
+; LE-NEON-NEXT:    vorr d0, d8, d8
+; LE-NEON-NEXT:    vmov.32 d12[1], r8
+; LE-NEON-NEXT:    add lr, sp, #88
+; LE-NEON-NEXT:    mov r5, r1
+; LE-NEON-NEXT:    vmov.32 d15[0], r0
+; LE-NEON-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-NEON-NEXT:    bl llrint
+; LE-NEON-NEXT:    add lr, sp, #192
+; LE-NEON-NEXT:    str r1, [sp, #24] @ 4-byte Spill
+; LE-NEON-NEXT:    vmov.32 d14[0], r0
+; LE-NEON-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-NEON-NEXT:    add lr, sp, #40
+; LE-NEON-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; LE-NEON-NEXT:    vorr d0, d11, d11
+; LE-NEON-NEXT:    vmov.32 d9[1], r9
+; LE-NEON-NEXT:    bl llrint
+; LE-NEON-NEXT:    vorr d0, d10, d10
+; LE-NEON-NEXT:    vmov.32 d8[1], r11
+; LE-NEON-NEXT:    add lr, sp, #192
+; LE-NEON-NEXT:    mov r6, r1
+; LE-NEON-NEXT:    vmov.32 d13[0], r0
+; LE-NEON-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-NEON-NEXT:    bl llrint
+; LE-NEON-NEXT:    vmov.32 d12[0], r0
+; LE-NEON-NEXT:    add lr, sp, #160
+; LE-NEON-NEXT:    str r1, [sp, #40] @ 4-byte Spill
+; LE-NEON-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-NEON-NEXT:    add lr, sp, #176
+; LE-NEON-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; LE-NEON-NEXT:    add lr, sp, #56
+; LE-NEON-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-NEON-NEXT:    vorr d0, d9, d9
+; LE-NEON-NEXT:    vmov.32 d11[1], r4
+; LE-NEON-NEXT:    bl llrint
+; LE-NEON-NEXT:    vorr d0, d8, d8
+; LE-NEON-NEXT:    vmov.32 d10[1], r7
+; LE-NEON-NEXT:    add lr, sp, #176
+; LE-NEON-NEXT:    mov r8, r1
+; LE-NEON-NEXT:    vmov.32 d13[0], r0
+; LE-NEON-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-NEON-NEXT:    bl llrint
+; LE-NEON-NEXT:    vmov.32 d12[0], r0
+; LE-NEON-NEXT:    add lr, sp, #8
+; LE-NEON-NEXT:    mov r11, r1
+; LE-NEON-NEXT:    vmov.32 d15[1], r5
+; LE-NEON-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-NEON-NEXT:    add lr, sp, #72
+; LE-NEON-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-NEON-NEXT:    vorr d0, d9, d9
+; LE-NEON-NEXT:    bl llrint
+; LE-NEON-NEXT:    vmov.32 d11[0], r0
+; LE-NEON-NEXT:    ldr r0, [sp, #24] @ 4-byte Reload
+; LE-NEON-NEXT:    vorr d0, d8, d8
+; LE-NEON-NEXT:    add lr, sp, #56
+; LE-NEON-NEXT:    mov r9, r1
+; LE-NEON-NEXT:    vmov.32 d14[1], r0
+; LE-NEON-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-NEON-NEXT:    bl llrint
+; LE-NEON-NEXT:    vmov.32 d10[0], r0
+; LE-NEON-NEXT:    add lr, sp, #72
+; LE-NEON-NEXT:    mov r7, r1
+; LE-NEON-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-NEON-NEXT:    add lr, sp, #160
+; LE-NEON-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
+; LE-NEON-NEXT:    add lr, sp, #104
+; LE-NEON-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-NEON-NEXT:    vorr d0, d9, d9
+; LE-NEON-NEXT:    vmov.32 d13[1], r6
+; LE-NEON-NEXT:    bl llrint
+; LE-NEON-NEXT:    vmov.32 d11[0], r0
+; LE-NEON-NEXT:    ldr r0, [sp, #40] @ 4-byte Reload
+; LE-NEON-NEXT:    vorr d0, d8, d8
+; LE-NEON-NEXT:    add lr, sp, #160
+; LE-NEON-NEXT:    mov r6, r1
+; LE-NEON-NEXT:    vmov.32 d12[1], r0
+; LE-NEON-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-NEON-NEXT:    bl llrint
+; LE-NEON-NEXT:    add lr, sp, #8
+; LE-NEON-NEXT:    mov r4, r1
+; LE-NEON-NEXT:    vmov.32 d10[0], r0
+; LE-NEON-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
+; LE-NEON-NEXT:    add lr, sp, #120
+; LE-NEON-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-NEON-NEXT:    vorr d0, d9, d9
+; LE-NEON-NEXT:    vmov.32 d13[1], r8
+; LE-NEON-NEXT:    bl llrint
+; LE-NEON-NEXT:    vorr d0, d8, d8
+; LE-NEON-NEXT:    mov r5, r1
+; LE-NEON-NEXT:    vmov.32 d15[0], r0
+; LE-NEON-NEXT:    vmov.32 d12[1], r11
+; LE-NEON-NEXT:    bl llrint
+; LE-NEON-NEXT:    add lr, sp, #72
+; LE-NEON-NEXT:    vmov.32 d14[0], r0
+; LE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEON-NEXT:    add lr, sp, #160
+; LE-NEON-NEXT:    vmov.32 d17[1], r9
+; LE-NEON-NEXT:    vmov.32 d16[1], r7
+; LE-NEON-NEXT:    vst1.64 {d12, d13}, [r10:128]!
+; LE-NEON-NEXT:    vorr q9, q8, q8
+; LE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEON-NEXT:    add lr, sp, #136
+; LE-NEON-NEXT:    vmov.32 d15[1], r5
+; LE-NEON-NEXT:    vst1.64 {d16, d17}, [r10:128]
+; LE-NEON-NEXT:    vmov.32 d14[1], r1
+; LE-NEON-NEXT:    ldr r1, [sp, #156] @ 4-byte Reload
+; LE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEON-NEXT:    add r0, r1, #128
+; LE-NEON-NEXT:    add lr, sp, #56
+; LE-NEON-NEXT:    vmov.32 d11[1], r6
+; LE-NEON-NEXT:    vst1.64 {d16, d17}, [r0:128]!
+; LE-NEON-NEXT:    vst1.64 {d18, d19}, [r0:128]!
+; LE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEON-NEXT:    add lr, sp, #176
+; LE-NEON-NEXT:    vmov.32 d10[1], r4
+; LE-NEON-NEXT:    vst1.64 {d16, d17}, [r0:128]!
+; LE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEON-NEXT:    add lr, sp, #192
+; LE-NEON-NEXT:    vst1.64 {d16, d17}, [r0:128]
+; LE-NEON-NEXT:    add r0, r1, #64
+; LE-NEON-NEXT:    vst1.64 {d10, d11}, [r0:128]!
+; LE-NEON-NEXT:    vst1.64 {d14, d15}, [r0:128]!
+; LE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEON-NEXT:    add lr, sp, #88
+; LE-NEON-NEXT:    vst1.64 {d16, d17}, [r0:128]!
+; LE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEON-NEXT:    vst1.64 {d16, d17}, [r0:128]
+; LE-NEON-NEXT:    add sp, sp, #208
+; LE-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEON-NEXT:    add sp, sp, #4
+; LE-NEON-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; BE-LABEL: llrint_v32f64:
+; BE:       @ %bb.0:
+; BE-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-NEXT:    .pad #4
+; BE-NEXT:    sub sp, sp, #4
+; BE-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-NEXT:    .pad #232
+; BE-NEXT:    sub sp, sp, #232
+; BE-NEXT:    add lr, sp, #184
+; BE-NEXT:    str r0, [sp, #148] @ 4-byte Spill
+; BE-NEXT:    add r0, sp, #416
+; BE-NEXT:    vstmia lr, {d6, d7} @ 16-byte Spill
+; BE-NEXT:    add lr, sp, #168
+; BE-NEXT:    vstmia lr, {d4, d5} @ 16-byte Spill
+; BE-NEXT:    add lr, sp, #152
+; BE-NEXT:    vstmia lr, {d2, d3} @ 16-byte Spill
+; BE-NEXT:    add lr, sp, #128
+; BE-NEXT:    vstmia lr, {d0, d1} @ 16-byte Spill
+; BE-NEXT:    add lr, sp, #200
+; BE-NEXT:    vld1.64 {d18, d19}, [r0]
+; BE-NEXT:    add r0, sp, #448
+; BE-NEXT:    vorr d0, d19, d19
+; BE-NEXT:    vld1.64 {d14, d15}, [r0]
+; BE-NEXT:    add r0, sp, #336
+; BE-NEXT:    vstmia lr, {d18, d19} @ 16-byte Spill
+; BE-NEXT:    add lr, sp, #64
+; BE-NEXT:    vld1.64 {d16, d17}, [r0]
+; BE-NEXT:    add r0, sp, #400
+; BE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-NEXT:    add lr, sp, #8
+; BE-NEXT:    vld1.64 {d16, d17}, [r0]
+; BE-NEXT:    add r0, sp, #352
+; BE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-NEXT:    add lr, sp, #24
+; BE-NEXT:    vld1.64 {d16, d17}, [r0]
+; BE-NEXT:    add r0, sp, #368
+; BE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-NEXT:    add lr, sp, #48
+; BE-NEXT:    vld1.64 {d16, d17}, [r0]
+; BE-NEXT:    add r0, sp, #384
+; BE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-NEXT:    add lr, sp, #96
+; BE-NEXT:    vld1.64 {d16, d17}, [r0]
+; BE-NEXT:    add r0, sp, #512
+; BE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-NEXT:    add lr, sp, #112
+; BE-NEXT:    vld1.64 {d16, d17}, [r0]
+; BE-NEXT:    add r0, sp, #432
+; BE-NEXT:    vld1.64 {d8, d9}, [r0]
+; BE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-NEXT:    bl llrint
+; BE-NEXT:    vorr d0, d8, d8
+; BE-NEXT:    str r1, [sp, #80] @ 4-byte Spill
+; BE-NEXT:    vmov.32 d13[0], r0
+; BE-NEXT:    bl llrint
+; BE-NEXT:    vorr d0, d9, d9
+; BE-NEXT:    str r1, [sp, #44] @ 4-byte Spill
+; BE-NEXT:    vmov.32 d10[0], r0
+; BE-NEXT:    bl llrint
+; BE-NEXT:    vorr d0, d14, d14
+; BE-NEXT:    add lr, sp, #216
+; BE-NEXT:    vmov.32 d11[0], r0
+; BE-NEXT:    mov r9, r1
+; BE-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-NEXT:    bl llrint
+; BE-NEXT:    vorr d0, d15, d15
+; BE-NEXT:    mov r8, r1
+; BE-NEXT:    vmov.32 d8[0], r0
+; BE-NEXT:    bl llrint
+; BE-NEXT:    add lr, sp, #64
+; BE-NEXT:    mov r4, r1
+; BE-NEXT:    vmov.32 d9[0], r0
+; BE-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-NEXT:    vorr d0, d10, d10
+; BE-NEXT:    bl llrint
+; BE-NEXT:    vorr d0, d11, d11
+; BE-NEXT:    mov r6, r1
+; BE-NEXT:    vmov.32 d14[0], r0
+; BE-NEXT:    bl llrint
+; BE-NEXT:    add lr, sp, #200
+; BE-NEXT:    mov r7, r1
+; BE-NEXT:    vmov.32 d15[0], r0
+; BE-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
+; BE-NEXT:    @ kill: def $d0 killed $d0 killed $q0
+; BE-NEXT:    bl llrint
+; BE-NEXT:    vmov.32 d12[0], r0
+; BE-NEXT:    add lr, sp, #200
+; BE-NEXT:    mov r5, r1
+; BE-NEXT:    vmov.32 d15[1], r7
+; BE-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
+; BE-NEXT:    add lr, sp, #8
+; BE-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-NEXT:    vorr d0, d11, d11
+; BE-NEXT:    bl llrint
+; BE-NEXT:    vorr d0, d10, d10
+; BE-NEXT:    vmov.32 d14[1], r6
+; BE-NEXT:    add lr, sp, #64
+; BE-NEXT:    mov r10, r1
+; BE-NEXT:    vmov.32 d13[0], r0
+; BE-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
+; BE-NEXT:    bl llrint
+; BE-NEXT:    add lr, sp, #24
+; BE-NEXT:    mov r11, r1
+; BE-NEXT:    vmov.32 d12[0], r0
+; BE-NEXT:    vldmia lr, {d14, d15} @ 16-byte Reload
+; BE-NEXT:    vorr d0, d15, d15
+; BE-NEXT:    vmov.32 d9[1], r4
+; BE-NEXT:    bl llrint
+; BE-NEXT:    vorr d0, d14, d14
+; BE-NEXT:    vmov.32 d8[1], r8
+; BE-NEXT:    add lr, sp, #8
+; BE-NEXT:    str r1, [sp, #4] @ 4-byte Spill
+; BE-NEXT:    vmov.32 d11[0], r0
+; BE-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
+; BE-NEXT:    bl llrint
+; BE-NEXT:    vmov.32 d10[0], r0
+; BE-NEXT:    add lr, sp, #24
+; BE-NEXT:    mov r8, r1
+; BE-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-NEXT:    add lr, sp, #216
+; BE-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-NEXT:    add lr, sp, #48
+; BE-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
+; BE-NEXT:    vorr d0, d9, d9
+; BE-NEXT:    vmov.32 d11[1], r9
+; BE-NEXT:    bl llrint
+; BE-NEXT:    vmov.32 d15[0], r0
+; BE-NEXT:    ldr r0, [sp, #44] @ 4-byte Reload
+; BE-NEXT:    vorr d0, d8, d8
+; BE-NEXT:    add lr, sp, #216
+; BE-NEXT:    mov r9, r1
+; BE-NEXT:    vmov.32 d10[1], r0
+; BE-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-NEXT:    bl llrint
+; BE-NEXT:    vmov.32 d14[0], r0
+; BE-NEXT:    add lr, sp, #48
+; BE-NEXT:    ldr r0, [sp, #80] @ 4-byte Reload
+; BE-NEXT:    mov r6, r1
+; BE-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
+; BE-NEXT:    add lr, sp, #200
+; BE-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-NEXT:    add lr, sp, #96
+; BE-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
+; BE-NEXT:    vorr d0, d9, d9
+; BE-NEXT:    vmov.32 d11[1], r0
+; BE-NEXT:    bl llrint
+; BE-NEXT:    vorr d0, d8, d8
+; BE-NEXT:    vmov.32 d10[1], r5
+; BE-NEXT:    add lr, sp, #200
+; BE-NEXT:    mov r7, r1
+; BE-NEXT:    vmov.32 d15[0], r0
+; BE-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-NEXT:    bl llrint
+; BE-NEXT:    add lr, sp, #112
+; BE-NEXT:    vorr q4, q6, q6
+; BE-NEXT:    mov r4, r1
+; BE-NEXT:    vmov.32 d14[0], r0
+; BE-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
+; BE-NEXT:    vorr d0, d13, d13
+; BE-NEXT:    vmov.32 d9[1], r10
+; BE-NEXT:    bl llrint
+; BE-NEXT:    vorr d0, d12, d12
+; BE-NEXT:    mov r5, r1
+; BE-NEXT:    vmov.32 d11[0], r0
+; BE-NEXT:    vmov.32 d8[1], r11
+; BE-NEXT:    bl llrint
+; BE-NEXT:    vmov.32 d10[0], r0
+; BE-NEXT:    add lr, sp, #24
+; BE-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
+; BE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEXT:    add lr, sp, #48
+; BE-NEXT:    vmov.32 d17[1], r0
+; BE-NEXT:    vmov.32 d16[1], r8
+; BE-NEXT:    vorr q9, q8, q8
+; BE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEXT:    add lr, sp, #112
+; BE-NEXT:    vmov.32 d17[1], r9
+; BE-NEXT:    vmov.32 d16[1], r6
+; BE-NEXT:    vorr q10, q8, q8
+; BE-NEXT:    vrev64.32 q8, q4
+; BE-NEXT:    vmov.32 d15[1], r7
+; BE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-NEXT:    add lr, sp, #200
+; BE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEXT:    vmov.32 d11[1], r5
+; BE-NEXT:    vrev64.32 q8, q8
+; BE-NEXT:    vmov.32 d14[1], r4
+; BE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-NEXT:    add lr, sp, #216
+; BE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEXT:    vmov.32 d10[1], r1
+; BE-NEXT:    vrev64.32 q8, q8
+; BE-NEXT:    vrev64.32 q6, q7
+; BE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-NEXT:    add lr, sp, #8
+; BE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEXT:    add lr, sp, #96
+; BE-NEXT:    vrev64.32 q7, q5
+; BE-NEXT:    vrev64.32 q8, q8
+; BE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-NEXT:    add lr, sp, #64
+; BE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEXT:    add lr, sp, #80
+; BE-NEXT:    vrev64.32 q8, q8
+; BE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-NEXT:    add lr, sp, #64
+; BE-NEXT:    vrev64.32 q8, q9
+; BE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-NEXT:    add lr, sp, #48
+; BE-NEXT:    vrev64.32 q8, q10
+; BE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-NEXT:    add lr, sp, #128
+; BE-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-NEXT:    vorr d0, d11, d11
+; BE-NEXT:    bl llrint
+; BE-NEXT:    vorr d0, d10, d10
+; BE-NEXT:    mov r4, r1
+; BE-NEXT:    vmov.32 d9[0], r0
+; BE-NEXT:    bl llrint
+; BE-NEXT:    vmov.32 d8[0], r0
+; BE-NEXT:    ldr r6, [sp, #148] @ 4-byte Reload
+; BE-NEXT:    add lr, sp, #152
+; BE-NEXT:    vmov.32 d9[1], r4
+; BE-NEXT:    mov r5, r6
+; BE-NEXT:    vmov.32 d8[1], r1
+; BE-NEXT:    vrev64.32 q8, q4
+; BE-NEXT:    vst1.64 {d16, d17}, [r5:128]!
+; BE-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-NEXT:    vorr d0, d11, d11
+; BE-NEXT:    bl llrint
+; BE-NEXT:    vorr d0, d10, d10
+; BE-NEXT:    mov r4, r1
+; BE-NEXT:    vmov.32 d9[0], r0
+; BE-NEXT:    bl llrint
+; BE-NEXT:    vmov.32 d8[0], r0
+; BE-NEXT:    add lr, sp, #168
+; BE-NEXT:    vmov.32 d9[1], r4
+; BE-NEXT:    vmov.32 d8[1], r1
+; BE-NEXT:    vrev64.32 q8, q4
+; BE-NEXT:    vst1.64 {d16, d17}, [r5:128]!
+; BE-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-NEXT:    vorr d0, d11, d11
+; BE-NEXT:    bl llrint
+; BE-NEXT:    vorr d0, d10, d10
+; BE-NEXT:    mov r4, r1
+; BE-NEXT:    vmov.32 d9[0], r0
+; BE-NEXT:    bl llrint
+; BE-NEXT:    vmov.32 d8[0], r0
+; BE-NEXT:    add lr, sp, #184
+; BE-NEXT:    vmov.32 d9[1], r4
+; BE-NEXT:    vmov.32 d8[1], r1
+; BE-NEXT:    vrev64.32 q8, q4
+; BE-NEXT:    vst1.64 {d16, d17}, [r5:128]!
+; BE-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-NEXT:    vorr d0, d11, d11
+; BE-NEXT:    bl llrint
+; BE-NEXT:    vorr d0, d10, d10
+; BE-NEXT:    mov r4, r1
+; BE-NEXT:    vmov.32 d9[0], r0
+; BE-NEXT:    bl llrint
+; BE-NEXT:    vmov.32 d8[0], r0
+; BE-NEXT:    add r0, sp, #464
+; BE-NEXT:    vmov.32 d9[1], r4
+; BE-NEXT:    vmov.32 d8[1], r1
+; BE-NEXT:    vrev64.32 q8, q4
+; BE-NEXT:    vld1.64 {d8, d9}, [r0]
+; BE-NEXT:    vorr d0, d9, d9
+; BE-NEXT:    vst1.64 {d16, d17}, [r5:128]
+; BE-NEXT:    bl llrint
+; BE-NEXT:    vorr d0, d8, d8
+; BE-NEXT:    mov r4, r1
+; BE-NEXT:    vmov.32 d11[0], r0
+; BE-NEXT:    bl llrint
+; BE-NEXT:    vmov.32 d10[0], r0
+; BE-NEXT:    add r0, sp, #480
+; BE-NEXT:    add r5, r6, #192
+; BE-NEXT:    vmov.32 d11[1], r4
+; BE-NEXT:    vmov.32 d10[1], r1
+; BE-NEXT:    vld1.64 {d8, d9}, [r0]
+; BE-NEXT:    vorr d0, d9, d9
+; BE-NEXT:    vrev64.32 q8, q5
+; BE-NEXT:    vst1.64 {d16, d17}, [r5:128]!
+; BE-NEXT:    bl llrint
+; BE-NEXT:    vorr d0, d8, d8
+; BE-NEXT:    mov r4, r1
+; BE-NEXT:    vmov.32 d11[0], r0
+; BE-NEXT:    bl llrint
+; BE-NEXT:    vmov.32 d10[0], r0
+; BE-NEXT:    add r0, sp, #496
+; BE-NEXT:    vmov.32 d11[1], r4
+; BE-NEXT:    vmov.32 d10[1], r1
+; BE-NEXT:    vld1.64 {d8, d9}, [r0]
+; BE-NEXT:    vorr d0, d9, d9
+; BE-NEXT:    vrev64.32 q8, q5
+; BE-NEXT:    vst1.64 {d16, d17}, [r5:128]!
+; BE-NEXT:    bl llrint
+; BE-NEXT:    vorr d0, d8, d8
+; BE-NEXT:    mov r4, r1
+; BE-NEXT:    vmov.32 d11[0], r0
+; BE-NEXT:    bl llrint
+; BE-NEXT:    vmov.32 d10[0], r0
+; BE-NEXT:    add lr, sp, #112
+; BE-NEXT:    add r0, r6, #128
+; BE-NEXT:    vmov.32 d11[1], r4
+; BE-NEXT:    vmov.32 d10[1], r1
+; BE-NEXT:    vrev64.32 q8, q5
+; BE-NEXT:    vst1.64 {d16, d17}, [r5:128]!
+; BE-NEXT:    vst1.64 {d14, d15}, [r5:128]
+; BE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEXT:    add lr, sp, #200
+; BE-NEXT:    vst1.64 {d16, d17}, [r0:128]!
+; BE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEXT:    add lr, sp, #216
+; BE-NEXT:    vst1.64 {d16, d17}, [r0:128]!
+; BE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEXT:    add lr, sp, #96
+; BE-NEXT:    vst1.64 {d16, d17}, [r0:128]!
+; BE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEXT:    add lr, sp, #80
+; BE-NEXT:    vst1.64 {d16, d17}, [r0:128]
+; BE-NEXT:    add r0, r6, #64
+; BE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEXT:    add lr, sp, #64
+; BE-NEXT:    vst1.64 {d16, d17}, [r0:128]!
+; BE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEXT:    add lr, sp, #48
+; BE-NEXT:    vst1.64 {d16, d17}, [r0:128]!
+; BE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEXT:    vst1.64 {d16, d17}, [r0:128]!
+; BE-NEXT:    vst1.64 {d12, d13}, [r0:128]
+; BE-NEXT:    add sp, sp, #232
+; BE-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-NEXT:    add sp, sp, #4
+; BE-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; BE-NEON-LABEL: llrint_v32f64:
+; BE-NEON:       @ %bb.0:
+; BE-NEON-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-NEON-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-NEON-NEXT:    .pad #4
+; BE-NEON-NEXT:    sub sp, sp, #4
+; BE-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-NEON-NEXT:    .pad #232
+; BE-NEON-NEXT:    sub sp, sp, #232
+; BE-NEON-NEXT:    add lr, sp, #184
+; BE-NEON-NEXT:    str r0, [sp, #148] @ 4-byte Spill
+; BE-NEON-NEXT:    add r0, sp, #416
+; BE-NEON-NEXT:    vstmia lr, {d6, d7} @ 16-byte Spill
+; BE-NEON-NEXT:    add lr, sp, #168
+; BE-NEON-NEXT:    vstmia lr, {d4, d5} @ 16-byte Spill
+; BE-NEON-NEXT:    add lr, sp, #152
+; BE-NEON-NEXT:    vstmia lr, {d2, d3} @ 16-byte Spill
+; BE-NEON-NEXT:    add lr, sp, #128
+; BE-NEON-NEXT:    vstmia lr, {d0, d1} @ 16-byte Spill
+; BE-NEON-NEXT:    add lr, sp, #200
+; BE-NEON-NEXT:    vld1.64 {d18, d19}, [r0]
+; BE-NEON-NEXT:    add r0, sp, #448
+; BE-NEON-NEXT:    vorr d0, d19, d19
+; BE-NEON-NEXT:    vld1.64 {d14, d15}, [r0]
+; BE-NEON-NEXT:    add r0, sp, #336
+; BE-NEON-NEXT:    vstmia lr, {d18, d19} @ 16-byte Spill
+; BE-NEON-NEXT:    add lr, sp, #64
+; BE-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
+; BE-NEON-NEXT:    add r0, sp, #400
+; BE-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-NEON-NEXT:    add lr, sp, #8
+; BE-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
+; BE-NEON-NEXT:    add r0, sp, #352
+; BE-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-NEON-NEXT:    add lr, sp, #24
+; BE-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
+; BE-NEON-NEXT:    add r0, sp, #368
+; BE-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-NEON-NEXT:    add lr, sp, #48
+; BE-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
+; BE-NEON-NEXT:    add r0, sp, #384
+; BE-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-NEON-NEXT:    add lr, sp, #96
+; BE-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
+; BE-NEON-NEXT:    add r0, sp, #512
+; BE-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-NEON-NEXT:    add lr, sp, #112
+; BE-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
+; BE-NEON-NEXT:    add r0, sp, #432
+; BE-NEON-NEXT:    vld1.64 {d8, d9}, [r0]
+; BE-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-NEON-NEXT:    bl llrint
+; BE-NEON-NEXT:    vorr d0, d8, d8
+; BE-NEON-NEXT:    str r1, [sp, #80] @ 4-byte Spill
+; BE-NEON-NEXT:    vmov.32 d13[0], r0
+; BE-NEON-NEXT:    bl llrint
+; BE-NEON-NEXT:    vorr d0, d9, d9
+; BE-NEON-NEXT:    str r1, [sp, #44] @ 4-byte Spill
+; BE-NEON-NEXT:    vmov.32 d10[0], r0
+; BE-NEON-NEXT:    bl llrint
+; BE-NEON-NEXT:    vorr d0, d14, d14
+; BE-NEON-NEXT:    add lr, sp, #216
+; BE-NEON-NEXT:    vmov.32 d11[0], r0
+; BE-NEON-NEXT:    mov r9, r1
+; BE-NEON-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-NEON-NEXT:    bl llrint
+; BE-NEON-NEXT:    vorr d0, d15, d15
+; BE-NEON-NEXT:    mov r8, r1
+; BE-NEON-NEXT:    vmov.32 d8[0], r0
+; BE-NEON-NEXT:    bl llrint
+; BE-NEON-NEXT:    add lr, sp, #64
+; BE-NEON-NEXT:    mov r4, r1
+; BE-NEON-NEXT:    vmov.32 d9[0], r0
+; BE-NEON-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-NEON-NEXT:    vorr d0, d10, d10
+; BE-NEON-NEXT:    bl llrint
+; BE-NEON-NEXT:    vorr d0, d11, d11
+; BE-NEON-NEXT:    mov r6, r1
+; BE-NEON-NEXT:    vmov.32 d14[0], r0
+; BE-NEON-NEXT:    bl llrint
+; BE-NEON-NEXT:    add lr, sp, #200
+; BE-NEON-NEXT:    mov r7, r1
+; BE-NEON-NEXT:    vmov.32 d15[0], r0
+; BE-NEON-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
+; BE-NEON-NEXT:    @ kill: def $d0 killed $d0 killed $q0
+; BE-NEON-NEXT:    bl llrint
+; BE-NEON-NEXT:    vmov.32 d12[0], r0
+; BE-NEON-NEXT:    add lr, sp, #200
+; BE-NEON-NEXT:    mov r5, r1
+; BE-NEON-NEXT:    vmov.32 d15[1], r7
+; BE-NEON-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
+; BE-NEON-NEXT:    add lr, sp, #8
+; BE-NEON-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-NEON-NEXT:    vorr d0, d11, d11
+; BE-NEON-NEXT:    bl llrint
+; BE-NEON-NEXT:    vorr d0, d10, d10
+; BE-NEON-NEXT:    vmov.32 d14[1], r6
+; BE-NEON-NEXT:    add lr, sp, #64
+; BE-NEON-NEXT:    mov r10, r1
+; BE-NEON-NEXT:    vmov.32 d13[0], r0
+; BE-NEON-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
+; BE-NEON-NEXT:    bl llrint
+; BE-NEON-NEXT:    add lr, sp, #24
+; BE-NEON-NEXT:    mov r11, r1
+; BE-NEON-NEXT:    vmov.32 d12[0], r0
+; BE-NEON-NEXT:    vldmia lr, {d14, d15} @ 16-byte Reload
+; BE-NEON-NEXT:    vorr d0, d15, d15
+; BE-NEON-NEXT:    vmov.32 d9[1], r4
+; BE-NEON-NEXT:    bl llrint
+; BE-NEON-NEXT:    vorr d0, d14, d14
+; BE-NEON-NEXT:    vmov.32 d8[1], r8
+; BE-NEON-NEXT:    add lr, sp, #8
+; BE-NEON-NEXT:    str r1, [sp, #4] @ 4-byte Spill
+; BE-NEON-NEXT:    vmov.32 d11[0], r0
+; BE-NEON-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
+; BE-NEON-NEXT:    bl llrint
+; BE-NEON-NEXT:    vmov.32 d10[0], r0
+; BE-NEON-NEXT:    add lr, sp, #24
+; BE-NEON-NEXT:    mov r8, r1
+; BE-NEON-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-NEON-NEXT:    add lr, sp, #216
+; BE-NEON-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-NEON-NEXT:    add lr, sp, #48
+; BE-NEON-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
+; BE-NEON-NEXT:    vorr d0, d9, d9
+; BE-NEON-NEXT:    vmov.32 d11[1], r9
+; BE-NEON-NEXT:    bl llrint
+; BE-NEON-NEXT:    vmov.32 d15[0], r0
+; BE-NEON-NEXT:    ldr r0, [sp, #44] @ 4-byte Reload
+; BE-NEON-NEXT:    vorr d0, d8, d8
+; BE-NEON-NEXT:    add lr, sp, #216
+; BE-NEON-NEXT:    mov r9, r1
+; BE-NEON-NEXT:    vmov.32 d10[1], r0
+; BE-NEON-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-NEON-NEXT:    bl llrint
+; BE-NEON-NEXT:    vmov.32 d14[0], r0
+; BE-NEON-NEXT:    add lr, sp, #48
+; BE-NEON-NEXT:    ldr r0, [sp, #80] @ 4-byte Reload
+; BE-NEON-NEXT:    mov r6, r1
+; BE-NEON-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
+; BE-NEON-NEXT:    add lr, sp, #200
+; BE-NEON-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-NEON-NEXT:    add lr, sp, #96
+; BE-NEON-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
+; BE-NEON-NEXT:    vorr d0, d9, d9
+; BE-NEON-NEXT:    vmov.32 d11[1], r0
+; BE-NEON-NEXT:    bl llrint
+; BE-NEON-NEXT:    vorr d0, d8, d8
+; BE-NEON-NEXT:    vmov.32 d10[1], r5
+; BE-NEON-NEXT:    add lr, sp, #200
+; BE-NEON-NEXT:    mov r7, r1
+; BE-NEON-NEXT:    vmov.32 d15[0], r0
+; BE-NEON-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-NEON-NEXT:    bl llrint
+; BE-NEON-NEXT:    add lr, sp, #112
+; BE-NEON-NEXT:    vorr q4, q6, q6
+; BE-NEON-NEXT:    mov r4, r1
+; BE-NEON-NEXT:    vmov.32 d14[0], r0
+; BE-NEON-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
+; BE-NEON-NEXT:    vorr d0, d13, d13
+; BE-NEON-NEXT:    vmov.32 d9[1], r10
+; BE-NEON-NEXT:    bl llrint
+; BE-NEON-NEXT:    vorr d0, d12, d12
+; BE-NEON-NEXT:    mov r5, r1
+; BE-NEON-NEXT:    vmov.32 d11[0], r0
+; BE-NEON-NEXT:    vmov.32 d8[1], r11
+; BE-NEON-NEXT:    bl llrint
+; BE-NEON-NEXT:    vmov.32 d10[0], r0
+; BE-NEON-NEXT:    add lr, sp, #24
+; BE-NEON-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
+; BE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEON-NEXT:    add lr, sp, #48
+; BE-NEON-NEXT:    vmov.32 d17[1], r0
+; BE-NEON-NEXT:    vmov.32 d16[1], r8
+; BE-NEON-NEXT:    vorr q9, q8, q8
+; BE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEON-NEXT:    add lr, sp, #112
+; BE-NEON-NEXT:    vmov.32 d17[1], r9
+; BE-NEON-NEXT:    vmov.32 d16[1], r6
+; BE-NEON-NEXT:    vorr q10, q8, q8
+; BE-NEON-NEXT:    vrev64.32 q8, q4
+; BE-NEON-NEXT:    vmov.32 d15[1], r7
+; BE-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-NEON-NEXT:    add lr, sp, #200
+; BE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEON-NEXT:    vmov.32 d11[1], r5
+; BE-NEON-NEXT:    vrev64.32 q8, q8
+; BE-NEON-NEXT:    vmov.32 d14[1], r4
+; BE-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-NEON-NEXT:    add lr, sp, #216
+; BE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEON-NEXT:    vmov.32 d10[1], r1
+; BE-NEON-NEXT:    vrev64.32 q8, q8
+; BE-NEON-NEXT:    vrev64.32 q6, q7
+; BE-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-NEON-NEXT:    add lr, sp, #8
+; BE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEON-NEXT:    add lr, sp, #96
+; BE-NEON-NEXT:    vrev64.32 q7, q5
+; BE-NEON-NEXT:    vrev64.32 q8, q8
+; BE-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-NEON-NEXT:    add lr, sp, #64
+; BE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEON-NEXT:    add lr, sp, #80
+; BE-NEON-NEXT:    vrev64.32 q8, q8
+; BE-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-NEON-NEXT:    add lr, sp, #64
+; BE-NEON-NEXT:    vrev64.32 q8, q9
+; BE-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-NEON-NEXT:    add lr, sp, #48
+; BE-NEON-NEXT:    vrev64.32 q8, q10
+; BE-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-NEON-NEXT:    add lr, sp, #128
+; BE-NEON-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-NEON-NEXT:    vorr d0, d11, d11
+; BE-NEON-NEXT:    bl llrint
+; BE-NEON-NEXT:    vorr d0, d10, d10
+; BE-NEON-NEXT:    mov r4, r1
+; BE-NEON-NEXT:    vmov.32 d9[0], r0
+; BE-NEON-NEXT:    bl llrint
+; BE-NEON-NEXT:    vmov.32 d8[0], r0
+; BE-NEON-NEXT:    ldr r6, [sp, #148] @ 4-byte Reload
+; BE-NEON-NEXT:    add lr, sp, #152
+; BE-NEON-NEXT:    vmov.32 d9[1], r4
+; BE-NEON-NEXT:    mov r5, r6
+; BE-NEON-NEXT:    vmov.32 d8[1], r1
+; BE-NEON-NEXT:    vrev64.32 q8, q4
+; BE-NEON-NEXT:    vst1.64 {d16, d17}, [r5:128]!
+; BE-NEON-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-NEON-NEXT:    vorr d0, d11, d11
+; BE-NEON-NEXT:    bl llrint
+; BE-NEON-NEXT:    vorr d0, d10, d10
+; BE-NEON-NEXT:    mov r4, r1
+; BE-NEON-NEXT:    vmov.32 d9[0], r0
+; BE-NEON-NEXT:    bl llrint
+; BE-NEON-NEXT:    vmov.32 d8[0], r0
+; BE-NEON-NEXT:    add lr, sp, #168
+; BE-NEON-NEXT:    vmov.32 d9[1], r4
+; BE-NEON-NEXT:    vmov.32 d8[1], r1
+; BE-NEON-NEXT:    vrev64.32 q8, q4
+; BE-NEON-NEXT:    vst1.64 {d16, d17}, [r5:128]!
+; BE-NEON-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-NEON-NEXT:    vorr d0, d11, d11
+; BE-NEON-NEXT:    bl llrint
+; BE-NEON-NEXT:    vorr d0, d10, d10
+; BE-NEON-NEXT:    mov r4, r1
+; BE-NEON-NEXT:    vmov.32 d9[0], r0
+; BE-NEON-NEXT:    bl llrint
+; BE-NEON-NEXT:    vmov.32 d8[0], r0
+; BE-NEON-NEXT:    add lr, sp, #184
+; BE-NEON-NEXT:    vmov.32 d9[1], r4
+; BE-NEON-NEXT:    vmov.32 d8[1], r1
+; BE-NEON-NEXT:    vrev64.32 q8, q4
+; BE-NEON-NEXT:    vst1.64 {d16, d17}, [r5:128]!
+; BE-NEON-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-NEON-NEXT:    vorr d0, d11, d11
+; BE-NEON-NEXT:    bl llrint
+; BE-NEON-NEXT:    vorr d0, d10, d10
+; BE-NEON-NEXT:    mov r4, r1
+; BE-NEON-NEXT:    vmov.32 d9[0], r0
+; BE-NEON-NEXT:    bl llrint
+; BE-NEON-NEXT:    vmov.32 d8[0], r0
+; BE-NEON-NEXT:    add r0, sp, #464
+; BE-NEON-NEXT:    vmov.32 d9[1], r4
+; BE-NEON-NEXT:    vmov.32 d8[1], r1
+; BE-NEON-NEXT:    vrev64.32 q8, q4
+; BE-NEON-NEXT:    vld1.64 {d8, d9}, [r0]
+; BE-NEON-NEXT:    vorr d0, d9, d9
+; BE-NEON-NEXT:    vst1.64 {d16, d17}, [r5:128]
+; BE-NEON-NEXT:    bl llrint
+; BE-NEON-NEXT:    vorr d0, d8, d8
+; BE-NEON-NEXT:    mov r4, r1
+; BE-NEON-NEXT:    vmov.32 d11[0], r0
+; BE-NEON-NEXT:    bl llrint
+; BE-NEON-NEXT:    vmov.32 d10[0], r0
+; BE-NEON-NEXT:    add r0, sp, #480
+; BE-NEON-NEXT:    add r5, r6, #192
+; BE-NEON-NEXT:    vmov.32 d11[1], r4
+; BE-NEON-NEXT:    vmov.32 d10[1], r1
+; BE-NEON-NEXT:    vld1.64 {d8, d9}, [r0]
+; BE-NEON-NEXT:    vorr d0, d9, d9
+; BE-NEON-NEXT:    vrev64.32 q8, q5
+; BE-NEON-NEXT:    vst1.64 {d16, d17}, [r5:128]!
+; BE-NEON-NEXT:    bl llrint
+; BE-NEON-NEXT:    vorr d0, d8, d8
+; BE-NEON-NEXT:    mov r4, r1
+; BE-NEON-NEXT:    vmov.32 d11[0], r0
+; BE-NEON-NEXT:    bl llrint
+; BE-NEON-NEXT:    vmov.32 d10[0], r0
+; BE-NEON-NEXT:    add r0, sp, #496
+; BE-NEON-NEXT:    vmov.32 d11[1], r4
+; BE-NEON-NEXT:    vmov.32 d10[1], r1
+; BE-NEON-NEXT:    vld1.64 {d8, d9}, [r0]
+; BE-NEON-NEXT:    vorr d0, d9, d9
+; BE-NEON-NEXT:    vrev64.32 q8, q5
+; BE-NEON-NEXT:    vst1.64 {d16, d17}, [r5:128]!
+; BE-NEON-NEXT:    bl llrint
+; BE-NEON-NEXT:    vorr d0, d8, d8
+; BE-NEON-NEXT:    mov r4, r1
+; BE-NEON-NEXT:    vmov.32 d11[0], r0
+; BE-NEON-NEXT:    bl llrint
+; BE-NEON-NEXT:    vmov.32 d10[0], r0
+; BE-NEON-NEXT:    add lr, sp, #112
+; BE-NEON-NEXT:    add r0, r6, #128
+; BE-NEON-NEXT:    vmov.32 d11[1], r4
+; BE-NEON-NEXT:    vmov.32 d10[1], r1
+; BE-NEON-NEXT:    vrev64.32 q8, q5
+; BE-NEON-NEXT:    vst1.64 {d16, d17}, [r5:128]!
+; BE-NEON-NEXT:    vst1.64 {d14, d15}, [r5:128]
+; BE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEON-NEXT:    add lr, sp, #200
+; BE-NEON-NEXT:    vst1.64 {d16, d17}, [r0:128]!
+; BE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEON-NEXT:    add lr, sp, #216
+; BE-NEON-NEXT:    vst1.64 {d16, d17}, [r0:128]!
+; BE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEON-NEXT:    add lr, sp, #96
+; BE-NEON-NEXT:    vst1.64 {d16, d17}, [r0:128]!
+; BE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEON-NEXT:    add lr, sp, #80
+; BE-NEON-NEXT:    vst1.64 {d16, d17}, [r0:128]
+; BE-NEON-NEXT:    add r0, r6, #64
+; BE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEON-NEXT:    add lr, sp, #64
+; BE-NEON-NEXT:    vst1.64 {d16, d17}, [r0:128]!
+; BE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEON-NEXT:    add lr, sp, #48
+; BE-NEON-NEXT:    vst1.64 {d16, d17}, [r0:128]!
+; BE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEON-NEXT:    vst1.64 {d16, d17}, [r0:128]!
+; BE-NEON-NEXT:    vst1.64 {d12, d13}, [r0:128]
+; BE-NEON-NEXT:    add sp, sp, #232
+; BE-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-NEON-NEXT:    add sp, sp, #4
+; BE-NEON-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+  %a = call <32 x i64> @llvm.llrint.v32i64.v16f64(<32 x double> %x)
+  ret <32 x i64> %a
+}
+declare <32 x i64> @llvm.llrint.v32i64.v32f64(<32 x double>)
+
+define <1 x i64> @llrint_v1i64_v1f128(<1 x fp128> %x) {
+; LE-LABEL: llrint_v1i64_v1f128:
+; LE:       @ %bb.0:
+; LE-NEXT:    .save {r11, lr}
+; LE-NEXT:    push {r11, lr}
+; LE-NEXT:    bl llrintl
+; LE-NEXT:    vmov.32 d0[0], r0
+; LE-NEXT:    vmov.32 d0[1], r1
+; LE-NEXT:    pop {r11, pc}
+;
+; LE-NEON-LABEL: llrint_v1i64_v1f128:
+; LE-NEON:       @ %bb.0:
+; LE-NEON-NEXT:    .save {r11, lr}
+; LE-NEON-NEXT:    push {r11, lr}
+; LE-NEON-NEXT:    bl llrintl
+; LE-NEON-NEXT:    vmov.32 d0[0], r0
+; LE-NEON-NEXT:    vmov.32 d0[1], r1
+; LE-NEON-NEXT:    pop {r11, pc}
+;
+; BE-LABEL: llrint_v1i64_v1f128:
+; BE:       @ %bb.0:
+; BE-NEXT:    .save {r11, lr}
+; BE-NEXT:    push {r11, lr}
+; BE-NEXT:    bl llrintl
+; BE-NEXT:    vmov.32 d16[0], r0
+; BE-NEXT:    vmov.32 d16[1], r1
+; BE-NEXT:    vrev64.32 d0, d16
+; BE-NEXT:    pop {r11, pc}
+;
+; BE-NEON-LABEL: llrint_v1i64_v1f128:
+; BE-NEON:       @ %bb.0:
+; BE-NEON-NEXT:    .save {r11, lr}
+; BE-NEON-NEXT:    push {r11, lr}
+; BE-NEON-NEXT:    bl llrintl
+; BE-NEON-NEXT:    vmov.32 d16[0], r0
+; BE-NEON-NEXT:    vmov.32 d16[1], r1
+; BE-NEON-NEXT:    vrev64.32 d0, d16
+; BE-NEON-NEXT:    pop {r11, pc}
+  %a = call <1 x i64> @llvm.llrint.v1i64.v1f128(<1 x fp128> %x)
+  ret <1 x i64> %a
+}
+declare <1 x i64> @llvm.llrint.v1i64.v1f128(<1 x fp128>)
+
+define <2 x i64> @llrint_v2i64_v2f128(<2 x fp128> %x) {
+; LE-LABEL: llrint_v2i64_v2f128:
+; LE:       @ %bb.0:
+; LE-NEXT:    .save {r4, r5, r6, r7, r8, lr}
+; LE-NEXT:    push {r4, r5, r6, r7, r8, lr}
+; LE-NEXT:    .vsave {d8, d9}
+; LE-NEXT:    vpush {d8, d9}
+; LE-NEXT:    mov r8, r3
+; LE-NEXT:    add r3, sp, #40
+; LE-NEXT:    mov r5, r2
+; LE-NEXT:    mov r6, r1
+; LE-NEXT:    mov r7, r0
+; LE-NEXT:    ldm r3, {r0, r1, r2, r3}
+; LE-NEXT:    bl llrintl
+; LE-NEXT:    mov r4, r1
+; LE-NEXT:    vmov.32 d9[0], r0
+; LE-NEXT:    mov r0, r7
+; LE-NEXT:    mov r1, r6
+; LE-NEXT:    mov r2, r5
+; LE-NEXT:    mov r3, r8
+; LE-NEXT:    bl llrintl
+; LE-NEXT:    vmov.32 d8[0], r0
+; LE-NEXT:    vmov.32 d9[1], r4
+; LE-NEXT:    vmov.32 d8[1], r1
+; LE-NEXT:    vorr q0, q4, q4
+; LE-NEXT:    vpop {d8, d9}
+; LE-NEXT:    pop {r4, r5, r6, r7, r8, pc}
+;
+; LE-NEON-LABEL: llrint_v2i64_v2f128:
+; LE-NEON:       @ %bb.0:
+; LE-NEON-NEXT:    .save {r4, r5, r6, r7, r8, lr}
+; LE-NEON-NEXT:    push {r4, r5, r6, r7, r8, lr}
+; LE-NEON-NEXT:    .vsave {d8, d9}
+; LE-NEON-NEXT:    vpush {d8, d9}
+; LE-NEON-NEXT:    mov r8, r3
+; LE-NEON-NEXT:    add r3, sp, #40
+; LE-NEON-NEXT:    mov r5, r2
+; LE-NEON-NEXT:    mov r6, r1
+; LE-NEON-NEXT:    mov r7, r0
+; LE-NEON-NEXT:    ldm r3, {r0, r1, r2, r3}
+; LE-NEON-NEXT:    bl llrintl
+; LE-NEON-NEXT:    mov r4, r1
+; LE-NEON-NEXT:    vmov.32 d9[0], r0
+; LE-NEON-NEXT:    mov r0, r7
+; LE-NEON-NEXT:    mov r1, r6
+; LE-NEON-NEXT:    mov r2, r5
+; LE-NEON-NEXT:    mov r3, r8
+; LE-NEON-NEXT:    bl llrintl
+; LE-NEON-NEXT:    vmov.32 d8[0], r0
+; LE-NEON-NEXT:    vmov.32 d9[1], r4
+; LE-NEON-NEXT:    vmov.32 d8[1], r1
+; LE-NEON-NEXT:    vorr q0, q4, q4
+; LE-NEON-NEXT:    vpop {d8, d9}
+; LE-NEON-NEXT:    pop {r4, r5, r6, r7, r8, pc}
+;
+; BE-LABEL: llrint_v2i64_v2f128:
+; BE:       @ %bb.0:
+; BE-NEXT:    .save {r4, r5, r6, r7, r8, lr}
+; BE-NEXT:    push {r4, r5, r6, r7, r8, lr}
+; BE-NEXT:    .vsave {d8}
+; BE-NEXT:    vpush {d8}
+; BE-NEXT:    mov r8, r3
+; BE-NEXT:    add r3, sp, #32
+; BE-NEXT:    mov r5, r2
+; BE-NEXT:    mov r6, r1
+; BE-NEXT:    mov r7, r0
+; BE-NEXT:    ldm r3, {r0, r1, r2, r3}
+; BE-NEXT:    bl llrintl
+; BE-NEXT:    mov r4, r1
+; BE-NEXT:    vmov.32 d8[0], r0
+; BE-NEXT:    mov r0, r7
+; BE-NEXT:    mov r1, r6
+; BE-NEXT:    mov r2, r5
+; BE-NEXT:    mov r3, r8
+; BE-NEXT:    bl llrintl
+; BE-NEXT:    vmov.32 d16[0], r0
+; BE-NEXT:    vmov.32 d8[1], r4
+; BE-NEXT:    vmov.32 d16[1], r1
+; BE-NEXT:    vrev64.32 d1, d8
+; BE-NEXT:    vrev64.32 d0, d16
+; BE-NEXT:    vpop {d8}
+; BE-NEXT:    pop {r4, r5, r6, r7, r8, pc}
+;
+; BE-NEON-LABEL: llrint_v2i64_v2f128:
+; BE-NEON:       @ %bb.0:
+; BE-NEON-NEXT:    .save {r4, r5, r6, r7, r8, lr}
+; BE-NEON-NEXT:    push {r4, r5, r6, r7, r8, lr}
+; BE-NEON-NEXT:    .vsave {d8}
+; BE-NEON-NEXT:    vpush {d8}
+; BE-NEON-NEXT:    mov r8, r3
+; BE-NEON-NEXT:    add r3, sp, #32
+; BE-NEON-NEXT:    mov r5, r2
+; BE-NEON-NEXT:    mov r6, r1
+; BE-NEON-NEXT:    mov r7, r0
+; BE-NEON-NEXT:    ldm r3, {r0, r1, r2, r3}
+; BE-NEON-NEXT:    bl llrintl
+; BE-NEON-NEXT:    mov r4, r1
+; BE-NEON-NEXT:    vmov.32 d8[0], r0
+; BE-NEON-NEXT:    mov r0, r7
+; BE-NEON-NEXT:    mov r1, r6
+; BE-NEON-NEXT:    mov r2, r5
+; BE-NEON-NEXT:    mov r3, r8
+; BE-NEON-NEXT:    bl llrintl
+; BE-NEON-NEXT:    vmov.32 d16[0], r0
+; BE-NEON-NEXT:    vmov.32 d8[1], r4
+; BE-NEON-NEXT:    vmov.32 d16[1], r1
+; BE-NEON-NEXT:    vrev64.32 d1, d8
+; BE-NEON-NEXT:    vrev64.32 d0, d16
+; BE-NEON-NEXT:    vpop {d8}
+; BE-NEON-NEXT:    pop {r4, r5, r6, r7, r8, pc}
+  %a = call <2 x i64> @llvm.llrint.v2i64.v2f128(<2 x fp128> %x)
+  ret <2 x i64> %a
+}
+declare <2 x i64> @llvm.llrint.v2i64.v2f128(<2 x fp128>)
+
+define <4 x i64> @llrint_v4i64_v4f128(<4 x fp128> %x) {
+; LE-LABEL: llrint_v4i64_v4f128:
+; LE:       @ %bb.0:
+; LE-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, lr}
+; LE-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, lr}
+; LE-NEXT:    .vsave {d8, d9, d10, d11}
+; LE-NEXT:    vpush {d8, d9, d10, d11}
+; LE-NEXT:    mov r5, r3
+; LE-NEXT:    add r3, sp, #96
+; LE-NEXT:    mov r7, r2
+; LE-NEXT:    mov r6, r1
+; LE-NEXT:    mov r4, r0
+; LE-NEXT:    ldm r3, {r0, r1, r2, r3}
+; LE-NEXT:    bl llrintl
+; LE-NEXT:    mov r9, r1
+; LE-NEXT:    vmov.32 d9[0], r0
+; LE-NEXT:    mov r0, r4
+; LE-NEXT:    mov r1, r6
+; LE-NEXT:    mov r2, r7
+; LE-NEXT:    mov r3, r5
+; LE-NEXT:    ldr r8, [sp, #80]
+; LE-NEXT:    ldr r10, [sp, #64]
+; LE-NEXT:    bl llrintl
+; LE-NEXT:    add r3, sp, #68
+; LE-NEXT:    mov r5, r1
+; LE-NEXT:    vmov.32 d10[0], r0
+; LE-NEXT:    mov r0, r10
+; LE-NEXT:    ldm r3, {r1, r2, r3}
+; LE-NEXT:    bl llrintl
+; LE-NEXT:    add r3, sp, #84
+; LE-NEXT:    mov r4, r1
+; LE-NEXT:    vmov.32 d11[0], r0
+; LE-NEXT:    mov r0, r8
+; LE-NEXT:    ldm r3, {r1, r2, r3}
+; LE-NEXT:    bl llrintl
+; LE-NEXT:    vmov.32 d8[0], r0
+; LE-NEXT:    vmov.32 d11[1], r4
+; LE-NEXT:    vmov.32 d9[1], r9
+; LE-NEXT:    vmov.32 d10[1], r5
+; LE-NEXT:    vmov.32 d8[1], r1
+; LE-NEXT:    vorr q0, q5, q5
+; LE-NEXT:    vorr q1, q4, q4
+; LE-NEXT:    vpop {d8, d9, d10, d11}
+; LE-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, pc}
+;
+; LE-NEON-LABEL: llrint_v4i64_v4f128:
+; LE-NEON:       @ %bb.0:
+; LE-NEON-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, lr}
+; LE-NEON-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, lr}
+; LE-NEON-NEXT:    .vsave {d8, d9, d10, d11}
+; LE-NEON-NEXT:    vpush {d8, d9, d10, d11}
+; LE-NEON-NEXT:    mov r5, r3
+; LE-NEON-NEXT:    add r3, sp, #96
+; LE-NEON-NEXT:    mov r7, r2
+; LE-NEON-NEXT:    mov r6, r1
+; LE-NEON-NEXT:    mov r4, r0
+; LE-NEON-NEXT:    ldm r3, {r0, r1, r2, r3}
+; LE-NEON-NEXT:    bl llrintl
+; LE-NEON-NEXT:    mov r9, r1
+; LE-NEON-NEXT:    vmov.32 d9[0], r0
+; LE-NEON-NEXT:    mov r0, r4
+; LE-NEON-NEXT:    mov r1, r6
+; LE-NEON-NEXT:    mov r2, r7
+; LE-NEON-NEXT:    mov r3, r5
+; LE-NEON-NEXT:    ldr r8, [sp, #80]
+; LE-NEON-NEXT:    ldr r10, [sp, #64]
+; LE-NEON-NEXT:    bl llrintl
+; LE-NEON-NEXT:    add r3, sp, #68
+; LE-NEON-NEXT:    mov r5, r1
+; LE-NEON-NEXT:    vmov.32 d10[0], r0
+; LE-NEON-NEXT:    mov r0, r10
+; LE-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; LE-NEON-NEXT:    bl llrintl
+; LE-NEON-NEXT:    add r3, sp, #84
+; LE-NEON-NEXT:    mov r4, r1
+; LE-NEON-NEXT:    vmov.32 d11[0], r0
+; LE-NEON-NEXT:    mov r0, r8
+; LE-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; LE-NEON-NEXT:    bl llrintl
+; LE-NEON-NEXT:    vmov.32 d8[0], r0
+; LE-NEON-NEXT:    vmov.32 d11[1], r4
+; LE-NEON-NEXT:    vmov.32 d9[1], r9
+; LE-NEON-NEXT:    vmov.32 d10[1], r5
+; LE-NEON-NEXT:    vmov.32 d8[1], r1
+; LE-NEON-NEXT:    vorr q0, q5, q5
+; LE-NEON-NEXT:    vorr q1, q4, q4
+; LE-NEON-NEXT:    vpop {d8, d9, d10, d11}
+; LE-NEON-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, pc}
+;
+; BE-LABEL: llrint_v4i64_v4f128:
+; BE:       @ %bb.0:
+; BE-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, lr}
+; BE-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, lr}
+; BE-NEXT:    .vsave {d8, d9, d10}
+; BE-NEXT:    vpush {d8, d9, d10}
+; BE-NEXT:    mov r5, r3
+; BE-NEXT:    add r3, sp, #88
+; BE-NEXT:    mov r7, r2
+; BE-NEXT:    mov r6, r1
+; BE-NEXT:    mov r4, r0
+; BE-NEXT:    ldm r3, {r0, r1, r2, r3}
+; BE-NEXT:    bl llrintl
+; BE-NEXT:    mov r9, r1
+; BE-NEXT:    vmov.32 d8[0], r0
+; BE-NEXT:    mov r0, r4
+; BE-NEXT:    mov r1, r6
+; BE-NEXT:    mov r2, r7
+; BE-NEXT:    mov r3, r5
+; BE-NEXT:    ldr r8, [sp, #72]
+; BE-NEXT:    ldr r10, [sp, #56]
+; BE-NEXT:    bl llrintl
+; BE-NEXT:    add r3, sp, #60
+; BE-NEXT:    mov r5, r1
+; BE-NEXT:    vmov.32 d9[0], r0
+; BE-NEXT:    mov r0, r10
+; BE-NEXT:    ldm r3, {r1, r2, r3}
+; BE-NEXT:    bl llrintl
+; BE-NEXT:    add r3, sp, #76
+; BE-NEXT:    mov r4, r1
+; BE-NEXT:    vmov.32 d10[0], r0
+; BE-NEXT:    mov r0, r8
+; BE-NEXT:    ldm r3, {r1, r2, r3}
+; BE-NEXT:    bl llrintl
+; BE-NEXT:    vmov.32 d16[0], r0
+; BE-NEXT:    vmov.32 d10[1], r4
+; BE-NEXT:    vmov.32 d8[1], r9
+; BE-NEXT:    vmov.32 d9[1], r5
+; BE-NEXT:    vmov.32 d16[1], r1
+; BE-NEXT:    vrev64.32 d1, d10
+; BE-NEXT:    vrev64.32 d3, d8
+; BE-NEXT:    vrev64.32 d0, d9
+; BE-NEXT:    vrev64.32 d2, d16
+; BE-NEXT:    vpop {d8, d9, d10}
+; BE-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, pc}
+;
+; BE-NEON-LABEL: llrint_v4i64_v4f128:
+; BE-NEON:       @ %bb.0:
+; BE-NEON-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, lr}
+; BE-NEON-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, lr}
+; BE-NEON-NEXT:    .vsave {d8, d9, d10}
+; BE-NEON-NEXT:    vpush {d8, d9, d10}
+; BE-NEON-NEXT:    mov r5, r3
+; BE-NEON-NEXT:    add r3, sp, #88
+; BE-NEON-NEXT:    mov r7, r2
+; BE-NEON-NEXT:    mov r6, r1
+; BE-NEON-NEXT:    mov r4, r0
+; BE-NEON-NEXT:    ldm r3, {r0, r1, r2, r3}
+; BE-NEON-NEXT:    bl llrintl
+; BE-NEON-NEXT:    mov r9, r1
+; BE-NEON-NEXT:    vmov.32 d8[0], r0
+; BE-NEON-NEXT:    mov r0, r4
+; BE-NEON-NEXT:    mov r1, r6
+; BE-NEON-NEXT:    mov r2, r7
+; BE-NEON-NEXT:    mov r3, r5
+; BE-NEON-NEXT:    ldr r8, [sp, #72]
+; BE-NEON-NEXT:    ldr r10, [sp, #56]
+; BE-NEON-NEXT:    bl llrintl
+; BE-NEON-NEXT:    add r3, sp, #60
+; BE-NEON-NEXT:    mov r5, r1
+; BE-NEON-NEXT:    vmov.32 d9[0], r0
+; BE-NEON-NEXT:    mov r0, r10
+; BE-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; BE-NEON-NEXT:    bl llrintl
+; BE-NEON-NEXT:    add r3, sp, #76
+; BE-NEON-NEXT:    mov r4, r1
+; BE-NEON-NEXT:    vmov.32 d10[0], r0
+; BE-NEON-NEXT:    mov r0, r8
+; BE-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; BE-NEON-NEXT:    bl llrintl
+; BE-NEON-NEXT:    vmov.32 d16[0], r0
+; BE-NEON-NEXT:    vmov.32 d10[1], r4
+; BE-NEON-NEXT:    vmov.32 d8[1], r9
+; BE-NEON-NEXT:    vmov.32 d9[1], r5
+; BE-NEON-NEXT:    vmov.32 d16[1], r1
+; BE-NEON-NEXT:    vrev64.32 d1, d10
+; BE-NEON-NEXT:    vrev64.32 d3, d8
+; BE-NEON-NEXT:    vrev64.32 d0, d9
+; BE-NEON-NEXT:    vrev64.32 d2, d16
+; BE-NEON-NEXT:    vpop {d8, d9, d10}
+; BE-NEON-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, pc}
+  %a = call <4 x i64> @llvm.llrint.v4i64.v4f128(<4 x fp128> %x)
+  ret <4 x i64> %a
+}
+declare <4 x i64> @llvm.llrint.v4i64.v4f128(<4 x fp128>)
+
+define <8 x i64> @llrint_v8i64_v8f128(<8 x fp128> %x) {
+; LE-LABEL: llrint_v8i64_v8f128:
+; LE:       @ %bb.0:
+; LE-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-NEXT:    .pad #4
+; LE-NEXT:    sub sp, sp, #4
+; LE-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEXT:    .pad #8
+; LE-NEXT:    sub sp, sp, #8
+; LE-NEXT:    mov r11, r3
+; LE-NEXT:    add r3, sp, #208
+; LE-NEXT:    mov r10, r2
+; LE-NEXT:    mov r4, r1
+; LE-NEXT:    mov r5, r0
+; LE-NEXT:    ldm r3, {r0, r1, r2, r3}
+; LE-NEXT:    bl llrintl
+; LE-NEXT:    add r7, sp, #164
+; LE-NEXT:    ldr r6, [sp, #160]
+; LE-NEXT:    str r1, [sp, #4] @ 4-byte Spill
+; LE-NEXT:    vmov.32 d9[0], r0
+; LE-NEXT:    ldm r7, {r1, r2, r3, r7}
+; LE-NEXT:    mov r0, r6
+; LE-NEXT:    ldr r8, [sp, #128]
+; LE-NEXT:    ldr r9, [sp, #144]
+; LE-NEXT:    bl llrintl
+; LE-NEXT:    add r3, sp, #180
+; LE-NEXT:    str r1, [sp] @ 4-byte Spill
+; LE-NEXT:    vmov.32 d10[0], r0
+; LE-NEXT:    mov r0, r7
+; LE-NEXT:    ldm r3, {r1, r2, r3}
+; LE-NEXT:    bl llrintl
+; LE-NEXT:    add r3, sp, #132
+; LE-NEXT:    mov r7, r1
+; LE-NEXT:    vmov.32 d11[0], r0
+; LE-NEXT:    mov r0, r8
+; LE-NEXT:    ldm r3, {r1, r2, r3}
+; LE-NEXT:    bl llrintl
+; LE-NEXT:    add r3, sp, #148
+; LE-NEXT:    mov r8, r1
+; LE-NEXT:    vmov.32 d12[0], r0
+; LE-NEXT:    mov r0, r9
+; LE-NEXT:    ldm r3, {r1, r2, r3}
+; LE-NEXT:    bl llrintl
+; LE-NEXT:    mov r9, r1
+; LE-NEXT:    vmov.32 d13[0], r0
+; LE-NEXT:    mov r0, r5
+; LE-NEXT:    mov r1, r4
+; LE-NEXT:    mov r2, r10
+; LE-NEXT:    mov r3, r11
+; LE-NEXT:    ldr r6, [sp, #112]
+; LE-NEXT:    bl llrintl
+; LE-NEXT:    add r3, sp, #116
+; LE-NEXT:    mov r4, r1
+; LE-NEXT:    vmov.32 d14[0], r0
+; LE-NEXT:    mov r0, r6
+; LE-NEXT:    ldm r3, {r1, r2, r3}
+; LE-NEXT:    bl llrintl
+; LE-NEXT:    add r3, sp, #196
+; LE-NEXT:    vmov.32 d15[0], r0
+; LE-NEXT:    ldr r0, [sp, #192]
+; LE-NEXT:    mov r5, r1
+; LE-NEXT:    ldm r3, {r1, r2, r3}
+; LE-NEXT:    bl llrintl
+; LE-NEXT:    vmov.32 d8[0], r0
+; LE-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; LE-NEXT:    vmov.32 d11[1], r7
+; LE-NEXT:    vmov.32 d10[1], r0
+; LE-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
+; LE-NEXT:    vmov.32 d15[1], r5
+; LE-NEXT:    vorr q2, q5, q5
+; LE-NEXT:    vmov.32 d13[1], r9
+; LE-NEXT:    vmov.32 d9[1], r0
+; LE-NEXT:    vmov.32 d14[1], r4
+; LE-NEXT:    vmov.32 d12[1], r8
+; LE-NEXT:    vorr q0, q7, q7
+; LE-NEXT:    vmov.32 d8[1], r1
+; LE-NEXT:    vorr q1, q6, q6
+; LE-NEXT:    vorr q3, q4, q4
+; LE-NEXT:    add sp, sp, #8
+; LE-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEXT:    add sp, sp, #4
+; LE-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; LE-NEON-LABEL: llrint_v8i64_v8f128:
+; LE-NEON:       @ %bb.0:
+; LE-NEON-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-NEON-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-NEON-NEXT:    .pad #4
+; LE-NEON-NEXT:    sub sp, sp, #4
+; LE-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEON-NEXT:    .pad #8
+; LE-NEON-NEXT:    sub sp, sp, #8
+; LE-NEON-NEXT:    mov r11, r3
+; LE-NEON-NEXT:    add r3, sp, #208
+; LE-NEON-NEXT:    mov r10, r2
+; LE-NEON-NEXT:    mov r4, r1
+; LE-NEON-NEXT:    mov r5, r0
+; LE-NEON-NEXT:    ldm r3, {r0, r1, r2, r3}
+; LE-NEON-NEXT:    bl llrintl
+; LE-NEON-NEXT:    add r7, sp, #164
+; LE-NEON-NEXT:    ldr r6, [sp, #160]
+; LE-NEON-NEXT:    str r1, [sp, #4] @ 4-byte Spill
+; LE-NEON-NEXT:    vmov.32 d9[0], r0
+; LE-NEON-NEXT:    ldm r7, {r1, r2, r3, r7}
+; LE-NEON-NEXT:    mov r0, r6
+; LE-NEON-NEXT:    ldr r8, [sp, #128]
+; LE-NEON-NEXT:    ldr r9, [sp, #144]
+; LE-NEON-NEXT:    bl llrintl
+; LE-NEON-NEXT:    add r3, sp, #180
+; LE-NEON-NEXT:    str r1, [sp] @ 4-byte Spill
+; LE-NEON-NEXT:    vmov.32 d10[0], r0
+; LE-NEON-NEXT:    mov r0, r7
+; LE-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; LE-NEON-NEXT:    bl llrintl
+; LE-NEON-NEXT:    add r3, sp, #132
+; LE-NEON-NEXT:    mov r7, r1
+; LE-NEON-NEXT:    vmov.32 d11[0], r0
+; LE-NEON-NEXT:    mov r0, r8
+; LE-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; LE-NEON-NEXT:    bl llrintl
+; LE-NEON-NEXT:    add r3, sp, #148
+; LE-NEON-NEXT:    mov r8, r1
+; LE-NEON-NEXT:    vmov.32 d12[0], r0
+; LE-NEON-NEXT:    mov r0, r9
+; LE-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; LE-NEON-NEXT:    bl llrintl
+; LE-NEON-NEXT:    mov r9, r1
+; LE-NEON-NEXT:    vmov.32 d13[0], r0
+; LE-NEON-NEXT:    mov r0, r5
+; LE-NEON-NEXT:    mov r1, r4
+; LE-NEON-NEXT:    mov r2, r10
+; LE-NEON-NEXT:    mov r3, r11
+; LE-NEON-NEXT:    ldr r6, [sp, #112]
+; LE-NEON-NEXT:    bl llrintl
+; LE-NEON-NEXT:    add r3, sp, #116
+; LE-NEON-NEXT:    mov r4, r1
+; LE-NEON-NEXT:    vmov.32 d14[0], r0
+; LE-NEON-NEXT:    mov r0, r6
+; LE-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; LE-NEON-NEXT:    bl llrintl
+; LE-NEON-NEXT:    add r3, sp, #196
+; LE-NEON-NEXT:    vmov.32 d15[0], r0
+; LE-NEON-NEXT:    ldr r0, [sp, #192]
+; LE-NEON-NEXT:    mov r5, r1
+; LE-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; LE-NEON-NEXT:    bl llrintl
+; LE-NEON-NEXT:    vmov.32 d8[0], r0
+; LE-NEON-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; LE-NEON-NEXT:    vmov.32 d11[1], r7
+; LE-NEON-NEXT:    vmov.32 d10[1], r0
+; LE-NEON-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
+; LE-NEON-NEXT:    vmov.32 d15[1], r5
+; LE-NEON-NEXT:    vorr q2, q5, q5
+; LE-NEON-NEXT:    vmov.32 d13[1], r9
+; LE-NEON-NEXT:    vmov.32 d9[1], r0
+; LE-NEON-NEXT:    vmov.32 d14[1], r4
+; LE-NEON-NEXT:    vmov.32 d12[1], r8
+; LE-NEON-NEXT:    vorr q0, q7, q7
+; LE-NEON-NEXT:    vmov.32 d8[1], r1
+; LE-NEON-NEXT:    vorr q1, q6, q6
+; LE-NEON-NEXT:    vorr q3, q4, q4
+; LE-NEON-NEXT:    add sp, sp, #8
+; LE-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEON-NEXT:    add sp, sp, #4
+; LE-NEON-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; BE-LABEL: llrint_v8i64_v8f128:
+; BE:       @ %bb.0:
+; BE-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-NEXT:    .pad #4
+; BE-NEXT:    sub sp, sp, #4
+; BE-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14}
+; BE-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14}
+; BE-NEXT:    .pad #16
+; BE-NEXT:    sub sp, sp, #16
+; BE-NEXT:    str r3, [sp, #4] @ 4-byte Spill
+; BE-NEXT:    add r3, sp, #208
+; BE-NEXT:    mov r11, r2
+; BE-NEXT:    mov r4, r1
+; BE-NEXT:    mov r5, r0
+; BE-NEXT:    ldm r3, {r0, r1, r2, r3}
+; BE-NEXT:    bl llrintl
+; BE-NEXT:    ldr r7, [sp, #176]
+; BE-NEXT:    add r3, sp, #180
+; BE-NEXT:    str r1, [sp, #12] @ 4-byte Spill
+; BE-NEXT:    vmov.32 d8[0], r0
+; BE-NEXT:    ldm r3, {r1, r2, r3}
+; BE-NEXT:    mov r0, r7
+; BE-NEXT:    ldr r6, [sp, #128]
+; BE-NEXT:    ldr r8, [sp, #144]
+; BE-NEXT:    bl llrintl
+; BE-NEXT:    add r3, sp, #132
+; BE-NEXT:    str r1, [sp, #8] @ 4-byte Spill
+; BE-NEXT:    vmov.32 d9[0], r0
+; BE-NEXT:    mov r0, r6
+; BE-NEXT:    ldm r3, {r1, r2, r3}
+; BE-NEXT:    bl llrintl
+; BE-NEXT:    add r3, sp, #148
+; BE-NEXT:    mov r6, r1
+; BE-NEXT:    vmov.32 d10[0], r0
+; BE-NEXT:    mov r0, r8
+; BE-NEXT:    ldm r3, {r1, r2, r3}
+; BE-NEXT:    bl llrintl
+; BE-NEXT:    add r3, sp, #160
+; BE-NEXT:    mov r9, r0
+; BE-NEXT:    mov r7, r1
+; BE-NEXT:    ldm r3, {r0, r1, r2, r3}
+; BE-NEXT:    bl llrintl
+; BE-NEXT:    ldr r3, [sp, #4] @ 4-byte Reload
+; BE-NEXT:    mov r8, r1
+; BE-NEXT:    vmov.32 d11[0], r0
+; BE-NEXT:    mov r0, r5
+; BE-NEXT:    mov r1, r4
+; BE-NEXT:    mov r2, r11
+; BE-NEXT:    ldr r10, [sp, #112]
+; BE-NEXT:    vmov.32 d12[0], r9
+; BE-NEXT:    bl llrintl
+; BE-NEXT:    add r3, sp, #116
+; BE-NEXT:    mov r4, r1
+; BE-NEXT:    vmov.32 d13[0], r0
+; BE-NEXT:    mov r0, r10
+; BE-NEXT:    ldm r3, {r1, r2, r3}
+; BE-NEXT:    bl llrintl
+; BE-NEXT:    add r3, sp, #196
+; BE-NEXT:    vmov.32 d14[0], r0
+; BE-NEXT:    ldr r0, [sp, #192]
+; BE-NEXT:    mov r5, r1
+; BE-NEXT:    ldm r3, {r1, r2, r3}
+; BE-NEXT:    bl llrintl
+; BE-NEXT:    vmov.32 d16[0], r0
+; BE-NEXT:    ldr r0, [sp, #8] @ 4-byte Reload
+; BE-NEXT:    vmov.32 d14[1], r5
+; BE-NEXT:    vmov.32 d9[1], r0
+; BE-NEXT:    ldr r0, [sp, #12] @ 4-byte Reload
+; BE-NEXT:    vmov.32 d12[1], r7
+; BE-NEXT:    vmov.32 d8[1], r0
+; BE-NEXT:    vmov.32 d13[1], r4
+; BE-NEXT:    vmov.32 d10[1], r6
+; BE-NEXT:    vmov.32 d11[1], r8
+; BE-NEXT:    vmov.32 d16[1], r1
+; BE-NEXT:    vrev64.32 d1, d14
+; BE-NEXT:    vrev64.32 d3, d12
+; BE-NEXT:    vrev64.32 d5, d9
+; BE-NEXT:    vrev64.32 d7, d8
+; BE-NEXT:    vrev64.32 d0, d13
+; BE-NEXT:    vrev64.32 d2, d10
+; BE-NEXT:    vrev64.32 d4, d11
+; BE-NEXT:    vrev64.32 d6, d16
+; BE-NEXT:    add sp, sp, #16
+; BE-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14}
+; BE-NEXT:    add sp, sp, #4
+; BE-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; BE-NEON-LABEL: llrint_v8i64_v8f128:
+; BE-NEON:       @ %bb.0:
+; BE-NEON-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-NEON-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-NEON-NEXT:    .pad #4
+; BE-NEON-NEXT:    sub sp, sp, #4
+; BE-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14}
+; BE-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14}
+; BE-NEON-NEXT:    .pad #16
+; BE-NEON-NEXT:    sub sp, sp, #16
+; BE-NEON-NEXT:    str r3, [sp, #4] @ 4-byte Spill
+; BE-NEON-NEXT:    add r3, sp, #208
+; BE-NEON-NEXT:    mov r11, r2
+; BE-NEON-NEXT:    mov r4, r1
+; BE-NEON-NEXT:    mov r5, r0
+; BE-NEON-NEXT:    ldm r3, {r0, r1, r2, r3}
+; BE-NEON-NEXT:    bl llrintl
+; BE-NEON-NEXT:    ldr r7, [sp, #176]
+; BE-NEON-NEXT:    add r3, sp, #180
+; BE-NEON-NEXT:    str r1, [sp, #12] @ 4-byte Spill
+; BE-NEON-NEXT:    vmov.32 d8[0], r0
+; BE-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; BE-NEON-NEXT:    mov r0, r7
+; BE-NEON-NEXT:    ldr r6, [sp, #128]
+; BE-NEON-NEXT:    ldr r8, [sp, #144]
+; BE-NEON-NEXT:    bl llrintl
+; BE-NEON-NEXT:    add r3, sp, #132
+; BE-NEON-NEXT:    str r1, [sp, #8] @ 4-byte Spill
+; BE-NEON-NEXT:    vmov.32 d9[0], r0
+; BE-NEON-NEXT:    mov r0, r6
+; BE-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; BE-NEON-NEXT:    bl llrintl
+; BE-NEON-NEXT:    add r3, sp, #148
+; BE-NEON-NEXT:    mov r6, r1
+; BE-NEON-NEXT:    vmov.32 d10[0], r0
+; BE-NEON-NEXT:    mov r0, r8
+; BE-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; BE-NEON-NEXT:    bl llrintl
+; BE-NEON-NEXT:    add r3, sp, #160
+; BE-NEON-NEXT:    mov r9, r0
+; BE-NEON-NEXT:    mov r7, r1
+; BE-NEON-NEXT:    ldm r3, {r0, r1, r2, r3}
+; BE-NEON-NEXT:    bl llrintl
+; BE-NEON-NEXT:    ldr r3, [sp, #4] @ 4-byte Reload
+; BE-NEON-NEXT:    mov r8, r1
+; BE-NEON-NEXT:    vmov.32 d11[0], r0
+; BE-NEON-NEXT:    mov r0, r5
+; BE-NEON-NEXT:    mov r1, r4
+; BE-NEON-NEXT:    mov r2, r11
+; BE-NEON-NEXT:    ldr r10, [sp, #112]
+; BE-NEON-NEXT:    vmov.32 d12[0], r9
+; BE-NEON-NEXT:    bl llrintl
+; BE-NEON-NEXT:    add r3, sp, #116
+; BE-NEON-NEXT:    mov r4, r1
+; BE-NEON-NEXT:    vmov.32 d13[0], r0
+; BE-NEON-NEXT:    mov r0, r10
+; BE-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; BE-NEON-NEXT:    bl llrintl
+; BE-NEON-NEXT:    add r3, sp, #196
+; BE-NEON-NEXT:    vmov.32 d14[0], r0
+; BE-NEON-NEXT:    ldr r0, [sp, #192]
+; BE-NEON-NEXT:    mov r5, r1
+; BE-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; BE-NEON-NEXT:    bl llrintl
+; BE-NEON-NEXT:    vmov.32 d16[0], r0
+; BE-NEON-NEXT:    ldr r0, [sp, #8] @ 4-byte Reload
+; BE-NEON-NEXT:    vmov.32 d14[1], r5
+; BE-NEON-NEXT:    vmov.32 d9[1], r0
+; BE-NEON-NEXT:    ldr r0, [sp, #12] @ 4-byte Reload
+; BE-NEON-NEXT:    vmov.32 d12[1], r7
+; BE-NEON-NEXT:    vmov.32 d8[1], r0
+; BE-NEON-NEXT:    vmov.32 d13[1], r4
+; BE-NEON-NEXT:    vmov.32 d10[1], r6
+; BE-NEON-NEXT:    vmov.32 d11[1], r8
+; BE-NEON-NEXT:    vmov.32 d16[1], r1
+; BE-NEON-NEXT:    vrev64.32 d1, d14
+; BE-NEON-NEXT:    vrev64.32 d3, d12
+; BE-NEON-NEXT:    vrev64.32 d5, d9
+; BE-NEON-NEXT:    vrev64.32 d7, d8
+; BE-NEON-NEXT:    vrev64.32 d0, d13
+; BE-NEON-NEXT:    vrev64.32 d2, d10
+; BE-NEON-NEXT:    vrev64.32 d4, d11
+; BE-NEON-NEXT:    vrev64.32 d6, d16
+; BE-NEON-NEXT:    add sp, sp, #16
+; BE-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14}
+; BE-NEON-NEXT:    add sp, sp, #4
+; BE-NEON-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+  %a = call <8 x i64> @llvm.llrint.v8i64.v8f128(<8 x fp128> %x)
+  ret <8 x i64> %a
+}
+declare <8 x i64> @llvm.llrint.v8i64.v8f128(<8 x fp128>)
+
+define <16 x i64> @llrint_v16f128(<16 x fp128> %x) {
+; LE-LABEL: llrint_v16f128:
+; LE:       @ %bb.0:
+; LE-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-NEXT:    .pad #4
+; LE-NEXT:    sub sp, sp, #4
+; LE-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEXT:    .pad #72
+; LE-NEXT:    sub sp, sp, #72
+; LE-NEXT:    mov r6, r3
+; LE-NEXT:    add r3, sp, #408
+; LE-NEXT:    mov r7, r2
+; LE-NEXT:    mov r4, r0
+; LE-NEXT:    ldm r3, {r0, r1, r2, r3}
+; LE-NEXT:    bl llrintl
+; LE-NEXT:    add r5, sp, #176
+; LE-NEXT:    mov r10, r1
+; LE-NEXT:    vmov.32 d13[0], r0
+; LE-NEXT:    mov r0, r7
+; LE-NEXT:    ldm r5, {r2, r3, r5}
+; LE-NEXT:    mov r1, r6
+; LE-NEXT:    ldr r8, [sp, #232]
+; LE-NEXT:    bl llrintl
+; LE-NEXT:    add r3, sp, #188
+; LE-NEXT:    mov r9, r1
+; LE-NEXT:    vmov.32 d8[0], r0
+; LE-NEXT:    mov r0, r5
+; LE-NEXT:    ldm r3, {r1, r2, r3}
+; LE-NEXT:    bl llrintl
+; LE-NEXT:    add r3, sp, #236
+; LE-NEXT:    mov r11, r1
+; LE-NEXT:    vmov.32 d9[0], r0
+; LE-NEXT:    mov r0, r8
+; LE-NEXT:    ldm r3, {r1, r2, r3}
+; LE-NEXT:    bl llrintl
+; LE-NEXT:    add r3, sp, #252
+; LE-NEXT:    vmov.32 d10[0], r0
+; LE-NEXT:    ldr r0, [sp, #248]
+; LE-NEXT:    mov r8, r1
+; LE-NEXT:    ldm r3, {r1, r2, r3}
+; LE-NEXT:    bl llrintl
+; LE-NEXT:    add r3, sp, #268
+; LE-NEXT:    vmov.32 d11[0], r0
+; LE-NEXT:    ldr r0, [sp, #264]
+; LE-NEXT:    mov r6, r1
+; LE-NEXT:    ldm r3, {r1, r2, r3}
+; LE-NEXT:    bl llrintl
+; LE-NEXT:    add r3, sp, #284
+; LE-NEXT:    vmov.32 d14[0], r0
+; LE-NEXT:    ldr r0, [sp, #280]
+; LE-NEXT:    mov r7, r1
+; LE-NEXT:    ldm r3, {r1, r2, r3}
+; LE-NEXT:    bl llrintl
+; LE-NEXT:    add r3, sp, #316
+; LE-NEXT:    vmov.32 d15[0], r0
+; LE-NEXT:    ldr r0, [sp, #312]
+; LE-NEXT:    mov r5, r1
+; LE-NEXT:    ldm r3, {r1, r2, r3}
+; LE-NEXT:    bl llrintl
+; LE-NEXT:    vmov.32 d15[1], r5
+; LE-NEXT:    add lr, sp, #56
+; LE-NEXT:    ldr r5, [sp, #300]
+; LE-NEXT:    vmov.32 d14[1], r7
+; LE-NEXT:    ldr r2, [sp, #304]
+; LE-NEXT:    ldr r3, [sp, #308]
+; LE-NEXT:    vmov.32 d11[1], r6
+; LE-NEXT:    ldr r6, [sp, #200]
+; LE-NEXT:    ldr r7, [sp, #204]
+; LE-NEXT:    vmov.32 d10[1], r8
+; LE-NEXT:    ldr r8, [sp, #344]
+; LE-NEXT:    vmov.32 d9[1], r11
+; LE-NEXT:    ldr r11, [sp, #216]
+; LE-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-NEXT:    add lr, sp, #40
+; LE-NEXT:    vmov.32 d17[0], r0
+; LE-NEXT:    ldr r0, [sp, #296]
+; LE-NEXT:    vmov.32 d8[1], r9
+; LE-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-NEXT:    add lr, sp, #24
+; LE-NEXT:    vorr q5, q8, q8
+; LE-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-NEXT:    vorr q4, q6, q6
+; LE-NEXT:    vmov.32 d11[1], r1
+; LE-NEXT:    mov r1, r5
+; LE-NEXT:    vmov.32 d9[1], r10
+; LE-NEXT:    bl llrintl
+; LE-NEXT:    vmov.32 d10[0], r0
+; LE-NEXT:    ldr r2, [sp, #208]
+; LE-NEXT:    ldr r3, [sp, #212]
+; LE-NEXT:    add lr, sp, #8
+; LE-NEXT:    mov r9, r1
+; LE-NEXT:    mov r0, r6
+; LE-NEXT:    mov r1, r7
+; LE-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-NEXT:    bl llrintl
+; LE-NEXT:    add r3, sp, #220
+; LE-NEXT:    mov r10, r1
+; LE-NEXT:    vmov.32 d10[0], r0
+; LE-NEXT:    mov r0, r11
+; LE-NEXT:    ldm r3, {r1, r2, r3}
+; LE-NEXT:    bl llrintl
+; LE-NEXT:    add r3, sp, #348
+; LE-NEXT:    mov r11, r1
+; LE-NEXT:    vmov.32 d11[0], r0
+; LE-NEXT:    mov r0, r8
+; LE-NEXT:    ldm r3, {r1, r2, r3}
+; LE-NEXT:    bl llrintl
+; LE-NEXT:    add r3, sp, #364
+; LE-NEXT:    vmov.32 d13[0], r0
+; LE-NEXT:    ldr r0, [sp, #360]
+; LE-NEXT:    mov r8, r1
+; LE-NEXT:    ldm r3, {r1, r2, r3}
+; LE-NEXT:    bl llrintl
+; LE-NEXT:    add r3, sp, #380
+; LE-NEXT:    vmov.32 d14[0], r0
+; LE-NEXT:    ldr r0, [sp, #376]
+; LE-NEXT:    mov r5, r1
+; LE-NEXT:    ldm r3, {r1, r2, r3}
+; LE-NEXT:    bl llrintl
+; LE-NEXT:    add r3, sp, #396
+; LE-NEXT:    vmov.32 d15[0], r0
+; LE-NEXT:    ldr r0, [sp, #392]
+; LE-NEXT:    mov r6, r1
+; LE-NEXT:    ldm r3, {r1, r2, r3}
+; LE-NEXT:    bl llrintl
+; LE-NEXT:    add r3, sp, #332
+; LE-NEXT:    vmov.32 d8[0], r0
+; LE-NEXT:    ldr r0, [sp, #328]
+; LE-NEXT:    mov r7, r1
+; LE-NEXT:    ldm r3, {r1, r2, r3}
+; LE-NEXT:    bl llrintl
+; LE-NEXT:    add lr, sp, #8
+; LE-NEXT:    vmov.32 d12[0], r0
+; LE-NEXT:    add r0, r4, #64
+; LE-NEXT:    vldmia lr, {d18, d19} @ 16-byte Reload
+; LE-NEXT:    add lr, sp, #24
+; LE-NEXT:    vmov.32 d13[1], r8
+; LE-NEXT:    vmov.32 d18[1], r9
+; LE-NEXT:    vmov.32 d15[1], r6
+; LE-NEXT:    vmov.32 d12[1], r1
+; LE-NEXT:    vmov.32 d14[1], r5
+; LE-NEXT:    vst1.64 {d18, d19}, [r0:128]!
+; LE-NEXT:    vst1.64 {d12, d13}, [r0:128]!
+; LE-NEXT:    vmov.32 d8[1], r7
+; LE-NEXT:    vst1.64 {d14, d15}, [r0:128]!
+; LE-NEXT:    vst1.64 {d8, d9}, [r0:128]
+; LE-NEXT:    vmov.32 d11[1], r11
+; LE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEXT:    add lr, sp, #40
+; LE-NEXT:    vmov.32 d10[1], r10
+; LE-NEXT:    vst1.64 {d16, d17}, [r4:128]!
+; LE-NEXT:    vst1.64 {d10, d11}, [r4:128]!
+; LE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEXT:    add lr, sp, #56
+; LE-NEXT:    vst1.64 {d16, d17}, [r4:128]!
+; LE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEXT:    vst1.64 {d16, d17}, [r4:128]
+; LE-NEXT:    add sp, sp, #72
+; LE-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEXT:    add sp, sp, #4
+; LE-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; LE-NEON-LABEL: llrint_v16f128:
+; LE-NEON:       @ %bb.0:
+; LE-NEON-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-NEON-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-NEON-NEXT:    .pad #4
+; LE-NEON-NEXT:    sub sp, sp, #4
+; LE-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEON-NEXT:    .pad #72
+; LE-NEON-NEXT:    sub sp, sp, #72
+; LE-NEON-NEXT:    mov r6, r3
+; LE-NEON-NEXT:    add r3, sp, #408
+; LE-NEON-NEXT:    mov r7, r2
+; LE-NEON-NEXT:    mov r4, r0
+; LE-NEON-NEXT:    ldm r3, {r0, r1, r2, r3}
+; LE-NEON-NEXT:    bl llrintl
+; LE-NEON-NEXT:    add r5, sp, #176
+; LE-NEON-NEXT:    mov r10, r1
+; LE-NEON-NEXT:    vmov.32 d13[0], r0
+; LE-NEON-NEXT:    mov r0, r7
+; LE-NEON-NEXT:    ldm r5, {r2, r3, r5}
+; LE-NEON-NEXT:    mov r1, r6
+; LE-NEON-NEXT:    ldr r8, [sp, #232]
+; LE-NEON-NEXT:    bl llrintl
+; LE-NEON-NEXT:    add r3, sp, #188
+; LE-NEON-NEXT:    mov r9, r1
+; LE-NEON-NEXT:    vmov.32 d8[0], r0
+; LE-NEON-NEXT:    mov r0, r5
+; LE-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; LE-NEON-NEXT:    bl llrintl
+; LE-NEON-NEXT:    add r3, sp, #236
+; LE-NEON-NEXT:    mov r11, r1
+; LE-NEON-NEXT:    vmov.32 d9[0], r0
+; LE-NEON-NEXT:    mov r0, r8
+; LE-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; LE-NEON-NEXT:    bl llrintl
+; LE-NEON-NEXT:    add r3, sp, #252
+; LE-NEON-NEXT:    vmov.32 d10[0], r0
+; LE-NEON-NEXT:    ldr r0, [sp, #248]
+; LE-NEON-NEXT:    mov r8, r1
+; LE-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; LE-NEON-NEXT:    bl llrintl
+; LE-NEON-NEXT:    add r3, sp, #268
+; LE-NEON-NEXT:    vmov.32 d11[0], r0
+; LE-NEON-NEXT:    ldr r0, [sp, #264]
+; LE-NEON-NEXT:    mov r6, r1
+; LE-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; LE-NEON-NEXT:    bl llrintl
+; LE-NEON-NEXT:    add r3, sp, #284
+; LE-NEON-NEXT:    vmov.32 d14[0], r0
+; LE-NEON-NEXT:    ldr r0, [sp, #280]
+; LE-NEON-NEXT:    mov r7, r1
+; LE-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; LE-NEON-NEXT:    bl llrintl
+; LE-NEON-NEXT:    add r3, sp, #316
+; LE-NEON-NEXT:    vmov.32 d15[0], r0
+; LE-NEON-NEXT:    ldr r0, [sp, #312]
+; LE-NEON-NEXT:    mov r5, r1
+; LE-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; LE-NEON-NEXT:    bl llrintl
+; LE-NEON-NEXT:    vmov.32 d15[1], r5
+; LE-NEON-NEXT:    add lr, sp, #56
+; LE-NEON-NEXT:    ldr r5, [sp, #300]
+; LE-NEON-NEXT:    vmov.32 d14[1], r7
+; LE-NEON-NEXT:    ldr r2, [sp, #304]
+; LE-NEON-NEXT:    ldr r3, [sp, #308]
+; LE-NEON-NEXT:    vmov.32 d11[1], r6
+; LE-NEON-NEXT:    ldr r6, [sp, #200]
+; LE-NEON-NEXT:    ldr r7, [sp, #204]
+; LE-NEON-NEXT:    vmov.32 d10[1], r8
+; LE-NEON-NEXT:    ldr r8, [sp, #344]
+; LE-NEON-NEXT:    vmov.32 d9[1], r11
+; LE-NEON-NEXT:    ldr r11, [sp, #216]
+; LE-NEON-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-NEON-NEXT:    add lr, sp, #40
+; LE-NEON-NEXT:    vmov.32 d17[0], r0
+; LE-NEON-NEXT:    ldr r0, [sp, #296]
+; LE-NEON-NEXT:    vmov.32 d8[1], r9
+; LE-NEON-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-NEON-NEXT:    add lr, sp, #24
+; LE-NEON-NEXT:    vorr q5, q8, q8
+; LE-NEON-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-NEON-NEXT:    vorr q4, q6, q6
+; LE-NEON-NEXT:    vmov.32 d11[1], r1
+; LE-NEON-NEXT:    mov r1, r5
+; LE-NEON-NEXT:    vmov.32 d9[1], r10
+; LE-NEON-NEXT:    bl llrintl
+; LE-NEON-NEXT:    vmov.32 d10[0], r0
+; LE-NEON-NEXT:    ldr r2, [sp, #208]
+; LE-NEON-NEXT:    ldr r3, [sp, #212]
+; LE-NEON-NEXT:    add lr, sp, #8
+; LE-NEON-NEXT:    mov r9, r1
+; LE-NEON-NEXT:    mov r0, r6
+; LE-NEON-NEXT:    mov r1, r7
+; LE-NEON-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-NEON-NEXT:    bl llrintl
+; LE-NEON-NEXT:    add r3, sp, #220
+; LE-NEON-NEXT:    mov r10, r1
+; LE-NEON-NEXT:    vmov.32 d10[0], r0
+; LE-NEON-NEXT:    mov r0, r11
+; LE-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; LE-NEON-NEXT:    bl llrintl
+; LE-NEON-NEXT:    add r3, sp, #348
+; LE-NEON-NEXT:    mov r11, r1
+; LE-NEON-NEXT:    vmov.32 d11[0], r0
+; LE-NEON-NEXT:    mov r0, r8
+; LE-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; LE-NEON-NEXT:    bl llrintl
+; LE-NEON-NEXT:    add r3, sp, #364
+; LE-NEON-NEXT:    vmov.32 d13[0], r0
+; LE-NEON-NEXT:    ldr r0, [sp, #360]
+; LE-NEON-NEXT:    mov r8, r1
+; LE-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; LE-NEON-NEXT:    bl llrintl
+; LE-NEON-NEXT:    add r3, sp, #380
+; LE-NEON-NEXT:    vmov.32 d14[0], r0
+; LE-NEON-NEXT:    ldr r0, [sp, #376]
+; LE-NEON-NEXT:    mov r5, r1
+; LE-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; LE-NEON-NEXT:    bl llrintl
+; LE-NEON-NEXT:    add r3, sp, #396
+; LE-NEON-NEXT:    vmov.32 d15[0], r0
+; LE-NEON-NEXT:    ldr r0, [sp, #392]
+; LE-NEON-NEXT:    mov r6, r1
+; LE-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; LE-NEON-NEXT:    bl llrintl
+; LE-NEON-NEXT:    add r3, sp, #332
+; LE-NEON-NEXT:    vmov.32 d8[0], r0
+; LE-NEON-NEXT:    ldr r0, [sp, #328]
+; LE-NEON-NEXT:    mov r7, r1
+; LE-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; LE-NEON-NEXT:    bl llrintl
+; LE-NEON-NEXT:    add lr, sp, #8
+; LE-NEON-NEXT:    vmov.32 d12[0], r0
+; LE-NEON-NEXT:    add r0, r4, #64
+; LE-NEON-NEXT:    vldmia lr, {d18, d19} @ 16-byte Reload
+; LE-NEON-NEXT:    add lr, sp, #24
+; LE-NEON-NEXT:    vmov.32 d13[1], r8
+; LE-NEON-NEXT:    vmov.32 d18[1], r9
+; LE-NEON-NEXT:    vmov.32 d15[1], r6
+; LE-NEON-NEXT:    vmov.32 d12[1], r1
+; LE-NEON-NEXT:    vmov.32 d14[1], r5
+; LE-NEON-NEXT:    vst1.64 {d18, d19}, [r0:128]!
+; LE-NEON-NEXT:    vst1.64 {d12, d13}, [r0:128]!
+; LE-NEON-NEXT:    vmov.32 d8[1], r7
+; LE-NEON-NEXT:    vst1.64 {d14, d15}, [r0:128]!
+; LE-NEON-NEXT:    vst1.64 {d8, d9}, [r0:128]
+; LE-NEON-NEXT:    vmov.32 d11[1], r11
+; LE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEON-NEXT:    add lr, sp, #40
+; LE-NEON-NEXT:    vmov.32 d10[1], r10
+; LE-NEON-NEXT:    vst1.64 {d16, d17}, [r4:128]!
+; LE-NEON-NEXT:    vst1.64 {d10, d11}, [r4:128]!
+; LE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEON-NEXT:    add lr, sp, #56
+; LE-NEON-NEXT:    vst1.64 {d16, d17}, [r4:128]!
+; LE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEON-NEXT:    vst1.64 {d16, d17}, [r4:128]
+; LE-NEON-NEXT:    add sp, sp, #72
+; LE-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEON-NEXT:    add sp, sp, #4
+; LE-NEON-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; BE-LABEL: llrint_v16f128:
+; BE:       @ %bb.0:
+; BE-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-NEXT:    .pad #4
+; BE-NEXT:    sub sp, sp, #4
+; BE-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-NEXT:    .pad #56
+; BE-NEXT:    sub sp, sp, #56
+; BE-NEXT:    mov r5, r3
+; BE-NEXT:    add r3, sp, #376
+; BE-NEXT:    mov r6, r2
+; BE-NEXT:    mov r4, r0
+; BE-NEXT:    ldm r3, {r0, r1, r2, r3}
+; BE-NEXT:    bl llrintl
+; BE-NEXT:    ldr r7, [sp, #392]
+; BE-NEXT:    add r3, sp, #396
+; BE-NEXT:    mov r9, r1
+; BE-NEXT:    vmov.32 d8[0], r0
+; BE-NEXT:    ldm r3, {r1, r2, r3}
+; BE-NEXT:    mov r0, r7
+; BE-NEXT:    ldr r11, [sp, #168]
+; BE-NEXT:    bl llrintl
+; BE-NEXT:    ldr r2, [sp, #160]
+; BE-NEXT:    mov r10, r1
+; BE-NEXT:    ldr r3, [sp, #164]
+; BE-NEXT:    vmov.32 d9[0], r0
+; BE-NEXT:    mov r0, r6
+; BE-NEXT:    mov r1, r5
+; BE-NEXT:    bl llrintl
+; BE-NEXT:    add r3, sp, #172
+; BE-NEXT:    mov r8, r1
+; BE-NEXT:    vmov.32 d10[0], r0
+; BE-NEXT:    mov r0, r11
+; BE-NEXT:    ldm r3, {r1, r2, r3}
+; BE-NEXT:    bl llrintl
+; BE-NEXT:    add r3, sp, #220
+; BE-NEXT:    vmov.32 d11[0], r0
+; BE-NEXT:    ldr r0, [sp, #216]
+; BE-NEXT:    mov r11, r1
+; BE-NEXT:    ldm r3, {r1, r2, r3}
+; BE-NEXT:    bl llrintl
+; BE-NEXT:    add r3, sp, #236
+; BE-NEXT:    vmov.32 d12[0], r0
+; BE-NEXT:    ldr r0, [sp, #232]
+; BE-NEXT:    mov r6, r1
+; BE-NEXT:    ldm r3, {r1, r2, r3}
+; BE-NEXT:    bl llrintl
+; BE-NEXT:    add r3, sp, #252
+; BE-NEXT:    vmov.32 d13[0], r0
+; BE-NEXT:    ldr r0, [sp, #248]
+; BE-NEXT:    mov r7, r1
+; BE-NEXT:    ldm r3, {r1, r2, r3}
+; BE-NEXT:    bl llrintl
+; BE-NEXT:    add r3, sp, #268
+; BE-NEXT:    vmov.32 d14[0], r0
+; BE-NEXT:    ldr r0, [sp, #264]
+; BE-NEXT:    mov r5, r1
+; BE-NEXT:    ldm r3, {r1, r2, r3}
+; BE-NEXT:    bl llrintl
+; BE-NEXT:    vmov.32 d15[0], r0
+; BE-NEXT:    ldr r0, [sp, #280]
+; BE-NEXT:    ldr r2, [sp, #288]
+; BE-NEXT:    vmov.32 d13[1], r7
+; BE-NEXT:    ldr r7, [sp, #284]
+; BE-NEXT:    ldr r3, [sp, #292]
+; BE-NEXT:    vmov.32 d14[1], r5
+; BE-NEXT:    ldr r5, [sp, #328]
+; BE-NEXT:    vmov.32 d12[1], r6
+; BE-NEXT:    ldr r6, [sp, #300]
+; BE-NEXT:    vmov.32 d10[1], r8
+; BE-NEXT:    ldr r8, [sp, #184]
+; BE-NEXT:    vmov.32 d11[1], r11
+; BE-NEXT:    vmov.32 d9[1], r10
+; BE-NEXT:    vmov.32 d8[1], r9
+; BE-NEXT:    vmov.32 d15[1], r1
+; BE-NEXT:    mov r1, r7
+; BE-NEXT:    vstr d14, [sp, #48] @ 8-byte Spill
+; BE-NEXT:    vstr d13, [sp, #40] @ 8-byte Spill
+; BE-NEXT:    vstr d12, [sp, #32] @ 8-byte Spill
+; BE-NEXT:    vstr d11, [sp, #24] @ 8-byte Spill
+; BE-NEXT:    vstr d10, [sp, #16] @ 8-byte Spill
+; BE-NEXT:    vstr d9, [sp, #8] @ 8-byte Spill
+; BE-NEXT:    vstr d8, [sp] @ 8-byte Spill
+; BE-NEXT:    bl llrintl
+; BE-NEXT:    mov r10, r1
+; BE-NEXT:    ldr r1, [sp, #296]
+; BE-NEXT:    ldr r2, [sp, #304]
+; BE-NEXT:    vmov.32 d8[0], r0
+; BE-NEXT:    ldr r3, [sp, #308]
+; BE-NEXT:    mov r0, r1
+; BE-NEXT:    mov r1, r6
+; BE-NEXT:    bl llrintl
+; BE-NEXT:    add r3, sp, #332
+; BE-NEXT:    mov r11, r1
+; BE-NEXT:    vmov.32 d9[0], r0
+; BE-NEXT:    mov r0, r5
+; BE-NEXT:    ldm r3, {r1, r2, r3}
+; BE-NEXT:    bl llrintl
+; BE-NEXT:    add r3, sp, #188
+; BE-NEXT:    mov r7, r1
+; BE-NEXT:    vmov.32 d10[0], r0
+; BE-NEXT:    mov r0, r8
+; BE-NEXT:    ldm r3, {r1, r2, r3}
+; BE-NEXT:    bl llrintl
+; BE-NEXT:    add r3, sp, #204
+; BE-NEXT:    vmov.32 d11[0], r0
+; BE-NEXT:    ldr r0, [sp, #200]
+; BE-NEXT:    mov r8, r1
+; BE-NEXT:    ldm r3, {r1, r2, r3}
+; BE-NEXT:    bl llrintl
+; BE-NEXT:    add r3, sp, #348
+; BE-NEXT:    vmov.32 d12[0], r0
+; BE-NEXT:    ldr r0, [sp, #344]
+; BE-NEXT:    mov r5, r1
+; BE-NEXT:    ldm r3, {r1, r2, r3}
+; BE-NEXT:    bl llrintl
+; BE-NEXT:    add r3, sp, #364
+; BE-NEXT:    vmov.32 d13[0], r0
+; BE-NEXT:    ldr r0, [sp, #360]
+; BE-NEXT:    mov r9, r1
+; BE-NEXT:    ldm r3, {r1, r2, r3}
+; BE-NEXT:    bl llrintl
+; BE-NEXT:    add r3, sp, #316
+; BE-NEXT:    vmov.32 d14[0], r0
+; BE-NEXT:    ldr r0, [sp, #312]
+; BE-NEXT:    mov r6, r1
+; BE-NEXT:    ldm r3, {r1, r2, r3}
+; BE-NEXT:    bl llrintl
+; BE-NEXT:    vldr d18, [sp, #48] @ 8-byte Reload
+; BE-NEXT:    vrev64.32 d17, d15
+; BE-NEXT:    vrev64.32 d16, d18
+; BE-NEXT:    vldr d18, [sp, #40] @ 8-byte Reload
+; BE-NEXT:    vmov.32 d24[0], r0
+; BE-NEXT:    add r0, r4, #64
+; BE-NEXT:    vldr d20, [sp, #32] @ 8-byte Reload
+; BE-NEXT:    vrev64.32 d19, d18
+; BE-NEXT:    vmov.32 d9[1], r11
+; BE-NEXT:    vmov.32 d10[1], r7
+; BE-NEXT:    vrev64.32 d18, d20
+; BE-NEXT:    vldr d20, [sp, #24] @ 8-byte Reload
+; BE-NEXT:    vmov.32 d8[1], r10
+; BE-NEXT:    vmov.32 d14[1], r6
+; BE-NEXT:    vmov.32 d24[1], r1
+; BE-NEXT:    vldr d22, [sp, #16] @ 8-byte Reload
+; BE-NEXT:    vrev64.32 d21, d20
+; BE-NEXT:    vrev64.32 d1, d9
+; BE-NEXT:    vmov.32 d13[1], r9
+; BE-NEXT:    vrev64.32 d31, d10
+; BE-NEXT:    vrev64.32 d20, d22
+; BE-NEXT:    vldr d22, [sp, #8] @ 8-byte Reload
+; BE-NEXT:    vrev64.32 d0, d8
+; BE-NEXT:    vrev64.32 d29, d14
+; BE-NEXT:    vmov.32 d12[1], r5
+; BE-NEXT:    vrev64.32 d30, d24
+; BE-NEXT:    vrev64.32 d27, d22
+; BE-NEXT:    vldr d22, [sp] @ 8-byte Reload
+; BE-NEXT:    vst1.64 {d0, d1}, [r0:128]!
+; BE-NEXT:    vmov.32 d11[1], r8
+; BE-NEXT:    vrev64.32 d28, d13
+; BE-NEXT:    vst1.64 {d30, d31}, [r0:128]!
+; BE-NEXT:    vrev64.32 d26, d22
+; BE-NEXT:    vrev64.32 d23, d12
+; BE-NEXT:    vst1.64 {d28, d29}, [r0:128]!
+; BE-NEXT:    vrev64.32 d22, d11
+; BE-NEXT:    vst1.64 {d26, d27}, [r0:128]
+; BE-NEXT:    vst1.64 {d20, d21}, [r4:128]!
+; BE-NEXT:    vst1.64 {d22, d23}, [r4:128]!
+; BE-NEXT:    vst1.64 {d18, d19}, [r4:128]!
+; BE-NEXT:    vst1.64 {d16, d17}, [r4:128]
+; BE-NEXT:    add sp, sp, #56
+; BE-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-NEXT:    add sp, sp, #4
+; BE-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; BE-NEON-LABEL: llrint_v16f128:
+; BE-NEON:       @ %bb.0:
+; BE-NEON-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-NEON-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-NEON-NEXT:    .pad #4
+; BE-NEON-NEXT:    sub sp, sp, #4
+; BE-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-NEON-NEXT:    .pad #56
+; BE-NEON-NEXT:    sub sp, sp, #56
+; BE-NEON-NEXT:    mov r5, r3
+; BE-NEON-NEXT:    add r3, sp, #376
+; BE-NEON-NEXT:    mov r6, r2
+; BE-NEON-NEXT:    mov r4, r0
+; BE-NEON-NEXT:    ldm r3, {r0, r1, r2, r3}
+; BE-NEON-NEXT:    bl llrintl
+; BE-NEON-NEXT:    ldr r7, [sp, #392]
+; BE-NEON-NEXT:    add r3, sp, #396
+; BE-NEON-NEXT:    mov r9, r1
+; BE-NEON-NEXT:    vmov.32 d8[0], r0
+; BE-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; BE-NEON-NEXT:    mov r0, r7
+; BE-NEON-NEXT:    ldr r11, [sp, #168]
+; BE-NEON-NEXT:    bl llrintl
+; BE-NEON-NEXT:    ldr r2, [sp, #160]
+; BE-NEON-NEXT:    mov r10, r1
+; BE-NEON-NEXT:    ldr r3, [sp, #164]
+; BE-NEON-NEXT:    vmov.32 d9[0], r0
+; BE-NEON-NEXT:    mov r0, r6
+; BE-NEON-NEXT:    mov r1, r5
+; BE-NEON-NEXT:    bl llrintl
+; BE-NEON-NEXT:    add r3, sp, #172
+; BE-NEON-NEXT:    mov r8, r1
+; BE-NEON-NEXT:    vmov.32 d10[0], r0
+; BE-NEON-NEXT:    mov r0, r11
+; BE-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; BE-NEON-NEXT:    bl llrintl
+; BE-NEON-NEXT:    add r3, sp, #220
+; BE-NEON-NEXT:    vmov.32 d11[0], r0
+; BE-NEON-NEXT:    ldr r0, [sp, #216]
+; BE-NEON-NEXT:    mov r11, r1
+; BE-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; BE-NEON-NEXT:    bl llrintl
+; BE-NEON-NEXT:    add r3, sp, #236
+; BE-NEON-NEXT:    vmov.32 d12[0], r0
+; BE-NEON-NEXT:    ldr r0, [sp, #232]
+; BE-NEON-NEXT:    mov r6, r1
+; BE-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; BE-NEON-NEXT:    bl llrintl
+; BE-NEON-NEXT:    add r3, sp, #252
+; BE-NEON-NEXT:    vmov.32 d13[0], r0
+; BE-NEON-NEXT:    ldr r0, [sp, #248]
+; BE-NEON-NEXT:    mov r7, r1
+; BE-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; BE-NEON-NEXT:    bl llrintl
+; BE-NEON-NEXT:    add r3, sp, #268
+; BE-NEON-NEXT:    vmov.32 d14[0], r0
+; BE-NEON-NEXT:    ldr r0, [sp, #264]
+; BE-NEON-NEXT:    mov r5, r1
+; BE-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; BE-NEON-NEXT:    bl llrintl
+; BE-NEON-NEXT:    vmov.32 d15[0], r0
+; BE-NEON-NEXT:    ldr r0, [sp, #280]
+; BE-NEON-NEXT:    ldr r2, [sp, #288]
+; BE-NEON-NEXT:    vmov.32 d13[1], r7
+; BE-NEON-NEXT:    ldr r7, [sp, #284]
+; BE-NEON-NEXT:    ldr r3, [sp, #292]
+; BE-NEON-NEXT:    vmov.32 d14[1], r5
+; BE-NEON-NEXT:    ldr r5, [sp, #328]
+; BE-NEON-NEXT:    vmov.32 d12[1], r6
+; BE-NEON-NEXT:    ldr r6, [sp, #300]
+; BE-NEON-NEXT:    vmov.32 d10[1], r8
+; BE-NEON-NEXT:    ldr r8, [sp, #184]
+; BE-NEON-NEXT:    vmov.32 d11[1], r11
+; BE-NEON-NEXT:    vmov.32 d9[1], r10
+; BE-NEON-NEXT:    vmov.32 d8[1], r9
+; BE-NEON-NEXT:    vmov.32 d15[1], r1
+; BE-NEON-NEXT:    mov r1, r7
+; BE-NEON-NEXT:    vstr d14, [sp, #48] @ 8-byte Spill
+; BE-NEON-NEXT:    vstr d13, [sp, #40] @ 8-byte Spill
+; BE-NEON-NEXT:    vstr d12, [sp, #32] @ 8-byte Spill
+; BE-NEON-NEXT:    vstr d11, [sp, #24] @ 8-byte Spill
+; BE-NEON-NEXT:    vstr d10, [sp, #16] @ 8-byte Spill
+; BE-NEON-NEXT:    vstr d9, [sp, #8] @ 8-byte Spill
+; BE-NEON-NEXT:    vstr d8, [sp] @ 8-byte Spill
+; BE-NEON-NEXT:    bl llrintl
+; BE-NEON-NEXT:    mov r10, r1
+; BE-NEON-NEXT:    ldr r1, [sp, #296]
+; BE-NEON-NEXT:    ldr r2, [sp, #304]
+; BE-NEON-NEXT:    vmov.32 d8[0], r0
+; BE-NEON-NEXT:    ldr r3, [sp, #308]
+; BE-NEON-NEXT:    mov r0, r1
+; BE-NEON-NEXT:    mov r1, r6
+; BE-NEON-NEXT:    bl llrintl
+; BE-NEON-NEXT:    add r3, sp, #332
+; BE-NEON-NEXT:    mov r11, r1
+; BE-NEON-NEXT:    vmov.32 d9[0], r0
+; BE-NEON-NEXT:    mov r0, r5
+; BE-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; BE-NEON-NEXT:    bl llrintl
+; BE-NEON-NEXT:    add r3, sp, #188
+; BE-NEON-NEXT:    mov r7, r1
+; BE-NEON-NEXT:    vmov.32 d10[0], r0
+; BE-NEON-NEXT:    mov r0, r8
+; BE-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; BE-NEON-NEXT:    bl llrintl
+; BE-NEON-NEXT:    add r3, sp, #204
+; BE-NEON-NEXT:    vmov.32 d11[0], r0
+; BE-NEON-NEXT:    ldr r0, [sp, #200]
+; BE-NEON-NEXT:    mov r8, r1
+; BE-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; BE-NEON-NEXT:    bl llrintl
+; BE-NEON-NEXT:    add r3, sp, #348
+; BE-NEON-NEXT:    vmov.32 d12[0], r0
+; BE-NEON-NEXT:    ldr r0, [sp, #344]
+; BE-NEON-NEXT:    mov r5, r1
+; BE-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; BE-NEON-NEXT:    bl llrintl
+; BE-NEON-NEXT:    add r3, sp, #364
+; BE-NEON-NEXT:    vmov.32 d13[0], r0
+; BE-NEON-NEXT:    ldr r0, [sp, #360]
+; BE-NEON-NEXT:    mov r9, r1
+; BE-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; BE-NEON-NEXT:    bl llrintl
+; BE-NEON-NEXT:    add r3, sp, #316
+; BE-NEON-NEXT:    vmov.32 d14[0], r0
+; BE-NEON-NEXT:    ldr r0, [sp, #312]
+; BE-NEON-NEXT:    mov r6, r1
+; BE-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; BE-NEON-NEXT:    bl llrintl
+; BE-NEON-NEXT:    vldr d18, [sp, #48] @ 8-byte Reload
+; BE-NEON-NEXT:    vrev64.32 d17, d15
+; BE-NEON-NEXT:    vrev64.32 d16, d18
+; BE-NEON-NEXT:    vldr d18, [sp, #40] @ 8-byte Reload
+; BE-NEON-NEXT:    vmov.32 d24[0], r0
+; BE-NEON-NEXT:    add r0, r4, #64
+; BE-NEON-NEXT:    vldr d20, [sp, #32] @ 8-byte Reload
+; BE-NEON-NEXT:    vrev64.32 d19, d18
+; BE-NEON-NEXT:    vmov.32 d9[1], r11
+; BE-NEON-NEXT:    vmov.32 d10[1], r7
+; BE-NEON-NEXT:    vrev64.32 d18, d20
+; BE-NEON-NEXT:    vldr d20, [sp, #24] @ 8-byte Reload
+; BE-NEON-NEXT:    vmov.32 d8[1], r10
+; BE-NEON-NEXT:    vmov.32 d14[1], r6
+; BE-NEON-NEXT:    vmov.32 d24[1], r1
+; BE-NEON-NEXT:    vldr d22, [sp, #16] @ 8-byte Reload
+; BE-NEON-NEXT:    vrev64.32 d21, d20
+; BE-NEON-NEXT:    vrev64.32 d1, d9
+; BE-NEON-NEXT:    vmov.32 d13[1], r9
+; BE-NEON-NEXT:    vrev64.32 d31, d10
+; BE-NEON-NEXT:    vrev64.32 d20, d22
+; BE-NEON-NEXT:    vldr d22, [sp, #8] @ 8-byte Reload
+; BE-NEON-NEXT:    vrev64.32 d0, d8
+; BE-NEON-NEXT:    vrev64.32 d29, d14
+; BE-NEON-NEXT:    vmov.32 d12[1], r5
+; BE-NEON-NEXT:    vrev64.32 d30, d24
+; BE-NEON-NEXT:    vrev64.32 d27, d22
+; BE-NEON-NEXT:    vldr d22, [sp] @ 8-byte Reload
+; BE-NEON-NEXT:    vst1.64 {d0, d1}, [r0:128]!
+; BE-NEON-NEXT:    vmov.32 d11[1], r8
+; BE-NEON-NEXT:    vrev64.32 d28, d13
+; BE-NEON-NEXT:    vst1.64 {d30, d31}, [r0:128]!
+; BE-NEON-NEXT:    vrev64.32 d26, d22
+; BE-NEON-NEXT:    vrev64.32 d23, d12
+; BE-NEON-NEXT:    vst1.64 {d28, d29}, [r0:128]!
+; BE-NEON-NEXT:    vrev64.32 d22, d11
+; BE-NEON-NEXT:    vst1.64 {d26, d27}, [r0:128]
+; BE-NEON-NEXT:    vst1.64 {d20, d21}, [r4:128]!
+; BE-NEON-NEXT:    vst1.64 {d22, d23}, [r4:128]!
+; BE-NEON-NEXT:    vst1.64 {d18, d19}, [r4:128]!
+; BE-NEON-NEXT:    vst1.64 {d16, d17}, [r4:128]
+; BE-NEON-NEXT:    add sp, sp, #56
+; BE-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-NEON-NEXT:    add sp, sp, #4
+; BE-NEON-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+  %a = call <16 x i64> @llvm.llrint.v16i64.v16f128(<16 x fp128> %x)
+  ret <16 x i64> %a
+}
+declare <16 x i64> @llvm.llrint.v16i64.v16f128(<16 x fp128>)
+
+define <32 x i64> @llrint_v32f128(<32 x fp128> %x) {
+; LE-LABEL: llrint_v32f128:
+; LE:       @ %bb.0:
+; LE-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-NEXT:    .pad #4
+; LE-NEXT:    sub sp, sp, #4
+; LE-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEXT:    .pad #192
+; LE-NEXT:    sub sp, sp, #192
+; LE-NEXT:    str r3, [sp, #60] @ 4-byte Spill
+; LE-NEXT:    add r3, sp, #688
+; LE-NEXT:    str r2, [sp, #56] @ 4-byte Spill
+; LE-NEXT:    mov r9, r0
+; LE-NEXT:    ldm r3, {r0, r1, r2, r3}
+; LE-NEXT:    bl llrintl
+; LE-NEXT:    add r3, sp, #560
+; LE-NEXT:    mov r4, r0
+; LE-NEXT:    str r1, [sp, #64] @ 4-byte Spill
+; LE-NEXT:    ldm r3, {r0, r1, r2, r3}
+; LE-NEXT:    bl llrintl
+; LE-NEXT:    vmov.32 d17[0], r0
+; LE-NEXT:    ldr r7, [sp, #544]
+; LE-NEXT:    ldr r6, [sp, #548]
+; LE-NEXT:    add lr, sp, #96
+; LE-NEXT:    ldr r2, [sp, #552]
+; LE-NEXT:    vmov.32 d17[1], r1
+; LE-NEXT:    ldr r3, [sp, #556]
+; LE-NEXT:    mov r0, r7
+; LE-NEXT:    mov r1, r6
+; LE-NEXT:    vorr q4, q8, q8
+; LE-NEXT:    ldr r5, [sp, #528]
+; LE-NEXT:    vmov.32 d17[0], r4
+; LE-NEXT:    ldr r10, [sp, #304]
+; LE-NEXT:    ldr r8, [sp, #368]
+; LE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEXT:    bl llrintl
+; LE-NEXT:    add r3, sp, #532
+; LE-NEXT:    vmov.32 d8[0], r0
+; LE-NEXT:    mov r11, r1
+; LE-NEXT:    add lr, sp, #144
+; LE-NEXT:    ldm r3, {r1, r2, r3}
+; LE-NEXT:    mov r0, r5
+; LE-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-NEXT:    bl llrintl
+; LE-NEXT:    add r3, sp, #308
+; LE-NEXT:    mov r5, r1
+; LE-NEXT:    add lr, sp, #176
+; LE-NEXT:    vmov.32 d17[0], r0
+; LE-NEXT:    ldm r3, {r1, r2, r3}
+; LE-NEXT:    mov r0, r10
+; LE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEXT:    bl llrintl
+; LE-NEXT:    add r3, sp, #372
+; LE-NEXT:    mov r10, r1
+; LE-NEXT:    vmov.32 d11[0], r0
+; LE-NEXT:    mov r0, r8
+; LE-NEXT:    ldm r3, {r1, r2, r3}
+; LE-NEXT:    bl llrintl
+; LE-NEXT:    add r3, sp, #404
+; LE-NEXT:    vmov.32 d15[0], r0
+; LE-NEXT:    ldr r0, [sp, #400]
+; LE-NEXT:    mov r6, r1
+; LE-NEXT:    ldm r3, {r1, r2, r3}
+; LE-NEXT:    bl llrintl
+; LE-NEXT:    add r3, sp, #596
+; LE-NEXT:    vmov.32 d9[0], r0
+; LE-NEXT:    ldr r0, [sp, #592]
+; LE-NEXT:    mov r7, r1
+; LE-NEXT:    ldm r3, {r1, r2, r3}
+; LE-NEXT:    bl llrintl
+; LE-NEXT:    add r3, sp, #676
+; LE-NEXT:    vmov.32 d13[0], r0
+; LE-NEXT:    ldr r0, [sp, #672]
+; LE-NEXT:    mov r4, r1
+; LE-NEXT:    ldm r3, {r1, r2, r3}
+; LE-NEXT:    bl llrintl
+; LE-NEXT:    add lr, sp, #96
+; LE-NEXT:    vmov.32 d13[1], r4
+; LE-NEXT:    str r1, [sp, #52] @ 4-byte Spill
+; LE-NEXT:    vldmia lr, {d18, d19} @ 16-byte Reload
+; LE-NEXT:    add lr, sp, #80
+; LE-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-NEXT:    add lr, sp, #128
+; LE-NEXT:    vmov.32 d9[1], r7
+; LE-NEXT:    ldr r1, [sp, #628]
+; LE-NEXT:    ldr r2, [sp, #632]
+; LE-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-NEXT:    add lr, sp, #112
+; LE-NEXT:    vmov.32 d15[1], r6
+; LE-NEXT:    ldr r3, [sp, #636]
+; LE-NEXT:    ldr r7, [sp, #64] @ 4-byte Reload
+; LE-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-NEXT:    add lr, sp, #160
+; LE-NEXT:    vmov.32 d11[1], r10
+; LE-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-NEXT:    add lr, sp, #176
+; LE-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-NEXT:    add lr, sp, #144
+; LE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEXT:    vmov.32 d18[0], r0
+; LE-NEXT:    ldr r0, [sp, #624]
+; LE-NEXT:    vmov.32 d16[1], r11
+; LE-NEXT:    vmov.32 d9[1], r5
+; LE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEXT:    add lr, sp, #96
+; LE-NEXT:    vmov.32 d19[1], r7
+; LE-NEXT:    vstmia lr, {d18, d19} @ 16-byte Spill
+; LE-NEXT:    bl llrintl
+; LE-NEXT:    add r3, sp, #512
+; LE-NEXT:    str r0, [sp, #48] @ 4-byte Spill
+; LE-NEXT:    str r1, [sp, #64] @ 4-byte Spill
+; LE-NEXT:    ldm r3, {r0, r1, r2, r3}
+; LE-NEXT:    bl llrintl
+; LE-NEXT:    add r3, sp, #768
+; LE-NEXT:    mov r11, r0
+; LE-NEXT:    str r1, [sp, #28] @ 4-byte Spill
+; LE-NEXT:    ldm r3, {r0, r1, r2, r3}
+; LE-NEXT:    bl llrintl
+; LE-NEXT:    ldr r6, [sp, #784]
+; LE-NEXT:    add r3, sp, #788
+; LE-NEXT:    mov r8, r1
+; LE-NEXT:    vmov.32 d14[0], r0
+; LE-NEXT:    ldm r3, {r1, r2, r3}
+; LE-NEXT:    mov r0, r6
+; LE-NEXT:    ldr r5, [sp, #736]
+; LE-NEXT:    ldr r7, [sp, #752]
+; LE-NEXT:    ldr r4, [sp, #720]
+; LE-NEXT:    bl llrintl
+; LE-NEXT:    add r3, sp, #740
+; LE-NEXT:    mov r6, r1
+; LE-NEXT:    vmov.32 d15[0], r0
+; LE-NEXT:    mov r0, r5
+; LE-NEXT:    ldm r3, {r1, r2, r3}
+; LE-NEXT:    bl llrintl
+; LE-NEXT:    add r3, sp, #756
+; LE-NEXT:    mov r5, r1
+; LE-NEXT:    vmov.32 d12[0], r0
+; LE-NEXT:    mov r0, r7
+; LE-NEXT:    ldm r3, {r1, r2, r3}
+; LE-NEXT:    bl llrintl
+; LE-NEXT:    add r3, sp, #724
+; LE-NEXT:    mov r7, r1
+; LE-NEXT:    vmov.32 d13[0], r0
+; LE-NEXT:    mov r0, r4
+; LE-NEXT:    ldm r3, {r1, r2, r3}
+; LE-NEXT:    bl llrintl
+; LE-NEXT:    vmov.32 d13[1], r7
+; LE-NEXT:    add lr, sp, #8
+; LE-NEXT:    ldr r2, [sp, #296]
+; LE-NEXT:    vmov.32 d12[1], r5
+; LE-NEXT:    ldr r3, [sp, #300]
+; LE-NEXT:    ldr r4, [sp, #576]
+; LE-NEXT:    vmov.32 d11[0], r0
+; LE-NEXT:    ldr r0, [sp, #56] @ 4-byte Reload
+; LE-NEXT:    ldr r10, [sp, #384]
+; LE-NEXT:    vmov.32 d15[1], r6
+; LE-NEXT:    ldr r6, [sp, #352]
+; LE-NEXT:    vmov.32 d14[1], r8
+; LE-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-NEXT:    add lr, sp, #32
+; LE-NEXT:    vmov.32 d11[1], r1
+; LE-NEXT:    ldr r1, [sp, #60] @ 4-byte Reload
+; LE-NEXT:    vmov.32 d8[0], r11
+; LE-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-NEXT:    add lr, sp, #176
+; LE-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-NEXT:    bl llrintl
+; LE-NEXT:    add lr, sp, #160
+; LE-NEXT:    add r3, sp, #356
+; LE-NEXT:    mov r5, r1
+; LE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEXT:    ldm r3, {r1, r2, r3}
+; LE-NEXT:    vmov.32 d16[0], r0
+; LE-NEXT:    mov r0, r6
+; LE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEXT:    bl llrintl
+; LE-NEXT:    add lr, sp, #112
+; LE-NEXT:    add r3, sp, #388
+; LE-NEXT:    mov r7, r1
+; LE-NEXT:    vldmia lr, {d14, d15} @ 16-byte Reload
+; LE-NEXT:    ldm r3, {r1, r2, r3}
+; LE-NEXT:    vmov.32 d14[0], r0
+; LE-NEXT:    mov r0, r10
+; LE-NEXT:    bl llrintl
+; LE-NEXT:    add lr, sp, #128
+; LE-NEXT:    add r3, sp, #580
+; LE-NEXT:    mov r6, r1
+; LE-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
+; LE-NEXT:    ldm r3, {r1, r2, r3}
+; LE-NEXT:    vmov.32 d12[0], r0
+; LE-NEXT:    mov r0, r4
+; LE-NEXT:    bl llrintl
+; LE-NEXT:    add lr, sp, #80
+; LE-NEXT:    add r3, sp, #708
+; LE-NEXT:    mov r4, r1
+; LE-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-NEXT:    ldm r3, {r1, r2, r3}
+; LE-NEXT:    vmov.32 d8[0], r0
+; LE-NEXT:    ldr r0, [sp, #704]
+; LE-NEXT:    bl llrintl
+; LE-NEXT:    vmov.32 d8[1], r4
+; LE-NEXT:    add lr, sp, #80
+; LE-NEXT:    ldr r2, [sp, #52] @ 4-byte Reload
+; LE-NEXT:    vmov.32 d12[1], r6
+; LE-NEXT:    ldr r6, [sp, #644]
+; LE-NEXT:    ldr r3, [sp, #652]
+; LE-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-NEXT:    add lr, sp, #128
+; LE-NEXT:    vmov.32 d14[1], r7
+; LE-NEXT:    ldr r4, [sp, #480]
+; LE-NEXT:    ldr r7, [sp, #656]
+; LE-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-NEXT:    add lr, sp, #112
+; LE-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-NEXT:    add lr, sp, #160
+; LE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEXT:    vmov.32 d10[0], r0
+; LE-NEXT:    ldr r0, [sp, #28] @ 4-byte Reload
+; LE-NEXT:    ldr r10, [sp, #496]
+; LE-NEXT:    vmov.32 d16[1], r5
+; LE-NEXT:    add r5, r9, #192
+; LE-NEXT:    ldr r8, [sp, #608]
+; LE-NEXT:    vmov.32 d10[1], r1
+; LE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEXT:    add lr, sp, #176
+; LE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEXT:    vmov.32 d16[1], r0
+; LE-NEXT:    ldr r0, [sp, #640]
+; LE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEXT:    add lr, sp, #96
+; LE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEXT:    add lr, sp, #8
+; LE-NEXT:    vmov.32 d16[1], r2
+; LE-NEXT:    ldr r2, [sp, #648]
+; LE-NEXT:    vst1.64 {d16, d17}, [r5:128]!
+; LE-NEXT:    vst1.64 {d10, d11}, [r5:128]!
+; LE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEXT:    vst1.64 {d16, d17}, [r5:128]!
+; LE-NEXT:    ldr r1, [sp, #48] @ 4-byte Reload
+; LE-NEXT:    vmov.32 d9[0], r1
+; LE-NEXT:    mov r1, r6
+; LE-NEXT:    bl llrintl
+; LE-NEXT:    add r3, sp, #660
+; LE-NEXT:    mov r11, r1
+; LE-NEXT:    vmov.32 d12[0], r0
+; LE-NEXT:    mov r0, r7
+; LE-NEXT:    ldm r3, {r1, r2, r3}
+; LE-NEXT:    bl llrintl
+; LE-NEXT:    add r3, sp, #484
+; LE-NEXT:    mov r7, r1
+; LE-NEXT:    vmov.32 d13[0], r0
+; LE-NEXT:    mov r0, r4
+; LE-NEXT:    ldm r3, {r1, r2, r3}
+; LE-NEXT:    bl llrintl
+; LE-NEXT:    add r3, sp, #500
+; LE-NEXT:    mov r6, r1
+; LE-NEXT:    vmov.32 d10[0], r0
+; LE-NEXT:    mov r0, r10
+; LE-NEXT:    ldm r3, {r1, r2, r3}
+; LE-NEXT:    bl llrintl
+; LE-NEXT:    add r3, sp, #612
+; LE-NEXT:    mov r4, r1
+; LE-NEXT:    vmov.32 d11[0], r0
+; LE-NEXT:    mov r0, r8
+; LE-NEXT:    ldm r3, {r1, r2, r3}
+; LE-NEXT:    bl llrintl
+; LE-NEXT:    vmov.32 d8[0], r0
+; LE-NEXT:    ldr r0, [sp, #64] @ 4-byte Reload
+; LE-NEXT:    add lr, sp, #96
+; LE-NEXT:    add r8, r9, #128
+; LE-NEXT:    vmov.32 d13[1], r7
+; LE-NEXT:    ldr r2, [sp, #344]
+; LE-NEXT:    ldr r3, [sp, #348]
+; LE-NEXT:    vmov.32 d12[1], r11
+; LE-NEXT:    ldr r7, [sp, #452]
+; LE-NEXT:    ldr r10, [sp, #416]
+; LE-NEXT:    vmov.32 d9[1], r0
+; LE-NEXT:    ldr r0, [sp, #336]
+; LE-NEXT:    vmov.32 d8[1], r1
+; LE-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-NEXT:    add lr, sp, #64
+; LE-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-NEXT:    add lr, sp, #32
+; LE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEXT:    add lr, sp, #144
+; LE-NEXT:    vmov.32 d11[1], r4
+; LE-NEXT:    ldr r4, [sp, #340]
+; LE-NEXT:    vst1.64 {d16, d17}, [r5:128]
+; LE-NEXT:    mov r1, r4
+; LE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEXT:    add lr, sp, #80
+; LE-NEXT:    vmov.32 d10[1], r6
+; LE-NEXT:    ldr r6, [sp, #448]
+; LE-NEXT:    vst1.64 {d16, d17}, [r8:128]!
+; LE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEXT:    vst1.64 {d16, d17}, [r8:128]!
+; LE-NEXT:    bl llrintl
+; LE-NEXT:    ldr r2, [sp, #456]
+; LE-NEXT:    mov r11, r1
+; LE-NEXT:    ldr r3, [sp, #460]
+; LE-NEXT:    vmov.32 d15[0], r0
+; LE-NEXT:    mov r0, r6
+; LE-NEXT:    mov r1, r7
+; LE-NEXT:    ldr r5, [sp, #432]
+; LE-NEXT:    bl llrintl
+; LE-NEXT:    add r3, sp, #468
+; LE-NEXT:    vmov.32 d12[0], r0
+; LE-NEXT:    ldr r0, [sp, #464]
+; LE-NEXT:    mov r6, r1
+; LE-NEXT:    ldm r3, {r1, r2, r3}
+; LE-NEXT:    bl llrintl
+; LE-NEXT:    add r3, sp, #420
+; LE-NEXT:    mov r7, r1
+; LE-NEXT:    vmov.32 d13[0], r0
+; LE-NEXT:    mov r0, r10
+; LE-NEXT:    ldm r3, {r1, r2, r3}
+; LE-NEXT:    bl llrintl
+; LE-NEXT:    add r3, sp, #436
+; LE-NEXT:    mov r4, r1
+; LE-NEXT:    vmov.32 d8[0], r0
+; LE-NEXT:    mov r0, r5
+; LE-NEXT:    ldm r3, {r1, r2, r3}
+; LE-NEXT:    bl llrintl
+; LE-NEXT:    add r3, sp, #324
+; LE-NEXT:    vmov.32 d9[0], r0
+; LE-NEXT:    ldr r0, [sp, #320]
+; LE-NEXT:    mov r5, r1
+; LE-NEXT:    ldm r3, {r1, r2, r3}
+; LE-NEXT:    bl llrintl
+; LE-NEXT:    add lr, sp, #64
+; LE-NEXT:    vmov.32 d9[1], r5
+; LE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEXT:    add lr, sp, #96
+; LE-NEXT:    vmov.32 d13[1], r7
+; LE-NEXT:    vst1.64 {d16, d17}, [r8:128]!
+; LE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEXT:    add lr, sp, #176
+; LE-NEXT:    vmov.32 d8[1], r4
+; LE-NEXT:    vmov.32 d12[1], r6
+; LE-NEXT:    vmov.32 d14[0], r0
+; LE-NEXT:    add r0, r9, #64
+; LE-NEXT:    vst1.64 {d16, d17}, [r8:128]
+; LE-NEXT:    vst1.64 {d8, d9}, [r0:128]!
+; LE-NEXT:    vst1.64 {d12, d13}, [r0:128]!
+; LE-NEXT:    vst1.64 {d10, d11}, [r0:128]!
+; LE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEXT:    add lr, sp, #160
+; LE-NEXT:    vmov.32 d15[1], r11
+; LE-NEXT:    vst1.64 {d16, d17}, [r0:128]
+; LE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEXT:    add lr, sp, #112
+; LE-NEXT:    vmov.32 d14[1], r1
+; LE-NEXT:    vst1.64 {d16, d17}, [r9:128]!
+; LE-NEXT:    vst1.64 {d14, d15}, [r9:128]!
+; LE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEXT:    add lr, sp, #128
+; LE-NEXT:    vst1.64 {d16, d17}, [r9:128]!
+; LE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEXT:    vst1.64 {d16, d17}, [r9:128]
+; LE-NEXT:    add sp, sp, #192
+; LE-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEXT:    add sp, sp, #4
+; LE-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; LE-NEON-LABEL: llrint_v32f128:
+; LE-NEON:       @ %bb.0:
+; LE-NEON-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-NEON-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-NEON-NEXT:    .pad #4
+; LE-NEON-NEXT:    sub sp, sp, #4
+; LE-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEON-NEXT:    .pad #192
+; LE-NEON-NEXT:    sub sp, sp, #192
+; LE-NEON-NEXT:    str r3, [sp, #60] @ 4-byte Spill
+; LE-NEON-NEXT:    add r3, sp, #688
+; LE-NEON-NEXT:    str r2, [sp, #56] @ 4-byte Spill
+; LE-NEON-NEXT:    mov r9, r0
+; LE-NEON-NEXT:    ldm r3, {r0, r1, r2, r3}
+; LE-NEON-NEXT:    bl llrintl
+; LE-NEON-NEXT:    add r3, sp, #560
+; LE-NEON-NEXT:    mov r4, r0
+; LE-NEON-NEXT:    str r1, [sp, #64] @ 4-byte Spill
+; LE-NEON-NEXT:    ldm r3, {r0, r1, r2, r3}
+; LE-NEON-NEXT:    bl llrintl
+; LE-NEON-NEXT:    vmov.32 d17[0], r0
+; LE-NEON-NEXT:    ldr r7, [sp, #544]
+; LE-NEON-NEXT:    ldr r6, [sp, #548]
+; LE-NEON-NEXT:    add lr, sp, #96
+; LE-NEON-NEXT:    ldr r2, [sp, #552]
+; LE-NEON-NEXT:    vmov.32 d17[1], r1
+; LE-NEON-NEXT:    ldr r3, [sp, #556]
+; LE-NEON-NEXT:    mov r0, r7
+; LE-NEON-NEXT:    mov r1, r6
+; LE-NEON-NEXT:    vorr q4, q8, q8
+; LE-NEON-NEXT:    ldr r5, [sp, #528]
+; LE-NEON-NEXT:    vmov.32 d17[0], r4
+; LE-NEON-NEXT:    ldr r10, [sp, #304]
+; LE-NEON-NEXT:    ldr r8, [sp, #368]
+; LE-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEON-NEXT:    bl llrintl
+; LE-NEON-NEXT:    add r3, sp, #532
+; LE-NEON-NEXT:    vmov.32 d8[0], r0
+; LE-NEON-NEXT:    mov r11, r1
+; LE-NEON-NEXT:    add lr, sp, #144
+; LE-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; LE-NEON-NEXT:    mov r0, r5
+; LE-NEON-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-NEON-NEXT:    bl llrintl
+; LE-NEON-NEXT:    add r3, sp, #308
+; LE-NEON-NEXT:    mov r5, r1
+; LE-NEON-NEXT:    add lr, sp, #176
+; LE-NEON-NEXT:    vmov.32 d17[0], r0
+; LE-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; LE-NEON-NEXT:    mov r0, r10
+; LE-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEON-NEXT:    bl llrintl
+; LE-NEON-NEXT:    add r3, sp, #372
+; LE-NEON-NEXT:    mov r10, r1
+; LE-NEON-NEXT:    vmov.32 d11[0], r0
+; LE-NEON-NEXT:    mov r0, r8
+; LE-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; LE-NEON-NEXT:    bl llrintl
+; LE-NEON-NEXT:    add r3, sp, #404
+; LE-NEON-NEXT:    vmov.32 d15[0], r0
+; LE-NEON-NEXT:    ldr r0, [sp, #400]
+; LE-NEON-NEXT:    mov r6, r1
+; LE-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; LE-NEON-NEXT:    bl llrintl
+; LE-NEON-NEXT:    add r3, sp, #596
+; LE-NEON-NEXT:    vmov.32 d9[0], r0
+; LE-NEON-NEXT:    ldr r0, [sp, #592]
+; LE-NEON-NEXT:    mov r7, r1
+; LE-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; LE-NEON-NEXT:    bl llrintl
+; LE-NEON-NEXT:    add r3, sp, #676
+; LE-NEON-NEXT:    vmov.32 d13[0], r0
+; LE-NEON-NEXT:    ldr r0, [sp, #672]
+; LE-NEON-NEXT:    mov r4, r1
+; LE-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; LE-NEON-NEXT:    bl llrintl
+; LE-NEON-NEXT:    add lr, sp, #96
+; LE-NEON-NEXT:    vmov.32 d13[1], r4
+; LE-NEON-NEXT:    str r1, [sp, #52] @ 4-byte Spill
+; LE-NEON-NEXT:    vldmia lr, {d18, d19} @ 16-byte Reload
+; LE-NEON-NEXT:    add lr, sp, #80
+; LE-NEON-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-NEON-NEXT:    add lr, sp, #128
+; LE-NEON-NEXT:    vmov.32 d9[1], r7
+; LE-NEON-NEXT:    ldr r1, [sp, #628]
+; LE-NEON-NEXT:    ldr r2, [sp, #632]
+; LE-NEON-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-NEON-NEXT:    add lr, sp, #112
+; LE-NEON-NEXT:    vmov.32 d15[1], r6
+; LE-NEON-NEXT:    ldr r3, [sp, #636]
+; LE-NEON-NEXT:    ldr r7, [sp, #64] @ 4-byte Reload
+; LE-NEON-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-NEON-NEXT:    add lr, sp, #160
+; LE-NEON-NEXT:    vmov.32 d11[1], r10
+; LE-NEON-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-NEON-NEXT:    add lr, sp, #176
+; LE-NEON-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-NEON-NEXT:    add lr, sp, #144
+; LE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEON-NEXT:    vmov.32 d18[0], r0
+; LE-NEON-NEXT:    ldr r0, [sp, #624]
+; LE-NEON-NEXT:    vmov.32 d16[1], r11
+; LE-NEON-NEXT:    vmov.32 d9[1], r5
+; LE-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEON-NEXT:    add lr, sp, #96
+; LE-NEON-NEXT:    vmov.32 d19[1], r7
+; LE-NEON-NEXT:    vstmia lr, {d18, d19} @ 16-byte Spill
+; LE-NEON-NEXT:    bl llrintl
+; LE-NEON-NEXT:    add r3, sp, #512
+; LE-NEON-NEXT:    str r0, [sp, #48] @ 4-byte Spill
+; LE-NEON-NEXT:    str r1, [sp, #64] @ 4-byte Spill
+; LE-NEON-NEXT:    ldm r3, {r0, r1, r2, r3}
+; LE-NEON-NEXT:    bl llrintl
+; LE-NEON-NEXT:    add r3, sp, #768
+; LE-NEON-NEXT:    mov r11, r0
+; LE-NEON-NEXT:    str r1, [sp, #28] @ 4-byte Spill
+; LE-NEON-NEXT:    ldm r3, {r0, r1, r2, r3}
+; LE-NEON-NEXT:    bl llrintl
+; LE-NEON-NEXT:    ldr r6, [sp, #784]
+; LE-NEON-NEXT:    add r3, sp, #788
+; LE-NEON-NEXT:    mov r8, r1
+; LE-NEON-NEXT:    vmov.32 d14[0], r0
+; LE-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; LE-NEON-NEXT:    mov r0, r6
+; LE-NEON-NEXT:    ldr r5, [sp, #736]
+; LE-NEON-NEXT:    ldr r7, [sp, #752]
+; LE-NEON-NEXT:    ldr r4, [sp, #720]
+; LE-NEON-NEXT:    bl llrintl
+; LE-NEON-NEXT:    add r3, sp, #740
+; LE-NEON-NEXT:    mov r6, r1
+; LE-NEON-NEXT:    vmov.32 d15[0], r0
+; LE-NEON-NEXT:    mov r0, r5
+; LE-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; LE-NEON-NEXT:    bl llrintl
+; LE-NEON-NEXT:    add r3, sp, #756
+; LE-NEON-NEXT:    mov r5, r1
+; LE-NEON-NEXT:    vmov.32 d12[0], r0
+; LE-NEON-NEXT:    mov r0, r7
+; LE-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; LE-NEON-NEXT:    bl llrintl
+; LE-NEON-NEXT:    add r3, sp, #724
+; LE-NEON-NEXT:    mov r7, r1
+; LE-NEON-NEXT:    vmov.32 d13[0], r0
+; LE-NEON-NEXT:    mov r0, r4
+; LE-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; LE-NEON-NEXT:    bl llrintl
+; LE-NEON-NEXT:    vmov.32 d13[1], r7
+; LE-NEON-NEXT:    add lr, sp, #8
+; LE-NEON-NEXT:    ldr r2, [sp, #296]
+; LE-NEON-NEXT:    vmov.32 d12[1], r5
+; LE-NEON-NEXT:    ldr r3, [sp, #300]
+; LE-NEON-NEXT:    ldr r4, [sp, #576]
+; LE-NEON-NEXT:    vmov.32 d11[0], r0
+; LE-NEON-NEXT:    ldr r0, [sp, #56] @ 4-byte Reload
+; LE-NEON-NEXT:    ldr r10, [sp, #384]
+; LE-NEON-NEXT:    vmov.32 d15[1], r6
+; LE-NEON-NEXT:    ldr r6, [sp, #352]
+; LE-NEON-NEXT:    vmov.32 d14[1], r8
+; LE-NEON-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-NEON-NEXT:    add lr, sp, #32
+; LE-NEON-NEXT:    vmov.32 d11[1], r1
+; LE-NEON-NEXT:    ldr r1, [sp, #60] @ 4-byte Reload
+; LE-NEON-NEXT:    vmov.32 d8[0], r11
+; LE-NEON-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-NEON-NEXT:    add lr, sp, #176
+; LE-NEON-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-NEON-NEXT:    bl llrintl
+; LE-NEON-NEXT:    add lr, sp, #160
+; LE-NEON-NEXT:    add r3, sp, #356
+; LE-NEON-NEXT:    mov r5, r1
+; LE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; LE-NEON-NEXT:    vmov.32 d16[0], r0
+; LE-NEON-NEXT:    mov r0, r6
+; LE-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEON-NEXT:    bl llrintl
+; LE-NEON-NEXT:    add lr, sp, #112
+; LE-NEON-NEXT:    add r3, sp, #388
+; LE-NEON-NEXT:    mov r7, r1
+; LE-NEON-NEXT:    vldmia lr, {d14, d15} @ 16-byte Reload
+; LE-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; LE-NEON-NEXT:    vmov.32 d14[0], r0
+; LE-NEON-NEXT:    mov r0, r10
+; LE-NEON-NEXT:    bl llrintl
+; LE-NEON-NEXT:    add lr, sp, #128
+; LE-NEON-NEXT:    add r3, sp, #580
+; LE-NEON-NEXT:    mov r6, r1
+; LE-NEON-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
+; LE-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; LE-NEON-NEXT:    vmov.32 d12[0], r0
+; LE-NEON-NEXT:    mov r0, r4
+; LE-NEON-NEXT:    bl llrintl
+; LE-NEON-NEXT:    add lr, sp, #80
+; LE-NEON-NEXT:    add r3, sp, #708
+; LE-NEON-NEXT:    mov r4, r1
+; LE-NEON-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; LE-NEON-NEXT:    vmov.32 d8[0], r0
+; LE-NEON-NEXT:    ldr r0, [sp, #704]
+; LE-NEON-NEXT:    bl llrintl
+; LE-NEON-NEXT:    vmov.32 d8[1], r4
+; LE-NEON-NEXT:    add lr, sp, #80
+; LE-NEON-NEXT:    ldr r2, [sp, #52] @ 4-byte Reload
+; LE-NEON-NEXT:    vmov.32 d12[1], r6
+; LE-NEON-NEXT:    ldr r6, [sp, #644]
+; LE-NEON-NEXT:    ldr r3, [sp, #652]
+; LE-NEON-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-NEON-NEXT:    add lr, sp, #128
+; LE-NEON-NEXT:    vmov.32 d14[1], r7
+; LE-NEON-NEXT:    ldr r4, [sp, #480]
+; LE-NEON-NEXT:    ldr r7, [sp, #656]
+; LE-NEON-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-NEON-NEXT:    add lr, sp, #112
+; LE-NEON-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-NEON-NEXT:    add lr, sp, #160
+; LE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEON-NEXT:    vmov.32 d10[0], r0
+; LE-NEON-NEXT:    ldr r0, [sp, #28] @ 4-byte Reload
+; LE-NEON-NEXT:    ldr r10, [sp, #496]
+; LE-NEON-NEXT:    vmov.32 d16[1], r5
+; LE-NEON-NEXT:    add r5, r9, #192
+; LE-NEON-NEXT:    ldr r8, [sp, #608]
+; LE-NEON-NEXT:    vmov.32 d10[1], r1
+; LE-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEON-NEXT:    add lr, sp, #176
+; LE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEON-NEXT:    vmov.32 d16[1], r0
+; LE-NEON-NEXT:    ldr r0, [sp, #640]
+; LE-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEON-NEXT:    add lr, sp, #96
+; LE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEON-NEXT:    add lr, sp, #8
+; LE-NEON-NEXT:    vmov.32 d16[1], r2
+; LE-NEON-NEXT:    ldr r2, [sp, #648]
+; LE-NEON-NEXT:    vst1.64 {d16, d17}, [r5:128]!
+; LE-NEON-NEXT:    vst1.64 {d10, d11}, [r5:128]!
+; LE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEON-NEXT:    vst1.64 {d16, d17}, [r5:128]!
+; LE-NEON-NEXT:    ldr r1, [sp, #48] @ 4-byte Reload
+; LE-NEON-NEXT:    vmov.32 d9[0], r1
+; LE-NEON-NEXT:    mov r1, r6
+; LE-NEON-NEXT:    bl llrintl
+; LE-NEON-NEXT:    add r3, sp, #660
+; LE-NEON-NEXT:    mov r11, r1
+; LE-NEON-NEXT:    vmov.32 d12[0], r0
+; LE-NEON-NEXT:    mov r0, r7
+; LE-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; LE-NEON-NEXT:    bl llrintl
+; LE-NEON-NEXT:    add r3, sp, #484
+; LE-NEON-NEXT:    mov r7, r1
+; LE-NEON-NEXT:    vmov.32 d13[0], r0
+; LE-NEON-NEXT:    mov r0, r4
+; LE-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; LE-NEON-NEXT:    bl llrintl
+; LE-NEON-NEXT:    add r3, sp, #500
+; LE-NEON-NEXT:    mov r6, r1
+; LE-NEON-NEXT:    vmov.32 d10[0], r0
+; LE-NEON-NEXT:    mov r0, r10
+; LE-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; LE-NEON-NEXT:    bl llrintl
+; LE-NEON-NEXT:    add r3, sp, #612
+; LE-NEON-NEXT:    mov r4, r1
+; LE-NEON-NEXT:    vmov.32 d11[0], r0
+; LE-NEON-NEXT:    mov r0, r8
+; LE-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; LE-NEON-NEXT:    bl llrintl
+; LE-NEON-NEXT:    vmov.32 d8[0], r0
+; LE-NEON-NEXT:    ldr r0, [sp, #64] @ 4-byte Reload
+; LE-NEON-NEXT:    add lr, sp, #96
+; LE-NEON-NEXT:    add r8, r9, #128
+; LE-NEON-NEXT:    vmov.32 d13[1], r7
+; LE-NEON-NEXT:    ldr r2, [sp, #344]
+; LE-NEON-NEXT:    ldr r3, [sp, #348]
+; LE-NEON-NEXT:    vmov.32 d12[1], r11
+; LE-NEON-NEXT:    ldr r7, [sp, #452]
+; LE-NEON-NEXT:    ldr r10, [sp, #416]
+; LE-NEON-NEXT:    vmov.32 d9[1], r0
+; LE-NEON-NEXT:    ldr r0, [sp, #336]
+; LE-NEON-NEXT:    vmov.32 d8[1], r1
+; LE-NEON-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-NEON-NEXT:    add lr, sp, #64
+; LE-NEON-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-NEON-NEXT:    add lr, sp, #32
+; LE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEON-NEXT:    add lr, sp, #144
+; LE-NEON-NEXT:    vmov.32 d11[1], r4
+; LE-NEON-NEXT:    ldr r4, [sp, #340]
+; LE-NEON-NEXT:    vst1.64 {d16, d17}, [r5:128]
+; LE-NEON-NEXT:    mov r1, r4
+; LE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEON-NEXT:    add lr, sp, #80
+; LE-NEON-NEXT:    vmov.32 d10[1], r6
+; LE-NEON-NEXT:    ldr r6, [sp, #448]
+; LE-NEON-NEXT:    vst1.64 {d16, d17}, [r8:128]!
+; LE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEON-NEXT:    vst1.64 {d16, d17}, [r8:128]!
+; LE-NEON-NEXT:    bl llrintl
+; LE-NEON-NEXT:    ldr r2, [sp, #456]
+; LE-NEON-NEXT:    mov r11, r1
+; LE-NEON-NEXT:    ldr r3, [sp, #460]
+; LE-NEON-NEXT:    vmov.32 d15[0], r0
+; LE-NEON-NEXT:    mov r0, r6
+; LE-NEON-NEXT:    mov r1, r7
+; LE-NEON-NEXT:    ldr r5, [sp, #432]
+; LE-NEON-NEXT:    bl llrintl
+; LE-NEON-NEXT:    add r3, sp, #468
+; LE-NEON-NEXT:    vmov.32 d12[0], r0
+; LE-NEON-NEXT:    ldr r0, [sp, #464]
+; LE-NEON-NEXT:    mov r6, r1
+; LE-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; LE-NEON-NEXT:    bl llrintl
+; LE-NEON-NEXT:    add r3, sp, #420
+; LE-NEON-NEXT:    mov r7, r1
+; LE-NEON-NEXT:    vmov.32 d13[0], r0
+; LE-NEON-NEXT:    mov r0, r10
+; LE-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; LE-NEON-NEXT:    bl llrintl
+; LE-NEON-NEXT:    add r3, sp, #436
+; LE-NEON-NEXT:    mov r4, r1
+; LE-NEON-NEXT:    vmov.32 d8[0], r0
+; LE-NEON-NEXT:    mov r0, r5
+; LE-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; LE-NEON-NEXT:    bl llrintl
+; LE-NEON-NEXT:    add r3, sp, #324
+; LE-NEON-NEXT:    vmov.32 d9[0], r0
+; LE-NEON-NEXT:    ldr r0, [sp, #320]
+; LE-NEON-NEXT:    mov r5, r1
+; LE-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; LE-NEON-NEXT:    bl llrintl
+; LE-NEON-NEXT:    add lr, sp, #64
+; LE-NEON-NEXT:    vmov.32 d9[1], r5
+; LE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEON-NEXT:    add lr, sp, #96
+; LE-NEON-NEXT:    vmov.32 d13[1], r7
+; LE-NEON-NEXT:    vst1.64 {d16, d17}, [r8:128]!
+; LE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEON-NEXT:    add lr, sp, #176
+; LE-NEON-NEXT:    vmov.32 d8[1], r4
+; LE-NEON-NEXT:    vmov.32 d12[1], r6
+; LE-NEON-NEXT:    vmov.32 d14[0], r0
+; LE-NEON-NEXT:    add r0, r9, #64
+; LE-NEON-NEXT:    vst1.64 {d16, d17}, [r8:128]
+; LE-NEON-NEXT:    vst1.64 {d8, d9}, [r0:128]!
+; LE-NEON-NEXT:    vst1.64 {d12, d13}, [r0:128]!
+; LE-NEON-NEXT:    vst1.64 {d10, d11}, [r0:128]!
+; LE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEON-NEXT:    add lr, sp, #160
+; LE-NEON-NEXT:    vmov.32 d15[1], r11
+; LE-NEON-NEXT:    vst1.64 {d16, d17}, [r0:128]
+; LE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEON-NEXT:    add lr, sp, #112
+; LE-NEON-NEXT:    vmov.32 d14[1], r1
+; LE-NEON-NEXT:    vst1.64 {d16, d17}, [r9:128]!
+; LE-NEON-NEXT:    vst1.64 {d14, d15}, [r9:128]!
+; LE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEON-NEXT:    add lr, sp, #128
+; LE-NEON-NEXT:    vst1.64 {d16, d17}, [r9:128]!
+; LE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEON-NEXT:    vst1.64 {d16, d17}, [r9:128]
+; LE-NEON-NEXT:    add sp, sp, #192
+; LE-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEON-NEXT:    add sp, sp, #4
+; LE-NEON-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; BE-LABEL: llrint_v32f128:
+; BE:       @ %bb.0:
+; BE-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-NEXT:    .pad #4
+; BE-NEXT:    sub sp, sp, #4
+; BE-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-NEXT:    .pad #152
+; BE-NEXT:    sub sp, sp, #152
+; BE-NEXT:    str r3, [sp, #120] @ 4-byte Spill
+; BE-NEXT:    add r3, sp, #712
+; BE-NEXT:    str r2, [sp, #112] @ 4-byte Spill
+; BE-NEXT:    mov r9, r0
+; BE-NEXT:    ldm r3, {r0, r1, r2, r3}
+; BE-NEXT:    bl llrintl
+; BE-NEXT:    ldr r7, [sp, #648]
+; BE-NEXT:    add r3, sp, #652
+; BE-NEXT:    mov r5, r1
+; BE-NEXT:    vmov.32 d9[0], r0
+; BE-NEXT:    ldm r3, {r1, r2, r3}
+; BE-NEXT:    mov r0, r7
+; BE-NEXT:    ldr r6, [sp, #520]
+; BE-NEXT:    ldr r8, [sp, #632]
+; BE-NEXT:    bl llrintl
+; BE-NEXT:    add r3, sp, #524
+; BE-NEXT:    mov r7, r1
+; BE-NEXT:    vmov.32 d10[0], r0
+; BE-NEXT:    mov r0, r6
+; BE-NEXT:    ldm r3, {r1, r2, r3}
+; BE-NEXT:    bl llrintl
+; BE-NEXT:    add r3, sp, #636
+; BE-NEXT:    mov r4, r1
+; BE-NEXT:    vmov.32 d8[0], r0
+; BE-NEXT:    mov r0, r8
+; BE-NEXT:    ldm r3, {r1, r2, r3}
+; BE-NEXT:    bl llrintl
+; BE-NEXT:    mov r6, r1
+; BE-NEXT:    vmov.32 d11[0], r0
+; BE-NEXT:    ldr r0, [sp, #488]
+; BE-NEXT:    vmov.32 d8[1], r4
+; BE-NEXT:    ldr r1, [sp, #492]
+; BE-NEXT:    ldr r2, [sp, #496]
+; BE-NEXT:    vmov.32 d10[1], r7
+; BE-NEXT:    ldr r3, [sp, #500]
+; BE-NEXT:    vmov.32 d9[1], r5
+; BE-NEXT:    vstr d8, [sp, #144] @ 8-byte Spill
+; BE-NEXT:    vstr d10, [sp, #136] @ 8-byte Spill
+; BE-NEXT:    vstr d9, [sp, #128] @ 8-byte Spill
+; BE-NEXT:    bl llrintl
+; BE-NEXT:    add r3, sp, #680
+; BE-NEXT:    str r0, [sp, #104] @ 4-byte Spill
+; BE-NEXT:    str r1, [sp, #88] @ 4-byte Spill
+; BE-NEXT:    ldm r3, {r0, r1, r2, r3}
+; BE-NEXT:    bl llrintl
+; BE-NEXT:    vmov.32 d16[0], r0
+; BE-NEXT:    ldr r0, [sp, #728]
+; BE-NEXT:    ldr r2, [sp, #736]
+; BE-NEXT:    vmov.32 d11[1], r6
+; BE-NEXT:    ldr r6, [sp, #732]
+; BE-NEXT:    ldr r3, [sp, #740]
+; BE-NEXT:    vmov.32 d16[1], r1
+; BE-NEXT:    ldr r5, [sp, #504]
+; BE-NEXT:    mov r1, r6
+; BE-NEXT:    ldr r7, [sp, #744]
+; BE-NEXT:    ldr r4, [sp, #748]
+; BE-NEXT:    vstr d11, [sp, #24] @ 8-byte Spill
+; BE-NEXT:    vstr d16, [sp, #8] @ 8-byte Spill
+; BE-NEXT:    bl llrintl
+; BE-NEXT:    ldr r2, [sp, #752]
+; BE-NEXT:    mov r11, r1
+; BE-NEXT:    ldr r3, [sp, #756]
+; BE-NEXT:    vmov.32 d13[0], r0
+; BE-NEXT:    mov r0, r7
+; BE-NEXT:    mov r1, r4
+; BE-NEXT:    ldr r10, [sp, #552]
+; BE-NEXT:    ldr r6, [sp, #664]
+; BE-NEXT:    bl llrintl
+; BE-NEXT:    add r3, sp, #508
+; BE-NEXT:    mov r8, r1
+; BE-NEXT:    vmov.32 d8[0], r0
+; BE-NEXT:    mov r0, r5
+; BE-NEXT:    ldm r3, {r1, r2, r3}
+; BE-NEXT:    bl llrintl
+; BE-NEXT:    add r3, sp, #540
+; BE-NEXT:    vmov.32 d10[0], r0
+; BE-NEXT:    ldr r0, [sp, #536]
+; BE-NEXT:    mov r7, r1
+; BE-NEXT:    ldm r3, {r1, r2, r3}
+; BE-NEXT:    bl llrintl
+; BE-NEXT:    add r3, sp, #556
+; BE-NEXT:    mov r5, r1
+; BE-NEXT:    vmov.32 d12[0], r0
+; BE-NEXT:    mov r0, r10
+; BE-NEXT:    ldm r3, {r1, r2, r3}
+; BE-NEXT:    bl llrintl
+; BE-NEXT:    add r3, sp, #668
+; BE-NEXT:    mov r4, r1
+; BE-NEXT:    vmov.32 d9[0], r0
+; BE-NEXT:    mov r0, r6
+; BE-NEXT:    ldm r3, {r1, r2, r3}
+; BE-NEXT:    bl llrintl
+; BE-NEXT:    add r3, sp, #700
+; BE-NEXT:    vmov.32 d14[0], r0
+; BE-NEXT:    ldr r0, [sp, #696]
+; BE-NEXT:    mov r6, r1
+; BE-NEXT:    ldm r3, {r1, r2, r3}
+; BE-NEXT:    bl llrintl
+; BE-NEXT:    vmov.32 d11[0], r0
+; BE-NEXT:    ldr r0, [sp, #104] @ 4-byte Reload
+; BE-NEXT:    ldr r2, [sp, #256]
+; BE-NEXT:    vmov.32 d13[1], r11
+; BE-NEXT:    ldr r3, [sp, #260]
+; BE-NEXT:    vmov.32 d14[1], r6
+; BE-NEXT:    ldr r6, [sp, #264]
+; BE-NEXT:    vmov.32 d9[1], r4
+; BE-NEXT:    ldr r4, [sp, #344]
+; BE-NEXT:    vmov.32 d12[1], r5
+; BE-NEXT:    ldr r5, [sp, #312]
+; BE-NEXT:    vmov.32 d8[1], r8
+; BE-NEXT:    ldr r8, [sp, #328]
+; BE-NEXT:    vmov.32 d10[1], r7
+; BE-NEXT:    vstr d13, [sp, #32] @ 8-byte Spill
+; BE-NEXT:    vmov.32 d11[1], r1
+; BE-NEXT:    ldr r1, [sp, #120] @ 4-byte Reload
+; BE-NEXT:    vmov.32 d13[0], r0
+; BE-NEXT:    ldr r0, [sp, #112] @ 4-byte Reload
+; BE-NEXT:    vstr d14, [sp] @ 8-byte Spill
+; BE-NEXT:    vstr d9, [sp, #16] @ 8-byte Spill
+; BE-NEXT:    vstr d12, [sp, #56] @ 8-byte Spill
+; BE-NEXT:    vstr d10, [sp, #64] @ 8-byte Spill
+; BE-NEXT:    vstr d8, [sp, #40] @ 8-byte Spill
+; BE-NEXT:    bl llrintl
+; BE-NEXT:    add r3, sp, #268
+; BE-NEXT:    mov r11, r1
+; BE-NEXT:    vmov.32 d14[0], r0
+; BE-NEXT:    mov r0, r6
+; BE-NEXT:    ldm r3, {r1, r2, r3}
+; BE-NEXT:    bl llrintl
+; BE-NEXT:    add r3, sp, #316
+; BE-NEXT:    mov r10, r1
+; BE-NEXT:    vmov.32 d8[0], r0
+; BE-NEXT:    mov r0, r5
+; BE-NEXT:    ldm r3, {r1, r2, r3}
+; BE-NEXT:    bl llrintl
+; BE-NEXT:    add r3, sp, #332
+; BE-NEXT:    mov r7, r1
+; BE-NEXT:    vmov.32 d12[0], r0
+; BE-NEXT:    mov r0, r8
+; BE-NEXT:    ldm r3, {r1, r2, r3}
+; BE-NEXT:    bl llrintl
+; BE-NEXT:    add r3, sp, #348
+; BE-NEXT:    mov r5, r1
+; BE-NEXT:    vmov.32 d15[0], r0
+; BE-NEXT:    mov r0, r4
+; BE-NEXT:    ldm r3, {r1, r2, r3}
+; BE-NEXT:    bl llrintl
+; BE-NEXT:    add r3, sp, #364
+; BE-NEXT:    vmov.32 d9[0], r0
+; BE-NEXT:    ldr r0, [sp, #360]
+; BE-NEXT:    mov r4, r1
+; BE-NEXT:    ldm r3, {r1, r2, r3}
+; BE-NEXT:    bl llrintl
+; BE-NEXT:    add r3, sp, #476
+; BE-NEXT:    vmov.32 d10[0], r0
+; BE-NEXT:    ldr r0, [sp, #472]
+; BE-NEXT:    mov r6, r1
+; BE-NEXT:    ldm r3, {r1, r2, r3}
+; BE-NEXT:    bl llrintl
+; BE-NEXT:    vmov.32 d16[0], r0
+; BE-NEXT:    ldr r0, [sp, #88] @ 4-byte Reload
+; BE-NEXT:    ldr r2, [sp, #592]
+; BE-NEXT:    vldr d20, [sp, #136] @ 8-byte Reload
+; BE-NEXT:    vmov.32 d16[1], r1
+; BE-NEXT:    ldr r1, [sp, #588]
+; BE-NEXT:    ldr r3, [sp, #596]
+; BE-NEXT:    vldr d22, [sp, #24] @ 8-byte Reload
+; BE-NEXT:    vldr d18, [sp, #8] @ 8-byte Reload
+; BE-NEXT:    vrev64.32 d21, d20
+; BE-NEXT:    vmov.32 d10[1], r6
+; BE-NEXT:    ldr r6, [sp, #600]
+; BE-NEXT:    vmov.32 d9[1], r4
+; BE-NEXT:    ldr r4, [sp, #616]
+; BE-NEXT:    vmov.32 d12[1], r7
+; BE-NEXT:    ldr r7, [sp, #604]
+; BE-NEXT:    vmov.32 d8[1], r10
+; BE-NEXT:    add r10, r9, #192
+; BE-NEXT:    vmov.32 d14[1], r11
+; BE-NEXT:    ldr r11, [sp, #440]
+; BE-NEXT:    vmov.32 d13[1], r0
+; BE-NEXT:    ldr r0, [sp, #584]
+; BE-NEXT:    vmov.32 d15[1], r5
+; BE-NEXT:    vstr d16, [sp, #48] @ 8-byte Spill
+; BE-NEXT:    vldr d16, [sp, #128] @ 8-byte Reload
+; BE-NEXT:    vrev64.32 d20, d22
+; BE-NEXT:    vldr d22, [sp] @ 8-byte Reload
+; BE-NEXT:    vrev64.32 d19, d18
+; BE-NEXT:    vrev64.32 d17, d16
+; BE-NEXT:    vrev64.32 d18, d22
+; BE-NEXT:    vstr d10, [sp, #120] @ 8-byte Spill
+; BE-NEXT:    vstr d9, [sp, #112] @ 8-byte Spill
+; BE-NEXT:    vstr d15, [sp, #104] @ 8-byte Spill
+; BE-NEXT:    vstr d12, [sp, #96] @ 8-byte Spill
+; BE-NEXT:    vstr d8, [sp, #80] @ 8-byte Spill
+; BE-NEXT:    vstr d14, [sp, #72] @ 8-byte Spill
+; BE-NEXT:    vstr d13, [sp, #88] @ 8-byte Spill
+; BE-NEXT:    vst1.64 {d20, d21}, [r10:128]!
+; BE-NEXT:    vrev64.32 d16, d11
+; BE-NEXT:    vst1.64 {d18, d19}, [r10:128]!
+; BE-NEXT:    vst1.64 {d16, d17}, [r10:128]!
+; BE-NEXT:    bl llrintl
+; BE-NEXT:    ldr r2, [sp, #608]
+; BE-NEXT:    mov r8, r1
+; BE-NEXT:    ldr r3, [sp, #612]
+; BE-NEXT:    vmov.32 d8[0], r0
+; BE-NEXT:    mov r0, r6
+; BE-NEXT:    mov r1, r7
+; BE-NEXT:    ldr r5, [sp, #456]
+; BE-NEXT:    bl llrintl
+; BE-NEXT:    add r3, sp, #620
+; BE-NEXT:    mov r6, r1
+; BE-NEXT:    vmov.32 d9[0], r0
+; BE-NEXT:    mov r0, r4
+; BE-NEXT:    ldm r3, {r1, r2, r3}
+; BE-NEXT:    bl llrintl
+; BE-NEXT:    add r3, sp, #444
+; BE-NEXT:    mov r7, r1
+; BE-NEXT:    vmov.32 d10[0], r0
+; BE-NEXT:    mov r0, r11
+; BE-NEXT:    ldm r3, {r1, r2, r3}
+; BE-NEXT:    bl llrintl
+; BE-NEXT:    add r3, sp, #460
+; BE-NEXT:    mov r4, r1
+; BE-NEXT:    vmov.32 d11[0], r0
+; BE-NEXT:    mov r0, r5
+; BE-NEXT:    ldm r3, {r1, r2, r3}
+; BE-NEXT:    bl llrintl
+; BE-NEXT:    add r3, sp, #572
+; BE-NEXT:    vmov.32 d13[0], r0
+; BE-NEXT:    ldr r0, [sp, #568]
+; BE-NEXT:    mov r5, r1
+; BE-NEXT:    ldm r3, {r1, r2, r3}
+; BE-NEXT:    bl llrintl
+; BE-NEXT:    vldr d16, [sp, #16] @ 8-byte Reload
+; BE-NEXT:    vldr d18, [sp, #56] @ 8-byte Reload
+; BE-NEXT:    vrev64.32 d17, d16
+; BE-NEXT:    ldr r2, [sp, #304]
+; BE-NEXT:    vrev64.32 d16, d18
+; BE-NEXT:    ldr r3, [sp, #308]
+; BE-NEXT:    vldr d18, [sp, #144] @ 8-byte Reload
+; BE-NEXT:    vldr d20, [sp, #64] @ 8-byte Reload
+; BE-NEXT:    vrev64.32 d19, d18
+; BE-NEXT:    vrev64.32 d18, d20
+; BE-NEXT:    vldr d20, [sp, #40] @ 8-byte Reload
+; BE-NEXT:    vldr d22, [sp, #32] @ 8-byte Reload
+; BE-NEXT:    vmov.32 d14[0], r0
+; BE-NEXT:    ldr r0, [sp, #296]
+; BE-NEXT:    vmov.32 d10[1], r7
+; BE-NEXT:    ldr r7, [sp, #412]
+; BE-NEXT:    vmov.32 d9[1], r6
+; BE-NEXT:    ldr r6, [sp, #408]
+; BE-NEXT:    vmov.32 d8[1], r8
+; BE-NEXT:    add r8, r9, #128
+; BE-NEXT:    vrev64.32 d21, d20
+; BE-NEXT:    vmov.32 d13[1], r5
+; BE-NEXT:    ldr r5, [sp, #300]
+; BE-NEXT:    vrev64.32 d20, d22
+; BE-NEXT:    vmov.32 d14[1], r1
+; BE-NEXT:    mov r1, r5
+; BE-NEXT:    vstr d10, [sp, #136] @ 8-byte Spill
+; BE-NEXT:    vstr d9, [sp, #128] @ 8-byte Spill
+; BE-NEXT:    vstr d8, [sp, #24] @ 8-byte Spill
+; BE-NEXT:    vst1.64 {d20, d21}, [r10:128]
+; BE-NEXT:    vst1.64 {d18, d19}, [r8:128]!
+; BE-NEXT:    vmov.32 d11[1], r4
+; BE-NEXT:    ldr r4, [sp, #424]
+; BE-NEXT:    ldr r10, [sp, #376]
+; BE-NEXT:    vst1.64 {d16, d17}, [r8:128]!
+; BE-NEXT:    bl llrintl
+; BE-NEXT:    ldr r2, [sp, #416]
+; BE-NEXT:    mov r11, r1
+; BE-NEXT:    ldr r3, [sp, #420]
+; BE-NEXT:    vmov.32 d15[0], r0
+; BE-NEXT:    mov r0, r6
+; BE-NEXT:    mov r1, r7
+; BE-NEXT:    ldr r5, [sp, #392]
+; BE-NEXT:    bl llrintl
+; BE-NEXT:    add r3, sp, #428
+; BE-NEXT:    mov r6, r1
+; BE-NEXT:    vmov.32 d8[0], r0
+; BE-NEXT:    mov r0, r4
+; BE-NEXT:    ldm r3, {r1, r2, r3}
+; BE-NEXT:    bl llrintl
+; BE-NEXT:    add r3, sp, #380
+; BE-NEXT:    mov r7, r1
+; BE-NEXT:    vmov.32 d9[0], r0
+; BE-NEXT:    mov r0, r10
+; BE-NEXT:    ldm r3, {r1, r2, r3}
+; BE-NEXT:    bl llrintl
+; BE-NEXT:    add r3, sp, #396
+; BE-NEXT:    mov r4, r1
+; BE-NEXT:    vmov.32 d12[0], r0
+; BE-NEXT:    mov r0, r5
+; BE-NEXT:    ldm r3, {r1, r2, r3}
+; BE-NEXT:    bl llrintl
+; BE-NEXT:    add r3, sp, #284
+; BE-NEXT:    vmov.32 d10[0], r0
+; BE-NEXT:    ldr r0, [sp, #280]
+; BE-NEXT:    mov r5, r1
+; BE-NEXT:    ldm r3, {r1, r2, r3}
+; BE-NEXT:    bl llrintl
+; BE-NEXT:    vldr d16, [sp, #120] @ 8-byte Reload
+; BE-NEXT:    vldr d18, [sp, #112] @ 8-byte Reload
+; BE-NEXT:    vrev64.32 d17, d16
+; BE-NEXT:    vldr d26, [sp, #136] @ 8-byte Reload
+; BE-NEXT:    vrev64.32 d16, d18
+; BE-NEXT:    vldr d18, [sp, #104] @ 8-byte Reload
+; BE-NEXT:    vrev64.32 d31, d26
+; BE-NEXT:    vldr d26, [sp, #128] @ 8-byte Reload
+; BE-NEXT:    vldr d20, [sp, #96] @ 8-byte Reload
+; BE-NEXT:    vrev64.32 d19, d18
+; BE-NEXT:    vrev64.32 d18, d20
+; BE-NEXT:    vldr d20, [sp, #80] @ 8-byte Reload
+; BE-NEXT:    vrev64.32 d30, d26
+; BE-NEXT:    vldr d26, [sp, #24] @ 8-byte Reload
+; BE-NEXT:    vmov.32 d10[1], r5
+; BE-NEXT:    vldr d22, [sp, #72] @ 8-byte Reload
+; BE-NEXT:    vrev64.32 d21, d20
+; BE-NEXT:    vrev64.32 d1, d26
+; BE-NEXT:    vmov.32 d9[1], r7
+; BE-NEXT:    vmov.32 d12[1], r4
+; BE-NEXT:    vrev64.32 d20, d22
+; BE-NEXT:    vldr d22, [sp, #88] @ 8-byte Reload
+; BE-NEXT:    vmov.32 d8[1], r6
+; BE-NEXT:    vrev64.32 d0, d14
+; BE-NEXT:    vmov.32 d28[0], r0
+; BE-NEXT:    add r0, r9, #64
+; BE-NEXT:    vrev64.32 d3, d10
+; BE-NEXT:    vldr d24, [sp, #48] @ 8-byte Reload
+; BE-NEXT:    vrev64.32 d23, d22
+; BE-NEXT:    vrev64.32 d5, d9
+; BE-NEXT:    vst1.64 {d0, d1}, [r8:128]!
+; BE-NEXT:    vrev64.32 d2, d12
+; BE-NEXT:    vmov.32 d15[1], r11
+; BE-NEXT:    vrev64.32 d22, d24
+; BE-NEXT:    vrev64.32 d25, d13
+; BE-NEXT:    vrev64.32 d4, d8
+; BE-NEXT:    vst1.64 {d30, d31}, [r8:128]
+; BE-NEXT:    vst1.64 {d2, d3}, [r0:128]!
+; BE-NEXT:    vmov.32 d28[1], r1
+; BE-NEXT:    vrev64.32 d24, d11
+; BE-NEXT:    vst1.64 {d4, d5}, [r0:128]!
+; BE-NEXT:    vrev64.32 d27, d15
+; BE-NEXT:    vst1.64 {d24, d25}, [r0:128]!
+; BE-NEXT:    vrev64.32 d26, d28
+; BE-NEXT:    vst1.64 {d22, d23}, [r0:128]
+; BE-NEXT:    vst1.64 {d20, d21}, [r9:128]!
+; BE-NEXT:    vst1.64 {d26, d27}, [r9:128]!
+; BE-NEXT:    vst1.64 {d18, d19}, [r9:128]!
+; BE-NEXT:    vst1.64 {d16, d17}, [r9:128]
+; BE-NEXT:    add sp, sp, #152
+; BE-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-NEXT:    add sp, sp, #4
+; BE-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; BE-NEON-LABEL: llrint_v32f128:
+; BE-NEON:       @ %bb.0:
+; BE-NEON-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-NEON-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-NEON-NEXT:    .pad #4
+; BE-NEON-NEXT:    sub sp, sp, #4
+; BE-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-NEON-NEXT:    .pad #152
+; BE-NEON-NEXT:    sub sp, sp, #152
+; BE-NEON-NEXT:    str r3, [sp, #120] @ 4-byte Spill
+; BE-NEON-NEXT:    add r3, sp, #712
+; BE-NEON-NEXT:    str r2, [sp, #112] @ 4-byte Spill
+; BE-NEON-NEXT:    mov r9, r0
+; BE-NEON-NEXT:    ldm r3, {r0, r1, r2, r3}
+; BE-NEON-NEXT:    bl llrintl
+; BE-NEON-NEXT:    ldr r7, [sp, #648]
+; BE-NEON-NEXT:    add r3, sp, #652
+; BE-NEON-NEXT:    mov r5, r1
+; BE-NEON-NEXT:    vmov.32 d9[0], r0
+; BE-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; BE-NEON-NEXT:    mov r0, r7
+; BE-NEON-NEXT:    ldr r6, [sp, #520]
+; BE-NEON-NEXT:    ldr r8, [sp, #632]
+; BE-NEON-NEXT:    bl llrintl
+; BE-NEON-NEXT:    add r3, sp, #524
+; BE-NEON-NEXT:    mov r7, r1
+; BE-NEON-NEXT:    vmov.32 d10[0], r0
+; BE-NEON-NEXT:    mov r0, r6
+; BE-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; BE-NEON-NEXT:    bl llrintl
+; BE-NEON-NEXT:    add r3, sp, #636
+; BE-NEON-NEXT:    mov r4, r1
+; BE-NEON-NEXT:    vmov.32 d8[0], r0
+; BE-NEON-NEXT:    mov r0, r8
+; BE-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; BE-NEON-NEXT:    bl llrintl
+; BE-NEON-NEXT:    mov r6, r1
+; BE-NEON-NEXT:    vmov.32 d11[0], r0
+; BE-NEON-NEXT:    ldr r0, [sp, #488]
+; BE-NEON-NEXT:    vmov.32 d8[1], r4
+; BE-NEON-NEXT:    ldr r1, [sp, #492]
+; BE-NEON-NEXT:    ldr r2, [sp, #496]
+; BE-NEON-NEXT:    vmov.32 d10[1], r7
+; BE-NEON-NEXT:    ldr r3, [sp, #500]
+; BE-NEON-NEXT:    vmov.32 d9[1], r5
+; BE-NEON-NEXT:    vstr d8, [sp, #144] @ 8-byte Spill
+; BE-NEON-NEXT:    vstr d10, [sp, #136] @ 8-byte Spill
+; BE-NEON-NEXT:    vstr d9, [sp, #128] @ 8-byte Spill
+; BE-NEON-NEXT:    bl llrintl
+; BE-NEON-NEXT:    add r3, sp, #680
+; BE-NEON-NEXT:    str r0, [sp, #104] @ 4-byte Spill
+; BE-NEON-NEXT:    str r1, [sp, #88] @ 4-byte Spill
+; BE-NEON-NEXT:    ldm r3, {r0, r1, r2, r3}
+; BE-NEON-NEXT:    bl llrintl
+; BE-NEON-NEXT:    vmov.32 d16[0], r0
+; BE-NEON-NEXT:    ldr r0, [sp, #728]
+; BE-NEON-NEXT:    ldr r2, [sp, #736]
+; BE-NEON-NEXT:    vmov.32 d11[1], r6
+; BE-NEON-NEXT:    ldr r6, [sp, #732]
+; BE-NEON-NEXT:    ldr r3, [sp, #740]
+; BE-NEON-NEXT:    vmov.32 d16[1], r1
+; BE-NEON-NEXT:    ldr r5, [sp, #504]
+; BE-NEON-NEXT:    mov r1, r6
+; BE-NEON-NEXT:    ldr r7, [sp, #744]
+; BE-NEON-NEXT:    ldr r4, [sp, #748]
+; BE-NEON-NEXT:    vstr d11, [sp, #24] @ 8-byte Spill
+; BE-NEON-NEXT:    vstr d16, [sp, #8] @ 8-byte Spill
+; BE-NEON-NEXT:    bl llrintl
+; BE-NEON-NEXT:    ldr r2, [sp, #752]
+; BE-NEON-NEXT:    mov r11, r1
+; BE-NEON-NEXT:    ldr r3, [sp, #756]
+; BE-NEON-NEXT:    vmov.32 d13[0], r0
+; BE-NEON-NEXT:    mov r0, r7
+; BE-NEON-NEXT:    mov r1, r4
+; BE-NEON-NEXT:    ldr r10, [sp, #552]
+; BE-NEON-NEXT:    ldr r6, [sp, #664]
+; BE-NEON-NEXT:    bl llrintl
+; BE-NEON-NEXT:    add r3, sp, #508
+; BE-NEON-NEXT:    mov r8, r1
+; BE-NEON-NEXT:    vmov.32 d8[0], r0
+; BE-NEON-NEXT:    mov r0, r5
+; BE-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; BE-NEON-NEXT:    bl llrintl
+; BE-NEON-NEXT:    add r3, sp, #540
+; BE-NEON-NEXT:    vmov.32 d10[0], r0
+; BE-NEON-NEXT:    ldr r0, [sp, #536]
+; BE-NEON-NEXT:    mov r7, r1
+; BE-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; BE-NEON-NEXT:    bl llrintl
+; BE-NEON-NEXT:    add r3, sp, #556
+; BE-NEON-NEXT:    mov r5, r1
+; BE-NEON-NEXT:    vmov.32 d12[0], r0
+; BE-NEON-NEXT:    mov r0, r10
+; BE-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; BE-NEON-NEXT:    bl llrintl
+; BE-NEON-NEXT:    add r3, sp, #668
+; BE-NEON-NEXT:    mov r4, r1
+; BE-NEON-NEXT:    vmov.32 d9[0], r0
+; BE-NEON-NEXT:    mov r0, r6
+; BE-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; BE-NEON-NEXT:    bl llrintl
+; BE-NEON-NEXT:    add r3, sp, #700
+; BE-NEON-NEXT:    vmov.32 d14[0], r0
+; BE-NEON-NEXT:    ldr r0, [sp, #696]
+; BE-NEON-NEXT:    mov r6, r1
+; BE-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; BE-NEON-NEXT:    bl llrintl
+; BE-NEON-NEXT:    vmov.32 d11[0], r0
+; BE-NEON-NEXT:    ldr r0, [sp, #104] @ 4-byte Reload
+; BE-NEON-NEXT:    ldr r2, [sp, #256]
+; BE-NEON-NEXT:    vmov.32 d13[1], r11
+; BE-NEON-NEXT:    ldr r3, [sp, #260]
+; BE-NEON-NEXT:    vmov.32 d14[1], r6
+; BE-NEON-NEXT:    ldr r6, [sp, #264]
+; BE-NEON-NEXT:    vmov.32 d9[1], r4
+; BE-NEON-NEXT:    ldr r4, [sp, #344]
+; BE-NEON-NEXT:    vmov.32 d12[1], r5
+; BE-NEON-NEXT:    ldr r5, [sp, #312]
+; BE-NEON-NEXT:    vmov.32 d8[1], r8
+; BE-NEON-NEXT:    ldr r8, [sp, #328]
+; BE-NEON-NEXT:    vmov.32 d10[1], r7
+; BE-NEON-NEXT:    vstr d13, [sp, #32] @ 8-byte Spill
+; BE-NEON-NEXT:    vmov.32 d11[1], r1
+; BE-NEON-NEXT:    ldr r1, [sp, #120] @ 4-byte Reload
+; BE-NEON-NEXT:    vmov.32 d13[0], r0
+; BE-NEON-NEXT:    ldr r0, [sp, #112] @ 4-byte Reload
+; BE-NEON-NEXT:    vstr d14, [sp] @ 8-byte Spill
+; BE-NEON-NEXT:    vstr d9, [sp, #16] @ 8-byte Spill
+; BE-NEON-NEXT:    vstr d12, [sp, #56] @ 8-byte Spill
+; BE-NEON-NEXT:    vstr d10, [sp, #64] @ 8-byte Spill
+; BE-NEON-NEXT:    vstr d8, [sp, #40] @ 8-byte Spill
+; BE-NEON-NEXT:    bl llrintl
+; BE-NEON-NEXT:    add r3, sp, #268
+; BE-NEON-NEXT:    mov r11, r1
+; BE-NEON-NEXT:    vmov.32 d14[0], r0
+; BE-NEON-NEXT:    mov r0, r6
+; BE-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; BE-NEON-NEXT:    bl llrintl
+; BE-NEON-NEXT:    add r3, sp, #316
+; BE-NEON-NEXT:    mov r10, r1
+; BE-NEON-NEXT:    vmov.32 d8[0], r0
+; BE-NEON-NEXT:    mov r0, r5
+; BE-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; BE-NEON-NEXT:    bl llrintl
+; BE-NEON-NEXT:    add r3, sp, #332
+; BE-NEON-NEXT:    mov r7, r1
+; BE-NEON-NEXT:    vmov.32 d12[0], r0
+; BE-NEON-NEXT:    mov r0, r8
+; BE-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; BE-NEON-NEXT:    bl llrintl
+; BE-NEON-NEXT:    add r3, sp, #348
+; BE-NEON-NEXT:    mov r5, r1
+; BE-NEON-NEXT:    vmov.32 d15[0], r0
+; BE-NEON-NEXT:    mov r0, r4
+; BE-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; BE-NEON-NEXT:    bl llrintl
+; BE-NEON-NEXT:    add r3, sp, #364
+; BE-NEON-NEXT:    vmov.32 d9[0], r0
+; BE-NEON-NEXT:    ldr r0, [sp, #360]
+; BE-NEON-NEXT:    mov r4, r1
+; BE-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; BE-NEON-NEXT:    bl llrintl
+; BE-NEON-NEXT:    add r3, sp, #476
+; BE-NEON-NEXT:    vmov.32 d10[0], r0
+; BE-NEON-NEXT:    ldr r0, [sp, #472]
+; BE-NEON-NEXT:    mov r6, r1
+; BE-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; BE-NEON-NEXT:    bl llrintl
+; BE-NEON-NEXT:    vmov.32 d16[0], r0
+; BE-NEON-NEXT:    ldr r0, [sp, #88] @ 4-byte Reload
+; BE-NEON-NEXT:    ldr r2, [sp, #592]
+; BE-NEON-NEXT:    vldr d20, [sp, #136] @ 8-byte Reload
+; BE-NEON-NEXT:    vmov.32 d16[1], r1
+; BE-NEON-NEXT:    ldr r1, [sp, #588]
+; BE-NEON-NEXT:    ldr r3, [sp, #596]
+; BE-NEON-NEXT:    vldr d22, [sp, #24] @ 8-byte Reload
+; BE-NEON-NEXT:    vldr d18, [sp, #8] @ 8-byte Reload
+; BE-NEON-NEXT:    vrev64.32 d21, d20
+; BE-NEON-NEXT:    vmov.32 d10[1], r6
+; BE-NEON-NEXT:    ldr r6, [sp, #600]
+; BE-NEON-NEXT:    vmov.32 d9[1], r4
+; BE-NEON-NEXT:    ldr r4, [sp, #616]
+; BE-NEON-NEXT:    vmov.32 d12[1], r7
+; BE-NEON-NEXT:    ldr r7, [sp, #604]
+; BE-NEON-NEXT:    vmov.32 d8[1], r10
+; BE-NEON-NEXT:    add r10, r9, #192
+; BE-NEON-NEXT:    vmov.32 d14[1], r11
+; BE-NEON-NEXT:    ldr r11, [sp, #440]
+; BE-NEON-NEXT:    vmov.32 d13[1], r0
+; BE-NEON-NEXT:    ldr r0, [sp, #584]
+; BE-NEON-NEXT:    vmov.32 d15[1], r5
+; BE-NEON-NEXT:    vstr d16, [sp, #48] @ 8-byte Spill
+; BE-NEON-NEXT:    vldr d16, [sp, #128] @ 8-byte Reload
+; BE-NEON-NEXT:    vrev64.32 d20, d22
+; BE-NEON-NEXT:    vldr d22, [sp] @ 8-byte Reload
+; BE-NEON-NEXT:    vrev64.32 d19, d18
+; BE-NEON-NEXT:    vrev64.32 d17, d16
+; BE-NEON-NEXT:    vrev64.32 d18, d22
+; BE-NEON-NEXT:    vstr d10, [sp, #120] @ 8-byte Spill
+; BE-NEON-NEXT:    vstr d9, [sp, #112] @ 8-byte Spill
+; BE-NEON-NEXT:    vstr d15, [sp, #104] @ 8-byte Spill
+; BE-NEON-NEXT:    vstr d12, [sp, #96] @ 8-byte Spill
+; BE-NEON-NEXT:    vstr d8, [sp, #80] @ 8-byte Spill
+; BE-NEON-NEXT:    vstr d14, [sp, #72] @ 8-byte Spill
+; BE-NEON-NEXT:    vstr d13, [sp, #88] @ 8-byte Spill
+; BE-NEON-NEXT:    vst1.64 {d20, d21}, [r10:128]!
+; BE-NEON-NEXT:    vrev64.32 d16, d11
+; BE-NEON-NEXT:    vst1.64 {d18, d19}, [r10:128]!
+; BE-NEON-NEXT:    vst1.64 {d16, d17}, [r10:128]!
+; BE-NEON-NEXT:    bl llrintl
+; BE-NEON-NEXT:    ldr r2, [sp, #608]
+; BE-NEON-NEXT:    mov r8, r1
+; BE-NEON-NEXT:    ldr r3, [sp, #612]
+; BE-NEON-NEXT:    vmov.32 d8[0], r0
+; BE-NEON-NEXT:    mov r0, r6
+; BE-NEON-NEXT:    mov r1, r7
+; BE-NEON-NEXT:    ldr r5, [sp, #456]
+; BE-NEON-NEXT:    bl llrintl
+; BE-NEON-NEXT:    add r3, sp, #620
+; BE-NEON-NEXT:    mov r6, r1
+; BE-NEON-NEXT:    vmov.32 d9[0], r0
+; BE-NEON-NEXT:    mov r0, r4
+; BE-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; BE-NEON-NEXT:    bl llrintl
+; BE-NEON-NEXT:    add r3, sp, #444
+; BE-NEON-NEXT:    mov r7, r1
+; BE-NEON-NEXT:    vmov.32 d10[0], r0
+; BE-NEON-NEXT:    mov r0, r11
+; BE-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; BE-NEON-NEXT:    bl llrintl
+; BE-NEON-NEXT:    add r3, sp, #460
+; BE-NEON-NEXT:    mov r4, r1
+; BE-NEON-NEXT:    vmov.32 d11[0], r0
+; BE-NEON-NEXT:    mov r0, r5
+; BE-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; BE-NEON-NEXT:    bl llrintl
+; BE-NEON-NEXT:    add r3, sp, #572
+; BE-NEON-NEXT:    vmov.32 d13[0], r0
+; BE-NEON-NEXT:    ldr r0, [sp, #568]
+; BE-NEON-NEXT:    mov r5, r1
+; BE-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; BE-NEON-NEXT:    bl llrintl
+; BE-NEON-NEXT:    vldr d16, [sp, #16] @ 8-byte Reload
+; BE-NEON-NEXT:    vldr d18, [sp, #56] @ 8-byte Reload
+; BE-NEON-NEXT:    vrev64.32 d17, d16
+; BE-NEON-NEXT:    ldr r2, [sp, #304]
+; BE-NEON-NEXT:    vrev64.32 d16, d18
+; BE-NEON-NEXT:    ldr r3, [sp, #308]
+; BE-NEON-NEXT:    vldr d18, [sp, #144] @ 8-byte Reload
+; BE-NEON-NEXT:    vldr d20, [sp, #64] @ 8-byte Reload
+; BE-NEON-NEXT:    vrev64.32 d19, d18
+; BE-NEON-NEXT:    vrev64.32 d18, d20
+; BE-NEON-NEXT:    vldr d20, [sp, #40] @ 8-byte Reload
+; BE-NEON-NEXT:    vldr d22, [sp, #32] @ 8-byte Reload
+; BE-NEON-NEXT:    vmov.32 d14[0], r0
+; BE-NEON-NEXT:    ldr r0, [sp, #296]
+; BE-NEON-NEXT:    vmov.32 d10[1], r7
+; BE-NEON-NEXT:    ldr r7, [sp, #412]
+; BE-NEON-NEXT:    vmov.32 d9[1], r6
+; BE-NEON-NEXT:    ldr r6, [sp, #408]
+; BE-NEON-NEXT:    vmov.32 d8[1], r8
+; BE-NEON-NEXT:    add r8, r9, #128
+; BE-NEON-NEXT:    vrev64.32 d21, d20
+; BE-NEON-NEXT:    vmov.32 d13[1], r5
+; BE-NEON-NEXT:    ldr r5, [sp, #300]
+; BE-NEON-NEXT:    vrev64.32 d20, d22
+; BE-NEON-NEXT:    vmov.32 d14[1], r1
+; BE-NEON-NEXT:    mov r1, r5
+; BE-NEON-NEXT:    vstr d10, [sp, #136] @ 8-byte Spill
+; BE-NEON-NEXT:    vstr d9, [sp, #128] @ 8-byte Spill
+; BE-NEON-NEXT:    vstr d8, [sp, #24] @ 8-byte Spill
+; BE-NEON-NEXT:    vst1.64 {d20, d21}, [r10:128]
+; BE-NEON-NEXT:    vst1.64 {d18, d19}, [r8:128]!
+; BE-NEON-NEXT:    vmov.32 d11[1], r4
+; BE-NEON-NEXT:    ldr r4, [sp, #424]
+; BE-NEON-NEXT:    ldr r10, [sp, #376]
+; BE-NEON-NEXT:    vst1.64 {d16, d17}, [r8:128]!
+; BE-NEON-NEXT:    bl llrintl
+; BE-NEON-NEXT:    ldr r2, [sp, #416]
+; BE-NEON-NEXT:    mov r11, r1
+; BE-NEON-NEXT:    ldr r3, [sp, #420]
+; BE-NEON-NEXT:    vmov.32 d15[0], r0
+; BE-NEON-NEXT:    mov r0, r6
+; BE-NEON-NEXT:    mov r1, r7
+; BE-NEON-NEXT:    ldr r5, [sp, #392]
+; BE-NEON-NEXT:    bl llrintl
+; BE-NEON-NEXT:    add r3, sp, #428
+; BE-NEON-NEXT:    mov r6, r1
+; BE-NEON-NEXT:    vmov.32 d8[0], r0
+; BE-NEON-NEXT:    mov r0, r4
+; BE-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; BE-NEON-NEXT:    bl llrintl
+; BE-NEON-NEXT:    add r3, sp, #380
+; BE-NEON-NEXT:    mov r7, r1
+; BE-NEON-NEXT:    vmov.32 d9[0], r0
+; BE-NEON-NEXT:    mov r0, r10
+; BE-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; BE-NEON-NEXT:    bl llrintl
+; BE-NEON-NEXT:    add r3, sp, #396
+; BE-NEON-NEXT:    mov r4, r1
+; BE-NEON-NEXT:    vmov.32 d12[0], r0
+; BE-NEON-NEXT:    mov r0, r5
+; BE-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; BE-NEON-NEXT:    bl llrintl
+; BE-NEON-NEXT:    add r3, sp, #284
+; BE-NEON-NEXT:    vmov.32 d10[0], r0
+; BE-NEON-NEXT:    ldr r0, [sp, #280]
+; BE-NEON-NEXT:    mov r5, r1
+; BE-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; BE-NEON-NEXT:    bl llrintl
+; BE-NEON-NEXT:    vldr d16, [sp, #120] @ 8-byte Reload
+; BE-NEON-NEXT:    vldr d18, [sp, #112] @ 8-byte Reload
+; BE-NEON-NEXT:    vrev64.32 d17, d16
+; BE-NEON-NEXT:    vldr d26, [sp, #136] @ 8-byte Reload
+; BE-NEON-NEXT:    vrev64.32 d16, d18
+; BE-NEON-NEXT:    vldr d18, [sp, #104] @ 8-byte Reload
+; BE-NEON-NEXT:    vrev64.32 d31, d26
+; BE-NEON-NEXT:    vldr d26, [sp, #128] @ 8-byte Reload
+; BE-NEON-NEXT:    vldr d20, [sp, #96] @ 8-byte Reload
+; BE-NEON-NEXT:    vrev64.32 d19, d18
+; BE-NEON-NEXT:    vrev64.32 d18, d20
+; BE-NEON-NEXT:    vldr d20, [sp, #80] @ 8-byte Reload
+; BE-NEON-NEXT:    vrev64.32 d30, d26
+; BE-NEON-NEXT:    vldr d26, [sp, #24] @ 8-byte Reload
+; BE-NEON-NEXT:    vmov.32 d10[1], r5
+; BE-NEON-NEXT:    vldr d22, [sp, #72] @ 8-byte Reload
+; BE-NEON-NEXT:    vrev64.32 d21, d20
+; BE-NEON-NEXT:    vrev64.32 d1, d26
+; BE-NEON-NEXT:    vmov.32 d9[1], r7
+; BE-NEON-NEXT:    vmov.32 d12[1], r4
+; BE-NEON-NEXT:    vrev64.32 d20, d22
+; BE-NEON-NEXT:    vldr d22, [sp, #88] @ 8-byte Reload
+; BE-NEON-NEXT:    vmov.32 d8[1], r6
+; BE-NEON-NEXT:    vrev64.32 d0, d14
+; BE-NEON-NEXT:    vmov.32 d28[0], r0
+; BE-NEON-NEXT:    add r0, r9, #64
+; BE-NEON-NEXT:    vrev64.32 d3, d10
+; BE-NEON-NEXT:    vldr d24, [sp, #48] @ 8-byte Reload
+; BE-NEON-NEXT:    vrev64.32 d23, d22
+; BE-NEON-NEXT:    vrev64.32 d5, d9
+; BE-NEON-NEXT:    vst1.64 {d0, d1}, [r8:128]!
+; BE-NEON-NEXT:    vrev64.32 d2, d12
+; BE-NEON-NEXT:    vmov.32 d15[1], r11
+; BE-NEON-NEXT:    vrev64.32 d22, d24
+; BE-NEON-NEXT:    vrev64.32 d25, d13
+; BE-NEON-NEXT:    vrev64.32 d4, d8
+; BE-NEON-NEXT:    vst1.64 {d30, d31}, [r8:128]
+; BE-NEON-NEXT:    vst1.64 {d2, d3}, [r0:128]!
+; BE-NEON-NEXT:    vmov.32 d28[1], r1
+; BE-NEON-NEXT:    vrev64.32 d24, d11
+; BE-NEON-NEXT:    vst1.64 {d4, d5}, [r0:128]!
+; BE-NEON-NEXT:    vrev64.32 d27, d15
+; BE-NEON-NEXT:    vst1.64 {d24, d25}, [r0:128]!
+; BE-NEON-NEXT:    vrev64.32 d26, d28
+; BE-NEON-NEXT:    vst1.64 {d22, d23}, [r0:128]
+; BE-NEON-NEXT:    vst1.64 {d20, d21}, [r9:128]!
+; BE-NEON-NEXT:    vst1.64 {d26, d27}, [r9:128]!
+; BE-NEON-NEXT:    vst1.64 {d18, d19}, [r9:128]!
+; BE-NEON-NEXT:    vst1.64 {d16, d17}, [r9:128]
+; BE-NEON-NEXT:    add sp, sp, #152
+; BE-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-NEON-NEXT:    add sp, sp, #4
+; BE-NEON-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+  %a = call <32 x i64> @llvm.llrint.v32i64.v16f128(<32 x fp128> %x)
+  ret <32 x i64> %a
+}
+declare <32 x i64> @llvm.llrint.v32i64.v32f128(<32 x fp128>)
diff --git a/llvm/test/CodeGen/ARM/vector-lrint.ll b/llvm/test/CodeGen/ARM/vector-lrint.ll
new file mode 100644
index 0000000000000..50c8b9ff6d913
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/vector-lrint.ll
@@ -0,0 +1,13251 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=armv7-unknown-none-eabihf                | FileCheck %s --check-prefix=LE-I32
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=armv7-unknown-none-eabihf                | FileCheck %s --check-prefix=LE-I64
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=armv7-unknown-none-eabihf -mattr=+neon   | FileCheck %s --check-prefix=LE-I32-NEON
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=armv7-unknown-none-eabihf -mattr=+neon   | FileCheck %s --check-prefix=LE-I64-NEON
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=armebv7-unknown-none-eabihf              | FileCheck %s --check-prefix=BE-I32
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=armebv7-unknown-none-eabihf              | FileCheck %s --check-prefix=BE-I64
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=armebv7-unknown-none-eabihf -mattr=+neon | FileCheck %s --check-prefix=BE-I32-NEON
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=armebv7-unknown-none-eabihf -mattr=+neon | FileCheck %s --check-prefix=BE-I64-NEON
+
+; FIXME: crash "Do not know how to soft promote this operator's operand!"
+; define <1 x iXLen> @lrint_v1f16(<1 x half> %x) {
+;   %a = call <1 x iXLen> @llvm.lrint.v1iXLen.v1f16(<1 x half> %x)
+;   ret <1 x iXLen> %a
+; }
+; declare <1 x iXLen> @llvm.lrint.v1iXLen.v1f16(<1 x half>)
+
+; define <2 x iXLen> @lrint_v2f16(<2 x half> %x) {
+;   %a = call <2 x iXLen> @llvm.lrint.v2iXLen.v2f16(<2 x half> %x)
+;   ret <2 x iXLen> %a
+; }
+; declare <2 x iXLen> @llvm.lrint.v2iXLen.v2f16(<2 x half>)
+
+; define <4 x iXLen> @lrint_v4f16(<4 x half> %x) {
+;   %a = call <4 x iXLen> @llvm.lrint.v4iXLen.v4f16(<4 x half> %x)
+;   ret <4 x iXLen> %a
+; }
+; declare <4 x iXLen> @llvm.lrint.v4iXLen.v4f16(<4 x half>)
+
+; define <8 x iXLen> @lrint_v8f16(<8 x half> %x) {
+;   %a = call <8 x iXLen> @llvm.lrint.v8iXLen.v8f16(<8 x half> %x)
+;   ret <8 x iXLen> %a
+; }
+; declare <8 x iXLen> @llvm.lrint.v8iXLen.v8f16(<8 x half>)
+
+; define <16 x iXLen> @lrint_v16f16(<16 x half> %x) {
+;   %a = call <16 x iXLen> @llvm.lrint.v16iXLen.v16f16(<16 x half> %x)
+;   ret <16 x iXLen> %a
+; }
+; declare <16 x iXLen> @llvm.lrint.v16iXLen.v16f16(<16 x half>)
+
+; define <32 x iXLen> @lrint_v32f16(<32 x half> %x) {
+;   %a = call <32 x iXLen> @llvm.lrint.v32iXLen.v32f16(<32 x half> %x)
+;   ret <32 x iXLen> %a
+; }
+; declare <32 x iXLen> @llvm.lrint.v32iXLen.v32f16(<32 x half>)
+
+define <1 x iXLen> @lrint_v1f32(<1 x float> %x) {
+; LE-I32-LABEL: lrint_v1f32:
+; LE-I32:       @ %bb.0:
+; LE-I32-NEXT:    .save {r11, lr}
+; LE-I32-NEXT:    push {r11, lr}
+; LE-I32-NEXT:    bl lrintf
+; LE-I32-NEXT:    pop {r11, pc}
+;
+; LE-I64-LABEL: lrint_v1f32:
+; LE-I64:       @ %bb.0:
+; LE-I64-NEXT:    .save {r11, lr}
+; LE-I64-NEXT:    push {r11, lr}
+; LE-I64-NEXT:    bl lrintf
+; LE-I64-NEXT:    vmov.32 d0[0], r0
+; LE-I64-NEXT:    vmov.32 d0[1], r1
+; LE-I64-NEXT:    pop {r11, pc}
+;
+; LE-I32-NEON-LABEL: lrint_v1f32:
+; LE-I32-NEON:       @ %bb.0:
+; LE-I32-NEON-NEXT:    .save {r11, lr}
+; LE-I32-NEON-NEXT:    push {r11, lr}
+; LE-I32-NEON-NEXT:    bl lrintf
+; LE-I32-NEON-NEXT:    pop {r11, pc}
+;
+; LE-I64-NEON-LABEL: lrint_v1f32:
+; LE-I64-NEON:       @ %bb.0:
+; LE-I64-NEON-NEXT:    .save {r11, lr}
+; LE-I64-NEON-NEXT:    push {r11, lr}
+; LE-I64-NEON-NEXT:    bl lrintf
+; LE-I64-NEON-NEXT:    vmov.32 d0[0], r0
+; LE-I64-NEON-NEXT:    vmov.32 d0[1], r1
+; LE-I64-NEON-NEXT:    pop {r11, pc}
+;
+; BE-I32-LABEL: lrint_v1f32:
+; BE-I32:       @ %bb.0:
+; BE-I32-NEXT:    .save {r11, lr}
+; BE-I32-NEXT:    push {r11, lr}
+; BE-I32-NEXT:    bl lrintf
+; BE-I32-NEXT:    pop {r11, pc}
+;
+; BE-I64-LABEL: lrint_v1f32:
+; BE-I64:       @ %bb.0:
+; BE-I64-NEXT:    .save {r11, lr}
+; BE-I64-NEXT:    push {r11, lr}
+; BE-I64-NEXT:    bl lrintf
+; BE-I64-NEXT:    vmov.32 d16[0], r0
+; BE-I64-NEXT:    vmov.32 d16[1], r1
+; BE-I64-NEXT:    vrev64.32 d0, d16
+; BE-I64-NEXT:    pop {r11, pc}
+;
+; BE-I32-NEON-LABEL: lrint_v1f32:
+; BE-I32-NEON:       @ %bb.0:
+; BE-I32-NEON-NEXT:    .save {r11, lr}
+; BE-I32-NEON-NEXT:    push {r11, lr}
+; BE-I32-NEON-NEXT:    bl lrintf
+; BE-I32-NEON-NEXT:    pop {r11, pc}
+;
+; BE-I64-NEON-LABEL: lrint_v1f32:
+; BE-I64-NEON:       @ %bb.0:
+; BE-I64-NEON-NEXT:    .save {r11, lr}
+; BE-I64-NEON-NEXT:    push {r11, lr}
+; BE-I64-NEON-NEXT:    bl lrintf
+; BE-I64-NEON-NEXT:    vmov.32 d16[0], r0
+; BE-I64-NEON-NEXT:    vmov.32 d16[1], r1
+; BE-I64-NEON-NEXT:    vrev64.32 d0, d16
+; BE-I64-NEON-NEXT:    pop {r11, pc}
+  %a = call <1 x iXLen> @llvm.lrint.v1iXLen.v1f32(<1 x float> %x)
+  ret <1 x iXLen> %a
+}
+declare <1 x iXLen> @llvm.lrint.v1iXLen.v1f32(<1 x float>)
+
+define <2 x iXLen> @lrint_v2f32(<2 x float> %x) {
+; LE-I32-LABEL: lrint_v2f32:
+; LE-I32:       @ %bb.0:
+; LE-I32-NEXT:    .save {r11, lr}
+; LE-I32-NEXT:    push {r11, lr}
+; LE-I32-NEXT:    .vsave {d8, d9}
+; LE-I32-NEXT:    vpush {d8, d9}
+; LE-I32-NEXT:    vmov.f64 d8, d0
+; LE-I32-NEXT:    bl lrintf
+; LE-I32-NEXT:    vmov.f32 s0, s17
+; LE-I32-NEXT:    vmov.32 d9[0], r0
+; LE-I32-NEXT:    bl lrintf
+; LE-I32-NEXT:    vmov.32 d9[1], r0
+; LE-I32-NEXT:    vorr d0, d9, d9
+; LE-I32-NEXT:    vpop {d8, d9}
+; LE-I32-NEXT:    pop {r11, pc}
+;
+; LE-I64-LABEL: lrint_v2f32:
+; LE-I64:       @ %bb.0:
+; LE-I64-NEXT:    .save {r4, lr}
+; LE-I64-NEXT:    push {r4, lr}
+; LE-I64-NEXT:    .vsave {d10, d11}
+; LE-I64-NEXT:    vpush {d10, d11}
+; LE-I64-NEXT:    .vsave {d8}
+; LE-I64-NEXT:    vpush {d8}
+; LE-I64-NEXT:    vmov.f64 d8, d0
+; LE-I64-NEXT:    vmov.f32 s0, s17
+; LE-I64-NEXT:    bl lrintf
+; LE-I64-NEXT:    vmov.f32 s0, s16
+; LE-I64-NEXT:    mov r4, r1
+; LE-I64-NEXT:    vmov.32 d11[0], r0
+; LE-I64-NEXT:    bl lrintf
+; LE-I64-NEXT:    vmov.32 d10[0], r0
+; LE-I64-NEXT:    vmov.32 d11[1], r4
+; LE-I64-NEXT:    vmov.32 d10[1], r1
+; LE-I64-NEXT:    vorr q0, q5, q5
+; LE-I64-NEXT:    vpop {d8}
+; LE-I64-NEXT:    vpop {d10, d11}
+; LE-I64-NEXT:    pop {r4, pc}
+;
+; LE-I32-NEON-LABEL: lrint_v2f32:
+; LE-I32-NEON:       @ %bb.0:
+; LE-I32-NEON-NEXT:    .save {r11, lr}
+; LE-I32-NEON-NEXT:    push {r11, lr}
+; LE-I32-NEON-NEXT:    .vsave {d8, d9}
+; LE-I32-NEON-NEXT:    vpush {d8, d9}
+; LE-I32-NEON-NEXT:    vmov.f64 d8, d0
+; LE-I32-NEON-NEXT:    bl lrintf
+; LE-I32-NEON-NEXT:    vmov.f32 s0, s17
+; LE-I32-NEON-NEXT:    vmov.32 d9[0], r0
+; LE-I32-NEON-NEXT:    bl lrintf
+; LE-I32-NEON-NEXT:    vmov.32 d9[1], r0
+; LE-I32-NEON-NEXT:    vorr d0, d9, d9
+; LE-I32-NEON-NEXT:    vpop {d8, d9}
+; LE-I32-NEON-NEXT:    pop {r11, pc}
+;
+; LE-I64-NEON-LABEL: lrint_v2f32:
+; LE-I64-NEON:       @ %bb.0:
+; LE-I64-NEON-NEXT:    .save {r4, lr}
+; LE-I64-NEON-NEXT:    push {r4, lr}
+; LE-I64-NEON-NEXT:    .vsave {d10, d11}
+; LE-I64-NEON-NEXT:    vpush {d10, d11}
+; LE-I64-NEON-NEXT:    .vsave {d8}
+; LE-I64-NEON-NEXT:    vpush {d8}
+; LE-I64-NEON-NEXT:    vmov.f64 d8, d0
+; LE-I64-NEON-NEXT:    vmov.f32 s0, s17
+; LE-I64-NEON-NEXT:    bl lrintf
+; LE-I64-NEON-NEXT:    vmov.f32 s0, s16
+; LE-I64-NEON-NEXT:    mov r4, r1
+; LE-I64-NEON-NEXT:    vmov.32 d11[0], r0
+; LE-I64-NEON-NEXT:    bl lrintf
+; LE-I64-NEON-NEXT:    vmov.32 d10[0], r0
+; LE-I64-NEON-NEXT:    vmov.32 d11[1], r4
+; LE-I64-NEON-NEXT:    vmov.32 d10[1], r1
+; LE-I64-NEON-NEXT:    vorr q0, q5, q5
+; LE-I64-NEON-NEXT:    vpop {d8}
+; LE-I64-NEON-NEXT:    vpop {d10, d11}
+; LE-I64-NEON-NEXT:    pop {r4, pc}
+;
+; BE-I32-LABEL: lrint_v2f32:
+; BE-I32:       @ %bb.0:
+; BE-I32-NEXT:    .save {r11, lr}
+; BE-I32-NEXT:    push {r11, lr}
+; BE-I32-NEXT:    .vsave {d8, d9}
+; BE-I32-NEXT:    vpush {d8, d9}
+; BE-I32-NEXT:    vrev64.32 d8, d0
+; BE-I32-NEXT:    vmov.f32 s0, s16
+; BE-I32-NEXT:    bl lrintf
+; BE-I32-NEXT:    vmov.f32 s0, s17
+; BE-I32-NEXT:    vmov.32 d9[0], r0
+; BE-I32-NEXT:    bl lrintf
+; BE-I32-NEXT:    vmov.32 d9[1], r0
+; BE-I32-NEXT:    vrev64.32 d0, d9
+; BE-I32-NEXT:    vpop {d8, d9}
+; BE-I32-NEXT:    pop {r11, pc}
+;
+; BE-I64-LABEL: lrint_v2f32:
+; BE-I64:       @ %bb.0:
+; BE-I64-NEXT:    .save {r4, lr}
+; BE-I64-NEXT:    push {r4, lr}
+; BE-I64-NEXT:    .vsave {d10, d11}
+; BE-I64-NEXT:    vpush {d10, d11}
+; BE-I64-NEXT:    .vsave {d8}
+; BE-I64-NEXT:    vpush {d8}
+; BE-I64-NEXT:    vrev64.32 d8, d0
+; BE-I64-NEXT:    vmov.f32 s0, s17
+; BE-I64-NEXT:    bl lrintf
+; BE-I64-NEXT:    vmov.f32 s0, s16
+; BE-I64-NEXT:    mov r4, r1
+; BE-I64-NEXT:    vmov.32 d11[0], r0
+; BE-I64-NEXT:    bl lrintf
+; BE-I64-NEXT:    vmov.32 d10[0], r0
+; BE-I64-NEXT:    vmov.32 d11[1], r4
+; BE-I64-NEXT:    vmov.32 d10[1], r1
+; BE-I64-NEXT:    vrev64.32 q0, q5
+; BE-I64-NEXT:    vpop {d8}
+; BE-I64-NEXT:    vpop {d10, d11}
+; BE-I64-NEXT:    pop {r4, pc}
+;
+; BE-I32-NEON-LABEL: lrint_v2f32:
+; BE-I32-NEON:       @ %bb.0:
+; BE-I32-NEON-NEXT:    .save {r11, lr}
+; BE-I32-NEON-NEXT:    push {r11, lr}
+; BE-I32-NEON-NEXT:    .vsave {d8, d9}
+; BE-I32-NEON-NEXT:    vpush {d8, d9}
+; BE-I32-NEON-NEXT:    vrev64.32 d8, d0
+; BE-I32-NEON-NEXT:    vmov.f32 s0, s16
+; BE-I32-NEON-NEXT:    bl lrintf
+; BE-I32-NEON-NEXT:    vmov.f32 s0, s17
+; BE-I32-NEON-NEXT:    vmov.32 d9[0], r0
+; BE-I32-NEON-NEXT:    bl lrintf
+; BE-I32-NEON-NEXT:    vmov.32 d9[1], r0
+; BE-I32-NEON-NEXT:    vrev64.32 d0, d9
+; BE-I32-NEON-NEXT:    vpop {d8, d9}
+; BE-I32-NEON-NEXT:    pop {r11, pc}
+;
+; BE-I64-NEON-LABEL: lrint_v2f32:
+; BE-I64-NEON:       @ %bb.0:
+; BE-I64-NEON-NEXT:    .save {r4, lr}
+; BE-I64-NEON-NEXT:    push {r4, lr}
+; BE-I64-NEON-NEXT:    .vsave {d10, d11}
+; BE-I64-NEON-NEXT:    vpush {d10, d11}
+; BE-I64-NEON-NEXT:    .vsave {d8}
+; BE-I64-NEON-NEXT:    vpush {d8}
+; BE-I64-NEON-NEXT:    vrev64.32 d8, d0
+; BE-I64-NEON-NEXT:    vmov.f32 s0, s17
+; BE-I64-NEON-NEXT:    bl lrintf
+; BE-I64-NEON-NEXT:    vmov.f32 s0, s16
+; BE-I64-NEON-NEXT:    mov r4, r1
+; BE-I64-NEON-NEXT:    vmov.32 d11[0], r0
+; BE-I64-NEON-NEXT:    bl lrintf
+; BE-I64-NEON-NEXT:    vmov.32 d10[0], r0
+; BE-I64-NEON-NEXT:    vmov.32 d11[1], r4
+; BE-I64-NEON-NEXT:    vmov.32 d10[1], r1
+; BE-I64-NEON-NEXT:    vrev64.32 q0, q5
+; BE-I64-NEON-NEXT:    vpop {d8}
+; BE-I64-NEON-NEXT:    vpop {d10, d11}
+; BE-I64-NEON-NEXT:    pop {r4, pc}
+  %a = call <2 x iXLen> @llvm.lrint.v2iXLen.v2f32(<2 x float> %x)
+  ret <2 x iXLen> %a
+}
+declare <2 x iXLen> @llvm.lrint.v2iXLen.v2f32(<2 x float>)
+
+define <4 x iXLen> @lrint_v4f32(<4 x float> %x) {
+; LE-I32-LABEL: lrint_v4f32:
+; LE-I32:       @ %bb.0:
+; LE-I32-NEXT:    .save {r11, lr}
+; LE-I32-NEXT:    push {r11, lr}
+; LE-I32-NEXT:    .vsave {d8, d9, d10, d11}
+; LE-I32-NEXT:    vpush {d8, d9, d10, d11}
+; LE-I32-NEXT:    vorr q4, q0, q0
+; LE-I32-NEXT:    vmov.f32 s0, s18
+; LE-I32-NEXT:    bl lrintf
+; LE-I32-NEXT:    vmov.f32 s0, s16
+; LE-I32-NEXT:    vmov.32 d11[0], r0
+; LE-I32-NEXT:    bl lrintf
+; LE-I32-NEXT:    vmov.f32 s0, s19
+; LE-I32-NEXT:    vmov.32 d10[0], r0
+; LE-I32-NEXT:    bl lrintf
+; LE-I32-NEXT:    vmov.f32 s0, s17
+; LE-I32-NEXT:    vmov.32 d11[1], r0
+; LE-I32-NEXT:    bl lrintf
+; LE-I32-NEXT:    vmov.32 d10[1], r0
+; LE-I32-NEXT:    vorr q0, q5, q5
+; LE-I32-NEXT:    vpop {d8, d9, d10, d11}
+; LE-I32-NEXT:    pop {r11, pc}
+;
+; LE-I64-LABEL: lrint_v4f32:
+; LE-I64:       @ %bb.0:
+; LE-I64-NEXT:    .save {r4, r5, r6, lr}
+; LE-I64-NEXT:    push {r4, r5, r6, lr}
+; LE-I64-NEXT:    .vsave {d8, d9, d10, d11, d12, d13}
+; LE-I64-NEXT:    vpush {d8, d9, d10, d11, d12, d13}
+; LE-I64-NEXT:    vorr q5, q0, q0
+; LE-I64-NEXT:    vmov.f32 s0, s23
+; LE-I64-NEXT:    bl lrintf
+; LE-I64-NEXT:    vmov.f32 s0, s20
+; LE-I64-NEXT:    mov r4, r1
+; LE-I64-NEXT:    vmov.32 d9[0], r0
+; LE-I64-NEXT:    bl lrintf
+; LE-I64-NEXT:    vmov.f32 s0, s21
+; LE-I64-NEXT:    mov r5, r1
+; LE-I64-NEXT:    vmov.32 d12[0], r0
+; LE-I64-NEXT:    bl lrintf
+; LE-I64-NEXT:    vmov.f32 s0, s22
+; LE-I64-NEXT:    mov r6, r1
+; LE-I64-NEXT:    vmov.32 d13[0], r0
+; LE-I64-NEXT:    bl lrintf
+; LE-I64-NEXT:    vmov.32 d8[0], r0
+; LE-I64-NEXT:    vmov.32 d13[1], r6
+; LE-I64-NEXT:    vmov.32 d9[1], r4
+; LE-I64-NEXT:    vmov.32 d12[1], r5
+; LE-I64-NEXT:    vmov.32 d8[1], r1
+; LE-I64-NEXT:    vorr q0, q6, q6
+; LE-I64-NEXT:    vorr q1, q4, q4
+; LE-I64-NEXT:    vpop {d8, d9, d10, d11, d12, d13}
+; LE-I64-NEXT:    pop {r4, r5, r6, pc}
+;
+; LE-I32-NEON-LABEL: lrint_v4f32:
+; LE-I32-NEON:       @ %bb.0:
+; LE-I32-NEON-NEXT:    .save {r11, lr}
+; LE-I32-NEON-NEXT:    push {r11, lr}
+; LE-I32-NEON-NEXT:    .vsave {d8, d9, d10, d11}
+; LE-I32-NEON-NEXT:    vpush {d8, d9, d10, d11}
+; LE-I32-NEON-NEXT:    vorr q4, q0, q0
+; LE-I32-NEON-NEXT:    vmov.f32 s0, s18
+; LE-I32-NEON-NEXT:    bl lrintf
+; LE-I32-NEON-NEXT:    vmov.f32 s0, s16
+; LE-I32-NEON-NEXT:    vmov.32 d11[0], r0
+; LE-I32-NEON-NEXT:    bl lrintf
+; LE-I32-NEON-NEXT:    vmov.f32 s0, s19
+; LE-I32-NEON-NEXT:    vmov.32 d10[0], r0
+; LE-I32-NEON-NEXT:    bl lrintf
+; LE-I32-NEON-NEXT:    vmov.f32 s0, s17
+; LE-I32-NEON-NEXT:    vmov.32 d11[1], r0
+; LE-I32-NEON-NEXT:    bl lrintf
+; LE-I32-NEON-NEXT:    vmov.32 d10[1], r0
+; LE-I32-NEON-NEXT:    vorr q0, q5, q5
+; LE-I32-NEON-NEXT:    vpop {d8, d9, d10, d11}
+; LE-I32-NEON-NEXT:    pop {r11, pc}
+;
+; LE-I64-NEON-LABEL: lrint_v4f32:
+; LE-I64-NEON:       @ %bb.0:
+; LE-I64-NEON-NEXT:    .save {r4, r5, r6, lr}
+; LE-I64-NEON-NEXT:    push {r4, r5, r6, lr}
+; LE-I64-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13}
+; LE-I64-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13}
+; LE-I64-NEON-NEXT:    vorr q5, q0, q0
+; LE-I64-NEON-NEXT:    vmov.f32 s0, s23
+; LE-I64-NEON-NEXT:    bl lrintf
+; LE-I64-NEON-NEXT:    vmov.f32 s0, s20
+; LE-I64-NEON-NEXT:    mov r4, r1
+; LE-I64-NEON-NEXT:    vmov.32 d9[0], r0
+; LE-I64-NEON-NEXT:    bl lrintf
+; LE-I64-NEON-NEXT:    vmov.f32 s0, s21
+; LE-I64-NEON-NEXT:    mov r5, r1
+; LE-I64-NEON-NEXT:    vmov.32 d12[0], r0
+; LE-I64-NEON-NEXT:    bl lrintf
+; LE-I64-NEON-NEXT:    vmov.f32 s0, s22
+; LE-I64-NEON-NEXT:    mov r6, r1
+; LE-I64-NEON-NEXT:    vmov.32 d13[0], r0
+; LE-I64-NEON-NEXT:    bl lrintf
+; LE-I64-NEON-NEXT:    vmov.32 d8[0], r0
+; LE-I64-NEON-NEXT:    vmov.32 d13[1], r6
+; LE-I64-NEON-NEXT:    vmov.32 d9[1], r4
+; LE-I64-NEON-NEXT:    vmov.32 d12[1], r5
+; LE-I64-NEON-NEXT:    vmov.32 d8[1], r1
+; LE-I64-NEON-NEXT:    vorr q0, q6, q6
+; LE-I64-NEON-NEXT:    vorr q1, q4, q4
+; LE-I64-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13}
+; LE-I64-NEON-NEXT:    pop {r4, r5, r6, pc}
+;
+; BE-I32-LABEL: lrint_v4f32:
+; BE-I32:       @ %bb.0:
+; BE-I32-NEXT:    .save {r11, lr}
+; BE-I32-NEXT:    push {r11, lr}
+; BE-I32-NEXT:    .vsave {d8, d9, d10, d11}
+; BE-I32-NEXT:    vpush {d8, d9, d10, d11}
+; BE-I32-NEXT:    vrev64.32 q4, q0
+; BE-I32-NEXT:    vmov.f32 s0, s18
+; BE-I32-NEXT:    bl lrintf
+; BE-I32-NEXT:    vmov.f32 s0, s16
+; BE-I32-NEXT:    vmov.32 d11[0], r0
+; BE-I32-NEXT:    bl lrintf
+; BE-I32-NEXT:    vmov.f32 s0, s19
+; BE-I32-NEXT:    vmov.32 d10[0], r0
+; BE-I32-NEXT:    bl lrintf
+; BE-I32-NEXT:    vmov.f32 s0, s17
+; BE-I32-NEXT:    vmov.32 d11[1], r0
+; BE-I32-NEXT:    bl lrintf
+; BE-I32-NEXT:    vmov.32 d10[1], r0
+; BE-I32-NEXT:    vrev64.32 q0, q5
+; BE-I32-NEXT:    vpop {d8, d9, d10, d11}
+; BE-I32-NEXT:    pop {r11, pc}
+;
+; BE-I64-LABEL: lrint_v4f32:
+; BE-I64:       @ %bb.0:
+; BE-I64-NEXT:    .save {r4, r5, r6, lr}
+; BE-I64-NEXT:    push {r4, r5, r6, lr}
+; BE-I64-NEXT:    .vsave {d8, d9, d10, d11, d12, d13}
+; BE-I64-NEXT:    vpush {d8, d9, d10, d11, d12, d13}
+; BE-I64-NEXT:    vrev64.32 d8, d1
+; BE-I64-NEXT:    vrev64.32 d9, d0
+; BE-I64-NEXT:    vmov.f32 s0, s17
+; BE-I64-NEXT:    bl lrintf
+; BE-I64-NEXT:    vmov.f32 s0, s18
+; BE-I64-NEXT:    mov r4, r1
+; BE-I64-NEXT:    vmov.32 d11[0], r0
+; BE-I64-NEXT:    bl lrintf
+; BE-I64-NEXT:    vmov.f32 s0, s19
+; BE-I64-NEXT:    mov r5, r1
+; BE-I64-NEXT:    vmov.32 d12[0], r0
+; BE-I64-NEXT:    bl lrintf
+; BE-I64-NEXT:    vmov.f32 s0, s16
+; BE-I64-NEXT:    mov r6, r1
+; BE-I64-NEXT:    vmov.32 d13[0], r0
+; BE-I64-NEXT:    bl lrintf
+; BE-I64-NEXT:    vmov.32 d10[0], r0
+; BE-I64-NEXT:    vmov.32 d13[1], r6
+; BE-I64-NEXT:    vmov.32 d11[1], r4
+; BE-I64-NEXT:    vmov.32 d12[1], r5
+; BE-I64-NEXT:    vmov.32 d10[1], r1
+; BE-I64-NEXT:    vrev64.32 q0, q6
+; BE-I64-NEXT:    vrev64.32 q1, q5
+; BE-I64-NEXT:    vpop {d8, d9, d10, d11, d12, d13}
+; BE-I64-NEXT:    pop {r4, r5, r6, pc}
+;
+; BE-I32-NEON-LABEL: lrint_v4f32:
+; BE-I32-NEON:       @ %bb.0:
+; BE-I32-NEON-NEXT:    .save {r11, lr}
+; BE-I32-NEON-NEXT:    push {r11, lr}
+; BE-I32-NEON-NEXT:    .vsave {d8, d9, d10, d11}
+; BE-I32-NEON-NEXT:    vpush {d8, d9, d10, d11}
+; BE-I32-NEON-NEXT:    vrev64.32 q4, q0
+; BE-I32-NEON-NEXT:    vmov.f32 s0, s18
+; BE-I32-NEON-NEXT:    bl lrintf
+; BE-I32-NEON-NEXT:    vmov.f32 s0, s16
+; BE-I32-NEON-NEXT:    vmov.32 d11[0], r0
+; BE-I32-NEON-NEXT:    bl lrintf
+; BE-I32-NEON-NEXT:    vmov.f32 s0, s19
+; BE-I32-NEON-NEXT:    vmov.32 d10[0], r0
+; BE-I32-NEON-NEXT:    bl lrintf
+; BE-I32-NEON-NEXT:    vmov.f32 s0, s17
+; BE-I32-NEON-NEXT:    vmov.32 d11[1], r0
+; BE-I32-NEON-NEXT:    bl lrintf
+; BE-I32-NEON-NEXT:    vmov.32 d10[1], r0
+; BE-I32-NEON-NEXT:    vrev64.32 q0, q5
+; BE-I32-NEON-NEXT:    vpop {d8, d9, d10, d11}
+; BE-I32-NEON-NEXT:    pop {r11, pc}
+;
+; BE-I64-NEON-LABEL: lrint_v4f32:
+; BE-I64-NEON:       @ %bb.0:
+; BE-I64-NEON-NEXT:    .save {r4, r5, r6, lr}
+; BE-I64-NEON-NEXT:    push {r4, r5, r6, lr}
+; BE-I64-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13}
+; BE-I64-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13}
+; BE-I64-NEON-NEXT:    vrev64.32 d8, d1
+; BE-I64-NEON-NEXT:    vrev64.32 d9, d0
+; BE-I64-NEON-NEXT:    vmov.f32 s0, s17
+; BE-I64-NEON-NEXT:    bl lrintf
+; BE-I64-NEON-NEXT:    vmov.f32 s0, s18
+; BE-I64-NEON-NEXT:    mov r4, r1
+; BE-I64-NEON-NEXT:    vmov.32 d11[0], r0
+; BE-I64-NEON-NEXT:    bl lrintf
+; BE-I64-NEON-NEXT:    vmov.f32 s0, s19
+; BE-I64-NEON-NEXT:    mov r5, r1
+; BE-I64-NEON-NEXT:    vmov.32 d12[0], r0
+; BE-I64-NEON-NEXT:    bl lrintf
+; BE-I64-NEON-NEXT:    vmov.f32 s0, s16
+; BE-I64-NEON-NEXT:    mov r6, r1
+; BE-I64-NEON-NEXT:    vmov.32 d13[0], r0
+; BE-I64-NEON-NEXT:    bl lrintf
+; BE-I64-NEON-NEXT:    vmov.32 d10[0], r0
+; BE-I64-NEON-NEXT:    vmov.32 d13[1], r6
+; BE-I64-NEON-NEXT:    vmov.32 d11[1], r4
+; BE-I64-NEON-NEXT:    vmov.32 d12[1], r5
+; BE-I64-NEON-NEXT:    vmov.32 d10[1], r1
+; BE-I64-NEON-NEXT:    vrev64.32 q0, q6
+; BE-I64-NEON-NEXT:    vrev64.32 q1, q5
+; BE-I64-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13}
+; BE-I64-NEON-NEXT:    pop {r4, r5, r6, pc}
+  %a = call <4 x iXLen> @llvm.lrint.v4iXLen.v4f32(<4 x float> %x)
+  ret <4 x iXLen> %a
+}
+declare <4 x iXLen> @llvm.lrint.v4iXLen.v4f32(<4 x float>)
+
+define <8 x iXLen> @lrint_v8f32(<8 x float> %x) {
+; LE-I32-LABEL: lrint_v8f32:
+; LE-I32:       @ %bb.0:
+; LE-I32-NEXT:    .save {r11, lr}
+; LE-I32-NEXT:    push {r11, lr}
+; LE-I32-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I32-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I32-NEXT:    vorr q5, q1, q1
+; LE-I32-NEXT:    vorr q7, q0, q0
+; LE-I32-NEXT:    vmov.f32 s0, s20
+; LE-I32-NEXT:    bl lrintf
+; LE-I32-NEXT:    vmov.f32 s0, s22
+; LE-I32-NEXT:    vmov.32 d8[0], r0
+; LE-I32-NEXT:    bl lrintf
+; LE-I32-NEXT:    vmov.f32 s0, s30
+; LE-I32-NEXT:    vmov.32 d9[0], r0
+; LE-I32-NEXT:    bl lrintf
+; LE-I32-NEXT:    vmov.f32 s0, s28
+; LE-I32-NEXT:    vmov.32 d13[0], r0
+; LE-I32-NEXT:    bl lrintf
+; LE-I32-NEXT:    vmov.f32 s0, s31
+; LE-I32-NEXT:    vmov.32 d12[0], r0
+; LE-I32-NEXT:    bl lrintf
+; LE-I32-NEXT:    vmov.f32 s0, s29
+; LE-I32-NEXT:    vmov.32 d13[1], r0
+; LE-I32-NEXT:    bl lrintf
+; LE-I32-NEXT:    vmov.f32 s0, s23
+; LE-I32-NEXT:    vmov.32 d12[1], r0
+; LE-I32-NEXT:    bl lrintf
+; LE-I32-NEXT:    vmov.f32 s0, s21
+; LE-I32-NEXT:    vmov.32 d9[1], r0
+; LE-I32-NEXT:    bl lrintf
+; LE-I32-NEXT:    vmov.32 d8[1], r0
+; LE-I32-NEXT:    vorr q0, q6, q6
+; LE-I32-NEXT:    vorr q1, q4, q4
+; LE-I32-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I32-NEXT:    pop {r11, pc}
+;
+; LE-I64-LABEL: lrint_v8f32:
+; LE-I64:       @ %bb.0:
+; LE-I64-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, lr}
+; LE-I64-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, lr}
+; LE-I64-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEXT:    .pad #40
+; LE-I64-NEXT:    sub sp, sp, #40
+; LE-I64-NEXT:    vorr q6, q1, q1
+; LE-I64-NEXT:    add lr, sp, #24
+; LE-I64-NEXT:    vorr q7, q0, q0
+; LE-I64-NEXT:    vstmia lr, {d2, d3} @ 16-byte Spill
+; LE-I64-NEXT:    vmov.f32 s0, s27
+; LE-I64-NEXT:    bl lrintf
+; LE-I64-NEXT:    vmov.f32 s0, s24
+; LE-I64-NEXT:    mov r8, r1
+; LE-I64-NEXT:    vmov.32 d9[0], r0
+; LE-I64-NEXT:    bl lrintf
+; LE-I64-NEXT:    vmov.f32 s0, s25
+; LE-I64-NEXT:    mov r9, r1
+; LE-I64-NEXT:    vmov.32 d10[0], r0
+; LE-I64-NEXT:    bl lrintf
+; LE-I64-NEXT:    vorr q6, q7, q7
+; LE-I64-NEXT:    add lr, sp, #8
+; LE-I64-NEXT:    mov r10, r1
+; LE-I64-NEXT:    vmov.32 d11[0], r0
+; LE-I64-NEXT:    vmov.f32 s0, s26
+; LE-I64-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-I64-NEXT:    bl lrintf
+; LE-I64-NEXT:    vmov.f32 s0, s27
+; LE-I64-NEXT:    mov r7, r1
+; LE-I64-NEXT:    vmov.32 d14[0], r0
+; LE-I64-NEXT:    bl lrintf
+; LE-I64-NEXT:    vmov.f32 s0, s24
+; LE-I64-NEXT:    mov r4, r1
+; LE-I64-NEXT:    vmov.32 d15[0], r0
+; LE-I64-NEXT:    bl lrintf
+; LE-I64-NEXT:    add lr, sp, #8
+; LE-I64-NEXT:    mov r5, r1
+; LE-I64-NEXT:    vmov.32 d12[0], r0
+; LE-I64-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
+; LE-I64-NEXT:    vmov.f32 s0, s1
+; LE-I64-NEXT:    bl lrintf
+; LE-I64-NEXT:    add lr, sp, #24
+; LE-I64-NEXT:    mov r6, r1
+; LE-I64-NEXT:    vmov.32 d13[0], r0
+; LE-I64-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
+; LE-I64-NEXT:    vmov.f32 s0, s2
+; LE-I64-NEXT:    bl lrintf
+; LE-I64-NEXT:    vmov.32 d8[0], r0
+; LE-I64-NEXT:    vmov.32 d13[1], r6
+; LE-I64-NEXT:    vmov.32 d15[1], r4
+; LE-I64-NEXT:    vmov.32 d11[1], r10
+; LE-I64-NEXT:    vmov.32 d9[1], r8
+; LE-I64-NEXT:    vmov.32 d12[1], r5
+; LE-I64-NEXT:    vmov.32 d14[1], r7
+; LE-I64-NEXT:    vorr q0, q6, q6
+; LE-I64-NEXT:    vmov.32 d10[1], r9
+; LE-I64-NEXT:    vorr q1, q7, q7
+; LE-I64-NEXT:    vmov.32 d8[1], r1
+; LE-I64-NEXT:    vorr q2, q5, q5
+; LE-I64-NEXT:    vorr q3, q4, q4
+; LE-I64-NEXT:    add sp, sp, #40
+; LE-I64-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, pc}
+;
+; LE-I32-NEON-LABEL: lrint_v8f32:
+; LE-I32-NEON:       @ %bb.0:
+; LE-I32-NEON-NEXT:    .save {r11, lr}
+; LE-I32-NEON-NEXT:    push {r11, lr}
+; LE-I32-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I32-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I32-NEON-NEXT:    vorr q5, q1, q1
+; LE-I32-NEON-NEXT:    vorr q7, q0, q0
+; LE-I32-NEON-NEXT:    vmov.f32 s0, s20
+; LE-I32-NEON-NEXT:    bl lrintf
+; LE-I32-NEON-NEXT:    vmov.f32 s0, s22
+; LE-I32-NEON-NEXT:    vmov.32 d8[0], r0
+; LE-I32-NEON-NEXT:    bl lrintf
+; LE-I32-NEON-NEXT:    vmov.f32 s0, s30
+; LE-I32-NEON-NEXT:    vmov.32 d9[0], r0
+; LE-I32-NEON-NEXT:    bl lrintf
+; LE-I32-NEON-NEXT:    vmov.f32 s0, s28
+; LE-I32-NEON-NEXT:    vmov.32 d13[0], r0
+; LE-I32-NEON-NEXT:    bl lrintf
+; LE-I32-NEON-NEXT:    vmov.f32 s0, s31
+; LE-I32-NEON-NEXT:    vmov.32 d12[0], r0
+; LE-I32-NEON-NEXT:    bl lrintf
+; LE-I32-NEON-NEXT:    vmov.f32 s0, s29
+; LE-I32-NEON-NEXT:    vmov.32 d13[1], r0
+; LE-I32-NEON-NEXT:    bl lrintf
+; LE-I32-NEON-NEXT:    vmov.f32 s0, s23
+; LE-I32-NEON-NEXT:    vmov.32 d12[1], r0
+; LE-I32-NEON-NEXT:    bl lrintf
+; LE-I32-NEON-NEXT:    vmov.f32 s0, s21
+; LE-I32-NEON-NEXT:    vmov.32 d9[1], r0
+; LE-I32-NEON-NEXT:    bl lrintf
+; LE-I32-NEON-NEXT:    vmov.32 d8[1], r0
+; LE-I32-NEON-NEXT:    vorr q0, q6, q6
+; LE-I32-NEON-NEXT:    vorr q1, q4, q4
+; LE-I32-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I32-NEON-NEXT:    pop {r11, pc}
+;
+; LE-I64-NEON-LABEL: lrint_v8f32:
+; LE-I64-NEON:       @ %bb.0:
+; LE-I64-NEON-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, lr}
+; LE-I64-NEON-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, lr}
+; LE-I64-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEON-NEXT:    .pad #40
+; LE-I64-NEON-NEXT:    sub sp, sp, #40
+; LE-I64-NEON-NEXT:    vorr q6, q1, q1
+; LE-I64-NEON-NEXT:    add lr, sp, #24
+; LE-I64-NEON-NEXT:    vorr q7, q0, q0
+; LE-I64-NEON-NEXT:    vstmia lr, {d2, d3} @ 16-byte Spill
+; LE-I64-NEON-NEXT:    vmov.f32 s0, s27
+; LE-I64-NEON-NEXT:    bl lrintf
+; LE-I64-NEON-NEXT:    vmov.f32 s0, s24
+; LE-I64-NEON-NEXT:    mov r8, r1
+; LE-I64-NEON-NEXT:    vmov.32 d9[0], r0
+; LE-I64-NEON-NEXT:    bl lrintf
+; LE-I64-NEON-NEXT:    vmov.f32 s0, s25
+; LE-I64-NEON-NEXT:    mov r9, r1
+; LE-I64-NEON-NEXT:    vmov.32 d10[0], r0
+; LE-I64-NEON-NEXT:    bl lrintf
+; LE-I64-NEON-NEXT:    vorr q6, q7, q7
+; LE-I64-NEON-NEXT:    add lr, sp, #8
+; LE-I64-NEON-NEXT:    mov r10, r1
+; LE-I64-NEON-NEXT:    vmov.32 d11[0], r0
+; LE-I64-NEON-NEXT:    vmov.f32 s0, s26
+; LE-I64-NEON-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-I64-NEON-NEXT:    bl lrintf
+; LE-I64-NEON-NEXT:    vmov.f32 s0, s27
+; LE-I64-NEON-NEXT:    mov r7, r1
+; LE-I64-NEON-NEXT:    vmov.32 d14[0], r0
+; LE-I64-NEON-NEXT:    bl lrintf
+; LE-I64-NEON-NEXT:    vmov.f32 s0, s24
+; LE-I64-NEON-NEXT:    mov r4, r1
+; LE-I64-NEON-NEXT:    vmov.32 d15[0], r0
+; LE-I64-NEON-NEXT:    bl lrintf
+; LE-I64-NEON-NEXT:    add lr, sp, #8
+; LE-I64-NEON-NEXT:    mov r5, r1
+; LE-I64-NEON-NEXT:    vmov.32 d12[0], r0
+; LE-I64-NEON-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
+; LE-I64-NEON-NEXT:    vmov.f32 s0, s1
+; LE-I64-NEON-NEXT:    bl lrintf
+; LE-I64-NEON-NEXT:    add lr, sp, #24
+; LE-I64-NEON-NEXT:    mov r6, r1
+; LE-I64-NEON-NEXT:    vmov.32 d13[0], r0
+; LE-I64-NEON-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
+; LE-I64-NEON-NEXT:    vmov.f32 s0, s2
+; LE-I64-NEON-NEXT:    bl lrintf
+; LE-I64-NEON-NEXT:    vmov.32 d8[0], r0
+; LE-I64-NEON-NEXT:    vmov.32 d13[1], r6
+; LE-I64-NEON-NEXT:    vmov.32 d15[1], r4
+; LE-I64-NEON-NEXT:    vmov.32 d11[1], r10
+; LE-I64-NEON-NEXT:    vmov.32 d9[1], r8
+; LE-I64-NEON-NEXT:    vmov.32 d12[1], r5
+; LE-I64-NEON-NEXT:    vmov.32 d14[1], r7
+; LE-I64-NEON-NEXT:    vorr q0, q6, q6
+; LE-I64-NEON-NEXT:    vmov.32 d10[1], r9
+; LE-I64-NEON-NEXT:    vorr q1, q7, q7
+; LE-I64-NEON-NEXT:    vmov.32 d8[1], r1
+; LE-I64-NEON-NEXT:    vorr q2, q5, q5
+; LE-I64-NEON-NEXT:    vorr q3, q4, q4
+; LE-I64-NEON-NEXT:    add sp, sp, #40
+; LE-I64-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEON-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, pc}
+;
+; BE-I32-LABEL: lrint_v8f32:
+; BE-I32:       @ %bb.0:
+; BE-I32-NEXT:    .save {r11, lr}
+; BE-I32-NEXT:    push {r11, lr}
+; BE-I32-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I32-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I32-NEXT:    vrev64.32 q4, q1
+; BE-I32-NEXT:    vrev64.32 q5, q0
+; BE-I32-NEXT:    vmov.f32 s0, s16
+; BE-I32-NEXT:    bl lrintf
+; BE-I32-NEXT:    vmov.f32 s0, s20
+; BE-I32-NEXT:    vmov.32 d12[0], r0
+; BE-I32-NEXT:    bl lrintf
+; BE-I32-NEXT:    vmov.f32 s0, s18
+; BE-I32-NEXT:    vmov.32 d14[0], r0
+; BE-I32-NEXT:    bl lrintf
+; BE-I32-NEXT:    vmov.f32 s0, s22
+; BE-I32-NEXT:    vmov.32 d13[0], r0
+; BE-I32-NEXT:    bl lrintf
+; BE-I32-NEXT:    vmov.f32 s0, s19
+; BE-I32-NEXT:    vmov.32 d15[0], r0
+; BE-I32-NEXT:    bl lrintf
+; BE-I32-NEXT:    vmov.f32 s0, s23
+; BE-I32-NEXT:    vmov.32 d13[1], r0
+; BE-I32-NEXT:    bl lrintf
+; BE-I32-NEXT:    vmov.f32 s0, s21
+; BE-I32-NEXT:    vmov.32 d15[1], r0
+; BE-I32-NEXT:    bl lrintf
+; BE-I32-NEXT:    vmov.f32 s0, s17
+; BE-I32-NEXT:    vmov.32 d14[1], r0
+; BE-I32-NEXT:    bl lrintf
+; BE-I32-NEXT:    vmov.32 d12[1], r0
+; BE-I32-NEXT:    vrev64.32 q0, q7
+; BE-I32-NEXT:    vrev64.32 q1, q6
+; BE-I32-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I32-NEXT:    pop {r11, pc}
+;
+; BE-I64-LABEL: lrint_v8f32:
+; BE-I64:       @ %bb.0:
+; BE-I64-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, lr}
+; BE-I64-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, lr}
+; BE-I64-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEXT:    .pad #32
+; BE-I64-NEXT:    sub sp, sp, #32
+; BE-I64-NEXT:    vorr q4, q1, q1
+; BE-I64-NEXT:    add lr, sp, #8
+; BE-I64-NEXT:    vorr q5, q0, q0
+; BE-I64-NEXT:    vstmia lr, {d0, d1} @ 16-byte Spill
+; BE-I64-NEXT:    vrev64.32 d12, d8
+; BE-I64-NEXT:    vmov.f32 s0, s25
+; BE-I64-NEXT:    bl lrintf
+; BE-I64-NEXT:    vmov.f32 s0, s24
+; BE-I64-NEXT:    mov r8, r1
+; BE-I64-NEXT:    vmov.32 d15[0], r0
+; BE-I64-NEXT:    bl lrintf
+; BE-I64-NEXT:    vrev64.32 d0, d11
+; BE-I64-NEXT:    mov r9, r1
+; BE-I64-NEXT:    vrev64.32 d8, d9
+; BE-I64-NEXT:    vorr d9, d0, d0
+; BE-I64-NEXT:    vmov.32 d14[0], r0
+; BE-I64-NEXT:    vstr d8, [sp, #24] @ 8-byte Spill
+; BE-I64-NEXT:    bl lrintf
+; BE-I64-NEXT:    vmov.f32 s0, s17
+; BE-I64-NEXT:    mov r10, r1
+; BE-I64-NEXT:    vmov.32 d10[0], r0
+; BE-I64-NEXT:    bl lrintf
+; BE-I64-NEXT:    add lr, sp, #8
+; BE-I64-NEXT:    vmov.f32 s0, s19
+; BE-I64-NEXT:    mov r7, r1
+; BE-I64-NEXT:    vmov.32 d13[0], r0
+; BE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEXT:    vrev64.32 d8, d16
+; BE-I64-NEXT:    vstr d8, [sp, #8] @ 8-byte Spill
+; BE-I64-NEXT:    bl lrintf
+; BE-I64-NEXT:    vmov.f32 s0, s16
+; BE-I64-NEXT:    mov r4, r1
+; BE-I64-NEXT:    vmov.32 d11[0], r0
+; BE-I64-NEXT:    bl lrintf
+; BE-I64-NEXT:    vldr d0, [sp, #8] @ 8-byte Reload
+; BE-I64-NEXT:    mov r5, r1
+; BE-I64-NEXT:    vmov.32 d8[0], r0
+; BE-I64-NEXT:    vmov.f32 s0, s1
+; BE-I64-NEXT:    bl lrintf
+; BE-I64-NEXT:    vldr d0, [sp, #24] @ 8-byte Reload
+; BE-I64-NEXT:    mov r6, r1
+; BE-I64-NEXT:    @ kill: def $s0 killed $s0 killed $d0
+; BE-I64-NEXT:    vmov.32 d9[0], r0
+; BE-I64-NEXT:    bl lrintf
+; BE-I64-NEXT:    vmov.32 d12[0], r0
+; BE-I64-NEXT:    vmov.32 d9[1], r6
+; BE-I64-NEXT:    vmov.32 d11[1], r4
+; BE-I64-NEXT:    vmov.32 d15[1], r8
+; BE-I64-NEXT:    vmov.32 d13[1], r7
+; BE-I64-NEXT:    vmov.32 d8[1], r5
+; BE-I64-NEXT:    vmov.32 d10[1], r10
+; BE-I64-NEXT:    vmov.32 d14[1], r9
+; BE-I64-NEXT:    vmov.32 d12[1], r1
+; BE-I64-NEXT:    vrev64.32 q0, q4
+; BE-I64-NEXT:    vrev64.32 q1, q5
+; BE-I64-NEXT:    vrev64.32 q2, q7
+; BE-I64-NEXT:    vrev64.32 q3, q6
+; BE-I64-NEXT:    add sp, sp, #32
+; BE-I64-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, pc}
+;
+; BE-I32-NEON-LABEL: lrint_v8f32:
+; BE-I32-NEON:       @ %bb.0:
+; BE-I32-NEON-NEXT:    .save {r11, lr}
+; BE-I32-NEON-NEXT:    push {r11, lr}
+; BE-I32-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I32-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I32-NEON-NEXT:    vrev64.32 q4, q1
+; BE-I32-NEON-NEXT:    vrev64.32 q5, q0
+; BE-I32-NEON-NEXT:    vmov.f32 s0, s16
+; BE-I32-NEON-NEXT:    bl lrintf
+; BE-I32-NEON-NEXT:    vmov.f32 s0, s20
+; BE-I32-NEON-NEXT:    vmov.32 d12[0], r0
+; BE-I32-NEON-NEXT:    bl lrintf
+; BE-I32-NEON-NEXT:    vmov.f32 s0, s18
+; BE-I32-NEON-NEXT:    vmov.32 d14[0], r0
+; BE-I32-NEON-NEXT:    bl lrintf
+; BE-I32-NEON-NEXT:    vmov.f32 s0, s22
+; BE-I32-NEON-NEXT:    vmov.32 d13[0], r0
+; BE-I32-NEON-NEXT:    bl lrintf
+; BE-I32-NEON-NEXT:    vmov.f32 s0, s19
+; BE-I32-NEON-NEXT:    vmov.32 d15[0], r0
+; BE-I32-NEON-NEXT:    bl lrintf
+; BE-I32-NEON-NEXT:    vmov.f32 s0, s23
+; BE-I32-NEON-NEXT:    vmov.32 d13[1], r0
+; BE-I32-NEON-NEXT:    bl lrintf
+; BE-I32-NEON-NEXT:    vmov.f32 s0, s21
+; BE-I32-NEON-NEXT:    vmov.32 d15[1], r0
+; BE-I32-NEON-NEXT:    bl lrintf
+; BE-I32-NEON-NEXT:    vmov.f32 s0, s17
+; BE-I32-NEON-NEXT:    vmov.32 d14[1], r0
+; BE-I32-NEON-NEXT:    bl lrintf
+; BE-I32-NEON-NEXT:    vmov.32 d12[1], r0
+; BE-I32-NEON-NEXT:    vrev64.32 q0, q7
+; BE-I32-NEON-NEXT:    vrev64.32 q1, q6
+; BE-I32-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I32-NEON-NEXT:    pop {r11, pc}
+;
+; BE-I64-NEON-LABEL: lrint_v8f32:
+; BE-I64-NEON:       @ %bb.0:
+; BE-I64-NEON-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, lr}
+; BE-I64-NEON-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, lr}
+; BE-I64-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEON-NEXT:    .pad #32
+; BE-I64-NEON-NEXT:    sub sp, sp, #32
+; BE-I64-NEON-NEXT:    vorr q4, q1, q1
+; BE-I64-NEON-NEXT:    add lr, sp, #8
+; BE-I64-NEON-NEXT:    vorr q5, q0, q0
+; BE-I64-NEON-NEXT:    vstmia lr, {d0, d1} @ 16-byte Spill
+; BE-I64-NEON-NEXT:    vrev64.32 d12, d8
+; BE-I64-NEON-NEXT:    vmov.f32 s0, s25
+; BE-I64-NEON-NEXT:    bl lrintf
+; BE-I64-NEON-NEXT:    vmov.f32 s0, s24
+; BE-I64-NEON-NEXT:    mov r8, r1
+; BE-I64-NEON-NEXT:    vmov.32 d15[0], r0
+; BE-I64-NEON-NEXT:    bl lrintf
+; BE-I64-NEON-NEXT:    vrev64.32 d0, d11
+; BE-I64-NEON-NEXT:    mov r9, r1
+; BE-I64-NEON-NEXT:    vrev64.32 d8, d9
+; BE-I64-NEON-NEXT:    vorr d9, d0, d0
+; BE-I64-NEON-NEXT:    vmov.32 d14[0], r0
+; BE-I64-NEON-NEXT:    vstr d8, [sp, #24] @ 8-byte Spill
+; BE-I64-NEON-NEXT:    bl lrintf
+; BE-I64-NEON-NEXT:    vmov.f32 s0, s17
+; BE-I64-NEON-NEXT:    mov r10, r1
+; BE-I64-NEON-NEXT:    vmov.32 d10[0], r0
+; BE-I64-NEON-NEXT:    bl lrintf
+; BE-I64-NEON-NEXT:    add lr, sp, #8
+; BE-I64-NEON-NEXT:    vmov.f32 s0, s19
+; BE-I64-NEON-NEXT:    mov r7, r1
+; BE-I64-NEON-NEXT:    vmov.32 d13[0], r0
+; BE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEON-NEXT:    vrev64.32 d8, d16
+; BE-I64-NEON-NEXT:    vstr d8, [sp, #8] @ 8-byte Spill
+; BE-I64-NEON-NEXT:    bl lrintf
+; BE-I64-NEON-NEXT:    vmov.f32 s0, s16
+; BE-I64-NEON-NEXT:    mov r4, r1
+; BE-I64-NEON-NEXT:    vmov.32 d11[0], r0
+; BE-I64-NEON-NEXT:    bl lrintf
+; BE-I64-NEON-NEXT:    vldr d0, [sp, #8] @ 8-byte Reload
+; BE-I64-NEON-NEXT:    mov r5, r1
+; BE-I64-NEON-NEXT:    vmov.32 d8[0], r0
+; BE-I64-NEON-NEXT:    vmov.f32 s0, s1
+; BE-I64-NEON-NEXT:    bl lrintf
+; BE-I64-NEON-NEXT:    vldr d0, [sp, #24] @ 8-byte Reload
+; BE-I64-NEON-NEXT:    mov r6, r1
+; BE-I64-NEON-NEXT:    @ kill: def $s0 killed $s0 killed $d0
+; BE-I64-NEON-NEXT:    vmov.32 d9[0], r0
+; BE-I64-NEON-NEXT:    bl lrintf
+; BE-I64-NEON-NEXT:    vmov.32 d12[0], r0
+; BE-I64-NEON-NEXT:    vmov.32 d9[1], r6
+; BE-I64-NEON-NEXT:    vmov.32 d11[1], r4
+; BE-I64-NEON-NEXT:    vmov.32 d15[1], r8
+; BE-I64-NEON-NEXT:    vmov.32 d13[1], r7
+; BE-I64-NEON-NEXT:    vmov.32 d8[1], r5
+; BE-I64-NEON-NEXT:    vmov.32 d10[1], r10
+; BE-I64-NEON-NEXT:    vmov.32 d14[1], r9
+; BE-I64-NEON-NEXT:    vmov.32 d12[1], r1
+; BE-I64-NEON-NEXT:    vrev64.32 q0, q4
+; BE-I64-NEON-NEXT:    vrev64.32 q1, q5
+; BE-I64-NEON-NEXT:    vrev64.32 q2, q7
+; BE-I64-NEON-NEXT:    vrev64.32 q3, q6
+; BE-I64-NEON-NEXT:    add sp, sp, #32
+; BE-I64-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEON-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, pc}
+  %a = call <8 x iXLen> @llvm.lrint.v8iXLen.v8f32(<8 x float> %x)
+  ret <8 x iXLen> %a
+}
+declare <8 x iXLen> @llvm.lrint.v8iXLen.v8f32(<8 x float>)
+
+define <16 x iXLen> @lrint_v16f32(<16 x float> %x) {
+; LE-I32-LABEL: lrint_v16f32:
+; LE-I32:       @ %bb.0:
+; LE-I32-NEXT:    .save {r11, lr}
+; LE-I32-NEXT:    push {r11, lr}
+; LE-I32-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I32-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I32-NEXT:    .pad #80
+; LE-I32-NEXT:    sub sp, sp, #80
+; LE-I32-NEXT:    vorr q5, q3, q3
+; LE-I32-NEXT:    vstmia sp, {d0, d1} @ 16-byte Spill
+; LE-I32-NEXT:    add lr, sp, #32
+; LE-I32-NEXT:    vorr q6, q2, q2
+; LE-I32-NEXT:    vorr q7, q1, q1
+; LE-I32-NEXT:    vmov.f32 s0, s20
+; LE-I32-NEXT:    vstmia lr, {d6, d7} @ 16-byte Spill
+; LE-I32-NEXT:    bl lrintf
+; LE-I32-NEXT:    vmov.f32 s0, s22
+; LE-I32-NEXT:    vmov.32 d8[0], r0
+; LE-I32-NEXT:    bl lrintf
+; LE-I32-NEXT:    vmov.f32 s0, s24
+; LE-I32-NEXT:    add lr, sp, #48
+; LE-I32-NEXT:    vmov.32 d9[0], r0
+; LE-I32-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-I32-NEXT:    add lr, sp, #16
+; LE-I32-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-I32-NEXT:    bl lrintf
+; LE-I32-NEXT:    vmov.f32 s0, s26
+; LE-I32-NEXT:    vmov.32 d8[0], r0
+; LE-I32-NEXT:    bl lrintf
+; LE-I32-NEXT:    add lr, sp, #64
+; LE-I32-NEXT:    vmov.32 d9[0], r0
+; LE-I32-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-I32-NEXT:    vorr q4, q7, q7
+; LE-I32-NEXT:    vmov.f32 s0, s16
+; LE-I32-NEXT:    bl lrintf
+; LE-I32-NEXT:    vmov.f32 s0, s18
+; LE-I32-NEXT:    vmov.32 d10[0], r0
+; LE-I32-NEXT:    bl lrintf
+; LE-I32-NEXT:    vldmia sp, {d12, d13} @ 16-byte Reload
+; LE-I32-NEXT:    vmov.f32 s0, s26
+; LE-I32-NEXT:    vmov.32 d11[0], r0
+; LE-I32-NEXT:    bl lrintf
+; LE-I32-NEXT:    vmov.f32 s0, s24
+; LE-I32-NEXT:    vmov.32 d15[0], r0
+; LE-I32-NEXT:    bl lrintf
+; LE-I32-NEXT:    vmov.f32 s0, s27
+; LE-I32-NEXT:    vmov.32 d14[0], r0
+; LE-I32-NEXT:    bl lrintf
+; LE-I32-NEXT:    vmov.f32 s0, s25
+; LE-I32-NEXT:    vmov.32 d15[1], r0
+; LE-I32-NEXT:    bl lrintf
+; LE-I32-NEXT:    vmov.f32 s0, s19
+; LE-I32-NEXT:    vmov.32 d14[1], r0
+; LE-I32-NEXT:    bl lrintf
+; LE-I32-NEXT:    vmov.f32 s0, s17
+; LE-I32-NEXT:    vmov.32 d11[1], r0
+; LE-I32-NEXT:    bl lrintf
+; LE-I32-NEXT:    add lr, sp, #16
+; LE-I32-NEXT:    vmov.32 d10[1], r0
+; LE-I32-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
+; LE-I32-NEXT:    vmov.f32 s0, s27
+; LE-I32-NEXT:    bl lrintf
+; LE-I32-NEXT:    vmov.f32 s0, s25
+; LE-I32-NEXT:    add lr, sp, #64
+; LE-I32-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-I32-NEXT:    vmov.32 d9[1], r0
+; LE-I32-NEXT:    bl lrintf
+; LE-I32-NEXT:    vmov.32 d8[1], r0
+; LE-I32-NEXT:    add lr, sp, #64
+; LE-I32-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-I32-NEXT:    add lr, sp, #32
+; LE-I32-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-I32-NEXT:    vmov.f32 s0, s19
+; LE-I32-NEXT:    bl lrintf
+; LE-I32-NEXT:    vmov.f32 s0, s17
+; LE-I32-NEXT:    add lr, sp, #48
+; LE-I32-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
+; LE-I32-NEXT:    vmov.32 d13[1], r0
+; LE-I32-NEXT:    bl lrintf
+; LE-I32-NEXT:    add lr, sp, #64
+; LE-I32-NEXT:    vmov.32 d12[1], r0
+; LE-I32-NEXT:    vorr q0, q7, q7
+; LE-I32-NEXT:    vldmia lr, {d4, d5} @ 16-byte Reload
+; LE-I32-NEXT:    vorr q1, q5, q5
+; LE-I32-NEXT:    vorr q3, q6, q6
+; LE-I32-NEXT:    add sp, sp, #80
+; LE-I32-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I32-NEXT:    pop {r11, pc}
+;
+; LE-I64-LABEL: lrint_v16f32:
+; LE-I64:       @ %bb.0:
+; LE-I64-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-I64-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-I64-NEXT:    .pad #4
+; LE-I64-NEXT:    sub sp, sp, #4
+; LE-I64-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEXT:    .pad #160
+; LE-I64-NEXT:    sub sp, sp, #160
+; LE-I64-NEXT:    add lr, sp, #112
+; LE-I64-NEXT:    vorr q5, q3, q3
+; LE-I64-NEXT:    vorr q6, q0, q0
+; LE-I64-NEXT:    mov r4, r0
+; LE-I64-NEXT:    vstmia lr, {d4, d5} @ 16-byte Spill
+; LE-I64-NEXT:    add lr, sp, #48
+; LE-I64-NEXT:    vorr q7, q1, q1
+; LE-I64-NEXT:    vstmia lr, {d0, d1} @ 16-byte Spill
+; LE-I64-NEXT:    vmov.f32 s0, s23
+; LE-I64-NEXT:    bl lrintf
+; LE-I64-NEXT:    vmov.f32 s0, s24
+; LE-I64-NEXT:    add lr, sp, #144
+; LE-I64-NEXT:    vmov.32 d17[0], r0
+; LE-I64-NEXT:    str r1, [sp, #108] @ 4-byte Spill
+; LE-I64-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEXT:    bl lrintf
+; LE-I64-NEXT:    vmov.f32 s0, s25
+; LE-I64-NEXT:    str r1, [sp, #84] @ 4-byte Spill
+; LE-I64-NEXT:    vmov.32 d8[0], r0
+; LE-I64-NEXT:    bl lrintf
+; LE-I64-NEXT:    vmov.f32 s0, s28
+; LE-I64-NEXT:    add lr, sp, #128
+; LE-I64-NEXT:    vmov.32 d9[0], r0
+; LE-I64-NEXT:    str r1, [sp, #44] @ 4-byte Spill
+; LE-I64-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-I64-NEXT:    bl lrintf
+; LE-I64-NEXT:    vmov.f32 s0, s29
+; LE-I64-NEXT:    mov r9, r1
+; LE-I64-NEXT:    vmov.32 d8[0], r0
+; LE-I64-NEXT:    bl lrintf
+; LE-I64-NEXT:    vmov.f32 s0, s30
+; LE-I64-NEXT:    mov r6, r1
+; LE-I64-NEXT:    vmov.32 d9[0], r0
+; LE-I64-NEXT:    bl lrintf
+; LE-I64-NEXT:    vmov.f32 s0, s31
+; LE-I64-NEXT:    mov r5, r1
+; LE-I64-NEXT:    vmov.32 d12[0], r0
+; LE-I64-NEXT:    bl lrintf
+; LE-I64-NEXT:    add lr, sp, #112
+; LE-I64-NEXT:    mov r7, r1
+; LE-I64-NEXT:    vmov.32 d13[0], r0
+; LE-I64-NEXT:    vldmia lr, {d14, d15} @ 16-byte Reload
+; LE-I64-NEXT:    vmov.f32 s0, s29
+; LE-I64-NEXT:    bl lrintf
+; LE-I64-NEXT:    vmov.f32 s0, s22
+; LE-I64-NEXT:    add lr, sp, #24
+; LE-I64-NEXT:    vmov.32 d17[0], r0
+; LE-I64-NEXT:    mov r11, r1
+; LE-I64-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEXT:    vmov.32 d13[1], r7
+; LE-I64-NEXT:    bl lrintf
+; LE-I64-NEXT:    add lr, sp, #144
+; LE-I64-NEXT:    vmov.f32 s0, s21
+; LE-I64-NEXT:    vmov.32 d12[1], r5
+; LE-I64-NEXT:    str r1, [sp, #40] @ 4-byte Spill
+; LE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEXT:    vmov.32 d16[0], r0
+; LE-I64-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEXT:    add lr, sp, #88
+; LE-I64-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-I64-NEXT:    bl lrintf
+; LE-I64-NEXT:    vmov.f32 s0, s20
+; LE-I64-NEXT:    mov r10, r1
+; LE-I64-NEXT:    vmov.32 d13[0], r0
+; LE-I64-NEXT:    vmov.32 d9[1], r6
+; LE-I64-NEXT:    bl lrintf
+; LE-I64-NEXT:    vmov.f32 s0, s31
+; LE-I64-NEXT:    vmov.32 d12[0], r0
+; LE-I64-NEXT:    add lr, sp, #8
+; LE-I64-NEXT:    mov r8, r1
+; LE-I64-NEXT:    vmov.32 d8[1], r9
+; LE-I64-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-I64-NEXT:    add lr, sp, #64
+; LE-I64-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-I64-NEXT:    bl lrintf
+; LE-I64-NEXT:    add lr, sp, #128
+; LE-I64-NEXT:    vmov.32 d9[0], r0
+; LE-I64-NEXT:    ldr r0, [sp, #44] @ 4-byte Reload
+; LE-I64-NEXT:    mov r9, r1
+; LE-I64-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; LE-I64-NEXT:    add lr, sp, #48
+; LE-I64-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
+; LE-I64-NEXT:    vmov.f32 s0, s27
+; LE-I64-NEXT:    vmov.32 d11[1], r0
+; LE-I64-NEXT:    bl lrintf
+; LE-I64-NEXT:    vmov.f32 s0, s26
+; LE-I64-NEXT:    vmov.32 d15[0], r0
+; LE-I64-NEXT:    ldr r0, [sp, #84] @ 4-byte Reload
+; LE-I64-NEXT:    add lr, sp, #128
+; LE-I64-NEXT:    mov r7, r1
+; LE-I64-NEXT:    vmov.32 d10[1], r0
+; LE-I64-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-I64-NEXT:    bl lrintf
+; LE-I64-NEXT:    vmov.32 d14[0], r0
+; LE-I64-NEXT:    add lr, sp, #144
+; LE-I64-NEXT:    ldr r0, [sp, #108] @ 4-byte Reload
+; LE-I64-NEXT:    mov r5, r1
+; LE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEXT:    vmov.32 d17[1], r0
+; LE-I64-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEXT:    add lr, sp, #112
+; LE-I64-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; LE-I64-NEXT:    vmov.f32 s0, s20
+; LE-I64-NEXT:    bl lrintf
+; LE-I64-NEXT:    add lr, sp, #24
+; LE-I64-NEXT:    vmov.f32 s0, s22
+; LE-I64-NEXT:    mov r6, r1
+; LE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEXT:    vmov.32 d16[0], r0
+; LE-I64-NEXT:    vmov.32 d17[1], r11
+; LE-I64-NEXT:    vorr q6, q8, q8
+; LE-I64-NEXT:    bl lrintf
+; LE-I64-NEXT:    add lr, sp, #144
+; LE-I64-NEXT:    vmov.32 d8[0], r0
+; LE-I64-NEXT:    ldr r0, [sp, #40] @ 4-byte Reload
+; LE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEXT:    add lr, sp, #8
+; LE-I64-NEXT:    vldmia lr, {d18, d19} @ 16-byte Reload
+; LE-I64-NEXT:    add lr, sp, #128
+; LE-I64-NEXT:    vmov.32 d9[1], r9
+; LE-I64-NEXT:    vmov.32 d12[1], r6
+; LE-I64-NEXT:    vmov.32 d19[1], r10
+; LE-I64-NEXT:    vmov.32 d8[1], r1
+; LE-I64-NEXT:    vmov.32 d16[1], r0
+; LE-I64-NEXT:    add r0, r4, #64
+; LE-I64-NEXT:    vmov.32 d18[1], r8
+; LE-I64-NEXT:    vst1.64 {d12, d13}, [r0:128]!
+; LE-I64-NEXT:    vst1.64 {d8, d9}, [r0:128]!
+; LE-I64-NEXT:    vst1.64 {d18, d19}, [r0:128]!
+; LE-I64-NEXT:    vst1.64 {d16, d17}, [r0:128]
+; LE-I64-NEXT:    vmov.32 d15[1], r7
+; LE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEXT:    add lr, sp, #64
+; LE-I64-NEXT:    vmov.32 d14[1], r5
+; LE-I64-NEXT:    vst1.64 {d16, d17}, [r4:128]!
+; LE-I64-NEXT:    vst1.64 {d14, d15}, [r4:128]!
+; LE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEXT:    add lr, sp, #88
+; LE-I64-NEXT:    vst1.64 {d16, d17}, [r4:128]!
+; LE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEXT:    vst1.64 {d16, d17}, [r4:128]
+; LE-I64-NEXT:    add sp, sp, #160
+; LE-I64-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEXT:    add sp, sp, #4
+; LE-I64-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; LE-I32-NEON-LABEL: lrint_v16f32:
+; LE-I32-NEON:       @ %bb.0:
+; LE-I32-NEON-NEXT:    .save {r11, lr}
+; LE-I32-NEON-NEXT:    push {r11, lr}
+; LE-I32-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I32-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I32-NEON-NEXT:    .pad #80
+; LE-I32-NEON-NEXT:    sub sp, sp, #80
+; LE-I32-NEON-NEXT:    vorr q5, q3, q3
+; LE-I32-NEON-NEXT:    vstmia sp, {d0, d1} @ 16-byte Spill
+; LE-I32-NEON-NEXT:    add lr, sp, #32
+; LE-I32-NEON-NEXT:    vorr q6, q2, q2
+; LE-I32-NEON-NEXT:    vorr q7, q1, q1
+; LE-I32-NEON-NEXT:    vmov.f32 s0, s20
+; LE-I32-NEON-NEXT:    vstmia lr, {d6, d7} @ 16-byte Spill
+; LE-I32-NEON-NEXT:    bl lrintf
+; LE-I32-NEON-NEXT:    vmov.f32 s0, s22
+; LE-I32-NEON-NEXT:    vmov.32 d8[0], r0
+; LE-I32-NEON-NEXT:    bl lrintf
+; LE-I32-NEON-NEXT:    vmov.f32 s0, s24
+; LE-I32-NEON-NEXT:    add lr, sp, #48
+; LE-I32-NEON-NEXT:    vmov.32 d9[0], r0
+; LE-I32-NEON-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-I32-NEON-NEXT:    add lr, sp, #16
+; LE-I32-NEON-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-I32-NEON-NEXT:    bl lrintf
+; LE-I32-NEON-NEXT:    vmov.f32 s0, s26
+; LE-I32-NEON-NEXT:    vmov.32 d8[0], r0
+; LE-I32-NEON-NEXT:    bl lrintf
+; LE-I32-NEON-NEXT:    add lr, sp, #64
+; LE-I32-NEON-NEXT:    vmov.32 d9[0], r0
+; LE-I32-NEON-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-I32-NEON-NEXT:    vorr q4, q7, q7
+; LE-I32-NEON-NEXT:    vmov.f32 s0, s16
+; LE-I32-NEON-NEXT:    bl lrintf
+; LE-I32-NEON-NEXT:    vmov.f32 s0, s18
+; LE-I32-NEON-NEXT:    vmov.32 d10[0], r0
+; LE-I32-NEON-NEXT:    bl lrintf
+; LE-I32-NEON-NEXT:    vldmia sp, {d12, d13} @ 16-byte Reload
+; LE-I32-NEON-NEXT:    vmov.f32 s0, s26
+; LE-I32-NEON-NEXT:    vmov.32 d11[0], r0
+; LE-I32-NEON-NEXT:    bl lrintf
+; LE-I32-NEON-NEXT:    vmov.f32 s0, s24
+; LE-I32-NEON-NEXT:    vmov.32 d15[0], r0
+; LE-I32-NEON-NEXT:    bl lrintf
+; LE-I32-NEON-NEXT:    vmov.f32 s0, s27
+; LE-I32-NEON-NEXT:    vmov.32 d14[0], r0
+; LE-I32-NEON-NEXT:    bl lrintf
+; LE-I32-NEON-NEXT:    vmov.f32 s0, s25
+; LE-I32-NEON-NEXT:    vmov.32 d15[1], r0
+; LE-I32-NEON-NEXT:    bl lrintf
+; LE-I32-NEON-NEXT:    vmov.f32 s0, s19
+; LE-I32-NEON-NEXT:    vmov.32 d14[1], r0
+; LE-I32-NEON-NEXT:    bl lrintf
+; LE-I32-NEON-NEXT:    vmov.f32 s0, s17
+; LE-I32-NEON-NEXT:    vmov.32 d11[1], r0
+; LE-I32-NEON-NEXT:    bl lrintf
+; LE-I32-NEON-NEXT:    add lr, sp, #16
+; LE-I32-NEON-NEXT:    vmov.32 d10[1], r0
+; LE-I32-NEON-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
+; LE-I32-NEON-NEXT:    vmov.f32 s0, s27
+; LE-I32-NEON-NEXT:    bl lrintf
+; LE-I32-NEON-NEXT:    vmov.f32 s0, s25
+; LE-I32-NEON-NEXT:    add lr, sp, #64
+; LE-I32-NEON-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-I32-NEON-NEXT:    vmov.32 d9[1], r0
+; LE-I32-NEON-NEXT:    bl lrintf
+; LE-I32-NEON-NEXT:    vmov.32 d8[1], r0
+; LE-I32-NEON-NEXT:    add lr, sp, #64
+; LE-I32-NEON-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-I32-NEON-NEXT:    add lr, sp, #32
+; LE-I32-NEON-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-I32-NEON-NEXT:    vmov.f32 s0, s19
+; LE-I32-NEON-NEXT:    bl lrintf
+; LE-I32-NEON-NEXT:    vmov.f32 s0, s17
+; LE-I32-NEON-NEXT:    add lr, sp, #48
+; LE-I32-NEON-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
+; LE-I32-NEON-NEXT:    vmov.32 d13[1], r0
+; LE-I32-NEON-NEXT:    bl lrintf
+; LE-I32-NEON-NEXT:    add lr, sp, #64
+; LE-I32-NEON-NEXT:    vmov.32 d12[1], r0
+; LE-I32-NEON-NEXT:    vorr q0, q7, q7
+; LE-I32-NEON-NEXT:    vldmia lr, {d4, d5} @ 16-byte Reload
+; LE-I32-NEON-NEXT:    vorr q1, q5, q5
+; LE-I32-NEON-NEXT:    vorr q3, q6, q6
+; LE-I32-NEON-NEXT:    add sp, sp, #80
+; LE-I32-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I32-NEON-NEXT:    pop {r11, pc}
+;
+; LE-I64-NEON-LABEL: lrint_v16f32:
+; LE-I64-NEON:       @ %bb.0:
+; LE-I64-NEON-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-I64-NEON-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-I64-NEON-NEXT:    .pad #4
+; LE-I64-NEON-NEXT:    sub sp, sp, #4
+; LE-I64-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEON-NEXT:    .pad #160
+; LE-I64-NEON-NEXT:    sub sp, sp, #160
+; LE-I64-NEON-NEXT:    add lr, sp, #112
+; LE-I64-NEON-NEXT:    vorr q5, q3, q3
+; LE-I64-NEON-NEXT:    vorr q6, q0, q0
+; LE-I64-NEON-NEXT:    mov r4, r0
+; LE-I64-NEON-NEXT:    vstmia lr, {d4, d5} @ 16-byte Spill
+; LE-I64-NEON-NEXT:    add lr, sp, #48
+; LE-I64-NEON-NEXT:    vorr q7, q1, q1
+; LE-I64-NEON-NEXT:    vstmia lr, {d0, d1} @ 16-byte Spill
+; LE-I64-NEON-NEXT:    vmov.f32 s0, s23
+; LE-I64-NEON-NEXT:    bl lrintf
+; LE-I64-NEON-NEXT:    vmov.f32 s0, s24
+; LE-I64-NEON-NEXT:    add lr, sp, #144
+; LE-I64-NEON-NEXT:    vmov.32 d17[0], r0
+; LE-I64-NEON-NEXT:    str r1, [sp, #108] @ 4-byte Spill
+; LE-I64-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEON-NEXT:    bl lrintf
+; LE-I64-NEON-NEXT:    vmov.f32 s0, s25
+; LE-I64-NEON-NEXT:    str r1, [sp, #84] @ 4-byte Spill
+; LE-I64-NEON-NEXT:    vmov.32 d8[0], r0
+; LE-I64-NEON-NEXT:    bl lrintf
+; LE-I64-NEON-NEXT:    vmov.f32 s0, s28
+; LE-I64-NEON-NEXT:    add lr, sp, #128
+; LE-I64-NEON-NEXT:    vmov.32 d9[0], r0
+; LE-I64-NEON-NEXT:    str r1, [sp, #44] @ 4-byte Spill
+; LE-I64-NEON-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-I64-NEON-NEXT:    bl lrintf
+; LE-I64-NEON-NEXT:    vmov.f32 s0, s29
+; LE-I64-NEON-NEXT:    mov r9, r1
+; LE-I64-NEON-NEXT:    vmov.32 d8[0], r0
+; LE-I64-NEON-NEXT:    bl lrintf
+; LE-I64-NEON-NEXT:    vmov.f32 s0, s30
+; LE-I64-NEON-NEXT:    mov r6, r1
+; LE-I64-NEON-NEXT:    vmov.32 d9[0], r0
+; LE-I64-NEON-NEXT:    bl lrintf
+; LE-I64-NEON-NEXT:    vmov.f32 s0, s31
+; LE-I64-NEON-NEXT:    mov r5, r1
+; LE-I64-NEON-NEXT:    vmov.32 d12[0], r0
+; LE-I64-NEON-NEXT:    bl lrintf
+; LE-I64-NEON-NEXT:    add lr, sp, #112
+; LE-I64-NEON-NEXT:    mov r7, r1
+; LE-I64-NEON-NEXT:    vmov.32 d13[0], r0
+; LE-I64-NEON-NEXT:    vldmia lr, {d14, d15} @ 16-byte Reload
+; LE-I64-NEON-NEXT:    vmov.f32 s0, s29
+; LE-I64-NEON-NEXT:    bl lrintf
+; LE-I64-NEON-NEXT:    vmov.f32 s0, s22
+; LE-I64-NEON-NEXT:    add lr, sp, #24
+; LE-I64-NEON-NEXT:    vmov.32 d17[0], r0
+; LE-I64-NEON-NEXT:    mov r11, r1
+; LE-I64-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEON-NEXT:    vmov.32 d13[1], r7
+; LE-I64-NEON-NEXT:    bl lrintf
+; LE-I64-NEON-NEXT:    add lr, sp, #144
+; LE-I64-NEON-NEXT:    vmov.f32 s0, s21
+; LE-I64-NEON-NEXT:    vmov.32 d12[1], r5
+; LE-I64-NEON-NEXT:    str r1, [sp, #40] @ 4-byte Spill
+; LE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEON-NEXT:    vmov.32 d16[0], r0
+; LE-I64-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEON-NEXT:    add lr, sp, #88
+; LE-I64-NEON-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-I64-NEON-NEXT:    bl lrintf
+; LE-I64-NEON-NEXT:    vmov.f32 s0, s20
+; LE-I64-NEON-NEXT:    mov r10, r1
+; LE-I64-NEON-NEXT:    vmov.32 d13[0], r0
+; LE-I64-NEON-NEXT:    vmov.32 d9[1], r6
+; LE-I64-NEON-NEXT:    bl lrintf
+; LE-I64-NEON-NEXT:    vmov.f32 s0, s31
+; LE-I64-NEON-NEXT:    vmov.32 d12[0], r0
+; LE-I64-NEON-NEXT:    add lr, sp, #8
+; LE-I64-NEON-NEXT:    mov r8, r1
+; LE-I64-NEON-NEXT:    vmov.32 d8[1], r9
+; LE-I64-NEON-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-I64-NEON-NEXT:    add lr, sp, #64
+; LE-I64-NEON-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-I64-NEON-NEXT:    bl lrintf
+; LE-I64-NEON-NEXT:    add lr, sp, #128
+; LE-I64-NEON-NEXT:    vmov.32 d9[0], r0
+; LE-I64-NEON-NEXT:    ldr r0, [sp, #44] @ 4-byte Reload
+; LE-I64-NEON-NEXT:    mov r9, r1
+; LE-I64-NEON-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; LE-I64-NEON-NEXT:    add lr, sp, #48
+; LE-I64-NEON-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
+; LE-I64-NEON-NEXT:    vmov.f32 s0, s27
+; LE-I64-NEON-NEXT:    vmov.32 d11[1], r0
+; LE-I64-NEON-NEXT:    bl lrintf
+; LE-I64-NEON-NEXT:    vmov.f32 s0, s26
+; LE-I64-NEON-NEXT:    vmov.32 d15[0], r0
+; LE-I64-NEON-NEXT:    ldr r0, [sp, #84] @ 4-byte Reload
+; LE-I64-NEON-NEXT:    add lr, sp, #128
+; LE-I64-NEON-NEXT:    mov r7, r1
+; LE-I64-NEON-NEXT:    vmov.32 d10[1], r0
+; LE-I64-NEON-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-I64-NEON-NEXT:    bl lrintf
+; LE-I64-NEON-NEXT:    vmov.32 d14[0], r0
+; LE-I64-NEON-NEXT:    add lr, sp, #144
+; LE-I64-NEON-NEXT:    ldr r0, [sp, #108] @ 4-byte Reload
+; LE-I64-NEON-NEXT:    mov r5, r1
+; LE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEON-NEXT:    vmov.32 d17[1], r0
+; LE-I64-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEON-NEXT:    add lr, sp, #112
+; LE-I64-NEON-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; LE-I64-NEON-NEXT:    vmov.f32 s0, s20
+; LE-I64-NEON-NEXT:    bl lrintf
+; LE-I64-NEON-NEXT:    add lr, sp, #24
+; LE-I64-NEON-NEXT:    vmov.f32 s0, s22
+; LE-I64-NEON-NEXT:    mov r6, r1
+; LE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEON-NEXT:    vmov.32 d16[0], r0
+; LE-I64-NEON-NEXT:    vmov.32 d17[1], r11
+; LE-I64-NEON-NEXT:    vorr q6, q8, q8
+; LE-I64-NEON-NEXT:    bl lrintf
+; LE-I64-NEON-NEXT:    add lr, sp, #144
+; LE-I64-NEON-NEXT:    vmov.32 d8[0], r0
+; LE-I64-NEON-NEXT:    ldr r0, [sp, #40] @ 4-byte Reload
+; LE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEON-NEXT:    add lr, sp, #8
+; LE-I64-NEON-NEXT:    vldmia lr, {d18, d19} @ 16-byte Reload
+; LE-I64-NEON-NEXT:    add lr, sp, #128
+; LE-I64-NEON-NEXT:    vmov.32 d9[1], r9
+; LE-I64-NEON-NEXT:    vmov.32 d12[1], r6
+; LE-I64-NEON-NEXT:    vmov.32 d19[1], r10
+; LE-I64-NEON-NEXT:    vmov.32 d8[1], r1
+; LE-I64-NEON-NEXT:    vmov.32 d16[1], r0
+; LE-I64-NEON-NEXT:    add r0, r4, #64
+; LE-I64-NEON-NEXT:    vmov.32 d18[1], r8
+; LE-I64-NEON-NEXT:    vst1.64 {d12, d13}, [r0:128]!
+; LE-I64-NEON-NEXT:    vst1.64 {d8, d9}, [r0:128]!
+; LE-I64-NEON-NEXT:    vst1.64 {d18, d19}, [r0:128]!
+; LE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r0:128]
+; LE-I64-NEON-NEXT:    vmov.32 d15[1], r7
+; LE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEON-NEXT:    add lr, sp, #64
+; LE-I64-NEON-NEXT:    vmov.32 d14[1], r5
+; LE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r4:128]!
+; LE-I64-NEON-NEXT:    vst1.64 {d14, d15}, [r4:128]!
+; LE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEON-NEXT:    add lr, sp, #88
+; LE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r4:128]!
+; LE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r4:128]
+; LE-I64-NEON-NEXT:    add sp, sp, #160
+; LE-I64-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEON-NEXT:    add sp, sp, #4
+; LE-I64-NEON-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; BE-I32-LABEL: lrint_v16f32:
+; BE-I32:       @ %bb.0:
+; BE-I32-NEXT:    .save {r11, lr}
+; BE-I32-NEXT:    push {r11, lr}
+; BE-I32-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I32-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I32-NEXT:    .pad #96
+; BE-I32-NEXT:    sub sp, sp, #96
+; BE-I32-NEXT:    vrev64.32 q3, q3
+; BE-I32-NEXT:    add lr, sp, #64
+; BE-I32-NEXT:    vrev64.32 q4, q0
+; BE-I32-NEXT:    vmov.f32 s0, s12
+; BE-I32-NEXT:    vstmia lr, {d6, d7} @ 16-byte Spill
+; BE-I32-NEXT:    add lr, sp, #32
+; BE-I32-NEXT:    vrev64.32 q5, q1
+; BE-I32-NEXT:    vrev64.32 q7, q2
+; BE-I32-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
+; BE-I32-NEXT:    bl lrintf
+; BE-I32-NEXT:    vmov.f32 s0, s16
+; BE-I32-NEXT:    vmov.32 d16[0], r0
+; BE-I32-NEXT:    add lr, sp, #80
+; BE-I32-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I32-NEXT:    bl lrintf
+; BE-I32-NEXT:    vmov.f32 s0, s18
+; BE-I32-NEXT:    vmov.32 d12[0], r0
+; BE-I32-NEXT:    bl lrintf
+; BE-I32-NEXT:    vmov.f32 s0, s20
+; BE-I32-NEXT:    add lr, sp, #48
+; BE-I32-NEXT:    vmov.32 d13[0], r0
+; BE-I32-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
+; BE-I32-NEXT:    add lr, sp, #16
+; BE-I32-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-I32-NEXT:    bl lrintf
+; BE-I32-NEXT:    vmov.f32 s0, s22
+; BE-I32-NEXT:    vmov.32 d8[0], r0
+; BE-I32-NEXT:    bl lrintf
+; BE-I32-NEXT:    vmov.f32 s0, s28
+; BE-I32-NEXT:    vmov.32 d9[0], r0
+; BE-I32-NEXT:    vstmia sp, {d8, d9} @ 16-byte Spill
+; BE-I32-NEXT:    bl lrintf
+; BE-I32-NEXT:    add lr, sp, #64
+; BE-I32-NEXT:    vmov.32 d12[0], r0
+; BE-I32-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-I32-NEXT:    vmov.f32 s0, s22
+; BE-I32-NEXT:    bl lrintf
+; BE-I32-NEXT:    vmov.f32 s0, s30
+; BE-I32-NEXT:    add lr, sp, #80
+; BE-I32-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
+; BE-I32-NEXT:    vmov.32 d9[0], r0
+; BE-I32-NEXT:    bl lrintf
+; BE-I32-NEXT:    vmov.f32 s0, s23
+; BE-I32-NEXT:    vmov.32 d13[0], r0
+; BE-I32-NEXT:    bl lrintf
+; BE-I32-NEXT:    vmov.f32 s0, s31
+; BE-I32-NEXT:    add lr, sp, #80
+; BE-I32-NEXT:    vmov.32 d9[1], r0
+; BE-I32-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
+; BE-I32-NEXT:    bl lrintf
+; BE-I32-NEXT:    vmov.f32 s0, s29
+; BE-I32-NEXT:    vmov.32 d13[1], r0
+; BE-I32-NEXT:    bl lrintf
+; BE-I32-NEXT:    add lr, sp, #16
+; BE-I32-NEXT:    vmov.32 d12[1], r0
+; BE-I32-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
+; BE-I32-NEXT:    vmov.f32 s0, s19
+; BE-I32-NEXT:    bl lrintf
+; BE-I32-NEXT:    vmov.f32 s0, s17
+; BE-I32-NEXT:    vldmia sp, {d10, d11} @ 16-byte Reload
+; BE-I32-NEXT:    vmov.32 d11[1], r0
+; BE-I32-NEXT:    bl lrintf
+; BE-I32-NEXT:    add lr, sp, #32
+; BE-I32-NEXT:    vmov.32 d10[1], r0
+; BE-I32-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
+; BE-I32-NEXT:    vmov.f32 s0, s19
+; BE-I32-NEXT:    vorr q7, q5, q5
+; BE-I32-NEXT:    bl lrintf
+; BE-I32-NEXT:    vmov.f32 s0, s17
+; BE-I32-NEXT:    add lr, sp, #48
+; BE-I32-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-I32-NEXT:    vmov.32 d11[1], r0
+; BE-I32-NEXT:    bl lrintf
+; BE-I32-NEXT:    add lr, sp, #64
+; BE-I32-NEXT:    vmov.32 d10[1], r0
+; BE-I32-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
+; BE-I32-NEXT:    vmov.f32 s0, s1
+; BE-I32-NEXT:    bl lrintf
+; BE-I32-NEXT:    add lr, sp, #80
+; BE-I32-NEXT:    vrev64.32 q0, q5
+; BE-I32-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I32-NEXT:    vrev64.32 q1, q7
+; BE-I32-NEXT:    vmov.32 d16[1], r0
+; BE-I32-NEXT:    vrev64.32 q2, q6
+; BE-I32-NEXT:    vrev64.32 q3, q8
+; BE-I32-NEXT:    add sp, sp, #96
+; BE-I32-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I32-NEXT:    pop {r11, pc}
+;
+; BE-I64-LABEL: lrint_v16f32:
+; BE-I64:       @ %bb.0:
+; BE-I64-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-I64-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-I64-NEXT:    .pad #4
+; BE-I64-NEXT:    sub sp, sp, #4
+; BE-I64-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEXT:    .pad #144
+; BE-I64-NEXT:    sub sp, sp, #144
+; BE-I64-NEXT:    vorr q6, q3, q3
+; BE-I64-NEXT:    add lr, sp, #112
+; BE-I64-NEXT:    vorr q7, q0, q0
+; BE-I64-NEXT:    mov r4, r0
+; BE-I64-NEXT:    vstmia lr, {d4, d5} @ 16-byte Spill
+; BE-I64-NEXT:    add lr, sp, #96
+; BE-I64-NEXT:    vrev64.32 d8, d13
+; BE-I64-NEXT:    vstmia lr, {d2, d3} @ 16-byte Spill
+; BE-I64-NEXT:    vmov.f32 s0, s17
+; BE-I64-NEXT:    bl lrintf
+; BE-I64-NEXT:    vmov.f32 s0, s16
+; BE-I64-NEXT:    str r1, [sp, #88] @ 4-byte Spill
+; BE-I64-NEXT:    vmov.32 d11[0], r0
+; BE-I64-NEXT:    bl lrintf
+; BE-I64-NEXT:    vrev64.32 d8, d14
+; BE-I64-NEXT:    add lr, sp, #128
+; BE-I64-NEXT:    vmov.32 d10[0], r0
+; BE-I64-NEXT:    str r1, [sp, #92] @ 4-byte Spill
+; BE-I64-NEXT:    vmov.f32 s0, s16
+; BE-I64-NEXT:    vrev64.32 d9, d12
+; BE-I64-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-I64-NEXT:    vstr d9, [sp, #64] @ 8-byte Spill
+; BE-I64-NEXT:    bl lrintf
+; BE-I64-NEXT:    vmov.f32 s0, s19
+; BE-I64-NEXT:    mov r9, r1
+; BE-I64-NEXT:    vmov.32 d12[0], r0
+; BE-I64-NEXT:    bl lrintf
+; BE-I64-NEXT:    vmov.f32 s0, s17
+; BE-I64-NEXT:    str r1, [sp, #84] @ 4-byte Spill
+; BE-I64-NEXT:    vmov.32 d11[0], r0
+; BE-I64-NEXT:    vrev64.32 d9, d15
+; BE-I64-NEXT:    bl lrintf
+; BE-I64-NEXT:    vmov.f32 s0, s18
+; BE-I64-NEXT:    mov r6, r1
+; BE-I64-NEXT:    vmov.32 d13[0], r0
+; BE-I64-NEXT:    bl lrintf
+; BE-I64-NEXT:    vmov.f32 s0, s19
+; BE-I64-NEXT:    mov r5, r1
+; BE-I64-NEXT:    vmov.32 d14[0], r0
+; BE-I64-NEXT:    bl lrintf
+; BE-I64-NEXT:    vldr d0, [sp, #64] @ 8-byte Reload
+; BE-I64-NEXT:    mov r7, r1
+; BE-I64-NEXT:    @ kill: def $s0 killed $s0 killed $d0
+; BE-I64-NEXT:    vmov.32 d15[0], r0
+; BE-I64-NEXT:    bl lrintf
+; BE-I64-NEXT:    vmov.32 d10[0], r0
+; BE-I64-NEXT:    add lr, sp, #40
+; BE-I64-NEXT:    str r1, [sp, #60] @ 4-byte Spill
+; BE-I64-NEXT:    vmov.32 d15[1], r7
+; BE-I64-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-I64-NEXT:    add lr, sp, #96
+; BE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEXT:    vrev64.32 d8, d16
+; BE-I64-NEXT:    vmov.f32 s0, s17
+; BE-I64-NEXT:    bl lrintf
+; BE-I64-NEXT:    vmov.f32 s0, s16
+; BE-I64-NEXT:    vmov.32 d14[1], r5
+; BE-I64-NEXT:    add lr, sp, #64
+; BE-I64-NEXT:    mov r10, r1
+; BE-I64-NEXT:    vmov.32 d11[0], r0
+; BE-I64-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
+; BE-I64-NEXT:    bl lrintf
+; BE-I64-NEXT:    vmov.32 d10[0], r0
+; BE-I64-NEXT:    add lr, sp, #24
+; BE-I64-NEXT:    mov r11, r1
+; BE-I64-NEXT:    vmov.32 d13[1], r6
+; BE-I64-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-I64-NEXT:    add lr, sp, #96
+; BE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEXT:    vrev64.32 d8, d17
+; BE-I64-NEXT:    vmov.f32 s0, s17
+; BE-I64-NEXT:    bl lrintf
+; BE-I64-NEXT:    vmov.f32 s0, s16
+; BE-I64-NEXT:    vmov.32 d12[1], r9
+; BE-I64-NEXT:    add lr, sp, #96
+; BE-I64-NEXT:    mov r8, r1
+; BE-I64-NEXT:    vmov.32 d11[0], r0
+; BE-I64-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
+; BE-I64-NEXT:    bl lrintf
+; BE-I64-NEXT:    vmov.32 d10[0], r0
+; BE-I64-NEXT:    add lr, sp, #8
+; BE-I64-NEXT:    ldr r0, [sp, #88] @ 4-byte Reload
+; BE-I64-NEXT:    mov r9, r1
+; BE-I64-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-I64-NEXT:    add lr, sp, #112
+; BE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEXT:    add lr, sp, #128
+; BE-I64-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-I64-NEXT:    vrev64.32 d8, d16
+; BE-I64-NEXT:    vmov.32 d11[1], r0
+; BE-I64-NEXT:    vmov.f32 s0, s17
+; BE-I64-NEXT:    bl lrintf
+; BE-I64-NEXT:    vmov.f32 s0, s16
+; BE-I64-NEXT:    vmov.32 d15[0], r0
+; BE-I64-NEXT:    ldr r0, [sp, #92] @ 4-byte Reload
+; BE-I64-NEXT:    add lr, sp, #128
+; BE-I64-NEXT:    mov r7, r1
+; BE-I64-NEXT:    vmov.32 d10[1], r0
+; BE-I64-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-I64-NEXT:    bl lrintf
+; BE-I64-NEXT:    add lr, sp, #112
+; BE-I64-NEXT:    vmov.32 d14[0], r0
+; BE-I64-NEXT:    ldr r0, [sp, #84] @ 4-byte Reload
+; BE-I64-NEXT:    mov r5, r1
+; BE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEXT:    add lr, sp, #40
+; BE-I64-NEXT:    vrev64.32 d8, d17
+; BE-I64-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
+; BE-I64-NEXT:    vmov.f32 s0, s17
+; BE-I64-NEXT:    vmov.32 d13[1], r0
+; BE-I64-NEXT:    bl lrintf
+; BE-I64-NEXT:    vmov.f32 s0, s16
+; BE-I64-NEXT:    vmov.32 d11[0], r0
+; BE-I64-NEXT:    ldr r0, [sp, #60] @ 4-byte Reload
+; BE-I64-NEXT:    mov r6, r1
+; BE-I64-NEXT:    vmov.32 d12[1], r0
+; BE-I64-NEXT:    bl lrintf
+; BE-I64-NEXT:    add lr, sp, #24
+; BE-I64-NEXT:    vmov.32 d10[0], r0
+; BE-I64-NEXT:    add r0, r4, #64
+; BE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEXT:    add lr, sp, #8
+; BE-I64-NEXT:    vmov.32 d17[1], r10
+; BE-I64-NEXT:    vmov.32 d16[1], r11
+; BE-I64-NEXT:    vorr q12, q8, q8
+; BE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEXT:    add lr, sp, #128
+; BE-I64-NEXT:    vmov.32 d15[1], r7
+; BE-I64-NEXT:    vmov.32 d11[1], r6
+; BE-I64-NEXT:    vmov.32 d14[1], r5
+; BE-I64-NEXT:    vldmia lr, {d18, d19} @ 16-byte Reload
+; BE-I64-NEXT:    add lr, sp, #96
+; BE-I64-NEXT:    vmov.32 d10[1], r1
+; BE-I64-NEXT:    vmov.32 d17[1], r8
+; BE-I64-NEXT:    vldmia lr, {d20, d21} @ 16-byte Reload
+; BE-I64-NEXT:    add lr, sp, #64
+; BE-I64-NEXT:    vmov.32 d16[1], r9
+; BE-I64-NEXT:    vrev64.32 q14, q7
+; BE-I64-NEXT:    vorr q13, q8, q8
+; BE-I64-NEXT:    vrev64.32 q15, q5
+; BE-I64-NEXT:    vldmia lr, {d22, d23} @ 16-byte Reload
+; BE-I64-NEXT:    vrev64.32 q8, q6
+; BE-I64-NEXT:    vst1.64 {d28, d29}, [r0:128]!
+; BE-I64-NEXT:    vst1.64 {d30, d31}, [r0:128]!
+; BE-I64-NEXT:    vrev64.32 q9, q9
+; BE-I64-NEXT:    vrev64.32 q10, q10
+; BE-I64-NEXT:    vst1.64 {d16, d17}, [r0:128]!
+; BE-I64-NEXT:    vrev64.32 q11, q11
+; BE-I64-NEXT:    vrev64.32 q12, q12
+; BE-I64-NEXT:    vst1.64 {d18, d19}, [r0:128]
+; BE-I64-NEXT:    vst1.64 {d20, d21}, [r4:128]!
+; BE-I64-NEXT:    vst1.64 {d22, d23}, [r4:128]!
+; BE-I64-NEXT:    vrev64.32 q13, q13
+; BE-I64-NEXT:    vst1.64 {d24, d25}, [r4:128]!
+; BE-I64-NEXT:    vst1.64 {d26, d27}, [r4:128]
+; BE-I64-NEXT:    add sp, sp, #144
+; BE-I64-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEXT:    add sp, sp, #4
+; BE-I64-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; BE-I32-NEON-LABEL: lrint_v16f32:
+; BE-I32-NEON:       @ %bb.0:
+; BE-I32-NEON-NEXT:    .save {r11, lr}
+; BE-I32-NEON-NEXT:    push {r11, lr}
+; BE-I32-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I32-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I32-NEON-NEXT:    .pad #96
+; BE-I32-NEON-NEXT:    sub sp, sp, #96
+; BE-I32-NEON-NEXT:    vrev64.32 q3, q3
+; BE-I32-NEON-NEXT:    add lr, sp, #64
+; BE-I32-NEON-NEXT:    vrev64.32 q4, q0
+; BE-I32-NEON-NEXT:    vmov.f32 s0, s12
+; BE-I32-NEON-NEXT:    vstmia lr, {d6, d7} @ 16-byte Spill
+; BE-I32-NEON-NEXT:    add lr, sp, #32
+; BE-I32-NEON-NEXT:    vrev64.32 q5, q1
+; BE-I32-NEON-NEXT:    vrev64.32 q7, q2
+; BE-I32-NEON-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
+; BE-I32-NEON-NEXT:    bl lrintf
+; BE-I32-NEON-NEXT:    vmov.f32 s0, s16
+; BE-I32-NEON-NEXT:    vmov.32 d16[0], r0
+; BE-I32-NEON-NEXT:    add lr, sp, #80
+; BE-I32-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I32-NEON-NEXT:    bl lrintf
+; BE-I32-NEON-NEXT:    vmov.f32 s0, s18
+; BE-I32-NEON-NEXT:    vmov.32 d12[0], r0
+; BE-I32-NEON-NEXT:    bl lrintf
+; BE-I32-NEON-NEXT:    vmov.f32 s0, s20
+; BE-I32-NEON-NEXT:    add lr, sp, #48
+; BE-I32-NEON-NEXT:    vmov.32 d13[0], r0
+; BE-I32-NEON-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
+; BE-I32-NEON-NEXT:    add lr, sp, #16
+; BE-I32-NEON-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-I32-NEON-NEXT:    bl lrintf
+; BE-I32-NEON-NEXT:    vmov.f32 s0, s22
+; BE-I32-NEON-NEXT:    vmov.32 d8[0], r0
+; BE-I32-NEON-NEXT:    bl lrintf
+; BE-I32-NEON-NEXT:    vmov.f32 s0, s28
+; BE-I32-NEON-NEXT:    vmov.32 d9[0], r0
+; BE-I32-NEON-NEXT:    vstmia sp, {d8, d9} @ 16-byte Spill
+; BE-I32-NEON-NEXT:    bl lrintf
+; BE-I32-NEON-NEXT:    add lr, sp, #64
+; BE-I32-NEON-NEXT:    vmov.32 d12[0], r0
+; BE-I32-NEON-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-I32-NEON-NEXT:    vmov.f32 s0, s22
+; BE-I32-NEON-NEXT:    bl lrintf
+; BE-I32-NEON-NEXT:    vmov.f32 s0, s30
+; BE-I32-NEON-NEXT:    add lr, sp, #80
+; BE-I32-NEON-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
+; BE-I32-NEON-NEXT:    vmov.32 d9[0], r0
+; BE-I32-NEON-NEXT:    bl lrintf
+; BE-I32-NEON-NEXT:    vmov.f32 s0, s23
+; BE-I32-NEON-NEXT:    vmov.32 d13[0], r0
+; BE-I32-NEON-NEXT:    bl lrintf
+; BE-I32-NEON-NEXT:    vmov.f32 s0, s31
+; BE-I32-NEON-NEXT:    add lr, sp, #80
+; BE-I32-NEON-NEXT:    vmov.32 d9[1], r0
+; BE-I32-NEON-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
+; BE-I32-NEON-NEXT:    bl lrintf
+; BE-I32-NEON-NEXT:    vmov.f32 s0, s29
+; BE-I32-NEON-NEXT:    vmov.32 d13[1], r0
+; BE-I32-NEON-NEXT:    bl lrintf
+; BE-I32-NEON-NEXT:    add lr, sp, #16
+; BE-I32-NEON-NEXT:    vmov.32 d12[1], r0
+; BE-I32-NEON-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
+; BE-I32-NEON-NEXT:    vmov.f32 s0, s19
+; BE-I32-NEON-NEXT:    bl lrintf
+; BE-I32-NEON-NEXT:    vmov.f32 s0, s17
+; BE-I32-NEON-NEXT:    vldmia sp, {d10, d11} @ 16-byte Reload
+; BE-I32-NEON-NEXT:    vmov.32 d11[1], r0
+; BE-I32-NEON-NEXT:    bl lrintf
+; BE-I32-NEON-NEXT:    add lr, sp, #32
+; BE-I32-NEON-NEXT:    vmov.32 d10[1], r0
+; BE-I32-NEON-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
+; BE-I32-NEON-NEXT:    vmov.f32 s0, s19
+; BE-I32-NEON-NEXT:    vorr q7, q5, q5
+; BE-I32-NEON-NEXT:    bl lrintf
+; BE-I32-NEON-NEXT:    vmov.f32 s0, s17
+; BE-I32-NEON-NEXT:    add lr, sp, #48
+; BE-I32-NEON-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-I32-NEON-NEXT:    vmov.32 d11[1], r0
+; BE-I32-NEON-NEXT:    bl lrintf
+; BE-I32-NEON-NEXT:    add lr, sp, #64
+; BE-I32-NEON-NEXT:    vmov.32 d10[1], r0
+; BE-I32-NEON-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
+; BE-I32-NEON-NEXT:    vmov.f32 s0, s1
+; BE-I32-NEON-NEXT:    bl lrintf
+; BE-I32-NEON-NEXT:    add lr, sp, #80
+; BE-I32-NEON-NEXT:    vrev64.32 q0, q5
+; BE-I32-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I32-NEON-NEXT:    vrev64.32 q1, q7
+; BE-I32-NEON-NEXT:    vmov.32 d16[1], r0
+; BE-I32-NEON-NEXT:    vrev64.32 q2, q6
+; BE-I32-NEON-NEXT:    vrev64.32 q3, q8
+; BE-I32-NEON-NEXT:    add sp, sp, #96
+; BE-I32-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I32-NEON-NEXT:    pop {r11, pc}
+;
+; BE-I64-NEON-LABEL: lrint_v16f32:
+; BE-I64-NEON:       @ %bb.0:
+; BE-I64-NEON-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-I64-NEON-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-I64-NEON-NEXT:    .pad #4
+; BE-I64-NEON-NEXT:    sub sp, sp, #4
+; BE-I64-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEON-NEXT:    .pad #144
+; BE-I64-NEON-NEXT:    sub sp, sp, #144
+; BE-I64-NEON-NEXT:    vorr q6, q3, q3
+; BE-I64-NEON-NEXT:    add lr, sp, #112
+; BE-I64-NEON-NEXT:    vorr q7, q0, q0
+; BE-I64-NEON-NEXT:    mov r4, r0
+; BE-I64-NEON-NEXT:    vstmia lr, {d4, d5} @ 16-byte Spill
+; BE-I64-NEON-NEXT:    add lr, sp, #96
+; BE-I64-NEON-NEXT:    vrev64.32 d8, d13
+; BE-I64-NEON-NEXT:    vstmia lr, {d2, d3} @ 16-byte Spill
+; BE-I64-NEON-NEXT:    vmov.f32 s0, s17
+; BE-I64-NEON-NEXT:    bl lrintf
+; BE-I64-NEON-NEXT:    vmov.f32 s0, s16
+; BE-I64-NEON-NEXT:    str r1, [sp, #88] @ 4-byte Spill
+; BE-I64-NEON-NEXT:    vmov.32 d11[0], r0
+; BE-I64-NEON-NEXT:    bl lrintf
+; BE-I64-NEON-NEXT:    vrev64.32 d8, d14
+; BE-I64-NEON-NEXT:    add lr, sp, #128
+; BE-I64-NEON-NEXT:    vmov.32 d10[0], r0
+; BE-I64-NEON-NEXT:    str r1, [sp, #92] @ 4-byte Spill
+; BE-I64-NEON-NEXT:    vmov.f32 s0, s16
+; BE-I64-NEON-NEXT:    vrev64.32 d9, d12
+; BE-I64-NEON-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-I64-NEON-NEXT:    vstr d9, [sp, #64] @ 8-byte Spill
+; BE-I64-NEON-NEXT:    bl lrintf
+; BE-I64-NEON-NEXT:    vmov.f32 s0, s19
+; BE-I64-NEON-NEXT:    mov r9, r1
+; BE-I64-NEON-NEXT:    vmov.32 d12[0], r0
+; BE-I64-NEON-NEXT:    bl lrintf
+; BE-I64-NEON-NEXT:    vmov.f32 s0, s17
+; BE-I64-NEON-NEXT:    str r1, [sp, #84] @ 4-byte Spill
+; BE-I64-NEON-NEXT:    vmov.32 d11[0], r0
+; BE-I64-NEON-NEXT:    vrev64.32 d9, d15
+; BE-I64-NEON-NEXT:    bl lrintf
+; BE-I64-NEON-NEXT:    vmov.f32 s0, s18
+; BE-I64-NEON-NEXT:    mov r6, r1
+; BE-I64-NEON-NEXT:    vmov.32 d13[0], r0
+; BE-I64-NEON-NEXT:    bl lrintf
+; BE-I64-NEON-NEXT:    vmov.f32 s0, s19
+; BE-I64-NEON-NEXT:    mov r5, r1
+; BE-I64-NEON-NEXT:    vmov.32 d14[0], r0
+; BE-I64-NEON-NEXT:    bl lrintf
+; BE-I64-NEON-NEXT:    vldr d0, [sp, #64] @ 8-byte Reload
+; BE-I64-NEON-NEXT:    mov r7, r1
+; BE-I64-NEON-NEXT:    @ kill: def $s0 killed $s0 killed $d0
+; BE-I64-NEON-NEXT:    vmov.32 d15[0], r0
+; BE-I64-NEON-NEXT:    bl lrintf
+; BE-I64-NEON-NEXT:    vmov.32 d10[0], r0
+; BE-I64-NEON-NEXT:    add lr, sp, #40
+; BE-I64-NEON-NEXT:    str r1, [sp, #60] @ 4-byte Spill
+; BE-I64-NEON-NEXT:    vmov.32 d15[1], r7
+; BE-I64-NEON-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-I64-NEON-NEXT:    add lr, sp, #96
+; BE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEON-NEXT:    vrev64.32 d8, d16
+; BE-I64-NEON-NEXT:    vmov.f32 s0, s17
+; BE-I64-NEON-NEXT:    bl lrintf
+; BE-I64-NEON-NEXT:    vmov.f32 s0, s16
+; BE-I64-NEON-NEXT:    vmov.32 d14[1], r5
+; BE-I64-NEON-NEXT:    add lr, sp, #64
+; BE-I64-NEON-NEXT:    mov r10, r1
+; BE-I64-NEON-NEXT:    vmov.32 d11[0], r0
+; BE-I64-NEON-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
+; BE-I64-NEON-NEXT:    bl lrintf
+; BE-I64-NEON-NEXT:    vmov.32 d10[0], r0
+; BE-I64-NEON-NEXT:    add lr, sp, #24
+; BE-I64-NEON-NEXT:    mov r11, r1
+; BE-I64-NEON-NEXT:    vmov.32 d13[1], r6
+; BE-I64-NEON-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-I64-NEON-NEXT:    add lr, sp, #96
+; BE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEON-NEXT:    vrev64.32 d8, d17
+; BE-I64-NEON-NEXT:    vmov.f32 s0, s17
+; BE-I64-NEON-NEXT:    bl lrintf
+; BE-I64-NEON-NEXT:    vmov.f32 s0, s16
+; BE-I64-NEON-NEXT:    vmov.32 d12[1], r9
+; BE-I64-NEON-NEXT:    add lr, sp, #96
+; BE-I64-NEON-NEXT:    mov r8, r1
+; BE-I64-NEON-NEXT:    vmov.32 d11[0], r0
+; BE-I64-NEON-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
+; BE-I64-NEON-NEXT:    bl lrintf
+; BE-I64-NEON-NEXT:    vmov.32 d10[0], r0
+; BE-I64-NEON-NEXT:    add lr, sp, #8
+; BE-I64-NEON-NEXT:    ldr r0, [sp, #88] @ 4-byte Reload
+; BE-I64-NEON-NEXT:    mov r9, r1
+; BE-I64-NEON-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-I64-NEON-NEXT:    add lr, sp, #112
+; BE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEON-NEXT:    add lr, sp, #128
+; BE-I64-NEON-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-I64-NEON-NEXT:    vrev64.32 d8, d16
+; BE-I64-NEON-NEXT:    vmov.32 d11[1], r0
+; BE-I64-NEON-NEXT:    vmov.f32 s0, s17
+; BE-I64-NEON-NEXT:    bl lrintf
+; BE-I64-NEON-NEXT:    vmov.f32 s0, s16
+; BE-I64-NEON-NEXT:    vmov.32 d15[0], r0
+; BE-I64-NEON-NEXT:    ldr r0, [sp, #92] @ 4-byte Reload
+; BE-I64-NEON-NEXT:    add lr, sp, #128
+; BE-I64-NEON-NEXT:    mov r7, r1
+; BE-I64-NEON-NEXT:    vmov.32 d10[1], r0
+; BE-I64-NEON-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-I64-NEON-NEXT:    bl lrintf
+; BE-I64-NEON-NEXT:    add lr, sp, #112
+; BE-I64-NEON-NEXT:    vmov.32 d14[0], r0
+; BE-I64-NEON-NEXT:    ldr r0, [sp, #84] @ 4-byte Reload
+; BE-I64-NEON-NEXT:    mov r5, r1
+; BE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEON-NEXT:    add lr, sp, #40
+; BE-I64-NEON-NEXT:    vrev64.32 d8, d17
+; BE-I64-NEON-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
+; BE-I64-NEON-NEXT:    vmov.f32 s0, s17
+; BE-I64-NEON-NEXT:    vmov.32 d13[1], r0
+; BE-I64-NEON-NEXT:    bl lrintf
+; BE-I64-NEON-NEXT:    vmov.f32 s0, s16
+; BE-I64-NEON-NEXT:    vmov.32 d11[0], r0
+; BE-I64-NEON-NEXT:    ldr r0, [sp, #60] @ 4-byte Reload
+; BE-I64-NEON-NEXT:    mov r6, r1
+; BE-I64-NEON-NEXT:    vmov.32 d12[1], r0
+; BE-I64-NEON-NEXT:    bl lrintf
+; BE-I64-NEON-NEXT:    add lr, sp, #24
+; BE-I64-NEON-NEXT:    vmov.32 d10[0], r0
+; BE-I64-NEON-NEXT:    add r0, r4, #64
+; BE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEON-NEXT:    add lr, sp, #8
+; BE-I64-NEON-NEXT:    vmov.32 d17[1], r10
+; BE-I64-NEON-NEXT:    vmov.32 d16[1], r11
+; BE-I64-NEON-NEXT:    vorr q12, q8, q8
+; BE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEON-NEXT:    add lr, sp, #128
+; BE-I64-NEON-NEXT:    vmov.32 d15[1], r7
+; BE-I64-NEON-NEXT:    vmov.32 d11[1], r6
+; BE-I64-NEON-NEXT:    vmov.32 d14[1], r5
+; BE-I64-NEON-NEXT:    vldmia lr, {d18, d19} @ 16-byte Reload
+; BE-I64-NEON-NEXT:    add lr, sp, #96
+; BE-I64-NEON-NEXT:    vmov.32 d10[1], r1
+; BE-I64-NEON-NEXT:    vmov.32 d17[1], r8
+; BE-I64-NEON-NEXT:    vldmia lr, {d20, d21} @ 16-byte Reload
+; BE-I64-NEON-NEXT:    add lr, sp, #64
+; BE-I64-NEON-NEXT:    vmov.32 d16[1], r9
+; BE-I64-NEON-NEXT:    vrev64.32 q14, q7
+; BE-I64-NEON-NEXT:    vorr q13, q8, q8
+; BE-I64-NEON-NEXT:    vrev64.32 q15, q5
+; BE-I64-NEON-NEXT:    vldmia lr, {d22, d23} @ 16-byte Reload
+; BE-I64-NEON-NEXT:    vrev64.32 q8, q6
+; BE-I64-NEON-NEXT:    vst1.64 {d28, d29}, [r0:128]!
+; BE-I64-NEON-NEXT:    vst1.64 {d30, d31}, [r0:128]!
+; BE-I64-NEON-NEXT:    vrev64.32 q9, q9
+; BE-I64-NEON-NEXT:    vrev64.32 q10, q10
+; BE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r0:128]!
+; BE-I64-NEON-NEXT:    vrev64.32 q11, q11
+; BE-I64-NEON-NEXT:    vrev64.32 q12, q12
+; BE-I64-NEON-NEXT:    vst1.64 {d18, d19}, [r0:128]
+; BE-I64-NEON-NEXT:    vst1.64 {d20, d21}, [r4:128]!
+; BE-I64-NEON-NEXT:    vst1.64 {d22, d23}, [r4:128]!
+; BE-I64-NEON-NEXT:    vrev64.32 q13, q13
+; BE-I64-NEON-NEXT:    vst1.64 {d24, d25}, [r4:128]!
+; BE-I64-NEON-NEXT:    vst1.64 {d26, d27}, [r4:128]
+; BE-I64-NEON-NEXT:    add sp, sp, #144
+; BE-I64-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEON-NEXT:    add sp, sp, #4
+; BE-I64-NEON-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+  %a = call <16 x iXLen> @llvm.lrint.v16iXLen.v16f32(<16 x float> %x)
+  ret <16 x iXLen> %a
+}
+declare <16 x iXLen> @llvm.lrint.v16iXLen.v16f32(<16 x float>)
+
+define <32 x iXLen> @lrint_v32f32(<32 x float> %x) {
+; LE-I32-LABEL: lrint_v32f32:
+; LE-I32:       @ %bb.0:
+; LE-I32-NEXT:    .save {r4, r5, r6, lr}
+; LE-I32-NEXT:    push {r4, r5, r6, lr}
+; LE-I32-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I32-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I32-NEXT:    .pad #144
+; LE-I32-NEXT:    sub sp, sp, #144
+; LE-I32-NEXT:    add lr, sp, #48
+; LE-I32-NEXT:    mov r4, r0
+; LE-I32-NEXT:    add r0, sp, #224
+; LE-I32-NEXT:    vorr q4, q0, q0
+; LE-I32-NEXT:    vstmia lr, {d4, d5} @ 16-byte Spill
+; LE-I32-NEXT:    add lr, sp, #16
+; LE-I32-NEXT:    vorr q6, q3, q3
+; LE-I32-NEXT:    vstmia lr, {d0, d1} @ 16-byte Spill
+; LE-I32-NEXT:    vmov.f32 s0, s4
+; LE-I32-NEXT:    add lr, sp, #80
+; LE-I32-NEXT:    vorr q5, q1, q1
+; LE-I32-NEXT:    vld1.64 {d16, d17}, [r0]
+; LE-I32-NEXT:    add r0, sp, #272
+; LE-I32-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I32-NEXT:    add lr, sp, #64
+; LE-I32-NEXT:    vld1.64 {d16, d17}, [r0]
+; LE-I32-NEXT:    add r0, sp, #240
+; LE-I32-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I32-NEXT:    add lr, sp, #32
+; LE-I32-NEXT:    vld1.64 {d16, d17}, [r0]
+; LE-I32-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I32-NEXT:    vstmia sp, {d2, d3} @ 16-byte Spill
+; LE-I32-NEXT:    bl lrintf
+; LE-I32-NEXT:    vmov.f32 s0, s18
+; LE-I32-NEXT:    vmov.32 d14[0], r0
+; LE-I32-NEXT:    bl lrintf
+; LE-I32-NEXT:    vmov.f32 s0, s22
+; LE-I32-NEXT:    add lr, sp, #112
+; LE-I32-NEXT:    vmov.32 d17[0], r0
+; LE-I32-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I32-NEXT:    bl lrintf
+; LE-I32-NEXT:    add lr, sp, #128
+; LE-I32-NEXT:    vmov.32 d15[0], r0
+; LE-I32-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-I32-NEXT:    add lr, sp, #48
+; LE-I32-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; LE-I32-NEXT:    vmov.f32 s0, s20
+; LE-I32-NEXT:    bl lrintf
+; LE-I32-NEXT:    vmov.f32 s0, s22
+; LE-I32-NEXT:    vmov.32 d8[0], r0
+; LE-I32-NEXT:    vorr q7, q5, q5
+; LE-I32-NEXT:    bl lrintf
+; LE-I32-NEXT:    vmov.f32 s0, s26
+; LE-I32-NEXT:    vmov.32 d9[0], r0
+; LE-I32-NEXT:    bl lrintf
+; LE-I32-NEXT:    vmov.f32 s0, s24
+; LE-I32-NEXT:    vmov.32 d11[0], r0
+; LE-I32-NEXT:    bl lrintf
+; LE-I32-NEXT:    vmov.f32 s0, s27
+; LE-I32-NEXT:    vmov.32 d10[0], r0
+; LE-I32-NEXT:    bl lrintf
+; LE-I32-NEXT:    vmov.f32 s0, s25
+; LE-I32-NEXT:    vmov.32 d11[1], r0
+; LE-I32-NEXT:    bl lrintf
+; LE-I32-NEXT:    vmov.f32 s0, s31
+; LE-I32-NEXT:    vmov.32 d10[1], r0
+; LE-I32-NEXT:    add lr, sp, #96
+; LE-I32-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-I32-NEXT:    bl lrintf
+; LE-I32-NEXT:    vmov.f32 s0, s29
+; LE-I32-NEXT:    vmov.32 d9[1], r0
+; LE-I32-NEXT:    bl lrintf
+; LE-I32-NEXT:    vldmia sp, {d14, d15} @ 16-byte Reload
+; LE-I32-NEXT:    vmov.f32 s0, s31
+; LE-I32-NEXT:    add lr, sp, #48
+; LE-I32-NEXT:    vmov.32 d8[1], r0
+; LE-I32-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-I32-NEXT:    bl lrintf
+; LE-I32-NEXT:    add lr, sp, #128
+; LE-I32-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-I32-NEXT:    add lr, sp, #16
+; LE-I32-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; LE-I32-NEXT:    vmov.f32 s0, s23
+; LE-I32-NEXT:    vmov.32 d9[1], r0
+; LE-I32-NEXT:    bl lrintf
+; LE-I32-NEXT:    vmov.f32 s0, s29
+; LE-I32-NEXT:    add lr, sp, #112
+; LE-I32-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
+; LE-I32-NEXT:    vmov.32 d13[1], r0
+; LE-I32-NEXT:    bl lrintf
+; LE-I32-NEXT:    vmov.f32 s0, s20
+; LE-I32-NEXT:    vmov.32 d8[1], r0
+; LE-I32-NEXT:    add lr, sp, #128
+; LE-I32-NEXT:    add r0, sp, #256
+; LE-I32-NEXT:    vld1.64 {d14, d15}, [r0]
+; LE-I32-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-I32-NEXT:    bl lrintf
+; LE-I32-NEXT:    vmov.f32 s0, s21
+; LE-I32-NEXT:    vorr q4, q6, q6
+; LE-I32-NEXT:    vmov.32 d8[0], r0
+; LE-I32-NEXT:    bl lrintf
+; LE-I32-NEXT:    vorr q6, q7, q7
+; LE-I32-NEXT:    vmov.32 d8[1], r0
+; LE-I32-NEXT:    add lr, sp, #112
+; LE-I32-NEXT:    vstmia sp, {d14, d15} @ 16-byte Spill
+; LE-I32-NEXT:    vmov.f32 s0, s24
+; LE-I32-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-I32-NEXT:    bl lrintf
+; LE-I32-NEXT:    add lr, sp, #32
+; LE-I32-NEXT:    vmov.32 d14[0], r0
+; LE-I32-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-I32-NEXT:    vmov.f32 s0, s18
+; LE-I32-NEXT:    bl lrintf
+; LE-I32-NEXT:    vmov.f32 s0, s16
+; LE-I32-NEXT:    vmov.32 d11[0], r0
+; LE-I32-NEXT:    bl lrintf
+; LE-I32-NEXT:    vmov.f32 s0, s19
+; LE-I32-NEXT:    vmov.32 d10[0], r0
+; LE-I32-NEXT:    bl lrintf
+; LE-I32-NEXT:    vmov.f32 s0, s26
+; LE-I32-NEXT:    vmov.32 d11[1], r0
+; LE-I32-NEXT:    bl lrintf
+; LE-I32-NEXT:    vmov.f32 s0, s17
+; LE-I32-NEXT:    add lr, sp, #16
+; LE-I32-NEXT:    vmov.32 d15[0], r0
+; LE-I32-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-I32-NEXT:    bl lrintf
+; LE-I32-NEXT:    vmov.32 d10[1], r0
+; LE-I32-NEXT:    add lr, sp, #32
+; LE-I32-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-I32-NEXT:    add lr, sp, #64
+; LE-I32-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; LE-I32-NEXT:    vmov.f32 s0, s20
+; LE-I32-NEXT:    bl lrintf
+; LE-I32-NEXT:    add lr, sp, #80
+; LE-I32-NEXT:    vmov.32 d14[0], r0
+; LE-I32-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
+; LE-I32-NEXT:    vmov.f32 s0, s26
+; LE-I32-NEXT:    bl lrintf
+; LE-I32-NEXT:    vmov.f32 s0, s24
+; LE-I32-NEXT:    vmov.32 d9[0], r0
+; LE-I32-NEXT:    bl lrintf
+; LE-I32-NEXT:    vmov.f32 s0, s27
+; LE-I32-NEXT:    vmov.32 d8[0], r0
+; LE-I32-NEXT:    bl lrintf
+; LE-I32-NEXT:    vmov.f32 s0, s22
+; LE-I32-NEXT:    vmov.32 d9[1], r0
+; LE-I32-NEXT:    bl lrintf
+; LE-I32-NEXT:    vmov.f32 s0, s25
+; LE-I32-NEXT:    vmov.32 d15[0], r0
+; LE-I32-NEXT:    bl lrintf
+; LE-I32-NEXT:    vmov.f32 s0, s23
+; LE-I32-NEXT:    vmov.32 d8[1], r0
+; LE-I32-NEXT:    bl lrintf
+; LE-I32-NEXT:    vmov.f32 s0, s21
+; LE-I32-NEXT:    vmov.32 d15[1], r0
+; LE-I32-NEXT:    bl lrintf
+; LE-I32-NEXT:    vldmia sp, {d12, d13} @ 16-byte Reload
+; LE-I32-NEXT:    vmov.f32 s0, s27
+; LE-I32-NEXT:    vmov.32 d14[1], r0
+; LE-I32-NEXT:    bl lrintf
+; LE-I32-NEXT:    vmov.f32 s0, s25
+; LE-I32-NEXT:    add lr, sp, #16
+; LE-I32-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; LE-I32-NEXT:    vmov.32 d11[1], r0
+; LE-I32-NEXT:    bl lrintf
+; LE-I32-NEXT:    add lr, sp, #112
+; LE-I32-NEXT:    vmov.32 d10[1], r0
+; LE-I32-NEXT:    mov r0, r4
+; LE-I32-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I32-NEXT:    add lr, sp, #128
+; LE-I32-NEXT:    vst1.32 {d16, d17}, [r0:128]!
+; LE-I32-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I32-NEXT:    add lr, sp, #48
+; LE-I32-NEXT:    vst1.32 {d16, d17}, [r0:128]!
+; LE-I32-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I32-NEXT:    add lr, sp, #96
+; LE-I32-NEXT:    vst1.32 {d16, d17}, [r0:128]!
+; LE-I32-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I32-NEXT:    add lr, sp, #32
+; LE-I32-NEXT:    vst1.64 {d16, d17}, [r0:128]
+; LE-I32-NEXT:    add r0, r4, #64
+; LE-I32-NEXT:    vst1.32 {d8, d9}, [r0:128]!
+; LE-I32-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I32-NEXT:    vst1.32 {d16, d17}, [r0:128]!
+; LE-I32-NEXT:    vst1.32 {d10, d11}, [r0:128]!
+; LE-I32-NEXT:    vst1.64 {d14, d15}, [r0:128]
+; LE-I32-NEXT:    add sp, sp, #144
+; LE-I32-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I32-NEXT:    pop {r4, r5, r6, pc}
+;
+; LE-I64-LABEL: lrint_v32f32:
+; LE-I64:       @ %bb.0:
+; LE-I64-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-I64-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-I64-NEXT:    .pad #4
+; LE-I64-NEXT:    sub sp, sp, #4
+; LE-I64-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEXT:    .pad #184
+; LE-I64-NEXT:    sub sp, sp, #184
+; LE-I64-NEXT:    add lr, sp, #152
+; LE-I64-NEXT:    vorr q7, q3, q3
+; LE-I64-NEXT:    vorr q4, q2, q2
+; LE-I64-NEXT:    mov r5, r0
+; LE-I64-NEXT:    vstmia lr, {d2, d3} @ 16-byte Spill
+; LE-I64-NEXT:    add lr, sp, #88
+; LE-I64-NEXT:    vstmia lr, {d0, d1} @ 16-byte Spill
+; LE-I64-NEXT:    vmov.f32 s0, s3
+; LE-I64-NEXT:    str r0, [sp, #68] @ 4-byte Spill
+; LE-I64-NEXT:    bl lrintf
+; LE-I64-NEXT:    vmov.f32 s0, s18
+; LE-I64-NEXT:    add lr, sp, #168
+; LE-I64-NEXT:    vmov.32 d17[0], r0
+; LE-I64-NEXT:    str r1, [sp, #16] @ 4-byte Spill
+; LE-I64-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEXT:    bl lrintf
+; LE-I64-NEXT:    vmov.f32 s0, s16
+; LE-I64-NEXT:    mov r8, r1
+; LE-I64-NEXT:    vmov.32 d10[0], r0
+; LE-I64-NEXT:    bl lrintf
+; LE-I64-NEXT:    vmov.f32 s0, s17
+; LE-I64-NEXT:    mov r6, r1
+; LE-I64-NEXT:    vmov.32 d12[0], r0
+; LE-I64-NEXT:    bl lrintf
+; LE-I64-NEXT:    vmov.f32 s0, s19
+; LE-I64-NEXT:    mov r4, r1
+; LE-I64-NEXT:    vmov.32 d13[0], r0
+; LE-I64-NEXT:    bl lrintf
+; LE-I64-NEXT:    vmov.f32 s0, s31
+; LE-I64-NEXT:    mov r7, r1
+; LE-I64-NEXT:    vmov.32 d11[0], r0
+; LE-I64-NEXT:    bl lrintf
+; LE-I64-NEXT:    vmov.f32 s0, s30
+; LE-I64-NEXT:    str r1, [sp, #8] @ 4-byte Spill
+; LE-I64-NEXT:    vmov.32 d9[0], r0
+; LE-I64-NEXT:    vmov.32 d11[1], r7
+; LE-I64-NEXT:    bl lrintf
+; LE-I64-NEXT:    vmov.f32 s0, s29
+; LE-I64-NEXT:    vmov.32 d8[0], r0
+; LE-I64-NEXT:    add lr, sp, #104
+; LE-I64-NEXT:    str r1, [sp, #12] @ 4-byte Spill
+; LE-I64-NEXT:    vmov.32 d13[1], r4
+; LE-I64-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-I64-NEXT:    bl lrintf
+; LE-I64-NEXT:    vmov.32 d9[0], r0
+; LE-I64-NEXT:    add r0, sp, #320
+; LE-I64-NEXT:    add lr, sp, #120
+; LE-I64-NEXT:    mov r11, r1
+; LE-I64-NEXT:    vld1.64 {d0, d1}, [r0]
+; LE-I64-NEXT:    add r0, sp, #304
+; LE-I64-NEXT:    vstmia lr, {d0, d1} @ 16-byte Spill
+; LE-I64-NEXT:    add lr, sp, #72
+; LE-I64-NEXT:    vld1.64 {d0, d1}, [r0]
+; LE-I64-NEXT:    add r0, sp, #336
+; LE-I64-NEXT:    vstmia lr, {d0, d1} @ 16-byte Spill
+; LE-I64-NEXT:    add lr, sp, #32
+; LE-I64-NEXT:    vld1.64 {d0, d1}, [r0]
+; LE-I64-NEXT:    add r0, sp, #288
+; LE-I64-NEXT:    vmov.32 d12[1], r6
+; LE-I64-NEXT:    vstmia lr, {d0, d1} @ 16-byte Spill
+; LE-I64-NEXT:    add lr, sp, #48
+; LE-I64-NEXT:    vld1.64 {d0, d1}, [r0]
+; LE-I64-NEXT:    vmov.32 d10[1], r8
+; LE-I64-NEXT:    add r8, r5, #64
+; LE-I64-NEXT:    vstmia lr, {d0, d1} @ 16-byte Spill
+; LE-I64-NEXT:    add lr, sp, #152
+; LE-I64-NEXT:    vst1.64 {d12, d13}, [r8:128]!
+; LE-I64-NEXT:    vst1.64 {d10, d11}, [r8:128]!
+; LE-I64-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
+; LE-I64-NEXT:    vmov.f32 s0, s27
+; LE-I64-NEXT:    bl lrintf
+; LE-I64-NEXT:    vmov.f32 s0, s28
+; LE-I64-NEXT:    mov r4, r1
+; LE-I64-NEXT:    vmov.32 d11[0], r0
+; LE-I64-NEXT:    bl lrintf
+; LE-I64-NEXT:    vmov.f32 s0, s26
+; LE-I64-NEXT:    mov r9, r1
+; LE-I64-NEXT:    vmov.32 d8[0], r0
+; LE-I64-NEXT:    vmov.32 d11[1], r4
+; LE-I64-NEXT:    bl lrintf
+; LE-I64-NEXT:    vmov.32 d10[0], r0
+; LE-I64-NEXT:    add lr, sp, #136
+; LE-I64-NEXT:    ldr r0, [sp, #16] @ 4-byte Reload
+; LE-I64-NEXT:    mov r10, r1
+; LE-I64-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-I64-NEXT:    add lr, sp, #168
+; LE-I64-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; LE-I64-NEXT:    add lr, sp, #88
+; LE-I64-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
+; LE-I64-NEXT:    vmov.f32 s0, s26
+; LE-I64-NEXT:    vmov.32 d11[1], r0
+; LE-I64-NEXT:    bl lrintf
+; LE-I64-NEXT:    vmov.f32 s0, s25
+; LE-I64-NEXT:    vmov.32 d10[0], r0
+; LE-I64-NEXT:    add lr, sp, #168
+; LE-I64-NEXT:    ldr r0, [sp, #8] @ 4-byte Reload
+; LE-I64-NEXT:    mov r7, r1
+; LE-I64-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-I64-NEXT:    add lr, sp, #104
+; LE-I64-NEXT:    vorr q5, q6, q6
+; LE-I64-NEXT:    vldmia lr, {d14, d15} @ 16-byte Reload
+; LE-I64-NEXT:    vmov.32 d15[1], r0
+; LE-I64-NEXT:    bl lrintf
+; LE-I64-NEXT:    vmov.f32 s0, s20
+; LE-I64-NEXT:    vmov.32 d13[0], r0
+; LE-I64-NEXT:    ldr r0, [sp, #12] @ 4-byte Reload
+; LE-I64-NEXT:    add lr, sp, #104
+; LE-I64-NEXT:    mov r4, r1
+; LE-I64-NEXT:    vmov.32 d14[1], r0
+; LE-I64-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-I64-NEXT:    bl lrintf
+; LE-I64-NEXT:    vmov.32 d12[0], r0
+; LE-I64-NEXT:    add lr, sp, #152
+; LE-I64-NEXT:    mov r6, r1
+; LE-I64-NEXT:    vorr q7, q6, q6
+; LE-I64-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
+; LE-I64-NEXT:    vmov.32 d9[1], r11
+; LE-I64-NEXT:    vmov.f32 s0, s25
+; LE-I64-NEXT:    bl lrintf
+; LE-I64-NEXT:    vmov.f32 s0, s24
+; LE-I64-NEXT:    mov r5, r1
+; LE-I64-NEXT:    vmov.32 d11[0], r0
+; LE-I64-NEXT:    vmov.32 d8[1], r9
+; LE-I64-NEXT:    bl lrintf
+; LE-I64-NEXT:    add lr, sp, #136
+; LE-I64-NEXT:    mov r11, r1
+; LE-I64-NEXT:    vmov.32 d10[0], r0
+; LE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEXT:    vmov.32 d16[1], r10
+; LE-I64-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEXT:    add lr, sp, #120
+; LE-I64-NEXT:    vst1.64 {d8, d9}, [r8:128]!
+; LE-I64-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
+; LE-I64-NEXT:    vmov.f32 s0, s1
+; LE-I64-NEXT:    bl lrintf
+; LE-I64-NEXT:    add lr, sp, #152
+; LE-I64-NEXT:    vmov.32 d17[0], r0
+; LE-I64-NEXT:    mov r10, r1
+; LE-I64-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEXT:    add lr, sp, #104
+; LE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEXT:    add lr, sp, #72
+; LE-I64-NEXT:    vst1.64 {d16, d17}, [r8:128]
+; LE-I64-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-I64-NEXT:    vmov.f32 s0, s19
+; LE-I64-NEXT:    bl lrintf
+; LE-I64-NEXT:    add lr, sp, #168
+; LE-I64-NEXT:    vmov.f32 s0, s18
+; LE-I64-NEXT:    mov r8, r1
+; LE-I64-NEXT:    vmov.32 d13[0], r0
+; LE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEXT:    vmov.32 d16[1], r7
+; LE-I64-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEXT:    bl lrintf
+; LE-I64-NEXT:    vmov.f32 s0, s17
+; LE-I64-NEXT:    vmov.32 d12[0], r0
+; LE-I64-NEXT:    add lr, sp, #104
+; LE-I64-NEXT:    mov r7, r1
+; LE-I64-NEXT:    vmov.32 d15[1], r4
+; LE-I64-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-I64-NEXT:    bl lrintf
+; LE-I64-NEXT:    vmov.f32 s0, s16
+; LE-I64-NEXT:    mov r9, r1
+; LE-I64-NEXT:    vmov.32 d13[0], r0
+; LE-I64-NEXT:    vmov.32 d14[1], r6
+; LE-I64-NEXT:    bl lrintf
+; LE-I64-NEXT:    vmov.32 d12[0], r0
+; LE-I64-NEXT:    add lr, sp, #88
+; LE-I64-NEXT:    mov r6, r1
+; LE-I64-NEXT:    vmov.32 d11[1], r5
+; LE-I64-NEXT:    vmov.32 d10[1], r11
+; LE-I64-NEXT:    ldr r11, [sp, #68] @ 4-byte Reload
+; LE-I64-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-I64-NEXT:    add lr, sp, #16
+; LE-I64-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-I64-NEXT:    add lr, sp, #32
+; LE-I64-NEXT:    vst1.64 {d14, d15}, [r11:128]!
+; LE-I64-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; LE-I64-NEXT:    vmov.f32 s0, s23
+; LE-I64-NEXT:    bl lrintf
+; LE-I64-NEXT:    add lr, sp, #152
+; LE-I64-NEXT:    mov r4, r1
+; LE-I64-NEXT:    vmov.32 d9[0], r0
+; LE-I64-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
+; LE-I64-NEXT:    add lr, sp, #120
+; LE-I64-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
+; LE-I64-NEXT:    @ kill: def $s0 killed $s0 killed $q0
+; LE-I64-NEXT:    vmov.32 d13[1], r10
+; LE-I64-NEXT:    bl lrintf
+; LE-I64-NEXT:    vmov.f32 s0, s22
+; LE-I64-NEXT:    vmov.32 d12[0], r0
+; LE-I64-NEXT:    add lr, sp, #152
+; LE-I64-NEXT:    mov r5, r1
+; LE-I64-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-I64-NEXT:    add lr, sp, #104
+; LE-I64-NEXT:    vldmia lr, {d14, d15} @ 16-byte Reload
+; LE-I64-NEXT:    vmov.32 d15[1], r8
+; LE-I64-NEXT:    bl lrintf
+; LE-I64-NEXT:    vmov.f32 s0, s21
+; LE-I64-NEXT:    vmov.32 d8[0], r0
+; LE-I64-NEXT:    add lr, sp, #72
+; LE-I64-NEXT:    mov r8, r1
+; LE-I64-NEXT:    vmov.32 d14[1], r7
+; LE-I64-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-I64-NEXT:    add lr, sp, #104
+; LE-I64-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-I64-NEXT:    bl lrintf
+; LE-I64-NEXT:    vmov.f32 s0, s20
+; LE-I64-NEXT:    add lr, sp, #88
+; LE-I64-NEXT:    mov r7, r1
+; LE-I64-NEXT:    vmov.32 d9[0], r0
+; LE-I64-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
+; LE-I64-NEXT:    vmov.32 d13[1], r9
+; LE-I64-NEXT:    bl lrintf
+; LE-I64-NEXT:    vmov.32 d8[0], r0
+; LE-I64-NEXT:    add lr, sp, #32
+; LE-I64-NEXT:    mov r9, r1
+; LE-I64-NEXT:    vmov.32 d12[1], r6
+; LE-I64-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-I64-NEXT:    add lr, sp, #88
+; LE-I64-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-I64-NEXT:    add lr, sp, #120
+; LE-I64-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-I64-NEXT:    vmov.f32 s0, s19
+; LE-I64-NEXT:    bl lrintf
+; LE-I64-NEXT:    vmov.f32 s0, s18
+; LE-I64-NEXT:    add lr, sp, #72
+; LE-I64-NEXT:    mov r10, r1
+; LE-I64-NEXT:    vmov.32 d15[0], r0
+; LE-I64-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
+; LE-I64-NEXT:    vmov.32 d13[1], r4
+; LE-I64-NEXT:    bl lrintf
+; LE-I64-NEXT:    add lr, sp, #152
+; LE-I64-NEXT:    mov r4, r1
+; LE-I64-NEXT:    vmov.32 d14[0], r0
+; LE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEXT:    vmov.32 d16[1], r5
+; LE-I64-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEXT:    add lr, sp, #168
+; LE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEXT:    add lr, sp, #48
+; LE-I64-NEXT:    vst1.64 {d16, d17}, [r11:128]!
+; LE-I64-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; LE-I64-NEXT:    vmov.f32 s0, s21
+; LE-I64-NEXT:    bl lrintf
+; LE-I64-NEXT:    vmov.f32 s0, s20
+; LE-I64-NEXT:    vmov.32 d12[1], r8
+; LE-I64-NEXT:    add lr, sp, #72
+; LE-I64-NEXT:    mov r5, r1
+; LE-I64-NEXT:    vmov.32 d9[0], r0
+; LE-I64-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-I64-NEXT:    bl lrintf
+; LE-I64-NEXT:    vmov.f32 s0, s23
+; LE-I64-NEXT:    add lr, sp, #32
+; LE-I64-NEXT:    mov r6, r1
+; LE-I64-NEXT:    vmov.32 d8[0], r0
+; LE-I64-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
+; LE-I64-NEXT:    vmov.32 d13[1], r7
+; LE-I64-NEXT:    bl lrintf
+; LE-I64-NEXT:    add lr, sp, #48
+; LE-I64-NEXT:    mov r7, r1
+; LE-I64-NEXT:    vmov.32 d11[0], r0
+; LE-I64-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
+; LE-I64-NEXT:    vmov.f32 s0, s2
+; LE-I64-NEXT:    vmov.32 d12[1], r9
+; LE-I64-NEXT:    bl lrintf
+; LE-I64-NEXT:    add lr, sp, #16
+; LE-I64-NEXT:    vmov.32 d10[0], r0
+; LE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEXT:    add lr, sp, #136
+; LE-I64-NEXT:    vmov.32 d11[1], r7
+; LE-I64-NEXT:    vst1.64 {d16, d17}, [r11:128]!
+; LE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEXT:    add lr, sp, #152
+; LE-I64-NEXT:    vmov.32 d15[1], r10
+; LE-I64-NEXT:    vst1.64 {d16, d17}, [r11:128]
+; LE-I64-NEXT:    vmov.32 d10[1], r1
+; LE-I64-NEXT:    ldr r1, [sp, #68] @ 4-byte Reload
+; LE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEXT:    add r0, r1, #192
+; LE-I64-NEXT:    add lr, sp, #72
+; LE-I64-NEXT:    vmov.32 d14[1], r4
+; LE-I64-NEXT:    vst1.64 {d16, d17}, [r0:128]!
+; LE-I64-NEXT:    vst1.64 {d14, d15}, [r0:128]!
+; LE-I64-NEXT:    vst1.64 {d12, d13}, [r0:128]!
+; LE-I64-NEXT:    vmov.32 d9[1], r5
+; LE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEXT:    add lr, sp, #88
+; LE-I64-NEXT:    vmov.32 d8[1], r6
+; LE-I64-NEXT:    vst1.64 {d16, d17}, [r0:128]
+; LE-I64-NEXT:    add r0, r1, #128
+; LE-I64-NEXT:    vst1.64 {d8, d9}, [r0:128]!
+; LE-I64-NEXT:    vst1.64 {d10, d11}, [r0:128]!
+; LE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEXT:    add lr, sp, #104
+; LE-I64-NEXT:    vst1.64 {d16, d17}, [r0:128]!
+; LE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEXT:    vst1.64 {d16, d17}, [r0:128]
+; LE-I64-NEXT:    add sp, sp, #184
+; LE-I64-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEXT:    add sp, sp, #4
+; LE-I64-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; LE-I32-NEON-LABEL: lrint_v32f32:
+; LE-I32-NEON:       @ %bb.0:
+; LE-I32-NEON-NEXT:    .save {r4, r5, r6, lr}
+; LE-I32-NEON-NEXT:    push {r4, r5, r6, lr}
+; LE-I32-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I32-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I32-NEON-NEXT:    .pad #144
+; LE-I32-NEON-NEXT:    sub sp, sp, #144
+; LE-I32-NEON-NEXT:    add lr, sp, #48
+; LE-I32-NEON-NEXT:    mov r4, r0
+; LE-I32-NEON-NEXT:    add r0, sp, #224
+; LE-I32-NEON-NEXT:    vorr q4, q0, q0
+; LE-I32-NEON-NEXT:    vstmia lr, {d4, d5} @ 16-byte Spill
+; LE-I32-NEON-NEXT:    add lr, sp, #16
+; LE-I32-NEON-NEXT:    vorr q6, q3, q3
+; LE-I32-NEON-NEXT:    vstmia lr, {d0, d1} @ 16-byte Spill
+; LE-I32-NEON-NEXT:    vmov.f32 s0, s4
+; LE-I32-NEON-NEXT:    add lr, sp, #80
+; LE-I32-NEON-NEXT:    vorr q5, q1, q1
+; LE-I32-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
+; LE-I32-NEON-NEXT:    add r0, sp, #272
+; LE-I32-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I32-NEON-NEXT:    add lr, sp, #64
+; LE-I32-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
+; LE-I32-NEON-NEXT:    add r0, sp, #240
+; LE-I32-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I32-NEON-NEXT:    add lr, sp, #32
+; LE-I32-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
+; LE-I32-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I32-NEON-NEXT:    vstmia sp, {d2, d3} @ 16-byte Spill
+; LE-I32-NEON-NEXT:    bl lrintf
+; LE-I32-NEON-NEXT:    vmov.f32 s0, s18
+; LE-I32-NEON-NEXT:    vmov.32 d14[0], r0
+; LE-I32-NEON-NEXT:    bl lrintf
+; LE-I32-NEON-NEXT:    vmov.f32 s0, s22
+; LE-I32-NEON-NEXT:    add lr, sp, #112
+; LE-I32-NEON-NEXT:    vmov.32 d17[0], r0
+; LE-I32-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I32-NEON-NEXT:    bl lrintf
+; LE-I32-NEON-NEXT:    add lr, sp, #128
+; LE-I32-NEON-NEXT:    vmov.32 d15[0], r0
+; LE-I32-NEON-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-I32-NEON-NEXT:    add lr, sp, #48
+; LE-I32-NEON-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; LE-I32-NEON-NEXT:    vmov.f32 s0, s20
+; LE-I32-NEON-NEXT:    bl lrintf
+; LE-I32-NEON-NEXT:    vmov.f32 s0, s22
+; LE-I32-NEON-NEXT:    vmov.32 d8[0], r0
+; LE-I32-NEON-NEXT:    vorr q7, q5, q5
+; LE-I32-NEON-NEXT:    bl lrintf
+; LE-I32-NEON-NEXT:    vmov.f32 s0, s26
+; LE-I32-NEON-NEXT:    vmov.32 d9[0], r0
+; LE-I32-NEON-NEXT:    bl lrintf
+; LE-I32-NEON-NEXT:    vmov.f32 s0, s24
+; LE-I32-NEON-NEXT:    vmov.32 d11[0], r0
+; LE-I32-NEON-NEXT:    bl lrintf
+; LE-I32-NEON-NEXT:    vmov.f32 s0, s27
+; LE-I32-NEON-NEXT:    vmov.32 d10[0], r0
+; LE-I32-NEON-NEXT:    bl lrintf
+; LE-I32-NEON-NEXT:    vmov.f32 s0, s25
+; LE-I32-NEON-NEXT:    vmov.32 d11[1], r0
+; LE-I32-NEON-NEXT:    bl lrintf
+; LE-I32-NEON-NEXT:    vmov.f32 s0, s31
+; LE-I32-NEON-NEXT:    vmov.32 d10[1], r0
+; LE-I32-NEON-NEXT:    add lr, sp, #96
+; LE-I32-NEON-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-I32-NEON-NEXT:    bl lrintf
+; LE-I32-NEON-NEXT:    vmov.f32 s0, s29
+; LE-I32-NEON-NEXT:    vmov.32 d9[1], r0
+; LE-I32-NEON-NEXT:    bl lrintf
+; LE-I32-NEON-NEXT:    vldmia sp, {d14, d15} @ 16-byte Reload
+; LE-I32-NEON-NEXT:    vmov.f32 s0, s31
+; LE-I32-NEON-NEXT:    add lr, sp, #48
+; LE-I32-NEON-NEXT:    vmov.32 d8[1], r0
+; LE-I32-NEON-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-I32-NEON-NEXT:    bl lrintf
+; LE-I32-NEON-NEXT:    add lr, sp, #128
+; LE-I32-NEON-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-I32-NEON-NEXT:    add lr, sp, #16
+; LE-I32-NEON-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; LE-I32-NEON-NEXT:    vmov.f32 s0, s23
+; LE-I32-NEON-NEXT:    vmov.32 d9[1], r0
+; LE-I32-NEON-NEXT:    bl lrintf
+; LE-I32-NEON-NEXT:    vmov.f32 s0, s29
+; LE-I32-NEON-NEXT:    add lr, sp, #112
+; LE-I32-NEON-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
+; LE-I32-NEON-NEXT:    vmov.32 d13[1], r0
+; LE-I32-NEON-NEXT:    bl lrintf
+; LE-I32-NEON-NEXT:    vmov.f32 s0, s20
+; LE-I32-NEON-NEXT:    vmov.32 d8[1], r0
+; LE-I32-NEON-NEXT:    add lr, sp, #128
+; LE-I32-NEON-NEXT:    add r0, sp, #256
+; LE-I32-NEON-NEXT:    vld1.64 {d14, d15}, [r0]
+; LE-I32-NEON-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-I32-NEON-NEXT:    bl lrintf
+; LE-I32-NEON-NEXT:    vmov.f32 s0, s21
+; LE-I32-NEON-NEXT:    vorr q4, q6, q6
+; LE-I32-NEON-NEXT:    vmov.32 d8[0], r0
+; LE-I32-NEON-NEXT:    bl lrintf
+; LE-I32-NEON-NEXT:    vorr q6, q7, q7
+; LE-I32-NEON-NEXT:    vmov.32 d8[1], r0
+; LE-I32-NEON-NEXT:    add lr, sp, #112
+; LE-I32-NEON-NEXT:    vstmia sp, {d14, d15} @ 16-byte Spill
+; LE-I32-NEON-NEXT:    vmov.f32 s0, s24
+; LE-I32-NEON-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-I32-NEON-NEXT:    bl lrintf
+; LE-I32-NEON-NEXT:    add lr, sp, #32
+; LE-I32-NEON-NEXT:    vmov.32 d14[0], r0
+; LE-I32-NEON-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-I32-NEON-NEXT:    vmov.f32 s0, s18
+; LE-I32-NEON-NEXT:    bl lrintf
+; LE-I32-NEON-NEXT:    vmov.f32 s0, s16
+; LE-I32-NEON-NEXT:    vmov.32 d11[0], r0
+; LE-I32-NEON-NEXT:    bl lrintf
+; LE-I32-NEON-NEXT:    vmov.f32 s0, s19
+; LE-I32-NEON-NEXT:    vmov.32 d10[0], r0
+; LE-I32-NEON-NEXT:    bl lrintf
+; LE-I32-NEON-NEXT:    vmov.f32 s0, s26
+; LE-I32-NEON-NEXT:    vmov.32 d11[1], r0
+; LE-I32-NEON-NEXT:    bl lrintf
+; LE-I32-NEON-NEXT:    vmov.f32 s0, s17
+; LE-I32-NEON-NEXT:    add lr, sp, #16
+; LE-I32-NEON-NEXT:    vmov.32 d15[0], r0
+; LE-I32-NEON-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-I32-NEON-NEXT:    bl lrintf
+; LE-I32-NEON-NEXT:    vmov.32 d10[1], r0
+; LE-I32-NEON-NEXT:    add lr, sp, #32
+; LE-I32-NEON-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-I32-NEON-NEXT:    add lr, sp, #64
+; LE-I32-NEON-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; LE-I32-NEON-NEXT:    vmov.f32 s0, s20
+; LE-I32-NEON-NEXT:    bl lrintf
+; LE-I32-NEON-NEXT:    add lr, sp, #80
+; LE-I32-NEON-NEXT:    vmov.32 d14[0], r0
+; LE-I32-NEON-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
+; LE-I32-NEON-NEXT:    vmov.f32 s0, s26
+; LE-I32-NEON-NEXT:    bl lrintf
+; LE-I32-NEON-NEXT:    vmov.f32 s0, s24
+; LE-I32-NEON-NEXT:    vmov.32 d9[0], r0
+; LE-I32-NEON-NEXT:    bl lrintf
+; LE-I32-NEON-NEXT:    vmov.f32 s0, s27
+; LE-I32-NEON-NEXT:    vmov.32 d8[0], r0
+; LE-I32-NEON-NEXT:    bl lrintf
+; LE-I32-NEON-NEXT:    vmov.f32 s0, s22
+; LE-I32-NEON-NEXT:    vmov.32 d9[1], r0
+; LE-I32-NEON-NEXT:    bl lrintf
+; LE-I32-NEON-NEXT:    vmov.f32 s0, s25
+; LE-I32-NEON-NEXT:    vmov.32 d15[0], r0
+; LE-I32-NEON-NEXT:    bl lrintf
+; LE-I32-NEON-NEXT:    vmov.f32 s0, s23
+; LE-I32-NEON-NEXT:    vmov.32 d8[1], r0
+; LE-I32-NEON-NEXT:    bl lrintf
+; LE-I32-NEON-NEXT:    vmov.f32 s0, s21
+; LE-I32-NEON-NEXT:    vmov.32 d15[1], r0
+; LE-I32-NEON-NEXT:    bl lrintf
+; LE-I32-NEON-NEXT:    vldmia sp, {d12, d13} @ 16-byte Reload
+; LE-I32-NEON-NEXT:    vmov.f32 s0, s27
+; LE-I32-NEON-NEXT:    vmov.32 d14[1], r0
+; LE-I32-NEON-NEXT:    bl lrintf
+; LE-I32-NEON-NEXT:    vmov.f32 s0, s25
+; LE-I32-NEON-NEXT:    add lr, sp, #16
+; LE-I32-NEON-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; LE-I32-NEON-NEXT:    vmov.32 d11[1], r0
+; LE-I32-NEON-NEXT:    bl lrintf
+; LE-I32-NEON-NEXT:    add lr, sp, #112
+; LE-I32-NEON-NEXT:    vmov.32 d10[1], r0
+; LE-I32-NEON-NEXT:    mov r0, r4
+; LE-I32-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I32-NEON-NEXT:    add lr, sp, #128
+; LE-I32-NEON-NEXT:    vst1.32 {d16, d17}, [r0:128]!
+; LE-I32-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I32-NEON-NEXT:    add lr, sp, #48
+; LE-I32-NEON-NEXT:    vst1.32 {d16, d17}, [r0:128]!
+; LE-I32-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I32-NEON-NEXT:    add lr, sp, #96
+; LE-I32-NEON-NEXT:    vst1.32 {d16, d17}, [r0:128]!
+; LE-I32-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I32-NEON-NEXT:    add lr, sp, #32
+; LE-I32-NEON-NEXT:    vst1.64 {d16, d17}, [r0:128]
+; LE-I32-NEON-NEXT:    add r0, r4, #64
+; LE-I32-NEON-NEXT:    vst1.32 {d8, d9}, [r0:128]!
+; LE-I32-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I32-NEON-NEXT:    vst1.32 {d16, d17}, [r0:128]!
+; LE-I32-NEON-NEXT:    vst1.32 {d10, d11}, [r0:128]!
+; LE-I32-NEON-NEXT:    vst1.64 {d14, d15}, [r0:128]
+; LE-I32-NEON-NEXT:    add sp, sp, #144
+; LE-I32-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I32-NEON-NEXT:    pop {r4, r5, r6, pc}
+;
+; LE-I64-NEON-LABEL: lrint_v32f32:
+; LE-I64-NEON:       @ %bb.0:
+; LE-I64-NEON-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-I64-NEON-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-I64-NEON-NEXT:    .pad #4
+; LE-I64-NEON-NEXT:    sub sp, sp, #4
+; LE-I64-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEON-NEXT:    .pad #184
+; LE-I64-NEON-NEXT:    sub sp, sp, #184
+; LE-I64-NEON-NEXT:    add lr, sp, #152
+; LE-I64-NEON-NEXT:    vorr q7, q3, q3
+; LE-I64-NEON-NEXT:    vorr q4, q2, q2
+; LE-I64-NEON-NEXT:    mov r5, r0
+; LE-I64-NEON-NEXT:    vstmia lr, {d2, d3} @ 16-byte Spill
+; LE-I64-NEON-NEXT:    add lr, sp, #88
+; LE-I64-NEON-NEXT:    vstmia lr, {d0, d1} @ 16-byte Spill
+; LE-I64-NEON-NEXT:    vmov.f32 s0, s3
+; LE-I64-NEON-NEXT:    str r0, [sp, #68] @ 4-byte Spill
+; LE-I64-NEON-NEXT:    bl lrintf
+; LE-I64-NEON-NEXT:    vmov.f32 s0, s18
+; LE-I64-NEON-NEXT:    add lr, sp, #168
+; LE-I64-NEON-NEXT:    vmov.32 d17[0], r0
+; LE-I64-NEON-NEXT:    str r1, [sp, #16] @ 4-byte Spill
+; LE-I64-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEON-NEXT:    bl lrintf
+; LE-I64-NEON-NEXT:    vmov.f32 s0, s16
+; LE-I64-NEON-NEXT:    mov r8, r1
+; LE-I64-NEON-NEXT:    vmov.32 d10[0], r0
+; LE-I64-NEON-NEXT:    bl lrintf
+; LE-I64-NEON-NEXT:    vmov.f32 s0, s17
+; LE-I64-NEON-NEXT:    mov r6, r1
+; LE-I64-NEON-NEXT:    vmov.32 d12[0], r0
+; LE-I64-NEON-NEXT:    bl lrintf
+; LE-I64-NEON-NEXT:    vmov.f32 s0, s19
+; LE-I64-NEON-NEXT:    mov r4, r1
+; LE-I64-NEON-NEXT:    vmov.32 d13[0], r0
+; LE-I64-NEON-NEXT:    bl lrintf
+; LE-I64-NEON-NEXT:    vmov.f32 s0, s31
+; LE-I64-NEON-NEXT:    mov r7, r1
+; LE-I64-NEON-NEXT:    vmov.32 d11[0], r0
+; LE-I64-NEON-NEXT:    bl lrintf
+; LE-I64-NEON-NEXT:    vmov.f32 s0, s30
+; LE-I64-NEON-NEXT:    str r1, [sp, #8] @ 4-byte Spill
+; LE-I64-NEON-NEXT:    vmov.32 d9[0], r0
+; LE-I64-NEON-NEXT:    vmov.32 d11[1], r7
+; LE-I64-NEON-NEXT:    bl lrintf
+; LE-I64-NEON-NEXT:    vmov.f32 s0, s29
+; LE-I64-NEON-NEXT:    vmov.32 d8[0], r0
+; LE-I64-NEON-NEXT:    add lr, sp, #104
+; LE-I64-NEON-NEXT:    str r1, [sp, #12] @ 4-byte Spill
+; LE-I64-NEON-NEXT:    vmov.32 d13[1], r4
+; LE-I64-NEON-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-I64-NEON-NEXT:    bl lrintf
+; LE-I64-NEON-NEXT:    vmov.32 d9[0], r0
+; LE-I64-NEON-NEXT:    add r0, sp, #320
+; LE-I64-NEON-NEXT:    add lr, sp, #120
+; LE-I64-NEON-NEXT:    mov r11, r1
+; LE-I64-NEON-NEXT:    vld1.64 {d0, d1}, [r0]
+; LE-I64-NEON-NEXT:    add r0, sp, #304
+; LE-I64-NEON-NEXT:    vstmia lr, {d0, d1} @ 16-byte Spill
+; LE-I64-NEON-NEXT:    add lr, sp, #72
+; LE-I64-NEON-NEXT:    vld1.64 {d0, d1}, [r0]
+; LE-I64-NEON-NEXT:    add r0, sp, #336
+; LE-I64-NEON-NEXT:    vstmia lr, {d0, d1} @ 16-byte Spill
+; LE-I64-NEON-NEXT:    add lr, sp, #32
+; LE-I64-NEON-NEXT:    vld1.64 {d0, d1}, [r0]
+; LE-I64-NEON-NEXT:    add r0, sp, #288
+; LE-I64-NEON-NEXT:    vmov.32 d12[1], r6
+; LE-I64-NEON-NEXT:    vstmia lr, {d0, d1} @ 16-byte Spill
+; LE-I64-NEON-NEXT:    add lr, sp, #48
+; LE-I64-NEON-NEXT:    vld1.64 {d0, d1}, [r0]
+; LE-I64-NEON-NEXT:    vmov.32 d10[1], r8
+; LE-I64-NEON-NEXT:    add r8, r5, #64
+; LE-I64-NEON-NEXT:    vstmia lr, {d0, d1} @ 16-byte Spill
+; LE-I64-NEON-NEXT:    add lr, sp, #152
+; LE-I64-NEON-NEXT:    vst1.64 {d12, d13}, [r8:128]!
+; LE-I64-NEON-NEXT:    vst1.64 {d10, d11}, [r8:128]!
+; LE-I64-NEON-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
+; LE-I64-NEON-NEXT:    vmov.f32 s0, s27
+; LE-I64-NEON-NEXT:    bl lrintf
+; LE-I64-NEON-NEXT:    vmov.f32 s0, s28
+; LE-I64-NEON-NEXT:    mov r4, r1
+; LE-I64-NEON-NEXT:    vmov.32 d11[0], r0
+; LE-I64-NEON-NEXT:    bl lrintf
+; LE-I64-NEON-NEXT:    vmov.f32 s0, s26
+; LE-I64-NEON-NEXT:    mov r9, r1
+; LE-I64-NEON-NEXT:    vmov.32 d8[0], r0
+; LE-I64-NEON-NEXT:    vmov.32 d11[1], r4
+; LE-I64-NEON-NEXT:    bl lrintf
+; LE-I64-NEON-NEXT:    vmov.32 d10[0], r0
+; LE-I64-NEON-NEXT:    add lr, sp, #136
+; LE-I64-NEON-NEXT:    ldr r0, [sp, #16] @ 4-byte Reload
+; LE-I64-NEON-NEXT:    mov r10, r1
+; LE-I64-NEON-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-I64-NEON-NEXT:    add lr, sp, #168
+; LE-I64-NEON-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; LE-I64-NEON-NEXT:    add lr, sp, #88
+; LE-I64-NEON-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
+; LE-I64-NEON-NEXT:    vmov.f32 s0, s26
+; LE-I64-NEON-NEXT:    vmov.32 d11[1], r0
+; LE-I64-NEON-NEXT:    bl lrintf
+; LE-I64-NEON-NEXT:    vmov.f32 s0, s25
+; LE-I64-NEON-NEXT:    vmov.32 d10[0], r0
+; LE-I64-NEON-NEXT:    add lr, sp, #168
+; LE-I64-NEON-NEXT:    ldr r0, [sp, #8] @ 4-byte Reload
+; LE-I64-NEON-NEXT:    mov r7, r1
+; LE-I64-NEON-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-I64-NEON-NEXT:    add lr, sp, #104
+; LE-I64-NEON-NEXT:    vorr q5, q6, q6
+; LE-I64-NEON-NEXT:    vldmia lr, {d14, d15} @ 16-byte Reload
+; LE-I64-NEON-NEXT:    vmov.32 d15[1], r0
+; LE-I64-NEON-NEXT:    bl lrintf
+; LE-I64-NEON-NEXT:    vmov.f32 s0, s20
+; LE-I64-NEON-NEXT:    vmov.32 d13[0], r0
+; LE-I64-NEON-NEXT:    ldr r0, [sp, #12] @ 4-byte Reload
+; LE-I64-NEON-NEXT:    add lr, sp, #104
+; LE-I64-NEON-NEXT:    mov r4, r1
+; LE-I64-NEON-NEXT:    vmov.32 d14[1], r0
+; LE-I64-NEON-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-I64-NEON-NEXT:    bl lrintf
+; LE-I64-NEON-NEXT:    vmov.32 d12[0], r0
+; LE-I64-NEON-NEXT:    add lr, sp, #152
+; LE-I64-NEON-NEXT:    mov r6, r1
+; LE-I64-NEON-NEXT:    vorr q7, q6, q6
+; LE-I64-NEON-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
+; LE-I64-NEON-NEXT:    vmov.32 d9[1], r11
+; LE-I64-NEON-NEXT:    vmov.f32 s0, s25
+; LE-I64-NEON-NEXT:    bl lrintf
+; LE-I64-NEON-NEXT:    vmov.f32 s0, s24
+; LE-I64-NEON-NEXT:    mov r5, r1
+; LE-I64-NEON-NEXT:    vmov.32 d11[0], r0
+; LE-I64-NEON-NEXT:    vmov.32 d8[1], r9
+; LE-I64-NEON-NEXT:    bl lrintf
+; LE-I64-NEON-NEXT:    add lr, sp, #136
+; LE-I64-NEON-NEXT:    mov r11, r1
+; LE-I64-NEON-NEXT:    vmov.32 d10[0], r0
+; LE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEON-NEXT:    vmov.32 d16[1], r10
+; LE-I64-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEON-NEXT:    add lr, sp, #120
+; LE-I64-NEON-NEXT:    vst1.64 {d8, d9}, [r8:128]!
+; LE-I64-NEON-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
+; LE-I64-NEON-NEXT:    vmov.f32 s0, s1
+; LE-I64-NEON-NEXT:    bl lrintf
+; LE-I64-NEON-NEXT:    add lr, sp, #152
+; LE-I64-NEON-NEXT:    vmov.32 d17[0], r0
+; LE-I64-NEON-NEXT:    mov r10, r1
+; LE-I64-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEON-NEXT:    add lr, sp, #104
+; LE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEON-NEXT:    add lr, sp, #72
+; LE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r8:128]
+; LE-I64-NEON-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-I64-NEON-NEXT:    vmov.f32 s0, s19
+; LE-I64-NEON-NEXT:    bl lrintf
+; LE-I64-NEON-NEXT:    add lr, sp, #168
+; LE-I64-NEON-NEXT:    vmov.f32 s0, s18
+; LE-I64-NEON-NEXT:    mov r8, r1
+; LE-I64-NEON-NEXT:    vmov.32 d13[0], r0
+; LE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEON-NEXT:    vmov.32 d16[1], r7
+; LE-I64-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEON-NEXT:    bl lrintf
+; LE-I64-NEON-NEXT:    vmov.f32 s0, s17
+; LE-I64-NEON-NEXT:    vmov.32 d12[0], r0
+; LE-I64-NEON-NEXT:    add lr, sp, #104
+; LE-I64-NEON-NEXT:    mov r7, r1
+; LE-I64-NEON-NEXT:    vmov.32 d15[1], r4
+; LE-I64-NEON-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-I64-NEON-NEXT:    bl lrintf
+; LE-I64-NEON-NEXT:    vmov.f32 s0, s16
+; LE-I64-NEON-NEXT:    mov r9, r1
+; LE-I64-NEON-NEXT:    vmov.32 d13[0], r0
+; LE-I64-NEON-NEXT:    vmov.32 d14[1], r6
+; LE-I64-NEON-NEXT:    bl lrintf
+; LE-I64-NEON-NEXT:    vmov.32 d12[0], r0
+; LE-I64-NEON-NEXT:    add lr, sp, #88
+; LE-I64-NEON-NEXT:    mov r6, r1
+; LE-I64-NEON-NEXT:    vmov.32 d11[1], r5
+; LE-I64-NEON-NEXT:    vmov.32 d10[1], r11
+; LE-I64-NEON-NEXT:    ldr r11, [sp, #68] @ 4-byte Reload
+; LE-I64-NEON-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-I64-NEON-NEXT:    add lr, sp, #16
+; LE-I64-NEON-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-I64-NEON-NEXT:    add lr, sp, #32
+; LE-I64-NEON-NEXT:    vst1.64 {d14, d15}, [r11:128]!
+; LE-I64-NEON-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; LE-I64-NEON-NEXT:    vmov.f32 s0, s23
+; LE-I64-NEON-NEXT:    bl lrintf
+; LE-I64-NEON-NEXT:    add lr, sp, #152
+; LE-I64-NEON-NEXT:    mov r4, r1
+; LE-I64-NEON-NEXT:    vmov.32 d9[0], r0
+; LE-I64-NEON-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
+; LE-I64-NEON-NEXT:    add lr, sp, #120
+; LE-I64-NEON-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
+; LE-I64-NEON-NEXT:    @ kill: def $s0 killed $s0 killed $q0
+; LE-I64-NEON-NEXT:    vmov.32 d13[1], r10
+; LE-I64-NEON-NEXT:    bl lrintf
+; LE-I64-NEON-NEXT:    vmov.f32 s0, s22
+; LE-I64-NEON-NEXT:    vmov.32 d12[0], r0
+; LE-I64-NEON-NEXT:    add lr, sp, #152
+; LE-I64-NEON-NEXT:    mov r5, r1
+; LE-I64-NEON-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-I64-NEON-NEXT:    add lr, sp, #104
+; LE-I64-NEON-NEXT:    vldmia lr, {d14, d15} @ 16-byte Reload
+; LE-I64-NEON-NEXT:    vmov.32 d15[1], r8
+; LE-I64-NEON-NEXT:    bl lrintf
+; LE-I64-NEON-NEXT:    vmov.f32 s0, s21
+; LE-I64-NEON-NEXT:    vmov.32 d8[0], r0
+; LE-I64-NEON-NEXT:    add lr, sp, #72
+; LE-I64-NEON-NEXT:    mov r8, r1
+; LE-I64-NEON-NEXT:    vmov.32 d14[1], r7
+; LE-I64-NEON-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-I64-NEON-NEXT:    add lr, sp, #104
+; LE-I64-NEON-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-I64-NEON-NEXT:    bl lrintf
+; LE-I64-NEON-NEXT:    vmov.f32 s0, s20
+; LE-I64-NEON-NEXT:    add lr, sp, #88
+; LE-I64-NEON-NEXT:    mov r7, r1
+; LE-I64-NEON-NEXT:    vmov.32 d9[0], r0
+; LE-I64-NEON-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
+; LE-I64-NEON-NEXT:    vmov.32 d13[1], r9
+; LE-I64-NEON-NEXT:    bl lrintf
+; LE-I64-NEON-NEXT:    vmov.32 d8[0], r0
+; LE-I64-NEON-NEXT:    add lr, sp, #32
+; LE-I64-NEON-NEXT:    mov r9, r1
+; LE-I64-NEON-NEXT:    vmov.32 d12[1], r6
+; LE-I64-NEON-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-I64-NEON-NEXT:    add lr, sp, #88
+; LE-I64-NEON-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-I64-NEON-NEXT:    add lr, sp, #120
+; LE-I64-NEON-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-I64-NEON-NEXT:    vmov.f32 s0, s19
+; LE-I64-NEON-NEXT:    bl lrintf
+; LE-I64-NEON-NEXT:    vmov.f32 s0, s18
+; LE-I64-NEON-NEXT:    add lr, sp, #72
+; LE-I64-NEON-NEXT:    mov r10, r1
+; LE-I64-NEON-NEXT:    vmov.32 d15[0], r0
+; LE-I64-NEON-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
+; LE-I64-NEON-NEXT:    vmov.32 d13[1], r4
+; LE-I64-NEON-NEXT:    bl lrintf
+; LE-I64-NEON-NEXT:    add lr, sp, #152
+; LE-I64-NEON-NEXT:    mov r4, r1
+; LE-I64-NEON-NEXT:    vmov.32 d14[0], r0
+; LE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEON-NEXT:    vmov.32 d16[1], r5
+; LE-I64-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEON-NEXT:    add lr, sp, #168
+; LE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEON-NEXT:    add lr, sp, #48
+; LE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r11:128]!
+; LE-I64-NEON-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; LE-I64-NEON-NEXT:    vmov.f32 s0, s21
+; LE-I64-NEON-NEXT:    bl lrintf
+; LE-I64-NEON-NEXT:    vmov.f32 s0, s20
+; LE-I64-NEON-NEXT:    vmov.32 d12[1], r8
+; LE-I64-NEON-NEXT:    add lr, sp, #72
+; LE-I64-NEON-NEXT:    mov r5, r1
+; LE-I64-NEON-NEXT:    vmov.32 d9[0], r0
+; LE-I64-NEON-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-I64-NEON-NEXT:    bl lrintf
+; LE-I64-NEON-NEXT:    vmov.f32 s0, s23
+; LE-I64-NEON-NEXT:    add lr, sp, #32
+; LE-I64-NEON-NEXT:    mov r6, r1
+; LE-I64-NEON-NEXT:    vmov.32 d8[0], r0
+; LE-I64-NEON-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
+; LE-I64-NEON-NEXT:    vmov.32 d13[1], r7
+; LE-I64-NEON-NEXT:    bl lrintf
+; LE-I64-NEON-NEXT:    add lr, sp, #48
+; LE-I64-NEON-NEXT:    mov r7, r1
+; LE-I64-NEON-NEXT:    vmov.32 d11[0], r0
+; LE-I64-NEON-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
+; LE-I64-NEON-NEXT:    vmov.f32 s0, s2
+; LE-I64-NEON-NEXT:    vmov.32 d12[1], r9
+; LE-I64-NEON-NEXT:    bl lrintf
+; LE-I64-NEON-NEXT:    add lr, sp, #16
+; LE-I64-NEON-NEXT:    vmov.32 d10[0], r0
+; LE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEON-NEXT:    add lr, sp, #136
+; LE-I64-NEON-NEXT:    vmov.32 d11[1], r7
+; LE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r11:128]!
+; LE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEON-NEXT:    add lr, sp, #152
+; LE-I64-NEON-NEXT:    vmov.32 d15[1], r10
+; LE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r11:128]
+; LE-I64-NEON-NEXT:    vmov.32 d10[1], r1
+; LE-I64-NEON-NEXT:    ldr r1, [sp, #68] @ 4-byte Reload
+; LE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEON-NEXT:    add r0, r1, #192
+; LE-I64-NEON-NEXT:    add lr, sp, #72
+; LE-I64-NEON-NEXT:    vmov.32 d14[1], r4
+; LE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r0:128]!
+; LE-I64-NEON-NEXT:    vst1.64 {d14, d15}, [r0:128]!
+; LE-I64-NEON-NEXT:    vst1.64 {d12, d13}, [r0:128]!
+; LE-I64-NEON-NEXT:    vmov.32 d9[1], r5
+; LE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEON-NEXT:    add lr, sp, #88
+; LE-I64-NEON-NEXT:    vmov.32 d8[1], r6
+; LE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r0:128]
+; LE-I64-NEON-NEXT:    add r0, r1, #128
+; LE-I64-NEON-NEXT:    vst1.64 {d8, d9}, [r0:128]!
+; LE-I64-NEON-NEXT:    vst1.64 {d10, d11}, [r0:128]!
+; LE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEON-NEXT:    add lr, sp, #104
+; LE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r0:128]!
+; LE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r0:128]
+; LE-I64-NEON-NEXT:    add sp, sp, #184
+; LE-I64-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEON-NEXT:    add sp, sp, #4
+; LE-I64-NEON-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; BE-I32-LABEL: lrint_v32f32:
+; BE-I32:       @ %bb.0:
+; BE-I32-NEXT:    .save {r4, r5, r6, lr}
+; BE-I32-NEXT:    push {r4, r5, r6, lr}
+; BE-I32-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I32-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I32-NEXT:    .pad #144
+; BE-I32-NEXT:    sub sp, sp, #144
+; BE-I32-NEXT:    mov r4, r0
+; BE-I32-NEXT:    add r0, sp, #256
+; BE-I32-NEXT:    vld1.64 {d16, d17}, [r0]
+; BE-I32-NEXT:    add lr, sp, #16
+; BE-I32-NEXT:    add r0, sp, #272
+; BE-I32-NEXT:    vrev64.32 q4, q3
+; BE-I32-NEXT:    vrev64.32 q7, q1
+; BE-I32-NEXT:    vrev64.32 q8, q8
+; BE-I32-NEXT:    vld1.64 {d18, d19}, [r0]
+; BE-I32-NEXT:    add r0, sp, #224
+; BE-I32-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
+; BE-I32-NEXT:    add lr, sp, #80
+; BE-I32-NEXT:    vrev64.32 q5, q0
+; BE-I32-NEXT:    vmov.f32 s0, s28
+; BE-I32-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I32-NEXT:    add lr, sp, #64
+; BE-I32-NEXT:    vrev64.32 q8, q9
+; BE-I32-NEXT:    vld1.64 {d20, d21}, [r0]
+; BE-I32-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I32-NEXT:    add lr, sp, #32
+; BE-I32-NEXT:    vrev64.32 q8, q10
+; BE-I32-NEXT:    vrev64.32 q6, q2
+; BE-I32-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I32-NEXT:    vstmia sp, {d14, d15} @ 16-byte Spill
+; BE-I32-NEXT:    bl lrintf
+; BE-I32-NEXT:    vmov.f32 s0, s18
+; BE-I32-NEXT:    vmov.32 d16[0], r0
+; BE-I32-NEXT:    add lr, sp, #128
+; BE-I32-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I32-NEXT:    bl lrintf
+; BE-I32-NEXT:    vmov.f32 s0, s20
+; BE-I32-NEXT:    add lr, sp, #112
+; BE-I32-NEXT:    vmov.32 d17[0], r0
+; BE-I32-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I32-NEXT:    add lr, sp, #48
+; BE-I32-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-I32-NEXT:    bl lrintf
+; BE-I32-NEXT:    vmov.f32 s0, s22
+; BE-I32-NEXT:    vmov.32 d8[0], r0
+; BE-I32-NEXT:    bl lrintf
+; BE-I32-NEXT:    vmov.f32 s0, s30
+; BE-I32-NEXT:    vmov.32 d9[0], r0
+; BE-I32-NEXT:    bl lrintf
+; BE-I32-NEXT:    vmov.f32 s0, s26
+; BE-I32-NEXT:    add lr, sp, #128
+; BE-I32-NEXT:    vldmia lr, {d14, d15} @ 16-byte Reload
+; BE-I32-NEXT:    vmov.32 d15[0], r0
+; BE-I32-NEXT:    bl lrintf
+; BE-I32-NEXT:    vmov.f32 s0, s24
+; BE-I32-NEXT:    vmov.32 d11[0], r0
+; BE-I32-NEXT:    bl lrintf
+; BE-I32-NEXT:    vmov.f32 s0, s27
+; BE-I32-NEXT:    vmov.32 d10[0], r0
+; BE-I32-NEXT:    bl lrintf
+; BE-I32-NEXT:    vmov.f32 s0, s25
+; BE-I32-NEXT:    vmov.32 d11[1], r0
+; BE-I32-NEXT:    bl lrintf
+; BE-I32-NEXT:    vldmia sp, {d12, d13} @ 16-byte Reload
+; BE-I32-NEXT:    vmov.f32 s0, s27
+; BE-I32-NEXT:    add lr, sp, #96
+; BE-I32-NEXT:    vmov.32 d10[1], r0
+; BE-I32-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-I32-NEXT:    bl lrintf
+; BE-I32-NEXT:    add lr, sp, #48
+; BE-I32-NEXT:    vmov.32 d15[1], r0
+; BE-I32-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-I32-NEXT:    vmov.f32 s0, s23
+; BE-I32-NEXT:    bl lrintf
+; BE-I32-NEXT:    vmov.f32 s0, s21
+; BE-I32-NEXT:    vmov.32 d9[1], r0
+; BE-I32-NEXT:    bl lrintf
+; BE-I32-NEXT:    vmov.32 d8[1], r0
+; BE-I32-NEXT:    add lr, sp, #48
+; BE-I32-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
+; BE-I32-NEXT:    add lr, sp, #16
+; BE-I32-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-I32-NEXT:    vmov.f32 s0, s23
+; BE-I32-NEXT:    bl lrintf
+; BE-I32-NEXT:    vmov.f32 s0, s25
+; BE-I32-NEXT:    add lr, sp, #112
+; BE-I32-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
+; BE-I32-NEXT:    vmov.32 d9[1], r0
+; BE-I32-NEXT:    bl lrintf
+; BE-I32-NEXT:    vmov.f32 s0, s20
+; BE-I32-NEXT:    vmov.32 d14[1], r0
+; BE-I32-NEXT:    add r0, sp, #240
+; BE-I32-NEXT:    add lr, sp, #128
+; BE-I32-NEXT:    vld1.64 {d16, d17}, [r0]
+; BE-I32-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
+; BE-I32-NEXT:    vrev64.32 q6, q8
+; BE-I32-NEXT:    bl lrintf
+; BE-I32-NEXT:    vmov.f32 s0, s21
+; BE-I32-NEXT:    vmov.32 d8[0], r0
+; BE-I32-NEXT:    bl lrintf
+; BE-I32-NEXT:    vmov.f32 s0, s24
+; BE-I32-NEXT:    vmov.32 d8[1], r0
+; BE-I32-NEXT:    add lr, sp, #112
+; BE-I32-NEXT:    vorr q7, q6, q6
+; BE-I32-NEXT:    vstmia sp, {d12, d13} @ 16-byte Spill
+; BE-I32-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
+; BE-I32-NEXT:    bl lrintf
+; BE-I32-NEXT:    add lr, sp, #32
+; BE-I32-NEXT:    vmov.32 d12[0], r0
+; BE-I32-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
+; BE-I32-NEXT:    vmov.f32 s0, s18
+; BE-I32-NEXT:    bl lrintf
+; BE-I32-NEXT:    vmov.f32 s0, s16
+; BE-I32-NEXT:    vmov.32 d11[0], r0
+; BE-I32-NEXT:    bl lrintf
+; BE-I32-NEXT:    vmov.f32 s0, s19
+; BE-I32-NEXT:    vmov.32 d10[0], r0
+; BE-I32-NEXT:    bl lrintf
+; BE-I32-NEXT:    vmov.f32 s0, s30
+; BE-I32-NEXT:    vmov.32 d11[1], r0
+; BE-I32-NEXT:    bl lrintf
+; BE-I32-NEXT:    vmov.f32 s0, s17
+; BE-I32-NEXT:    add lr, sp, #16
+; BE-I32-NEXT:    vmov.32 d13[0], r0
+; BE-I32-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
+; BE-I32-NEXT:    bl lrintf
+; BE-I32-NEXT:    vmov.32 d10[1], r0
+; BE-I32-NEXT:    add lr, sp, #32
+; BE-I32-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-I32-NEXT:    add lr, sp, #64
+; BE-I32-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-I32-NEXT:    vmov.f32 s0, s20
+; BE-I32-NEXT:    bl lrintf
+; BE-I32-NEXT:    add lr, sp, #80
+; BE-I32-NEXT:    vmov.32 d8[0], r0
+; BE-I32-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
+; BE-I32-NEXT:    vmov.f32 s0, s26
+; BE-I32-NEXT:    bl lrintf
+; BE-I32-NEXT:    vmov.f32 s0, s24
+; BE-I32-NEXT:    vmov.32 d15[0], r0
+; BE-I32-NEXT:    bl lrintf
+; BE-I32-NEXT:    vmov.f32 s0, s27
+; BE-I32-NEXT:    vmov.32 d14[0], r0
+; BE-I32-NEXT:    bl lrintf
+; BE-I32-NEXT:    vmov.f32 s0, s22
+; BE-I32-NEXT:    vmov.32 d15[1], r0
+; BE-I32-NEXT:    bl lrintf
+; BE-I32-NEXT:    vmov.f32 s0, s25
+; BE-I32-NEXT:    vmov.32 d9[0], r0
+; BE-I32-NEXT:    bl lrintf
+; BE-I32-NEXT:    vmov.f32 s0, s23
+; BE-I32-NEXT:    vmov.32 d14[1], r0
+; BE-I32-NEXT:    bl lrintf
+; BE-I32-NEXT:    vmov.f32 s0, s21
+; BE-I32-NEXT:    vmov.32 d9[1], r0
+; BE-I32-NEXT:    bl lrintf
+; BE-I32-NEXT:    vldmia sp, {d12, d13} @ 16-byte Reload
+; BE-I32-NEXT:    vmov.f32 s0, s27
+; BE-I32-NEXT:    vmov.32 d8[1], r0
+; BE-I32-NEXT:    bl lrintf
+; BE-I32-NEXT:    add lr, sp, #16
+; BE-I32-NEXT:    vmov.f32 s0, s25
+; BE-I32-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-I32-NEXT:    add lr, sp, #112
+; BE-I32-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I32-NEXT:    vmov.32 d11[1], r0
+; BE-I32-NEXT:    vrev64.32 q8, q8
+; BE-I32-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I32-NEXT:    bl lrintf
+; BE-I32-NEXT:    add lr, sp, #48
+; BE-I32-NEXT:    vmov.32 d10[1], r0
+; BE-I32-NEXT:    mov r0, r4
+; BE-I32-NEXT:    vldmia lr, {d18, d19} @ 16-byte Reload
+; BE-I32-NEXT:    add lr, sp, #128
+; BE-I32-NEXT:    vrev64.32 q8, q4
+; BE-I32-NEXT:    vst1.32 {d18, d19}, [r0:128]!
+; BE-I32-NEXT:    vldmia lr, {d18, d19} @ 16-byte Reload
+; BE-I32-NEXT:    add lr, sp, #96
+; BE-I32-NEXT:    vst1.32 {d18, d19}, [r0:128]!
+; BE-I32-NEXT:    vldmia lr, {d18, d19} @ 16-byte Reload
+; BE-I32-NEXT:    add lr, sp, #112
+; BE-I32-NEXT:    vst1.32 {d18, d19}, [r0:128]!
+; BE-I32-NEXT:    vldmia lr, {d18, d19} @ 16-byte Reload
+; BE-I32-NEXT:    add lr, sp, #32
+; BE-I32-NEXT:    vst1.64 {d18, d19}, [r0:128]
+; BE-I32-NEXT:    add r0, r4, #64
+; BE-I32-NEXT:    vldmia lr, {d18, d19} @ 16-byte Reload
+; BE-I32-NEXT:    vst1.32 {d18, d19}, [r0:128]!
+; BE-I32-NEXT:    vst1.32 {d10, d11}, [r0:128]!
+; BE-I32-NEXT:    vst1.32 {d14, d15}, [r0:128]!
+; BE-I32-NEXT:    vst1.64 {d16, d17}, [r0:128]
+; BE-I32-NEXT:    add sp, sp, #144
+; BE-I32-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I32-NEXT:    pop {r4, r5, r6, pc}
+;
+; BE-I64-LABEL: lrint_v32f32:
+; BE-I64:       @ %bb.0:
+; BE-I64-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-I64-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-I64-NEXT:    .pad #4
+; BE-I64-NEXT:    sub sp, sp, #4
+; BE-I64-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEXT:    .pad #256
+; BE-I64-NEXT:    sub sp, sp, #256
+; BE-I64-NEXT:    add lr, sp, #208
+; BE-I64-NEXT:    str r0, [sp, #156] @ 4-byte Spill
+; BE-I64-NEXT:    add r0, sp, #408
+; BE-I64-NEXT:    vstmia lr, {d6, d7} @ 16-byte Spill
+; BE-I64-NEXT:    add lr, sp, #120
+; BE-I64-NEXT:    vld1.64 {d10, d11}, [r0]
+; BE-I64-NEXT:    add r0, sp, #392
+; BE-I64-NEXT:    vstmia lr, {d4, d5} @ 16-byte Spill
+; BE-I64-NEXT:    add lr, sp, #160
+; BE-I64-NEXT:    vstmia lr, {d2, d3} @ 16-byte Spill
+; BE-I64-NEXT:    add lr, sp, #176
+; BE-I64-NEXT:    vrev64.32 d8, d10
+; BE-I64-NEXT:    vstmia lr, {d0, d1} @ 16-byte Spill
+; BE-I64-NEXT:    add lr, sp, #136
+; BE-I64-NEXT:    vmov.f32 s0, s17
+; BE-I64-NEXT:    vld1.64 {d12, d13}, [r0]
+; BE-I64-NEXT:    add r0, sp, #360
+; BE-I64-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
+; BE-I64-NEXT:    add lr, sp, #192
+; BE-I64-NEXT:    vld1.64 {d16, d17}, [r0]
+; BE-I64-NEXT:    add r0, sp, #376
+; BE-I64-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I64-NEXT:    add lr, sp, #40
+; BE-I64-NEXT:    vld1.64 {d16, d17}, [r0]
+; BE-I64-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I64-NEXT:    bl lrintf
+; BE-I64-NEXT:    vmov.f32 s0, s16
+; BE-I64-NEXT:    str r1, [sp, #88] @ 4-byte Spill
+; BE-I64-NEXT:    vmov.32 d15[0], r0
+; BE-I64-NEXT:    bl lrintf
+; BE-I64-NEXT:    vrev64.32 d9, d11
+; BE-I64-NEXT:    add lr, sp, #240
+; BE-I64-NEXT:    vmov.32 d14[0], r0
+; BE-I64-NEXT:    str r1, [sp, #104] @ 4-byte Spill
+; BE-I64-NEXT:    vmov.f32 s0, s18
+; BE-I64-NEXT:    vrev64.32 d8, d13
+; BE-I64-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
+; BE-I64-NEXT:    bl lrintf
+; BE-I64-NEXT:    vmov.f32 s0, s17
+; BE-I64-NEXT:    mov r5, r1
+; BE-I64-NEXT:    vmov.32 d14[0], r0
+; BE-I64-NEXT:    bl lrintf
+; BE-I64-NEXT:    vmov.f32 s0, s19
+; BE-I64-NEXT:    add lr, sp, #192
+; BE-I64-NEXT:    str r1, [sp, #72] @ 4-byte Spill
+; BE-I64-NEXT:    vmov.32 d13[0], r0
+; BE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEXT:    vrev64.32 d10, d16
+; BE-I64-NEXT:    bl lrintf
+; BE-I64-NEXT:    vmov.f32 s0, s20
+; BE-I64-NEXT:    add lr, sp, #224
+; BE-I64-NEXT:    vmov.32 d15[0], r0
+; BE-I64-NEXT:    mov r4, r1
+; BE-I64-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
+; BE-I64-NEXT:    bl lrintf
+; BE-I64-NEXT:    vmov.f32 s0, s21
+; BE-I64-NEXT:    mov r7, r1
+; BE-I64-NEXT:    vmov.32 d14[0], r0
+; BE-I64-NEXT:    bl lrintf
+; BE-I64-NEXT:    vmov.f32 s0, s16
+; BE-I64-NEXT:    mov r6, r1
+; BE-I64-NEXT:    vmov.32 d15[0], r0
+; BE-I64-NEXT:    bl lrintf
+; BE-I64-NEXT:    vmov.32 d12[0], r0
+; BE-I64-NEXT:    add lr, sp, #24
+; BE-I64-NEXT:    mov r9, r1
+; BE-I64-NEXT:    vmov.32 d15[1], r6
+; BE-I64-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
+; BE-I64-NEXT:    add lr, sp, #192
+; BE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEXT:    vrev64.32 d8, d17
+; BE-I64-NEXT:    vmov.f32 s0, s17
+; BE-I64-NEXT:    bl lrintf
+; BE-I64-NEXT:    vmov.f32 s0, s16
+; BE-I64-NEXT:    vmov.32 d14[1], r7
+; BE-I64-NEXT:    add lr, sp, #56
+; BE-I64-NEXT:    mov r10, r1
+; BE-I64-NEXT:    vmov.32 d11[0], r0
+; BE-I64-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
+; BE-I64-NEXT:    bl lrintf
+; BE-I64-NEXT:    vmov.32 d10[0], r0
+; BE-I64-NEXT:    add lr, sp, #192
+; BE-I64-NEXT:    mov r11, r1
+; BE-I64-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-I64-NEXT:    add lr, sp, #40
+; BE-I64-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
+; BE-I64-NEXT:    add lr, sp, #224
+; BE-I64-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-I64-NEXT:    vrev64.32 d8, d12
+; BE-I64-NEXT:    vmov.32 d11[1], r4
+; BE-I64-NEXT:    vmov.f32 s0, s17
+; BE-I64-NEXT:    bl lrintf
+; BE-I64-NEXT:    vmov.f32 s0, s16
+; BE-I64-NEXT:    vmov.32 d10[1], r5
+; BE-I64-NEXT:    add lr, sp, #224
+; BE-I64-NEXT:    mov r8, r1
+; BE-I64-NEXT:    vmov.32 d15[0], r0
+; BE-I64-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-I64-NEXT:    bl lrintf
+; BE-I64-NEXT:    vrev64.32 d8, d13
+; BE-I64-NEXT:    add lr, sp, #8
+; BE-I64-NEXT:    vmov.32 d14[0], r0
+; BE-I64-NEXT:    ldr r0, [sp, #88] @ 4-byte Reload
+; BE-I64-NEXT:    vmov.f32 s0, s17
+; BE-I64-NEXT:    mov r5, r1
+; BE-I64-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
+; BE-I64-NEXT:    add lr, sp, #240
+; BE-I64-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-I64-NEXT:    vmov.32 d11[1], r0
+; BE-I64-NEXT:    bl lrintf
+; BE-I64-NEXT:    vmov.f32 s0, s16
+; BE-I64-NEXT:    vmov.32 d15[0], r0
+; BE-I64-NEXT:    ldr r0, [sp, #104] @ 4-byte Reload
+; BE-I64-NEXT:    add lr, sp, #240
+; BE-I64-NEXT:    mov r4, r1
+; BE-I64-NEXT:    vmov.32 d10[1], r0
+; BE-I64-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-I64-NEXT:    bl lrintf
+; BE-I64-NEXT:    add lr, sp, #136
+; BE-I64-NEXT:    vmov.32 d14[0], r0
+; BE-I64-NEXT:    ldr r0, [sp, #72] @ 4-byte Reload
+; BE-I64-NEXT:    mov r6, r1
+; BE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEXT:    add lr, sp, #24
+; BE-I64-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
+; BE-I64-NEXT:    vrev64.32 d8, d16
+; BE-I64-NEXT:    vmov.32 d13[1], r0
+; BE-I64-NEXT:    vmov.f32 s0, s17
+; BE-I64-NEXT:    bl lrintf
+; BE-I64-NEXT:    vmov.f32 s0, s16
+; BE-I64-NEXT:    mov r7, r1
+; BE-I64-NEXT:    vmov.32 d11[0], r0
+; BE-I64-NEXT:    vmov.32 d12[1], r9
+; BE-I64-NEXT:    bl lrintf
+; BE-I64-NEXT:    add lr, sp, #192
+; BE-I64-NEXT:    vmov.32 d15[1], r4
+; BE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEXT:    add lr, sp, #8
+; BE-I64-NEXT:    vmov.32 d17[1], r10
+; BE-I64-NEXT:    vmov.32 d16[1], r11
+; BE-I64-NEXT:    vorr q9, q8, q8
+; BE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEXT:    add lr, sp, #192
+; BE-I64-NEXT:    vmov.32 d17[1], r8
+; BE-I64-NEXT:    vmov.32 d16[1], r5
+; BE-I64-NEXT:    vorr q10, q8, q8
+; BE-I64-NEXT:    vrev64.32 q8, q6
+; BE-I64-NEXT:    vmov.32 d14[1], r6
+; BE-I64-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I64-NEXT:    add lr, sp, #240
+; BE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEXT:    vmov.32 d10[0], r0
+; BE-I64-NEXT:    vrev64.32 q8, q8
+; BE-I64-NEXT:    vmov.32 d11[1], r7
+; BE-I64-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I64-NEXT:    add lr, sp, #224
+; BE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEXT:    vmov.32 d10[1], r1
+; BE-I64-NEXT:    vrev64.32 q8, q8
+; BE-I64-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I64-NEXT:    add lr, sp, #56
+; BE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEXT:    add lr, sp, #136
+; BE-I64-NEXT:    vrev64.32 q8, q8
+; BE-I64-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I64-NEXT:    add lr, sp, #104
+; BE-I64-NEXT:    vrev64.32 q8, q9
+; BE-I64-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I64-NEXT:    add lr, sp, #88
+; BE-I64-NEXT:    vrev64.32 q8, q10
+; BE-I64-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I64-NEXT:    add lr, sp, #72
+; BE-I64-NEXT:    vrev64.32 q8, q7
+; BE-I64-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I64-NEXT:    add lr, sp, #208
+; BE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEXT:    add lr, sp, #56
+; BE-I64-NEXT:    vrev64.32 d8, d17
+; BE-I64-NEXT:    vrev64.32 q8, q5
+; BE-I64-NEXT:    vmov.f32 s0, s17
+; BE-I64-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I64-NEXT:    bl lrintf
+; BE-I64-NEXT:    vmov.f32 s0, s16
+; BE-I64-NEXT:    mov r4, r1
+; BE-I64-NEXT:    vmov.32 d13[0], r0
+; BE-I64-NEXT:    bl lrintf
+; BE-I64-NEXT:    add lr, sp, #120
+; BE-I64-NEXT:    vmov.32 d12[0], r0
+; BE-I64-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-I64-NEXT:    vmov.32 d13[1], r4
+; BE-I64-NEXT:    vrev64.32 d8, d10
+; BE-I64-NEXT:    vmov.32 d12[1], r1
+; BE-I64-NEXT:    vmov.f32 s0, s17
+; BE-I64-NEXT:    vrev64.32 q6, q6
+; BE-I64-NEXT:    bl lrintf
+; BE-I64-NEXT:    vmov.f32 s0, s16
+; BE-I64-NEXT:    vmov.32 d15[0], r0
+; BE-I64-NEXT:    vmov.32 d15[1], r1
+; BE-I64-NEXT:    bl lrintf
+; BE-I64-NEXT:    vmov.32 d14[0], r0
+; BE-I64-NEXT:    ldr r6, [sp, #156] @ 4-byte Reload
+; BE-I64-NEXT:    vrev64.32 d8, d11
+; BE-I64-NEXT:    add r5, r6, #64
+; BE-I64-NEXT:    vmov.32 d14[1], r1
+; BE-I64-NEXT:    vmov.f32 s0, s17
+; BE-I64-NEXT:    vrev64.32 q8, q7
+; BE-I64-NEXT:    vst1.64 {d16, d17}, [r5:128]!
+; BE-I64-NEXT:    bl lrintf
+; BE-I64-NEXT:    vmov.f32 s0, s16
+; BE-I64-NEXT:    vmov.32 d15[0], r0
+; BE-I64-NEXT:    vmov.32 d15[1], r1
+; BE-I64-NEXT:    bl lrintf
+; BE-I64-NEXT:    add lr, sp, #208
+; BE-I64-NEXT:    vmov.32 d14[0], r0
+; BE-I64-NEXT:    vldmia lr, {d18, d19} @ 16-byte Reload
+; BE-I64-NEXT:    vmov.32 d14[1], r1
+; BE-I64-NEXT:    vrev64.32 d8, d18
+; BE-I64-NEXT:    vrev64.32 q8, q7
+; BE-I64-NEXT:    vmov.f32 s0, s17
+; BE-I64-NEXT:    vst1.64 {d16, d17}, [r5:128]!
+; BE-I64-NEXT:    bl lrintf
+; BE-I64-NEXT:    vmov.f32 s0, s16
+; BE-I64-NEXT:    mov r4, r1
+; BE-I64-NEXT:    vmov.32 d15[0], r0
+; BE-I64-NEXT:    bl lrintf
+; BE-I64-NEXT:    vmov.32 d14[0], r0
+; BE-I64-NEXT:    add lr, sp, #160
+; BE-I64-NEXT:    vmov.32 d15[1], r4
+; BE-I64-NEXT:    vmov.32 d14[1], r1
+; BE-I64-NEXT:    vrev64.32 q8, q7
+; BE-I64-NEXT:    vst1.64 {d16, d17}, [r5:128]!
+; BE-I64-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-I64-NEXT:    vrev64.32 d8, d11
+; BE-I64-NEXT:    vst1.64 {d12, d13}, [r5:128]
+; BE-I64-NEXT:    vmov.f32 s0, s17
+; BE-I64-NEXT:    bl lrintf
+; BE-I64-NEXT:    vmov.f32 s0, s16
+; BE-I64-NEXT:    mov r4, r1
+; BE-I64-NEXT:    vmov.32 d13[0], r0
+; BE-I64-NEXT:    bl lrintf
+; BE-I64-NEXT:    vmov.32 d12[0], r0
+; BE-I64-NEXT:    add lr, sp, #208
+; BE-I64-NEXT:    vmov.32 d13[1], r4
+; BE-I64-NEXT:    vmov.32 d12[1], r1
+; BE-I64-NEXT:    vrev64.32 q8, q6
+; BE-I64-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I64-NEXT:    add lr, sp, #176
+; BE-I64-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
+; BE-I64-NEXT:    vrev64.32 d8, d12
+; BE-I64-NEXT:    vmov.f32 s0, s17
+; BE-I64-NEXT:    bl lrintf
+; BE-I64-NEXT:    vmov.f32 s0, s16
+; BE-I64-NEXT:    vmov.32 d15[0], r0
+; BE-I64-NEXT:    vmov.32 d15[1], r1
+; BE-I64-NEXT:    bl lrintf
+; BE-I64-NEXT:    vmov.32 d14[0], r0
+; BE-I64-NEXT:    mov r5, r6
+; BE-I64-NEXT:    vrev64.32 d8, d13
+; BE-I64-NEXT:    vmov.32 d14[1], r1
+; BE-I64-NEXT:    vmov.f32 s0, s17
+; BE-I64-NEXT:    vrev64.32 q8, q7
+; BE-I64-NEXT:    vst1.64 {d16, d17}, [r5:128]!
+; BE-I64-NEXT:    bl lrintf
+; BE-I64-NEXT:    vmov.f32 s0, s16
+; BE-I64-NEXT:    vmov.32 d15[0], r0
+; BE-I64-NEXT:    vmov.32 d15[1], r1
+; BE-I64-NEXT:    bl lrintf
+; BE-I64-NEXT:    vmov.32 d14[0], r0
+; BE-I64-NEXT:    vrev64.32 d8, d10
+; BE-I64-NEXT:    vmov.32 d14[1], r1
+; BE-I64-NEXT:    vmov.f32 s0, s17
+; BE-I64-NEXT:    vrev64.32 q8, q7
+; BE-I64-NEXT:    vst1.64 {d16, d17}, [r5:128]!
+; BE-I64-NEXT:    bl lrintf
+; BE-I64-NEXT:    vmov.f32 s0, s16
+; BE-I64-NEXT:    mov r4, r1
+; BE-I64-NEXT:    vmov.32 d15[0], r0
+; BE-I64-NEXT:    bl lrintf
+; BE-I64-NEXT:    vmov.32 d14[0], r0
+; BE-I64-NEXT:    add lr, sp, #208
+; BE-I64-NEXT:    add r0, r6, #192
+; BE-I64-NEXT:    vmov.32 d15[1], r4
+; BE-I64-NEXT:    vmov.32 d14[1], r1
+; BE-I64-NEXT:    vrev64.32 q8, q7
+; BE-I64-NEXT:    vst1.64 {d16, d17}, [r5:128]!
+; BE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEXT:    add lr, sp, #56
+; BE-I64-NEXT:    vst1.64 {d16, d17}, [r5:128]
+; BE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEXT:    add lr, sp, #192
+; BE-I64-NEXT:    vst1.64 {d16, d17}, [r0:128]!
+; BE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEXT:    add lr, sp, #240
+; BE-I64-NEXT:    vst1.64 {d16, d17}, [r0:128]!
+; BE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEXT:    add lr, sp, #224
+; BE-I64-NEXT:    vst1.64 {d16, d17}, [r0:128]!
+; BE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEXT:    add lr, sp, #136
+; BE-I64-NEXT:    vst1.64 {d16, d17}, [r0:128]
+; BE-I64-NEXT:    add r0, r6, #128
+; BE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEXT:    add lr, sp, #104
+; BE-I64-NEXT:    vst1.64 {d16, d17}, [r0:128]!
+; BE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEXT:    add lr, sp, #88
+; BE-I64-NEXT:    vst1.64 {d16, d17}, [r0:128]!
+; BE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEXT:    add lr, sp, #72
+; BE-I64-NEXT:    vst1.64 {d16, d17}, [r0:128]!
+; BE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEXT:    vst1.64 {d16, d17}, [r0:128]
+; BE-I64-NEXT:    add sp, sp, #256
+; BE-I64-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEXT:    add sp, sp, #4
+; BE-I64-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; BE-I32-NEON-LABEL: lrint_v32f32:
+; BE-I32-NEON:       @ %bb.0:
+; BE-I32-NEON-NEXT:    .save {r4, r5, r6, lr}
+; BE-I32-NEON-NEXT:    push {r4, r5, r6, lr}
+; BE-I32-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I32-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I32-NEON-NEXT:    .pad #144
+; BE-I32-NEON-NEXT:    sub sp, sp, #144
+; BE-I32-NEON-NEXT:    mov r4, r0
+; BE-I32-NEON-NEXT:    add r0, sp, #256
+; BE-I32-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
+; BE-I32-NEON-NEXT:    add lr, sp, #16
+; BE-I32-NEON-NEXT:    add r0, sp, #272
+; BE-I32-NEON-NEXT:    vrev64.32 q4, q3
+; BE-I32-NEON-NEXT:    vrev64.32 q7, q1
+; BE-I32-NEON-NEXT:    vrev64.32 q8, q8
+; BE-I32-NEON-NEXT:    vld1.64 {d18, d19}, [r0]
+; BE-I32-NEON-NEXT:    add r0, sp, #224
+; BE-I32-NEON-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
+; BE-I32-NEON-NEXT:    add lr, sp, #80
+; BE-I32-NEON-NEXT:    vrev64.32 q5, q0
+; BE-I32-NEON-NEXT:    vmov.f32 s0, s28
+; BE-I32-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I32-NEON-NEXT:    add lr, sp, #64
+; BE-I32-NEON-NEXT:    vrev64.32 q8, q9
+; BE-I32-NEON-NEXT:    vld1.64 {d20, d21}, [r0]
+; BE-I32-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I32-NEON-NEXT:    add lr, sp, #32
+; BE-I32-NEON-NEXT:    vrev64.32 q8, q10
+; BE-I32-NEON-NEXT:    vrev64.32 q6, q2
+; BE-I32-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I32-NEON-NEXT:    vstmia sp, {d14, d15} @ 16-byte Spill
+; BE-I32-NEON-NEXT:    bl lrintf
+; BE-I32-NEON-NEXT:    vmov.f32 s0, s18
+; BE-I32-NEON-NEXT:    vmov.32 d16[0], r0
+; BE-I32-NEON-NEXT:    add lr, sp, #128
+; BE-I32-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I32-NEON-NEXT:    bl lrintf
+; BE-I32-NEON-NEXT:    vmov.f32 s0, s20
+; BE-I32-NEON-NEXT:    add lr, sp, #112
+; BE-I32-NEON-NEXT:    vmov.32 d17[0], r0
+; BE-I32-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I32-NEON-NEXT:    add lr, sp, #48
+; BE-I32-NEON-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-I32-NEON-NEXT:    bl lrintf
+; BE-I32-NEON-NEXT:    vmov.f32 s0, s22
+; BE-I32-NEON-NEXT:    vmov.32 d8[0], r0
+; BE-I32-NEON-NEXT:    bl lrintf
+; BE-I32-NEON-NEXT:    vmov.f32 s0, s30
+; BE-I32-NEON-NEXT:    vmov.32 d9[0], r0
+; BE-I32-NEON-NEXT:    bl lrintf
+; BE-I32-NEON-NEXT:    vmov.f32 s0, s26
+; BE-I32-NEON-NEXT:    add lr, sp, #128
+; BE-I32-NEON-NEXT:    vldmia lr, {d14, d15} @ 16-byte Reload
+; BE-I32-NEON-NEXT:    vmov.32 d15[0], r0
+; BE-I32-NEON-NEXT:    bl lrintf
+; BE-I32-NEON-NEXT:    vmov.f32 s0, s24
+; BE-I32-NEON-NEXT:    vmov.32 d11[0], r0
+; BE-I32-NEON-NEXT:    bl lrintf
+; BE-I32-NEON-NEXT:    vmov.f32 s0, s27
+; BE-I32-NEON-NEXT:    vmov.32 d10[0], r0
+; BE-I32-NEON-NEXT:    bl lrintf
+; BE-I32-NEON-NEXT:    vmov.f32 s0, s25
+; BE-I32-NEON-NEXT:    vmov.32 d11[1], r0
+; BE-I32-NEON-NEXT:    bl lrintf
+; BE-I32-NEON-NEXT:    vldmia sp, {d12, d13} @ 16-byte Reload
+; BE-I32-NEON-NEXT:    vmov.f32 s0, s27
+; BE-I32-NEON-NEXT:    add lr, sp, #96
+; BE-I32-NEON-NEXT:    vmov.32 d10[1], r0
+; BE-I32-NEON-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-I32-NEON-NEXT:    bl lrintf
+; BE-I32-NEON-NEXT:    add lr, sp, #48
+; BE-I32-NEON-NEXT:    vmov.32 d15[1], r0
+; BE-I32-NEON-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-I32-NEON-NEXT:    vmov.f32 s0, s23
+; BE-I32-NEON-NEXT:    bl lrintf
+; BE-I32-NEON-NEXT:    vmov.f32 s0, s21
+; BE-I32-NEON-NEXT:    vmov.32 d9[1], r0
+; BE-I32-NEON-NEXT:    bl lrintf
+; BE-I32-NEON-NEXT:    vmov.32 d8[1], r0
+; BE-I32-NEON-NEXT:    add lr, sp, #48
+; BE-I32-NEON-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
+; BE-I32-NEON-NEXT:    add lr, sp, #16
+; BE-I32-NEON-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-I32-NEON-NEXT:    vmov.f32 s0, s23
+; BE-I32-NEON-NEXT:    bl lrintf
+; BE-I32-NEON-NEXT:    vmov.f32 s0, s25
+; BE-I32-NEON-NEXT:    add lr, sp, #112
+; BE-I32-NEON-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
+; BE-I32-NEON-NEXT:    vmov.32 d9[1], r0
+; BE-I32-NEON-NEXT:    bl lrintf
+; BE-I32-NEON-NEXT:    vmov.f32 s0, s20
+; BE-I32-NEON-NEXT:    vmov.32 d14[1], r0
+; BE-I32-NEON-NEXT:    add r0, sp, #240
+; BE-I32-NEON-NEXT:    add lr, sp, #128
+; BE-I32-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
+; BE-I32-NEON-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
+; BE-I32-NEON-NEXT:    vrev64.32 q6, q8
+; BE-I32-NEON-NEXT:    bl lrintf
+; BE-I32-NEON-NEXT:    vmov.f32 s0, s21
+; BE-I32-NEON-NEXT:    vmov.32 d8[0], r0
+; BE-I32-NEON-NEXT:    bl lrintf
+; BE-I32-NEON-NEXT:    vmov.f32 s0, s24
+; BE-I32-NEON-NEXT:    vmov.32 d8[1], r0
+; BE-I32-NEON-NEXT:    add lr, sp, #112
+; BE-I32-NEON-NEXT:    vorr q7, q6, q6
+; BE-I32-NEON-NEXT:    vstmia sp, {d12, d13} @ 16-byte Spill
+; BE-I32-NEON-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
+; BE-I32-NEON-NEXT:    bl lrintf
+; BE-I32-NEON-NEXT:    add lr, sp, #32
+; BE-I32-NEON-NEXT:    vmov.32 d12[0], r0
+; BE-I32-NEON-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
+; BE-I32-NEON-NEXT:    vmov.f32 s0, s18
+; BE-I32-NEON-NEXT:    bl lrintf
+; BE-I32-NEON-NEXT:    vmov.f32 s0, s16
+; BE-I32-NEON-NEXT:    vmov.32 d11[0], r0
+; BE-I32-NEON-NEXT:    bl lrintf
+; BE-I32-NEON-NEXT:    vmov.f32 s0, s19
+; BE-I32-NEON-NEXT:    vmov.32 d10[0], r0
+; BE-I32-NEON-NEXT:    bl lrintf
+; BE-I32-NEON-NEXT:    vmov.f32 s0, s30
+; BE-I32-NEON-NEXT:    vmov.32 d11[1], r0
+; BE-I32-NEON-NEXT:    bl lrintf
+; BE-I32-NEON-NEXT:    vmov.f32 s0, s17
+; BE-I32-NEON-NEXT:    add lr, sp, #16
+; BE-I32-NEON-NEXT:    vmov.32 d13[0], r0
+; BE-I32-NEON-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
+; BE-I32-NEON-NEXT:    bl lrintf
+; BE-I32-NEON-NEXT:    vmov.32 d10[1], r0
+; BE-I32-NEON-NEXT:    add lr, sp, #32
+; BE-I32-NEON-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-I32-NEON-NEXT:    add lr, sp, #64
+; BE-I32-NEON-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-I32-NEON-NEXT:    vmov.f32 s0, s20
+; BE-I32-NEON-NEXT:    bl lrintf
+; BE-I32-NEON-NEXT:    add lr, sp, #80
+; BE-I32-NEON-NEXT:    vmov.32 d8[0], r0
+; BE-I32-NEON-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
+; BE-I32-NEON-NEXT:    vmov.f32 s0, s26
+; BE-I32-NEON-NEXT:    bl lrintf
+; BE-I32-NEON-NEXT:    vmov.f32 s0, s24
+; BE-I32-NEON-NEXT:    vmov.32 d15[0], r0
+; BE-I32-NEON-NEXT:    bl lrintf
+; BE-I32-NEON-NEXT:    vmov.f32 s0, s27
+; BE-I32-NEON-NEXT:    vmov.32 d14[0], r0
+; BE-I32-NEON-NEXT:    bl lrintf
+; BE-I32-NEON-NEXT:    vmov.f32 s0, s22
+; BE-I32-NEON-NEXT:    vmov.32 d15[1], r0
+; BE-I32-NEON-NEXT:    bl lrintf
+; BE-I32-NEON-NEXT:    vmov.f32 s0, s25
+; BE-I32-NEON-NEXT:    vmov.32 d9[0], r0
+; BE-I32-NEON-NEXT:    bl lrintf
+; BE-I32-NEON-NEXT:    vmov.f32 s0, s23
+; BE-I32-NEON-NEXT:    vmov.32 d14[1], r0
+; BE-I32-NEON-NEXT:    bl lrintf
+; BE-I32-NEON-NEXT:    vmov.f32 s0, s21
+; BE-I32-NEON-NEXT:    vmov.32 d9[1], r0
+; BE-I32-NEON-NEXT:    bl lrintf
+; BE-I32-NEON-NEXT:    vldmia sp, {d12, d13} @ 16-byte Reload
+; BE-I32-NEON-NEXT:    vmov.f32 s0, s27
+; BE-I32-NEON-NEXT:    vmov.32 d8[1], r0
+; BE-I32-NEON-NEXT:    bl lrintf
+; BE-I32-NEON-NEXT:    add lr, sp, #16
+; BE-I32-NEON-NEXT:    vmov.f32 s0, s25
+; BE-I32-NEON-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-I32-NEON-NEXT:    add lr, sp, #112
+; BE-I32-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I32-NEON-NEXT:    vmov.32 d11[1], r0
+; BE-I32-NEON-NEXT:    vrev64.32 q8, q8
+; BE-I32-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I32-NEON-NEXT:    bl lrintf
+; BE-I32-NEON-NEXT:    add lr, sp, #48
+; BE-I32-NEON-NEXT:    vmov.32 d10[1], r0
+; BE-I32-NEON-NEXT:    mov r0, r4
+; BE-I32-NEON-NEXT:    vldmia lr, {d18, d19} @ 16-byte Reload
+; BE-I32-NEON-NEXT:    add lr, sp, #128
+; BE-I32-NEON-NEXT:    vrev64.32 q8, q4
+; BE-I32-NEON-NEXT:    vst1.32 {d18, d19}, [r0:128]!
+; BE-I32-NEON-NEXT:    vldmia lr, {d18, d19} @ 16-byte Reload
+; BE-I32-NEON-NEXT:    add lr, sp, #96
+; BE-I32-NEON-NEXT:    vst1.32 {d18, d19}, [r0:128]!
+; BE-I32-NEON-NEXT:    vldmia lr, {d18, d19} @ 16-byte Reload
+; BE-I32-NEON-NEXT:    add lr, sp, #112
+; BE-I32-NEON-NEXT:    vst1.32 {d18, d19}, [r0:128]!
+; BE-I32-NEON-NEXT:    vldmia lr, {d18, d19} @ 16-byte Reload
+; BE-I32-NEON-NEXT:    add lr, sp, #32
+; BE-I32-NEON-NEXT:    vst1.64 {d18, d19}, [r0:128]
+; BE-I32-NEON-NEXT:    add r0, r4, #64
+; BE-I32-NEON-NEXT:    vldmia lr, {d18, d19} @ 16-byte Reload
+; BE-I32-NEON-NEXT:    vst1.32 {d18, d19}, [r0:128]!
+; BE-I32-NEON-NEXT:    vst1.32 {d10, d11}, [r0:128]!
+; BE-I32-NEON-NEXT:    vst1.32 {d14, d15}, [r0:128]!
+; BE-I32-NEON-NEXT:    vst1.64 {d16, d17}, [r0:128]
+; BE-I32-NEON-NEXT:    add sp, sp, #144
+; BE-I32-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I32-NEON-NEXT:    pop {r4, r5, r6, pc}
+;
+; BE-I64-NEON-LABEL: lrint_v32f32:
+; BE-I64-NEON:       @ %bb.0:
+; BE-I64-NEON-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-I64-NEON-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-I64-NEON-NEXT:    .pad #4
+; BE-I64-NEON-NEXT:    sub sp, sp, #4
+; BE-I64-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEON-NEXT:    .pad #256
+; BE-I64-NEON-NEXT:    sub sp, sp, #256
+; BE-I64-NEON-NEXT:    add lr, sp, #208
+; BE-I64-NEON-NEXT:    str r0, [sp, #156] @ 4-byte Spill
+; BE-I64-NEON-NEXT:    add r0, sp, #408
+; BE-I64-NEON-NEXT:    vstmia lr, {d6, d7} @ 16-byte Spill
+; BE-I64-NEON-NEXT:    add lr, sp, #120
+; BE-I64-NEON-NEXT:    vld1.64 {d10, d11}, [r0]
+; BE-I64-NEON-NEXT:    add r0, sp, #392
+; BE-I64-NEON-NEXT:    vstmia lr, {d4, d5} @ 16-byte Spill
+; BE-I64-NEON-NEXT:    add lr, sp, #160
+; BE-I64-NEON-NEXT:    vstmia lr, {d2, d3} @ 16-byte Spill
+; BE-I64-NEON-NEXT:    add lr, sp, #176
+; BE-I64-NEON-NEXT:    vrev64.32 d8, d10
+; BE-I64-NEON-NEXT:    vstmia lr, {d0, d1} @ 16-byte Spill
+; BE-I64-NEON-NEXT:    add lr, sp, #136
+; BE-I64-NEON-NEXT:    vmov.f32 s0, s17
+; BE-I64-NEON-NEXT:    vld1.64 {d12, d13}, [r0]
+; BE-I64-NEON-NEXT:    add r0, sp, #360
+; BE-I64-NEON-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
+; BE-I64-NEON-NEXT:    add lr, sp, #192
+; BE-I64-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
+; BE-I64-NEON-NEXT:    add r0, sp, #376
+; BE-I64-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I64-NEON-NEXT:    add lr, sp, #40
+; BE-I64-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
+; BE-I64-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I64-NEON-NEXT:    bl lrintf
+; BE-I64-NEON-NEXT:    vmov.f32 s0, s16
+; BE-I64-NEON-NEXT:    str r1, [sp, #88] @ 4-byte Spill
+; BE-I64-NEON-NEXT:    vmov.32 d15[0], r0
+; BE-I64-NEON-NEXT:    bl lrintf
+; BE-I64-NEON-NEXT:    vrev64.32 d9, d11
+; BE-I64-NEON-NEXT:    add lr, sp, #240
+; BE-I64-NEON-NEXT:    vmov.32 d14[0], r0
+; BE-I64-NEON-NEXT:    str r1, [sp, #104] @ 4-byte Spill
+; BE-I64-NEON-NEXT:    vmov.f32 s0, s18
+; BE-I64-NEON-NEXT:    vrev64.32 d8, d13
+; BE-I64-NEON-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
+; BE-I64-NEON-NEXT:    bl lrintf
+; BE-I64-NEON-NEXT:    vmov.f32 s0, s17
+; BE-I64-NEON-NEXT:    mov r5, r1
+; BE-I64-NEON-NEXT:    vmov.32 d14[0], r0
+; BE-I64-NEON-NEXT:    bl lrintf
+; BE-I64-NEON-NEXT:    vmov.f32 s0, s19
+; BE-I64-NEON-NEXT:    add lr, sp, #192
+; BE-I64-NEON-NEXT:    str r1, [sp, #72] @ 4-byte Spill
+; BE-I64-NEON-NEXT:    vmov.32 d13[0], r0
+; BE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEON-NEXT:    vrev64.32 d10, d16
+; BE-I64-NEON-NEXT:    bl lrintf
+; BE-I64-NEON-NEXT:    vmov.f32 s0, s20
+; BE-I64-NEON-NEXT:    add lr, sp, #224
+; BE-I64-NEON-NEXT:    vmov.32 d15[0], r0
+; BE-I64-NEON-NEXT:    mov r4, r1
+; BE-I64-NEON-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
+; BE-I64-NEON-NEXT:    bl lrintf
+; BE-I64-NEON-NEXT:    vmov.f32 s0, s21
+; BE-I64-NEON-NEXT:    mov r7, r1
+; BE-I64-NEON-NEXT:    vmov.32 d14[0], r0
+; BE-I64-NEON-NEXT:    bl lrintf
+; BE-I64-NEON-NEXT:    vmov.f32 s0, s16
+; BE-I64-NEON-NEXT:    mov r6, r1
+; BE-I64-NEON-NEXT:    vmov.32 d15[0], r0
+; BE-I64-NEON-NEXT:    bl lrintf
+; BE-I64-NEON-NEXT:    vmov.32 d12[0], r0
+; BE-I64-NEON-NEXT:    add lr, sp, #24
+; BE-I64-NEON-NEXT:    mov r9, r1
+; BE-I64-NEON-NEXT:    vmov.32 d15[1], r6
+; BE-I64-NEON-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
+; BE-I64-NEON-NEXT:    add lr, sp, #192
+; BE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEON-NEXT:    vrev64.32 d8, d17
+; BE-I64-NEON-NEXT:    vmov.f32 s0, s17
+; BE-I64-NEON-NEXT:    bl lrintf
+; BE-I64-NEON-NEXT:    vmov.f32 s0, s16
+; BE-I64-NEON-NEXT:    vmov.32 d14[1], r7
+; BE-I64-NEON-NEXT:    add lr, sp, #56
+; BE-I64-NEON-NEXT:    mov r10, r1
+; BE-I64-NEON-NEXT:    vmov.32 d11[0], r0
+; BE-I64-NEON-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
+; BE-I64-NEON-NEXT:    bl lrintf
+; BE-I64-NEON-NEXT:    vmov.32 d10[0], r0
+; BE-I64-NEON-NEXT:    add lr, sp, #192
+; BE-I64-NEON-NEXT:    mov r11, r1
+; BE-I64-NEON-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-I64-NEON-NEXT:    add lr, sp, #40
+; BE-I64-NEON-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
+; BE-I64-NEON-NEXT:    add lr, sp, #224
+; BE-I64-NEON-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-I64-NEON-NEXT:    vrev64.32 d8, d12
+; BE-I64-NEON-NEXT:    vmov.32 d11[1], r4
+; BE-I64-NEON-NEXT:    vmov.f32 s0, s17
+; BE-I64-NEON-NEXT:    bl lrintf
+; BE-I64-NEON-NEXT:    vmov.f32 s0, s16
+; BE-I64-NEON-NEXT:    vmov.32 d10[1], r5
+; BE-I64-NEON-NEXT:    add lr, sp, #224
+; BE-I64-NEON-NEXT:    mov r8, r1
+; BE-I64-NEON-NEXT:    vmov.32 d15[0], r0
+; BE-I64-NEON-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-I64-NEON-NEXT:    bl lrintf
+; BE-I64-NEON-NEXT:    vrev64.32 d8, d13
+; BE-I64-NEON-NEXT:    add lr, sp, #8
+; BE-I64-NEON-NEXT:    vmov.32 d14[0], r0
+; BE-I64-NEON-NEXT:    ldr r0, [sp, #88] @ 4-byte Reload
+; BE-I64-NEON-NEXT:    vmov.f32 s0, s17
+; BE-I64-NEON-NEXT:    mov r5, r1
+; BE-I64-NEON-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
+; BE-I64-NEON-NEXT:    add lr, sp, #240
+; BE-I64-NEON-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-I64-NEON-NEXT:    vmov.32 d11[1], r0
+; BE-I64-NEON-NEXT:    bl lrintf
+; BE-I64-NEON-NEXT:    vmov.f32 s0, s16
+; BE-I64-NEON-NEXT:    vmov.32 d15[0], r0
+; BE-I64-NEON-NEXT:    ldr r0, [sp, #104] @ 4-byte Reload
+; BE-I64-NEON-NEXT:    add lr, sp, #240
+; BE-I64-NEON-NEXT:    mov r4, r1
+; BE-I64-NEON-NEXT:    vmov.32 d10[1], r0
+; BE-I64-NEON-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-I64-NEON-NEXT:    bl lrintf
+; BE-I64-NEON-NEXT:    add lr, sp, #136
+; BE-I64-NEON-NEXT:    vmov.32 d14[0], r0
+; BE-I64-NEON-NEXT:    ldr r0, [sp, #72] @ 4-byte Reload
+; BE-I64-NEON-NEXT:    mov r6, r1
+; BE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEON-NEXT:    add lr, sp, #24
+; BE-I64-NEON-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
+; BE-I64-NEON-NEXT:    vrev64.32 d8, d16
+; BE-I64-NEON-NEXT:    vmov.32 d13[1], r0
+; BE-I64-NEON-NEXT:    vmov.f32 s0, s17
+; BE-I64-NEON-NEXT:    bl lrintf
+; BE-I64-NEON-NEXT:    vmov.f32 s0, s16
+; BE-I64-NEON-NEXT:    mov r7, r1
+; BE-I64-NEON-NEXT:    vmov.32 d11[0], r0
+; BE-I64-NEON-NEXT:    vmov.32 d12[1], r9
+; BE-I64-NEON-NEXT:    bl lrintf
+; BE-I64-NEON-NEXT:    add lr, sp, #192
+; BE-I64-NEON-NEXT:    vmov.32 d15[1], r4
+; BE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEON-NEXT:    add lr, sp, #8
+; BE-I64-NEON-NEXT:    vmov.32 d17[1], r10
+; BE-I64-NEON-NEXT:    vmov.32 d16[1], r11
+; BE-I64-NEON-NEXT:    vorr q9, q8, q8
+; BE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEON-NEXT:    add lr, sp, #192
+; BE-I64-NEON-NEXT:    vmov.32 d17[1], r8
+; BE-I64-NEON-NEXT:    vmov.32 d16[1], r5
+; BE-I64-NEON-NEXT:    vorr q10, q8, q8
+; BE-I64-NEON-NEXT:    vrev64.32 q8, q6
+; BE-I64-NEON-NEXT:    vmov.32 d14[1], r6
+; BE-I64-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I64-NEON-NEXT:    add lr, sp, #240
+; BE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEON-NEXT:    vmov.32 d10[0], r0
+; BE-I64-NEON-NEXT:    vrev64.32 q8, q8
+; BE-I64-NEON-NEXT:    vmov.32 d11[1], r7
+; BE-I64-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I64-NEON-NEXT:    add lr, sp, #224
+; BE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEON-NEXT:    vmov.32 d10[1], r1
+; BE-I64-NEON-NEXT:    vrev64.32 q8, q8
+; BE-I64-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I64-NEON-NEXT:    add lr, sp, #56
+; BE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEON-NEXT:    add lr, sp, #136
+; BE-I64-NEON-NEXT:    vrev64.32 q8, q8
+; BE-I64-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I64-NEON-NEXT:    add lr, sp, #104
+; BE-I64-NEON-NEXT:    vrev64.32 q8, q9
+; BE-I64-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I64-NEON-NEXT:    add lr, sp, #88
+; BE-I64-NEON-NEXT:    vrev64.32 q8, q10
+; BE-I64-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I64-NEON-NEXT:    add lr, sp, #72
+; BE-I64-NEON-NEXT:    vrev64.32 q8, q7
+; BE-I64-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I64-NEON-NEXT:    add lr, sp, #208
+; BE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEON-NEXT:    add lr, sp, #56
+; BE-I64-NEON-NEXT:    vrev64.32 d8, d17
+; BE-I64-NEON-NEXT:    vrev64.32 q8, q5
+; BE-I64-NEON-NEXT:    vmov.f32 s0, s17
+; BE-I64-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I64-NEON-NEXT:    bl lrintf
+; BE-I64-NEON-NEXT:    vmov.f32 s0, s16
+; BE-I64-NEON-NEXT:    mov r4, r1
+; BE-I64-NEON-NEXT:    vmov.32 d13[0], r0
+; BE-I64-NEON-NEXT:    bl lrintf
+; BE-I64-NEON-NEXT:    add lr, sp, #120
+; BE-I64-NEON-NEXT:    vmov.32 d12[0], r0
+; BE-I64-NEON-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-I64-NEON-NEXT:    vmov.32 d13[1], r4
+; BE-I64-NEON-NEXT:    vrev64.32 d8, d10
+; BE-I64-NEON-NEXT:    vmov.32 d12[1], r1
+; BE-I64-NEON-NEXT:    vmov.f32 s0, s17
+; BE-I64-NEON-NEXT:    vrev64.32 q6, q6
+; BE-I64-NEON-NEXT:    bl lrintf
+; BE-I64-NEON-NEXT:    vmov.f32 s0, s16
+; BE-I64-NEON-NEXT:    vmov.32 d15[0], r0
+; BE-I64-NEON-NEXT:    vmov.32 d15[1], r1
+; BE-I64-NEON-NEXT:    bl lrintf
+; BE-I64-NEON-NEXT:    vmov.32 d14[0], r0
+; BE-I64-NEON-NEXT:    ldr r6, [sp, #156] @ 4-byte Reload
+; BE-I64-NEON-NEXT:    vrev64.32 d8, d11
+; BE-I64-NEON-NEXT:    add r5, r6, #64
+; BE-I64-NEON-NEXT:    vmov.32 d14[1], r1
+; BE-I64-NEON-NEXT:    vmov.f32 s0, s17
+; BE-I64-NEON-NEXT:    vrev64.32 q8, q7
+; BE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r5:128]!
+; BE-I64-NEON-NEXT:    bl lrintf
+; BE-I64-NEON-NEXT:    vmov.f32 s0, s16
+; BE-I64-NEON-NEXT:    vmov.32 d15[0], r0
+; BE-I64-NEON-NEXT:    vmov.32 d15[1], r1
+; BE-I64-NEON-NEXT:    bl lrintf
+; BE-I64-NEON-NEXT:    add lr, sp, #208
+; BE-I64-NEON-NEXT:    vmov.32 d14[0], r0
+; BE-I64-NEON-NEXT:    vldmia lr, {d18, d19} @ 16-byte Reload
+; BE-I64-NEON-NEXT:    vmov.32 d14[1], r1
+; BE-I64-NEON-NEXT:    vrev64.32 d8, d18
+; BE-I64-NEON-NEXT:    vrev64.32 q8, q7
+; BE-I64-NEON-NEXT:    vmov.f32 s0, s17
+; BE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r5:128]!
+; BE-I64-NEON-NEXT:    bl lrintf
+; BE-I64-NEON-NEXT:    vmov.f32 s0, s16
+; BE-I64-NEON-NEXT:    mov r4, r1
+; BE-I64-NEON-NEXT:    vmov.32 d15[0], r0
+; BE-I64-NEON-NEXT:    bl lrintf
+; BE-I64-NEON-NEXT:    vmov.32 d14[0], r0
+; BE-I64-NEON-NEXT:    add lr, sp, #160
+; BE-I64-NEON-NEXT:    vmov.32 d15[1], r4
+; BE-I64-NEON-NEXT:    vmov.32 d14[1], r1
+; BE-I64-NEON-NEXT:    vrev64.32 q8, q7
+; BE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r5:128]!
+; BE-I64-NEON-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-I64-NEON-NEXT:    vrev64.32 d8, d11
+; BE-I64-NEON-NEXT:    vst1.64 {d12, d13}, [r5:128]
+; BE-I64-NEON-NEXT:    vmov.f32 s0, s17
+; BE-I64-NEON-NEXT:    bl lrintf
+; BE-I64-NEON-NEXT:    vmov.f32 s0, s16
+; BE-I64-NEON-NEXT:    mov r4, r1
+; BE-I64-NEON-NEXT:    vmov.32 d13[0], r0
+; BE-I64-NEON-NEXT:    bl lrintf
+; BE-I64-NEON-NEXT:    vmov.32 d12[0], r0
+; BE-I64-NEON-NEXT:    add lr, sp, #208
+; BE-I64-NEON-NEXT:    vmov.32 d13[1], r4
+; BE-I64-NEON-NEXT:    vmov.32 d12[1], r1
+; BE-I64-NEON-NEXT:    vrev64.32 q8, q6
+; BE-I64-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I64-NEON-NEXT:    add lr, sp, #176
+; BE-I64-NEON-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
+; BE-I64-NEON-NEXT:    vrev64.32 d8, d12
+; BE-I64-NEON-NEXT:    vmov.f32 s0, s17
+; BE-I64-NEON-NEXT:    bl lrintf
+; BE-I64-NEON-NEXT:    vmov.f32 s0, s16
+; BE-I64-NEON-NEXT:    vmov.32 d15[0], r0
+; BE-I64-NEON-NEXT:    vmov.32 d15[1], r1
+; BE-I64-NEON-NEXT:    bl lrintf
+; BE-I64-NEON-NEXT:    vmov.32 d14[0], r0
+; BE-I64-NEON-NEXT:    mov r5, r6
+; BE-I64-NEON-NEXT:    vrev64.32 d8, d13
+; BE-I64-NEON-NEXT:    vmov.32 d14[1], r1
+; BE-I64-NEON-NEXT:    vmov.f32 s0, s17
+; BE-I64-NEON-NEXT:    vrev64.32 q8, q7
+; BE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r5:128]!
+; BE-I64-NEON-NEXT:    bl lrintf
+; BE-I64-NEON-NEXT:    vmov.f32 s0, s16
+; BE-I64-NEON-NEXT:    vmov.32 d15[0], r0
+; BE-I64-NEON-NEXT:    vmov.32 d15[1], r1
+; BE-I64-NEON-NEXT:    bl lrintf
+; BE-I64-NEON-NEXT:    vmov.32 d14[0], r0
+; BE-I64-NEON-NEXT:    vrev64.32 d8, d10
+; BE-I64-NEON-NEXT:    vmov.32 d14[1], r1
+; BE-I64-NEON-NEXT:    vmov.f32 s0, s17
+; BE-I64-NEON-NEXT:    vrev64.32 q8, q7
+; BE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r5:128]!
+; BE-I64-NEON-NEXT:    bl lrintf
+; BE-I64-NEON-NEXT:    vmov.f32 s0, s16
+; BE-I64-NEON-NEXT:    mov r4, r1
+; BE-I64-NEON-NEXT:    vmov.32 d15[0], r0
+; BE-I64-NEON-NEXT:    bl lrintf
+; BE-I64-NEON-NEXT:    vmov.32 d14[0], r0
+; BE-I64-NEON-NEXT:    add lr, sp, #208
+; BE-I64-NEON-NEXT:    add r0, r6, #192
+; BE-I64-NEON-NEXT:    vmov.32 d15[1], r4
+; BE-I64-NEON-NEXT:    vmov.32 d14[1], r1
+; BE-I64-NEON-NEXT:    vrev64.32 q8, q7
+; BE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r5:128]!
+; BE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEON-NEXT:    add lr, sp, #56
+; BE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r5:128]
+; BE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEON-NEXT:    add lr, sp, #192
+; BE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r0:128]!
+; BE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEON-NEXT:    add lr, sp, #240
+; BE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r0:128]!
+; BE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEON-NEXT:    add lr, sp, #224
+; BE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r0:128]!
+; BE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEON-NEXT:    add lr, sp, #136
+; BE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r0:128]
+; BE-I64-NEON-NEXT:    add r0, r6, #128
+; BE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEON-NEXT:    add lr, sp, #104
+; BE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r0:128]!
+; BE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEON-NEXT:    add lr, sp, #88
+; BE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r0:128]!
+; BE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEON-NEXT:    add lr, sp, #72
+; BE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r0:128]!
+; BE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r0:128]
+; BE-I64-NEON-NEXT:    add sp, sp, #256
+; BE-I64-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEON-NEXT:    add sp, sp, #4
+; BE-I64-NEON-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+  %a = call <32 x iXLen> @llvm.lrint.v32iXLen.v32f32(<32 x float> %x)
+  ret <32 x iXLen> %a
+}
+declare <32 x iXLen> @llvm.lrint.v32iXLen.v32f32(<32 x float>)
+
+define <1 x iXLen> @lrint_v1f64(<1 x double> %x) {
+; LE-I32-LABEL: lrint_v1f64:
+; LE-I32:       @ %bb.0:
+; LE-I32-NEXT:    .save {r11, lr}
+; LE-I32-NEXT:    push {r11, lr}
+; LE-I32-NEXT:    bl lrint
+; LE-I32-NEXT:    pop {r11, pc}
+;
+; LE-I64-LABEL: lrint_v1f64:
+; LE-I64:       @ %bb.0:
+; LE-I64-NEXT:    .save {r11, lr}
+; LE-I64-NEXT:    push {r11, lr}
+; LE-I64-NEXT:    bl lrint
+; LE-I64-NEXT:    vmov.32 d0[0], r0
+; LE-I64-NEXT:    vmov.32 d0[1], r1
+; LE-I64-NEXT:    pop {r11, pc}
+;
+; LE-I32-NEON-LABEL: lrint_v1f64:
+; LE-I32-NEON:       @ %bb.0:
+; LE-I32-NEON-NEXT:    .save {r11, lr}
+; LE-I32-NEON-NEXT:    push {r11, lr}
+; LE-I32-NEON-NEXT:    bl lrint
+; LE-I32-NEON-NEXT:    pop {r11, pc}
+;
+; LE-I64-NEON-LABEL: lrint_v1f64:
+; LE-I64-NEON:       @ %bb.0:
+; LE-I64-NEON-NEXT:    .save {r11, lr}
+; LE-I64-NEON-NEXT:    push {r11, lr}
+; LE-I64-NEON-NEXT:    bl lrint
+; LE-I64-NEON-NEXT:    vmov.32 d0[0], r0
+; LE-I64-NEON-NEXT:    vmov.32 d0[1], r1
+; LE-I64-NEON-NEXT:    pop {r11, pc}
+;
+; BE-I32-LABEL: lrint_v1f64:
+; BE-I32:       @ %bb.0:
+; BE-I32-NEXT:    .save {r11, lr}
+; BE-I32-NEXT:    push {r11, lr}
+; BE-I32-NEXT:    bl lrint
+; BE-I32-NEXT:    pop {r11, pc}
+;
+; BE-I64-LABEL: lrint_v1f64:
+; BE-I64:       @ %bb.0:
+; BE-I64-NEXT:    .save {r11, lr}
+; BE-I64-NEXT:    push {r11, lr}
+; BE-I64-NEXT:    bl lrint
+; BE-I64-NEXT:    vmov.32 d16[0], r0
+; BE-I64-NEXT:    vmov.32 d16[1], r1
+; BE-I64-NEXT:    vrev64.32 d0, d16
+; BE-I64-NEXT:    pop {r11, pc}
+;
+; BE-I32-NEON-LABEL: lrint_v1f64:
+; BE-I32-NEON:       @ %bb.0:
+; BE-I32-NEON-NEXT:    .save {r11, lr}
+; BE-I32-NEON-NEXT:    push {r11, lr}
+; BE-I32-NEON-NEXT:    bl lrint
+; BE-I32-NEON-NEXT:    pop {r11, pc}
+;
+; BE-I64-NEON-LABEL: lrint_v1f64:
+; BE-I64-NEON:       @ %bb.0:
+; BE-I64-NEON-NEXT:    .save {r11, lr}
+; BE-I64-NEON-NEXT:    push {r11, lr}
+; BE-I64-NEON-NEXT:    bl lrint
+; BE-I64-NEON-NEXT:    vmov.32 d16[0], r0
+; BE-I64-NEON-NEXT:    vmov.32 d16[1], r1
+; BE-I64-NEON-NEXT:    vrev64.32 d0, d16
+; BE-I64-NEON-NEXT:    pop {r11, pc}
+  %a = call <1 x iXLen> @llvm.lrint.v1iXLen.v1f64(<1 x double> %x)
+  ret <1 x iXLen> %a
+}
+declare <1 x iXLen> @llvm.lrint.v1iXLen.v1f64(<1 x double>)
+
+define <2 x iXLen> @lrint_v2f64(<2 x double> %x) {
+; LE-I32-LABEL: lrint_v2f64:
+; LE-I32:       @ %bb.0:
+; LE-I32-NEXT:    .save {r11, lr}
+; LE-I32-NEXT:    push {r11, lr}
+; LE-I32-NEXT:    .vsave {d8, d9, d10}
+; LE-I32-NEXT:    vpush {d8, d9, d10}
+; LE-I32-NEXT:    vorr q4, q0, q0
+; LE-I32-NEXT:    bl lrint
+; LE-I32-NEXT:    vorr d0, d9, d9
+; LE-I32-NEXT:    vmov.32 d10[0], r0
+; LE-I32-NEXT:    bl lrint
+; LE-I32-NEXT:    vmov.32 d10[1], r0
+; LE-I32-NEXT:    vorr d0, d10, d10
+; LE-I32-NEXT:    vpop {d8, d9, d10}
+; LE-I32-NEXT:    pop {r11, pc}
+;
+; LE-I64-LABEL: lrint_v2f64:
+; LE-I64:       @ %bb.0:
+; LE-I64-NEXT:    .save {r4, lr}
+; LE-I64-NEXT:    push {r4, lr}
+; LE-I64-NEXT:    .vsave {d8, d9, d10, d11}
+; LE-I64-NEXT:    vpush {d8, d9, d10, d11}
+; LE-I64-NEXT:    vorr q4, q0, q0
+; LE-I64-NEXT:    vorr d0, d9, d9
+; LE-I64-NEXT:    bl lrint
+; LE-I64-NEXT:    vorr d0, d8, d8
+; LE-I64-NEXT:    mov r4, r1
+; LE-I64-NEXT:    vmov.32 d11[0], r0
+; LE-I64-NEXT:    bl lrint
+; LE-I64-NEXT:    vmov.32 d10[0], r0
+; LE-I64-NEXT:    vmov.32 d11[1], r4
+; LE-I64-NEXT:    vmov.32 d10[1], r1
+; LE-I64-NEXT:    vorr q0, q5, q5
+; LE-I64-NEXT:    vpop {d8, d9, d10, d11}
+; LE-I64-NEXT:    pop {r4, pc}
+;
+; LE-I32-NEON-LABEL: lrint_v2f64:
+; LE-I32-NEON:       @ %bb.0:
+; LE-I32-NEON-NEXT:    .save {r11, lr}
+; LE-I32-NEON-NEXT:    push {r11, lr}
+; LE-I32-NEON-NEXT:    .vsave {d8, d9, d10}
+; LE-I32-NEON-NEXT:    vpush {d8, d9, d10}
+; LE-I32-NEON-NEXT:    vorr q4, q0, q0
+; LE-I32-NEON-NEXT:    bl lrint
+; LE-I32-NEON-NEXT:    vorr d0, d9, d9
+; LE-I32-NEON-NEXT:    vmov.32 d10[0], r0
+; LE-I32-NEON-NEXT:    bl lrint
+; LE-I32-NEON-NEXT:    vmov.32 d10[1], r0
+; LE-I32-NEON-NEXT:    vorr d0, d10, d10
+; LE-I32-NEON-NEXT:    vpop {d8, d9, d10}
+; LE-I32-NEON-NEXT:    pop {r11, pc}
+;
+; LE-I64-NEON-LABEL: lrint_v2f64:
+; LE-I64-NEON:       @ %bb.0:
+; LE-I64-NEON-NEXT:    .save {r4, lr}
+; LE-I64-NEON-NEXT:    push {r4, lr}
+; LE-I64-NEON-NEXT:    .vsave {d8, d9, d10, d11}
+; LE-I64-NEON-NEXT:    vpush {d8, d9, d10, d11}
+; LE-I64-NEON-NEXT:    vorr q4, q0, q0
+; LE-I64-NEON-NEXT:    vorr d0, d9, d9
+; LE-I64-NEON-NEXT:    bl lrint
+; LE-I64-NEON-NEXT:    vorr d0, d8, d8
+; LE-I64-NEON-NEXT:    mov r4, r1
+; LE-I64-NEON-NEXT:    vmov.32 d11[0], r0
+; LE-I64-NEON-NEXT:    bl lrint
+; LE-I64-NEON-NEXT:    vmov.32 d10[0], r0
+; LE-I64-NEON-NEXT:    vmov.32 d11[1], r4
+; LE-I64-NEON-NEXT:    vmov.32 d10[1], r1
+; LE-I64-NEON-NEXT:    vorr q0, q5, q5
+; LE-I64-NEON-NEXT:    vpop {d8, d9, d10, d11}
+; LE-I64-NEON-NEXT:    pop {r4, pc}
+;
+; BE-I32-LABEL: lrint_v2f64:
+; BE-I32:       @ %bb.0:
+; BE-I32-NEXT:    .save {r11, lr}
+; BE-I32-NEXT:    push {r11, lr}
+; BE-I32-NEXT:    .vsave {d8, d9, d10}
+; BE-I32-NEXT:    vpush {d8, d9, d10}
+; BE-I32-NEXT:    vorr q4, q0, q0
+; BE-I32-NEXT:    bl lrint
+; BE-I32-NEXT:    vorr d0, d9, d9
+; BE-I32-NEXT:    vmov.32 d10[0], r0
+; BE-I32-NEXT:    bl lrint
+; BE-I32-NEXT:    vmov.32 d10[1], r0
+; BE-I32-NEXT:    vrev64.32 d0, d10
+; BE-I32-NEXT:    vpop {d8, d9, d10}
+; BE-I32-NEXT:    pop {r11, pc}
+;
+; BE-I64-LABEL: lrint_v2f64:
+; BE-I64:       @ %bb.0:
+; BE-I64-NEXT:    .save {r4, lr}
+; BE-I64-NEXT:    push {r4, lr}
+; BE-I64-NEXT:    .vsave {d8, d9, d10, d11}
+; BE-I64-NEXT:    vpush {d8, d9, d10, d11}
+; BE-I64-NEXT:    vorr q4, q0, q0
+; BE-I64-NEXT:    vorr d0, d9, d9
+; BE-I64-NEXT:    bl lrint
+; BE-I64-NEXT:    vorr d0, d8, d8
+; BE-I64-NEXT:    mov r4, r1
+; BE-I64-NEXT:    vmov.32 d11[0], r0
+; BE-I64-NEXT:    bl lrint
+; BE-I64-NEXT:    vmov.32 d10[0], r0
+; BE-I64-NEXT:    vmov.32 d11[1], r4
+; BE-I64-NEXT:    vmov.32 d10[1], r1
+; BE-I64-NEXT:    vrev64.32 q0, q5
+; BE-I64-NEXT:    vpop {d8, d9, d10, d11}
+; BE-I64-NEXT:    pop {r4, pc}
+;
+; BE-I32-NEON-LABEL: lrint_v2f64:
+; BE-I32-NEON:       @ %bb.0:
+; BE-I32-NEON-NEXT:    .save {r11, lr}
+; BE-I32-NEON-NEXT:    push {r11, lr}
+; BE-I32-NEON-NEXT:    .vsave {d8, d9, d10}
+; BE-I32-NEON-NEXT:    vpush {d8, d9, d10}
+; BE-I32-NEON-NEXT:    vorr q4, q0, q0
+; BE-I32-NEON-NEXT:    bl lrint
+; BE-I32-NEON-NEXT:    vorr d0, d9, d9
+; BE-I32-NEON-NEXT:    vmov.32 d10[0], r0
+; BE-I32-NEON-NEXT:    bl lrint
+; BE-I32-NEON-NEXT:    vmov.32 d10[1], r0
+; BE-I32-NEON-NEXT:    vrev64.32 d0, d10
+; BE-I32-NEON-NEXT:    vpop {d8, d9, d10}
+; BE-I32-NEON-NEXT:    pop {r11, pc}
+;
+; BE-I64-NEON-LABEL: lrint_v2f64:
+; BE-I64-NEON:       @ %bb.0:
+; BE-I64-NEON-NEXT:    .save {r4, lr}
+; BE-I64-NEON-NEXT:    push {r4, lr}
+; BE-I64-NEON-NEXT:    .vsave {d8, d9, d10, d11}
+; BE-I64-NEON-NEXT:    vpush {d8, d9, d10, d11}
+; BE-I64-NEON-NEXT:    vorr q4, q0, q0
+; BE-I64-NEON-NEXT:    vorr d0, d9, d9
+; BE-I64-NEON-NEXT:    bl lrint
+; BE-I64-NEON-NEXT:    vorr d0, d8, d8
+; BE-I64-NEON-NEXT:    mov r4, r1
+; BE-I64-NEON-NEXT:    vmov.32 d11[0], r0
+; BE-I64-NEON-NEXT:    bl lrint
+; BE-I64-NEON-NEXT:    vmov.32 d10[0], r0
+; BE-I64-NEON-NEXT:    vmov.32 d11[1], r4
+; BE-I64-NEON-NEXT:    vmov.32 d10[1], r1
+; BE-I64-NEON-NEXT:    vrev64.32 q0, q5
+; BE-I64-NEON-NEXT:    vpop {d8, d9, d10, d11}
+; BE-I64-NEON-NEXT:    pop {r4, pc}
+  %a = call <2 x iXLen> @llvm.lrint.v2iXLen.v2f64(<2 x double> %x)
+  ret <2 x iXLen> %a
+}
+declare <2 x iXLen> @llvm.lrint.v2iXLen.v2f64(<2 x double>)
+
+define <4 x iXLen> @lrint_v4f64(<4 x double> %x) {
+; LE-I32-LABEL: lrint_v4f64:
+; LE-I32:       @ %bb.0:
+; LE-I32-NEXT:    .save {r11, lr}
+; LE-I32-NEXT:    push {r11, lr}
+; LE-I32-NEXT:    .vsave {d8, d9, d10, d11, d12, d13}
+; LE-I32-NEXT:    vpush {d8, d9, d10, d11, d12, d13}
+; LE-I32-NEXT:    vorr q4, q1, q1
+; LE-I32-NEXT:    vorr q5, q0, q0
+; LE-I32-NEXT:    vorr d0, d8, d8
+; LE-I32-NEXT:    bl lrint
+; LE-I32-NEXT:    vorr d0, d10, d10
+; LE-I32-NEXT:    vmov.32 d13[0], r0
+; LE-I32-NEXT:    bl lrint
+; LE-I32-NEXT:    vorr d0, d9, d9
+; LE-I32-NEXT:    vmov.32 d12[0], r0
+; LE-I32-NEXT:    bl lrint
+; LE-I32-NEXT:    vorr d0, d11, d11
+; LE-I32-NEXT:    vmov.32 d13[1], r0
+; LE-I32-NEXT:    bl lrint
+; LE-I32-NEXT:    vmov.32 d12[1], r0
+; LE-I32-NEXT:    vorr q0, q6, q6
+; LE-I32-NEXT:    vpop {d8, d9, d10, d11, d12, d13}
+; LE-I32-NEXT:    pop {r11, pc}
+;
+; LE-I64-LABEL: lrint_v4f64:
+; LE-I64:       @ %bb.0:
+; LE-I64-NEXT:    .save {r4, r5, r6, lr}
+; LE-I64-NEXT:    push {r4, r5, r6, lr}
+; LE-I64-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEXT:    vorr q5, q1, q1
+; LE-I64-NEXT:    vorr q6, q0, q0
+; LE-I64-NEXT:    vorr d0, d11, d11
+; LE-I64-NEXT:    bl lrint
+; LE-I64-NEXT:    vorr d0, d12, d12
+; LE-I64-NEXT:    mov r4, r1
+; LE-I64-NEXT:    vmov.32 d9[0], r0
+; LE-I64-NEXT:    bl lrint
+; LE-I64-NEXT:    vorr d0, d13, d13
+; LE-I64-NEXT:    mov r5, r1
+; LE-I64-NEXT:    vmov.32 d14[0], r0
+; LE-I64-NEXT:    bl lrint
+; LE-I64-NEXT:    vorr d0, d10, d10
+; LE-I64-NEXT:    mov r6, r1
+; LE-I64-NEXT:    vmov.32 d15[0], r0
+; LE-I64-NEXT:    bl lrint
+; LE-I64-NEXT:    vmov.32 d8[0], r0
+; LE-I64-NEXT:    vmov.32 d15[1], r6
+; LE-I64-NEXT:    vmov.32 d9[1], r4
+; LE-I64-NEXT:    vmov.32 d14[1], r5
+; LE-I64-NEXT:    vmov.32 d8[1], r1
+; LE-I64-NEXT:    vorr q0, q7, q7
+; LE-I64-NEXT:    vorr q1, q4, q4
+; LE-I64-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEXT:    pop {r4, r5, r6, pc}
+;
+; LE-I32-NEON-LABEL: lrint_v4f64:
+; LE-I32-NEON:       @ %bb.0:
+; LE-I32-NEON-NEXT:    .save {r11, lr}
+; LE-I32-NEON-NEXT:    push {r11, lr}
+; LE-I32-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13}
+; LE-I32-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13}
+; LE-I32-NEON-NEXT:    vorr q4, q1, q1
+; LE-I32-NEON-NEXT:    vorr q5, q0, q0
+; LE-I32-NEON-NEXT:    vorr d0, d8, d8
+; LE-I32-NEON-NEXT:    bl lrint
+; LE-I32-NEON-NEXT:    vorr d0, d10, d10
+; LE-I32-NEON-NEXT:    vmov.32 d13[0], r0
+; LE-I32-NEON-NEXT:    bl lrint
+; LE-I32-NEON-NEXT:    vorr d0, d9, d9
+; LE-I32-NEON-NEXT:    vmov.32 d12[0], r0
+; LE-I32-NEON-NEXT:    bl lrint
+; LE-I32-NEON-NEXT:    vorr d0, d11, d11
+; LE-I32-NEON-NEXT:    vmov.32 d13[1], r0
+; LE-I32-NEON-NEXT:    bl lrint
+; LE-I32-NEON-NEXT:    vmov.32 d12[1], r0
+; LE-I32-NEON-NEXT:    vorr q0, q6, q6
+; LE-I32-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13}
+; LE-I32-NEON-NEXT:    pop {r11, pc}
+;
+; LE-I64-NEON-LABEL: lrint_v4f64:
+; LE-I64-NEON:       @ %bb.0:
+; LE-I64-NEON-NEXT:    .save {r4, r5, r6, lr}
+; LE-I64-NEON-NEXT:    push {r4, r5, r6, lr}
+; LE-I64-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEON-NEXT:    vorr q5, q1, q1
+; LE-I64-NEON-NEXT:    vorr q6, q0, q0
+; LE-I64-NEON-NEXT:    vorr d0, d11, d11
+; LE-I64-NEON-NEXT:    bl lrint
+; LE-I64-NEON-NEXT:    vorr d0, d12, d12
+; LE-I64-NEON-NEXT:    mov r4, r1
+; LE-I64-NEON-NEXT:    vmov.32 d9[0], r0
+; LE-I64-NEON-NEXT:    bl lrint
+; LE-I64-NEON-NEXT:    vorr d0, d13, d13
+; LE-I64-NEON-NEXT:    mov r5, r1
+; LE-I64-NEON-NEXT:    vmov.32 d14[0], r0
+; LE-I64-NEON-NEXT:    bl lrint
+; LE-I64-NEON-NEXT:    vorr d0, d10, d10
+; LE-I64-NEON-NEXT:    mov r6, r1
+; LE-I64-NEON-NEXT:    vmov.32 d15[0], r0
+; LE-I64-NEON-NEXT:    bl lrint
+; LE-I64-NEON-NEXT:    vmov.32 d8[0], r0
+; LE-I64-NEON-NEXT:    vmov.32 d15[1], r6
+; LE-I64-NEON-NEXT:    vmov.32 d9[1], r4
+; LE-I64-NEON-NEXT:    vmov.32 d14[1], r5
+; LE-I64-NEON-NEXT:    vmov.32 d8[1], r1
+; LE-I64-NEON-NEXT:    vorr q0, q7, q7
+; LE-I64-NEON-NEXT:    vorr q1, q4, q4
+; LE-I64-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEON-NEXT:    pop {r4, r5, r6, pc}
+;
+; BE-I32-LABEL: lrint_v4f64:
+; BE-I32:       @ %bb.0:
+; BE-I32-NEXT:    .save {r11, lr}
+; BE-I32-NEXT:    push {r11, lr}
+; BE-I32-NEXT:    .vsave {d8, d9, d10, d11, d12, d13}
+; BE-I32-NEXT:    vpush {d8, d9, d10, d11, d12, d13}
+; BE-I32-NEXT:    vorr q4, q1, q1
+; BE-I32-NEXT:    vorr q5, q0, q0
+; BE-I32-NEXT:    vorr d0, d8, d8
+; BE-I32-NEXT:    bl lrint
+; BE-I32-NEXT:    vorr d0, d10, d10
+; BE-I32-NEXT:    vmov.32 d13[0], r0
+; BE-I32-NEXT:    bl lrint
+; BE-I32-NEXT:    vorr d0, d9, d9
+; BE-I32-NEXT:    vmov.32 d12[0], r0
+; BE-I32-NEXT:    bl lrint
+; BE-I32-NEXT:    vorr d0, d11, d11
+; BE-I32-NEXT:    vmov.32 d13[1], r0
+; BE-I32-NEXT:    bl lrint
+; BE-I32-NEXT:    vmov.32 d12[1], r0
+; BE-I32-NEXT:    vrev64.32 q0, q6
+; BE-I32-NEXT:    vpop {d8, d9, d10, d11, d12, d13}
+; BE-I32-NEXT:    pop {r11, pc}
+;
+; BE-I64-LABEL: lrint_v4f64:
+; BE-I64:       @ %bb.0:
+; BE-I64-NEXT:    .save {r4, r5, r6, lr}
+; BE-I64-NEXT:    push {r4, r5, r6, lr}
+; BE-I64-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEXT:    vorr q4, q1, q1
+; BE-I64-NEXT:    vorr q5, q0, q0
+; BE-I64-NEXT:    vorr d0, d9, d9
+; BE-I64-NEXT:    bl lrint
+; BE-I64-NEXT:    vorr d0, d10, d10
+; BE-I64-NEXT:    mov r4, r1
+; BE-I64-NEXT:    vmov.32 d13[0], r0
+; BE-I64-NEXT:    bl lrint
+; BE-I64-NEXT:    vorr d0, d11, d11
+; BE-I64-NEXT:    mov r5, r1
+; BE-I64-NEXT:    vmov.32 d14[0], r0
+; BE-I64-NEXT:    bl lrint
+; BE-I64-NEXT:    vorr d0, d8, d8
+; BE-I64-NEXT:    mov r6, r1
+; BE-I64-NEXT:    vmov.32 d15[0], r0
+; BE-I64-NEXT:    bl lrint
+; BE-I64-NEXT:    vmov.32 d12[0], r0
+; BE-I64-NEXT:    vmov.32 d15[1], r6
+; BE-I64-NEXT:    vmov.32 d13[1], r4
+; BE-I64-NEXT:    vmov.32 d14[1], r5
+; BE-I64-NEXT:    vmov.32 d12[1], r1
+; BE-I64-NEXT:    vrev64.32 q0, q7
+; BE-I64-NEXT:    vrev64.32 q1, q6
+; BE-I64-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEXT:    pop {r4, r5, r6, pc}
+;
+; BE-I32-NEON-LABEL: lrint_v4f64:
+; BE-I32-NEON:       @ %bb.0:
+; BE-I32-NEON-NEXT:    .save {r11, lr}
+; BE-I32-NEON-NEXT:    push {r11, lr}
+; BE-I32-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13}
+; BE-I32-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13}
+; BE-I32-NEON-NEXT:    vorr q4, q1, q1
+; BE-I32-NEON-NEXT:    vorr q5, q0, q0
+; BE-I32-NEON-NEXT:    vorr d0, d8, d8
+; BE-I32-NEON-NEXT:    bl lrint
+; BE-I32-NEON-NEXT:    vorr d0, d10, d10
+; BE-I32-NEON-NEXT:    vmov.32 d13[0], r0
+; BE-I32-NEON-NEXT:    bl lrint
+; BE-I32-NEON-NEXT:    vorr d0, d9, d9
+; BE-I32-NEON-NEXT:    vmov.32 d12[0], r0
+; BE-I32-NEON-NEXT:    bl lrint
+; BE-I32-NEON-NEXT:    vorr d0, d11, d11
+; BE-I32-NEON-NEXT:    vmov.32 d13[1], r0
+; BE-I32-NEON-NEXT:    bl lrint
+; BE-I32-NEON-NEXT:    vmov.32 d12[1], r0
+; BE-I32-NEON-NEXT:    vrev64.32 q0, q6
+; BE-I32-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13}
+; BE-I32-NEON-NEXT:    pop {r11, pc}
+;
+; BE-I64-NEON-LABEL: lrint_v4f64:
+; BE-I64-NEON:       @ %bb.0:
+; BE-I64-NEON-NEXT:    .save {r4, r5, r6, lr}
+; BE-I64-NEON-NEXT:    push {r4, r5, r6, lr}
+; BE-I64-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEON-NEXT:    vorr q4, q1, q1
+; BE-I64-NEON-NEXT:    vorr q5, q0, q0
+; BE-I64-NEON-NEXT:    vorr d0, d9, d9
+; BE-I64-NEON-NEXT:    bl lrint
+; BE-I64-NEON-NEXT:    vorr d0, d10, d10
+; BE-I64-NEON-NEXT:    mov r4, r1
+; BE-I64-NEON-NEXT:    vmov.32 d13[0], r0
+; BE-I64-NEON-NEXT:    bl lrint
+; BE-I64-NEON-NEXT:    vorr d0, d11, d11
+; BE-I64-NEON-NEXT:    mov r5, r1
+; BE-I64-NEON-NEXT:    vmov.32 d14[0], r0
+; BE-I64-NEON-NEXT:    bl lrint
+; BE-I64-NEON-NEXT:    vorr d0, d8, d8
+; BE-I64-NEON-NEXT:    mov r6, r1
+; BE-I64-NEON-NEXT:    vmov.32 d15[0], r0
+; BE-I64-NEON-NEXT:    bl lrint
+; BE-I64-NEON-NEXT:    vmov.32 d12[0], r0
+; BE-I64-NEON-NEXT:    vmov.32 d15[1], r6
+; BE-I64-NEON-NEXT:    vmov.32 d13[1], r4
+; BE-I64-NEON-NEXT:    vmov.32 d14[1], r5
+; BE-I64-NEON-NEXT:    vmov.32 d12[1], r1
+; BE-I64-NEON-NEXT:    vrev64.32 q0, q7
+; BE-I64-NEON-NEXT:    vrev64.32 q1, q6
+; BE-I64-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEON-NEXT:    pop {r4, r5, r6, pc}
+  %a = call <4 x iXLen> @llvm.lrint.v4iXLen.v4f64(<4 x double> %x)
+  ret <4 x iXLen> %a
+}
+declare <4 x iXLen> @llvm.lrint.v4iXLen.v4f64(<4 x double>)
+
+define <8 x iXLen> @lrint_v8f64(<8 x double> %x) {
+; LE-I32-LABEL: lrint_v8f64:
+; LE-I32:       @ %bb.0:
+; LE-I32-NEXT:    .save {r11, lr}
+; LE-I32-NEXT:    push {r11, lr}
+; LE-I32-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I32-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I32-NEXT:    .pad #32
+; LE-I32-NEXT:    sub sp, sp, #32
+; LE-I32-NEXT:    vorr q5, q0, q0
+; LE-I32-NEXT:    add lr, sp, #16
+; LE-I32-NEXT:    vorr d0, d4, d4
+; LE-I32-NEXT:    vstmia sp, {d6, d7} @ 16-byte Spill
+; LE-I32-NEXT:    vorr q7, q3, q3
+; LE-I32-NEXT:    vstmia lr, {d4, d5} @ 16-byte Spill
+; LE-I32-NEXT:    vorr q6, q1, q1
+; LE-I32-NEXT:    bl lrint
+; LE-I32-NEXT:    vorr d0, d14, d14
+; LE-I32-NEXT:    vmov.32 d8[0], r0
+; LE-I32-NEXT:    bl lrint
+; LE-I32-NEXT:    vorr d0, d12, d12
+; LE-I32-NEXT:    vmov.32 d9[0], r0
+; LE-I32-NEXT:    bl lrint
+; LE-I32-NEXT:    vorr d0, d10, d10
+; LE-I32-NEXT:    vmov.32 d15[0], r0
+; LE-I32-NEXT:    bl lrint
+; LE-I32-NEXT:    vorr d0, d13, d13
+; LE-I32-NEXT:    vmov.32 d14[0], r0
+; LE-I32-NEXT:    bl lrint
+; LE-I32-NEXT:    vorr d0, d11, d11
+; LE-I32-NEXT:    vmov.32 d15[1], r0
+; LE-I32-NEXT:    bl lrint
+; LE-I32-NEXT:    vldmia sp, {d16, d17} @ 16-byte Reload
+; LE-I32-NEXT:    vorr d0, d17, d17
+; LE-I32-NEXT:    vmov.32 d14[1], r0
+; LE-I32-NEXT:    bl lrint
+; LE-I32-NEXT:    add lr, sp, #16
+; LE-I32-NEXT:    vmov.32 d9[1], r0
+; LE-I32-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I32-NEXT:    vorr d0, d17, d17
+; LE-I32-NEXT:    bl lrint
+; LE-I32-NEXT:    vmov.32 d8[1], r0
+; LE-I32-NEXT:    vorr q0, q7, q7
+; LE-I32-NEXT:    vorr q1, q4, q4
+; LE-I32-NEXT:    add sp, sp, #32
+; LE-I32-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I32-NEXT:    pop {r11, pc}
+;
+; LE-I64-LABEL: lrint_v8f64:
+; LE-I64:       @ %bb.0:
+; LE-I64-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, lr}
+; LE-I64-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, lr}
+; LE-I64-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEXT:    .pad #40
+; LE-I64-NEXT:    sub sp, sp, #40
+; LE-I64-NEXT:    vorr q4, q0, q0
+; LE-I64-NEXT:    add lr, sp, #24
+; LE-I64-NEXT:    vorr d0, d7, d7
+; LE-I64-NEXT:    vstmia lr, {d6, d7} @ 16-byte Spill
+; LE-I64-NEXT:    vorr q7, q2, q2
+; LE-I64-NEXT:    vorr q6, q1, q1
+; LE-I64-NEXT:    bl lrint
+; LE-I64-NEXT:    vorr d0, d14, d14
+; LE-I64-NEXT:    add lr, sp, #8
+; LE-I64-NEXT:    vmov.32 d17[0], r0
+; LE-I64-NEXT:    mov r8, r1
+; LE-I64-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEXT:    bl lrint
+; LE-I64-NEXT:    vorr d0, d15, d15
+; LE-I64-NEXT:    mov r9, r1
+; LE-I64-NEXT:    vmov.32 d10[0], r0
+; LE-I64-NEXT:    bl lrint
+; LE-I64-NEXT:    vorr d0, d12, d12
+; LE-I64-NEXT:    mov r10, r1
+; LE-I64-NEXT:    vmov.32 d11[0], r0
+; LE-I64-NEXT:    bl lrint
+; LE-I64-NEXT:    vorr d0, d13, d13
+; LE-I64-NEXT:    mov r7, r1
+; LE-I64-NEXT:    vmov.32 d14[0], r0
+; LE-I64-NEXT:    bl lrint
+; LE-I64-NEXT:    vorr d0, d8, d8
+; LE-I64-NEXT:    mov r4, r1
+; LE-I64-NEXT:    vmov.32 d15[0], r0
+; LE-I64-NEXT:    bl lrint
+; LE-I64-NEXT:    vorr d0, d9, d9
+; LE-I64-NEXT:    mov r5, r1
+; LE-I64-NEXT:    vmov.32 d12[0], r0
+; LE-I64-NEXT:    bl lrint
+; LE-I64-NEXT:    add lr, sp, #24
+; LE-I64-NEXT:    mov r6, r1
+; LE-I64-NEXT:    vmov.32 d13[0], r0
+; LE-I64-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
+; LE-I64-NEXT:    @ kill: def $d0 killed $d0 killed $q0
+; LE-I64-NEXT:    bl lrint
+; LE-I64-NEXT:    add lr, sp, #8
+; LE-I64-NEXT:    vmov.32 d13[1], r6
+; LE-I64-NEXT:    vldmia lr, {d6, d7} @ 16-byte Reload
+; LE-I64-NEXT:    vmov.32 d15[1], r4
+; LE-I64-NEXT:    vmov.32 d11[1], r10
+; LE-I64-NEXT:    vmov.32 d6[0], r0
+; LE-I64-NEXT:    vmov.32 d12[1], r5
+; LE-I64-NEXT:    vmov.32 d14[1], r7
+; LE-I64-NEXT:    vorr q0, q6, q6
+; LE-I64-NEXT:    vmov.32 d10[1], r9
+; LE-I64-NEXT:    vorr q1, q7, q7
+; LE-I64-NEXT:    vmov.32 d7[1], r8
+; LE-I64-NEXT:    vorr q2, q5, q5
+; LE-I64-NEXT:    vmov.32 d6[1], r1
+; LE-I64-NEXT:    add sp, sp, #40
+; LE-I64-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, pc}
+;
+; LE-I32-NEON-LABEL: lrint_v8f64:
+; LE-I32-NEON:       @ %bb.0:
+; LE-I32-NEON-NEXT:    .save {r11, lr}
+; LE-I32-NEON-NEXT:    push {r11, lr}
+; LE-I32-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I32-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I32-NEON-NEXT:    .pad #32
+; LE-I32-NEON-NEXT:    sub sp, sp, #32
+; LE-I32-NEON-NEXT:    vorr q5, q0, q0
+; LE-I32-NEON-NEXT:    add lr, sp, #16
+; LE-I32-NEON-NEXT:    vorr d0, d4, d4
+; LE-I32-NEON-NEXT:    vstmia sp, {d6, d7} @ 16-byte Spill
+; LE-I32-NEON-NEXT:    vorr q7, q3, q3
+; LE-I32-NEON-NEXT:    vstmia lr, {d4, d5} @ 16-byte Spill
+; LE-I32-NEON-NEXT:    vorr q6, q1, q1
+; LE-I32-NEON-NEXT:    bl lrint
+; LE-I32-NEON-NEXT:    vorr d0, d14, d14
+; LE-I32-NEON-NEXT:    vmov.32 d8[0], r0
+; LE-I32-NEON-NEXT:    bl lrint
+; LE-I32-NEON-NEXT:    vorr d0, d12, d12
+; LE-I32-NEON-NEXT:    vmov.32 d9[0], r0
+; LE-I32-NEON-NEXT:    bl lrint
+; LE-I32-NEON-NEXT:    vorr d0, d10, d10
+; LE-I32-NEON-NEXT:    vmov.32 d15[0], r0
+; LE-I32-NEON-NEXT:    bl lrint
+; LE-I32-NEON-NEXT:    vorr d0, d13, d13
+; LE-I32-NEON-NEXT:    vmov.32 d14[0], r0
+; LE-I32-NEON-NEXT:    bl lrint
+; LE-I32-NEON-NEXT:    vorr d0, d11, d11
+; LE-I32-NEON-NEXT:    vmov.32 d15[1], r0
+; LE-I32-NEON-NEXT:    bl lrint
+; LE-I32-NEON-NEXT:    vldmia sp, {d16, d17} @ 16-byte Reload
+; LE-I32-NEON-NEXT:    vorr d0, d17, d17
+; LE-I32-NEON-NEXT:    vmov.32 d14[1], r0
+; LE-I32-NEON-NEXT:    bl lrint
+; LE-I32-NEON-NEXT:    add lr, sp, #16
+; LE-I32-NEON-NEXT:    vmov.32 d9[1], r0
+; LE-I32-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I32-NEON-NEXT:    vorr d0, d17, d17
+; LE-I32-NEON-NEXT:    bl lrint
+; LE-I32-NEON-NEXT:    vmov.32 d8[1], r0
+; LE-I32-NEON-NEXT:    vorr q0, q7, q7
+; LE-I32-NEON-NEXT:    vorr q1, q4, q4
+; LE-I32-NEON-NEXT:    add sp, sp, #32
+; LE-I32-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I32-NEON-NEXT:    pop {r11, pc}
+;
+; LE-I64-NEON-LABEL: lrint_v8f64:
+; LE-I64-NEON:       @ %bb.0:
+; LE-I64-NEON-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, lr}
+; LE-I64-NEON-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, lr}
+; LE-I64-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEON-NEXT:    .pad #40
+; LE-I64-NEON-NEXT:    sub sp, sp, #40
+; LE-I64-NEON-NEXT:    vorr q4, q0, q0
+; LE-I64-NEON-NEXT:    add lr, sp, #24
+; LE-I64-NEON-NEXT:    vorr d0, d7, d7
+; LE-I64-NEON-NEXT:    vstmia lr, {d6, d7} @ 16-byte Spill
+; LE-I64-NEON-NEXT:    vorr q7, q2, q2
+; LE-I64-NEON-NEXT:    vorr q6, q1, q1
+; LE-I64-NEON-NEXT:    bl lrint
+; LE-I64-NEON-NEXT:    vorr d0, d14, d14
+; LE-I64-NEON-NEXT:    add lr, sp, #8
+; LE-I64-NEON-NEXT:    vmov.32 d17[0], r0
+; LE-I64-NEON-NEXT:    mov r8, r1
+; LE-I64-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEON-NEXT:    bl lrint
+; LE-I64-NEON-NEXT:    vorr d0, d15, d15
+; LE-I64-NEON-NEXT:    mov r9, r1
+; LE-I64-NEON-NEXT:    vmov.32 d10[0], r0
+; LE-I64-NEON-NEXT:    bl lrint
+; LE-I64-NEON-NEXT:    vorr d0, d12, d12
+; LE-I64-NEON-NEXT:    mov r10, r1
+; LE-I64-NEON-NEXT:    vmov.32 d11[0], r0
+; LE-I64-NEON-NEXT:    bl lrint
+; LE-I64-NEON-NEXT:    vorr d0, d13, d13
+; LE-I64-NEON-NEXT:    mov r7, r1
+; LE-I64-NEON-NEXT:    vmov.32 d14[0], r0
+; LE-I64-NEON-NEXT:    bl lrint
+; LE-I64-NEON-NEXT:    vorr d0, d8, d8
+; LE-I64-NEON-NEXT:    mov r4, r1
+; LE-I64-NEON-NEXT:    vmov.32 d15[0], r0
+; LE-I64-NEON-NEXT:    bl lrint
+; LE-I64-NEON-NEXT:    vorr d0, d9, d9
+; LE-I64-NEON-NEXT:    mov r5, r1
+; LE-I64-NEON-NEXT:    vmov.32 d12[0], r0
+; LE-I64-NEON-NEXT:    bl lrint
+; LE-I64-NEON-NEXT:    add lr, sp, #24
+; LE-I64-NEON-NEXT:    mov r6, r1
+; LE-I64-NEON-NEXT:    vmov.32 d13[0], r0
+; LE-I64-NEON-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
+; LE-I64-NEON-NEXT:    @ kill: def $d0 killed $d0 killed $q0
+; LE-I64-NEON-NEXT:    bl lrint
+; LE-I64-NEON-NEXT:    add lr, sp, #8
+; LE-I64-NEON-NEXT:    vmov.32 d13[1], r6
+; LE-I64-NEON-NEXT:    vldmia lr, {d6, d7} @ 16-byte Reload
+; LE-I64-NEON-NEXT:    vmov.32 d15[1], r4
+; LE-I64-NEON-NEXT:    vmov.32 d11[1], r10
+; LE-I64-NEON-NEXT:    vmov.32 d6[0], r0
+; LE-I64-NEON-NEXT:    vmov.32 d12[1], r5
+; LE-I64-NEON-NEXT:    vmov.32 d14[1], r7
+; LE-I64-NEON-NEXT:    vorr q0, q6, q6
+; LE-I64-NEON-NEXT:    vmov.32 d10[1], r9
+; LE-I64-NEON-NEXT:    vorr q1, q7, q7
+; LE-I64-NEON-NEXT:    vmov.32 d7[1], r8
+; LE-I64-NEON-NEXT:    vorr q2, q5, q5
+; LE-I64-NEON-NEXT:    vmov.32 d6[1], r1
+; LE-I64-NEON-NEXT:    add sp, sp, #40
+; LE-I64-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEON-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, pc}
+;
+; BE-I32-LABEL: lrint_v8f64:
+; BE-I32:       @ %bb.0:
+; BE-I32-NEXT:    .save {r11, lr}
+; BE-I32-NEXT:    push {r11, lr}
+; BE-I32-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I32-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I32-NEXT:    .pad #32
+; BE-I32-NEXT:    sub sp, sp, #32
+; BE-I32-NEXT:    vorr q5, q0, q0
+; BE-I32-NEXT:    vstmia sp, {d0, d1} @ 16-byte Spill
+; BE-I32-NEXT:    vorr d0, d4, d4
+; BE-I32-NEXT:    add lr, sp, #16
+; BE-I32-NEXT:    vorr q7, q3, q3
+; BE-I32-NEXT:    vstmia lr, {d4, d5} @ 16-byte Spill
+; BE-I32-NEXT:    vorr q6, q1, q1
+; BE-I32-NEXT:    bl lrint
+; BE-I32-NEXT:    vorr d0, d10, d10
+; BE-I32-NEXT:    vmov.32 d8[0], r0
+; BE-I32-NEXT:    bl lrint
+; BE-I32-NEXT:    vorr d0, d14, d14
+; BE-I32-NEXT:    vmov.32 d10[0], r0
+; BE-I32-NEXT:    bl lrint
+; BE-I32-NEXT:    vorr d0, d12, d12
+; BE-I32-NEXT:    vmov.32 d9[0], r0
+; BE-I32-NEXT:    bl lrint
+; BE-I32-NEXT:    vorr d0, d15, d15
+; BE-I32-NEXT:    vmov.32 d11[0], r0
+; BE-I32-NEXT:    bl lrint
+; BE-I32-NEXT:    vorr d0, d13, d13
+; BE-I32-NEXT:    vmov.32 d9[1], r0
+; BE-I32-NEXT:    bl lrint
+; BE-I32-NEXT:    vldmia sp, {d16, d17} @ 16-byte Reload
+; BE-I32-NEXT:    vorr d0, d17, d17
+; BE-I32-NEXT:    vmov.32 d11[1], r0
+; BE-I32-NEXT:    bl lrint
+; BE-I32-NEXT:    add lr, sp, #16
+; BE-I32-NEXT:    vmov.32 d10[1], r0
+; BE-I32-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I32-NEXT:    vorr d0, d17, d17
+; BE-I32-NEXT:    bl lrint
+; BE-I32-NEXT:    vmov.32 d8[1], r0
+; BE-I32-NEXT:    vrev64.32 q0, q5
+; BE-I32-NEXT:    vrev64.32 q1, q4
+; BE-I32-NEXT:    add sp, sp, #32
+; BE-I32-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I32-NEXT:    pop {r11, pc}
+;
+; BE-I64-LABEL: lrint_v8f64:
+; BE-I64:       @ %bb.0:
+; BE-I64-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, lr}
+; BE-I64-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, lr}
+; BE-I64-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEXT:    .pad #40
+; BE-I64-NEXT:    sub sp, sp, #40
+; BE-I64-NEXT:    vorr q4, q0, q0
+; BE-I64-NEXT:    add lr, sp, #24
+; BE-I64-NEXT:    vorr d0, d7, d7
+; BE-I64-NEXT:    vstmia lr, {d6, d7} @ 16-byte Spill
+; BE-I64-NEXT:    vorr q7, q2, q2
+; BE-I64-NEXT:    vorr q6, q1, q1
+; BE-I64-NEXT:    bl lrint
+; BE-I64-NEXT:    vorr d0, d14, d14
+; BE-I64-NEXT:    add lr, sp, #8
+; BE-I64-NEXT:    vmov.32 d17[0], r0
+; BE-I64-NEXT:    mov r8, r1
+; BE-I64-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I64-NEXT:    bl lrint
+; BE-I64-NEXT:    vorr d0, d15, d15
+; BE-I64-NEXT:    mov r9, r1
+; BE-I64-NEXT:    vmov.32 d10[0], r0
+; BE-I64-NEXT:    bl lrint
+; BE-I64-NEXT:    vorr d0, d12, d12
+; BE-I64-NEXT:    mov r10, r1
+; BE-I64-NEXT:    vmov.32 d11[0], r0
+; BE-I64-NEXT:    bl lrint
+; BE-I64-NEXT:    vorr d0, d13, d13
+; BE-I64-NEXT:    mov r7, r1
+; BE-I64-NEXT:    vmov.32 d14[0], r0
+; BE-I64-NEXT:    bl lrint
+; BE-I64-NEXT:    vorr d0, d8, d8
+; BE-I64-NEXT:    mov r4, r1
+; BE-I64-NEXT:    vmov.32 d15[0], r0
+; BE-I64-NEXT:    bl lrint
+; BE-I64-NEXT:    vorr d0, d9, d9
+; BE-I64-NEXT:    mov r5, r1
+; BE-I64-NEXT:    vmov.32 d12[0], r0
+; BE-I64-NEXT:    bl lrint
+; BE-I64-NEXT:    add lr, sp, #24
+; BE-I64-NEXT:    mov r6, r1
+; BE-I64-NEXT:    vmov.32 d13[0], r0
+; BE-I64-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
+; BE-I64-NEXT:    @ kill: def $d0 killed $d0 killed $q0
+; BE-I64-NEXT:    bl lrint
+; BE-I64-NEXT:    add lr, sp, #8
+; BE-I64-NEXT:    vmov.32 d13[1], r6
+; BE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEXT:    vmov.32 d15[1], r4
+; BE-I64-NEXT:    vmov.32 d16[0], r0
+; BE-I64-NEXT:    vmov.32 d11[1], r10
+; BE-I64-NEXT:    vmov.32 d17[1], r8
+; BE-I64-NEXT:    vmov.32 d12[1], r5
+; BE-I64-NEXT:    vmov.32 d14[1], r7
+; BE-I64-NEXT:    vmov.32 d10[1], r9
+; BE-I64-NEXT:    vmov.32 d16[1], r1
+; BE-I64-NEXT:    vrev64.32 q0, q6
+; BE-I64-NEXT:    vrev64.32 q1, q7
+; BE-I64-NEXT:    vrev64.32 q2, q5
+; BE-I64-NEXT:    vrev64.32 q3, q8
+; BE-I64-NEXT:    add sp, sp, #40
+; BE-I64-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, pc}
+;
+; BE-I32-NEON-LABEL: lrint_v8f64:
+; BE-I32-NEON:       @ %bb.0:
+; BE-I32-NEON-NEXT:    .save {r11, lr}
+; BE-I32-NEON-NEXT:    push {r11, lr}
+; BE-I32-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I32-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I32-NEON-NEXT:    .pad #32
+; BE-I32-NEON-NEXT:    sub sp, sp, #32
+; BE-I32-NEON-NEXT:    vorr q5, q0, q0
+; BE-I32-NEON-NEXT:    vstmia sp, {d0, d1} @ 16-byte Spill
+; BE-I32-NEON-NEXT:    vorr d0, d4, d4
+; BE-I32-NEON-NEXT:    add lr, sp, #16
+; BE-I32-NEON-NEXT:    vorr q7, q3, q3
+; BE-I32-NEON-NEXT:    vstmia lr, {d4, d5} @ 16-byte Spill
+; BE-I32-NEON-NEXT:    vorr q6, q1, q1
+; BE-I32-NEON-NEXT:    bl lrint
+; BE-I32-NEON-NEXT:    vorr d0, d10, d10
+; BE-I32-NEON-NEXT:    vmov.32 d8[0], r0
+; BE-I32-NEON-NEXT:    bl lrint
+; BE-I32-NEON-NEXT:    vorr d0, d14, d14
+; BE-I32-NEON-NEXT:    vmov.32 d10[0], r0
+; BE-I32-NEON-NEXT:    bl lrint
+; BE-I32-NEON-NEXT:    vorr d0, d12, d12
+; BE-I32-NEON-NEXT:    vmov.32 d9[0], r0
+; BE-I32-NEON-NEXT:    bl lrint
+; BE-I32-NEON-NEXT:    vorr d0, d15, d15
+; BE-I32-NEON-NEXT:    vmov.32 d11[0], r0
+; BE-I32-NEON-NEXT:    bl lrint
+; BE-I32-NEON-NEXT:    vorr d0, d13, d13
+; BE-I32-NEON-NEXT:    vmov.32 d9[1], r0
+; BE-I32-NEON-NEXT:    bl lrint
+; BE-I32-NEON-NEXT:    vldmia sp, {d16, d17} @ 16-byte Reload
+; BE-I32-NEON-NEXT:    vorr d0, d17, d17
+; BE-I32-NEON-NEXT:    vmov.32 d11[1], r0
+; BE-I32-NEON-NEXT:    bl lrint
+; BE-I32-NEON-NEXT:    add lr, sp, #16
+; BE-I32-NEON-NEXT:    vmov.32 d10[1], r0
+; BE-I32-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I32-NEON-NEXT:    vorr d0, d17, d17
+; BE-I32-NEON-NEXT:    bl lrint
+; BE-I32-NEON-NEXT:    vmov.32 d8[1], r0
+; BE-I32-NEON-NEXT:    vrev64.32 q0, q5
+; BE-I32-NEON-NEXT:    vrev64.32 q1, q4
+; BE-I32-NEON-NEXT:    add sp, sp, #32
+; BE-I32-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I32-NEON-NEXT:    pop {r11, pc}
+;
+; BE-I64-NEON-LABEL: lrint_v8f64:
+; BE-I64-NEON:       @ %bb.0:
+; BE-I64-NEON-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, lr}
+; BE-I64-NEON-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, lr}
+; BE-I64-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEON-NEXT:    .pad #40
+; BE-I64-NEON-NEXT:    sub sp, sp, #40
+; BE-I64-NEON-NEXT:    vorr q4, q0, q0
+; BE-I64-NEON-NEXT:    add lr, sp, #24
+; BE-I64-NEON-NEXT:    vorr d0, d7, d7
+; BE-I64-NEON-NEXT:    vstmia lr, {d6, d7} @ 16-byte Spill
+; BE-I64-NEON-NEXT:    vorr q7, q2, q2
+; BE-I64-NEON-NEXT:    vorr q6, q1, q1
+; BE-I64-NEON-NEXT:    bl lrint
+; BE-I64-NEON-NEXT:    vorr d0, d14, d14
+; BE-I64-NEON-NEXT:    add lr, sp, #8
+; BE-I64-NEON-NEXT:    vmov.32 d17[0], r0
+; BE-I64-NEON-NEXT:    mov r8, r1
+; BE-I64-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I64-NEON-NEXT:    bl lrint
+; BE-I64-NEON-NEXT:    vorr d0, d15, d15
+; BE-I64-NEON-NEXT:    mov r9, r1
+; BE-I64-NEON-NEXT:    vmov.32 d10[0], r0
+; BE-I64-NEON-NEXT:    bl lrint
+; BE-I64-NEON-NEXT:    vorr d0, d12, d12
+; BE-I64-NEON-NEXT:    mov r10, r1
+; BE-I64-NEON-NEXT:    vmov.32 d11[0], r0
+; BE-I64-NEON-NEXT:    bl lrint
+; BE-I64-NEON-NEXT:    vorr d0, d13, d13
+; BE-I64-NEON-NEXT:    mov r7, r1
+; BE-I64-NEON-NEXT:    vmov.32 d14[0], r0
+; BE-I64-NEON-NEXT:    bl lrint
+; BE-I64-NEON-NEXT:    vorr d0, d8, d8
+; BE-I64-NEON-NEXT:    mov r4, r1
+; BE-I64-NEON-NEXT:    vmov.32 d15[0], r0
+; BE-I64-NEON-NEXT:    bl lrint
+; BE-I64-NEON-NEXT:    vorr d0, d9, d9
+; BE-I64-NEON-NEXT:    mov r5, r1
+; BE-I64-NEON-NEXT:    vmov.32 d12[0], r0
+; BE-I64-NEON-NEXT:    bl lrint
+; BE-I64-NEON-NEXT:    add lr, sp, #24
+; BE-I64-NEON-NEXT:    mov r6, r1
+; BE-I64-NEON-NEXT:    vmov.32 d13[0], r0
+; BE-I64-NEON-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
+; BE-I64-NEON-NEXT:    @ kill: def $d0 killed $d0 killed $q0
+; BE-I64-NEON-NEXT:    bl lrint
+; BE-I64-NEON-NEXT:    add lr, sp, #8
+; BE-I64-NEON-NEXT:    vmov.32 d13[1], r6
+; BE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEON-NEXT:    vmov.32 d15[1], r4
+; BE-I64-NEON-NEXT:    vmov.32 d16[0], r0
+; BE-I64-NEON-NEXT:    vmov.32 d11[1], r10
+; BE-I64-NEON-NEXT:    vmov.32 d17[1], r8
+; BE-I64-NEON-NEXT:    vmov.32 d12[1], r5
+; BE-I64-NEON-NEXT:    vmov.32 d14[1], r7
+; BE-I64-NEON-NEXT:    vmov.32 d10[1], r9
+; BE-I64-NEON-NEXT:    vmov.32 d16[1], r1
+; BE-I64-NEON-NEXT:    vrev64.32 q0, q6
+; BE-I64-NEON-NEXT:    vrev64.32 q1, q7
+; BE-I64-NEON-NEXT:    vrev64.32 q2, q5
+; BE-I64-NEON-NEXT:    vrev64.32 q3, q8
+; BE-I64-NEON-NEXT:    add sp, sp, #40
+; BE-I64-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEON-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, pc}
+  %a = call <8 x iXLen> @llvm.lrint.v8iXLen.v8f64(<8 x double> %x)
+  ret <8 x iXLen> %a
+}
+declare <8 x iXLen> @llvm.lrint.v8iXLen.v8f64(<8 x double>)
+
+define <16 x iXLen> @lrint_v16f64(<16 x double> %x) {
+; LE-I32-LABEL: lrint_v16f64:
+; LE-I32:       @ %bb.0:
+; LE-I32-NEXT:    .save {r4, r5, r6, lr}
+; LE-I32-NEXT:    push {r4, r5, r6, lr}
+; LE-I32-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I32-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I32-NEXT:    .pad #128
+; LE-I32-NEXT:    sub sp, sp, #128
+; LE-I32-NEXT:    add lr, sp, #80
+; LE-I32-NEXT:    add r0, sp, #240
+; LE-I32-NEXT:    vld1.64 {d16, d17}, [r0]
+; LE-I32-NEXT:    add r0, sp, #208
+; LE-I32-NEXT:    vorr q6, q0, q0
+; LE-I32-NEXT:    vstmia lr, {d6, d7} @ 16-byte Spill
+; LE-I32-NEXT:    add lr, sp, #32
+; LE-I32-NEXT:    vorr q5, q1, q1
+; LE-I32-NEXT:    vstmia lr, {d4, d5} @ 16-byte Spill
+; LE-I32-NEXT:    add lr, sp, #16
+; LE-I32-NEXT:    vstmia lr, {d0, d1} @ 16-byte Spill
+; LE-I32-NEXT:    add lr, sp, #64
+; LE-I32-NEXT:    vorr d0, d4, d4
+; LE-I32-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I32-NEXT:    add lr, sp, #112
+; LE-I32-NEXT:    vld1.64 {d16, d17}, [r0]
+; LE-I32-NEXT:    add r0, sp, #224
+; LE-I32-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I32-NEXT:    add lr, sp, #96
+; LE-I32-NEXT:    vld1.64 {d16, d17}, [r0]
+; LE-I32-NEXT:    add r0, sp, #256
+; LE-I32-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I32-NEXT:    add lr, sp, #48
+; LE-I32-NEXT:    vld1.64 {d14, d15}, [r0]
+; LE-I32-NEXT:    vstmia sp, {d2, d3} @ 16-byte Spill
+; LE-I32-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-I32-NEXT:    bl lrint
+; LE-I32-NEXT:    vorr d0, d12, d12
+; LE-I32-NEXT:    vmov.32 d8[0], r0
+; LE-I32-NEXT:    bl lrint
+; LE-I32-NEXT:    vorr d0, d10, d10
+; LE-I32-NEXT:    vmov.32 d12[0], r0
+; LE-I32-NEXT:    bl lrint
+; LE-I32-NEXT:    vorr d0, d14, d14
+; LE-I32-NEXT:    vmov.32 d13[0], r0
+; LE-I32-NEXT:    bl lrint
+; LE-I32-NEXT:    add lr, sp, #64
+; LE-I32-NEXT:    mov r4, r0
+; LE-I32-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
+; LE-I32-NEXT:    @ kill: def $d0 killed $d0 killed $q0
+; LE-I32-NEXT:    bl lrint
+; LE-I32-NEXT:    add lr, sp, #80
+; LE-I32-NEXT:    vmov.32 d14[0], r0
+; LE-I32-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
+; LE-I32-NEXT:    @ kill: def $d0 killed $d0 killed $q0
+; LE-I32-NEXT:    bl lrint
+; LE-I32-NEXT:    add lr, sp, #112
+; LE-I32-NEXT:    vmov.32 d9[0], r0
+; LE-I32-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
+; LE-I32-NEXT:    @ kill: def $d0 killed $d0 killed $q0
+; LE-I32-NEXT:    vmov.32 d15[0], r4
+; LE-I32-NEXT:    bl lrint
+; LE-I32-NEXT:    vldmia sp, {d16, d17} @ 16-byte Reload
+; LE-I32-NEXT:    vorr d0, d17, d17
+; LE-I32-NEXT:    vmov.32 d10[0], r0
+; LE-I32-NEXT:    bl lrint
+; LE-I32-NEXT:    add lr, sp, #96
+; LE-I32-NEXT:    vmov.32 d13[1], r0
+; LE-I32-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
+; LE-I32-NEXT:    @ kill: def $d0 killed $d0 killed $q0
+; LE-I32-NEXT:    bl lrint
+; LE-I32-NEXT:    add lr, sp, #16
+; LE-I32-NEXT:    vmov.32 d11[0], r0
+; LE-I32-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I32-NEXT:    vorr d0, d17, d17
+; LE-I32-NEXT:    bl lrint
+; LE-I32-NEXT:    add lr, sp, #80
+; LE-I32-NEXT:    vmov.32 d12[1], r0
+; LE-I32-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I32-NEXT:    vorr d0, d17, d17
+; LE-I32-NEXT:    bl lrint
+; LE-I32-NEXT:    add lr, sp, #32
+; LE-I32-NEXT:    vmov.32 d9[1], r0
+; LE-I32-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I32-NEXT:    vorr d0, d17, d17
+; LE-I32-NEXT:    bl lrint
+; LE-I32-NEXT:    add lr, sp, #96
+; LE-I32-NEXT:    vmov.32 d8[1], r0
+; LE-I32-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I32-NEXT:    vorr d0, d17, d17
+; LE-I32-NEXT:    bl lrint
+; LE-I32-NEXT:    add lr, sp, #112
+; LE-I32-NEXT:    vmov.32 d11[1], r0
+; LE-I32-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I32-NEXT:    vorr d0, d17, d17
+; LE-I32-NEXT:    bl lrint
+; LE-I32-NEXT:    add lr, sp, #48
+; LE-I32-NEXT:    vmov.32 d10[1], r0
+; LE-I32-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I32-NEXT:    vorr d0, d17, d17
+; LE-I32-NEXT:    bl lrint
+; LE-I32-NEXT:    add lr, sp, #64
+; LE-I32-NEXT:    vmov.32 d15[1], r0
+; LE-I32-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I32-NEXT:    vorr d0, d17, d17
+; LE-I32-NEXT:    bl lrint
+; LE-I32-NEXT:    vmov.32 d14[1], r0
+; LE-I32-NEXT:    vorr q0, q6, q6
+; LE-I32-NEXT:    vorr q1, q4, q4
+; LE-I32-NEXT:    vorr q2, q5, q5
+; LE-I32-NEXT:    vorr q3, q7, q7
+; LE-I32-NEXT:    add sp, sp, #128
+; LE-I32-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I32-NEXT:    pop {r4, r5, r6, pc}
+;
+; LE-I64-LABEL: lrint_v16f64:
+; LE-I64:       @ %bb.0:
+; LE-I64-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-I64-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-I64-NEXT:    .pad #4
+; LE-I64-NEXT:    sub sp, sp, #4
+; LE-I64-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEXT:    .pad #176
+; LE-I64-NEXT:    sub sp, sp, #176
+; LE-I64-NEXT:    add lr, sp, #40
+; LE-I64-NEXT:    str r0, [sp, #140] @ 4-byte Spill
+; LE-I64-NEXT:    add r0, sp, #312
+; LE-I64-NEXT:    vorr q6, q2, q2
+; LE-I64-NEXT:    vstmia lr, {d6, d7} @ 16-byte Spill
+; LE-I64-NEXT:    add lr, sp, #96
+; LE-I64-NEXT:    vorr q7, q1, q1
+; LE-I64-NEXT:    vstmia lr, {d0, d1} @ 16-byte Spill
+; LE-I64-NEXT:    add lr, sp, #144
+; LE-I64-NEXT:    vorr d0, d1, d1
+; LE-I64-NEXT:    vld1.64 {d16, d17}, [r0]
+; LE-I64-NEXT:    add r0, sp, #280
+; LE-I64-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEXT:    add lr, sp, #80
+; LE-I64-NEXT:    vld1.64 {d16, d17}, [r0]
+; LE-I64-NEXT:    add r0, sp, #296
+; LE-I64-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEXT:    add lr, sp, #120
+; LE-I64-NEXT:    vld1.64 {d16, d17}, [r0]
+; LE-I64-NEXT:    add r0, sp, #328
+; LE-I64-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEXT:    add lr, sp, #56
+; LE-I64-NEXT:    vld1.64 {d16, d17}, [r0]
+; LE-I64-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEXT:    bl lrint
+; LE-I64-NEXT:    vorr d0, d14, d14
+; LE-I64-NEXT:    str r1, [sp, #116] @ 4-byte Spill
+; LE-I64-NEXT:    vmov.32 d11[0], r0
+; LE-I64-NEXT:    bl lrint
+; LE-I64-NEXT:    vorr d0, d15, d15
+; LE-I64-NEXT:    str r1, [sp, #76] @ 4-byte Spill
+; LE-I64-NEXT:    vmov.32 d8[0], r0
+; LE-I64-NEXT:    bl lrint
+; LE-I64-NEXT:    vorr d0, d12, d12
+; LE-I64-NEXT:    add lr, sp, #160
+; LE-I64-NEXT:    vmov.32 d9[0], r0
+; LE-I64-NEXT:    str r1, [sp, #72] @ 4-byte Spill
+; LE-I64-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-I64-NEXT:    bl lrint
+; LE-I64-NEXT:    vorr d0, d13, d13
+; LE-I64-NEXT:    mov r6, r1
+; LE-I64-NEXT:    vmov.32 d14[0], r0
+; LE-I64-NEXT:    bl lrint
+; LE-I64-NEXT:    add lr, sp, #40
+; LE-I64-NEXT:    mov r4, r1
+; LE-I64-NEXT:    vmov.32 d15[0], r0
+; LE-I64-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-I64-NEXT:    vorr d0, d8, d8
+; LE-I64-NEXT:    bl lrint
+; LE-I64-NEXT:    vorr d0, d9, d9
+; LE-I64-NEXT:    mov r7, r1
+; LE-I64-NEXT:    vmov.32 d12[0], r0
+; LE-I64-NEXT:    bl lrint
+; LE-I64-NEXT:    add lr, sp, #96
+; LE-I64-NEXT:    mov r5, r1
+; LE-I64-NEXT:    vmov.32 d13[0], r0
+; LE-I64-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
+; LE-I64-NEXT:    @ kill: def $d0 killed $d0 killed $q0
+; LE-I64-NEXT:    bl lrint
+; LE-I64-NEXT:    vmov.32 d10[0], r0
+; LE-I64-NEXT:    add lr, sp, #40
+; LE-I64-NEXT:    mov r10, r1
+; LE-I64-NEXT:    vmov.32 d13[1], r5
+; LE-I64-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-I64-NEXT:    add lr, sp, #56
+; LE-I64-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-I64-NEXT:    vorr d0, d9, d9
+; LE-I64-NEXT:    bl lrint
+; LE-I64-NEXT:    vorr d0, d8, d8
+; LE-I64-NEXT:    vmov.32 d12[1], r7
+; LE-I64-NEXT:    add lr, sp, #96
+; LE-I64-NEXT:    mov r9, r1
+; LE-I64-NEXT:    vmov.32 d11[0], r0
+; LE-I64-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-I64-NEXT:    bl lrint
+; LE-I64-NEXT:    vmov.32 d10[0], r0
+; LE-I64-NEXT:    add lr, sp, #24
+; LE-I64-NEXT:    mov r11, r1
+; LE-I64-NEXT:    vmov.32 d15[1], r4
+; LE-I64-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-I64-NEXT:    add lr, sp, #144
+; LE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEXT:    vorr d0, d17, d17
+; LE-I64-NEXT:    bl lrint
+; LE-I64-NEXT:    add lr, sp, #8
+; LE-I64-NEXT:    vmov.32 d14[1], r6
+; LE-I64-NEXT:    mov r8, r1
+; LE-I64-NEXT:    vmov.32 d17[0], r0
+; LE-I64-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEXT:    add lr, sp, #56
+; LE-I64-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-I64-NEXT:    add lr, sp, #80
+; LE-I64-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; LE-I64-NEXT:    vorr d0, d11, d11
+; LE-I64-NEXT:    bl lrint
+; LE-I64-NEXT:    vmov.32 d15[0], r0
+; LE-I64-NEXT:    add lr, sp, #160
+; LE-I64-NEXT:    vorr d0, d10, d10
+; LE-I64-NEXT:    ldr r0, [sp, #72] @ 4-byte Reload
+; LE-I64-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-I64-NEXT:    mov r6, r1
+; LE-I64-NEXT:    vmov.32 d9[1], r0
+; LE-I64-NEXT:    bl lrint
+; LE-I64-NEXT:    vmov.32 d14[0], r0
+; LE-I64-NEXT:    ldr r0, [sp, #76] @ 4-byte Reload
+; LE-I64-NEXT:    add lr, sp, #160
+; LE-I64-NEXT:    mov r4, r1
+; LE-I64-NEXT:    vmov.32 d8[1], r0
+; LE-I64-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-I64-NEXT:    add lr, sp, #120
+; LE-I64-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; LE-I64-NEXT:    vorr d0, d11, d11
+; LE-I64-NEXT:    bl lrint
+; LE-I64-NEXT:    vmov.32 d13[0], r0
+; LE-I64-NEXT:    add lr, sp, #40
+; LE-I64-NEXT:    vorr d0, d10, d10
+; LE-I64-NEXT:    ldr r0, [sp, #116] @ 4-byte Reload
+; LE-I64-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-I64-NEXT:    mov r5, r1
+; LE-I64-NEXT:    vmov.32 d9[1], r0
+; LE-I64-NEXT:    bl lrint
+; LE-I64-NEXT:    add lr, sp, #144
+; LE-I64-NEXT:    mov r7, r1
+; LE-I64-NEXT:    vmov.32 d12[0], r0
+; LE-I64-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
+; LE-I64-NEXT:    @ kill: def $d0 killed $d0 killed $q0
+; LE-I64-NEXT:    vmov.32 d8[1], r10
+; LE-I64-NEXT:    bl lrint
+; LE-I64-NEXT:    add lr, sp, #8
+; LE-I64-NEXT:    vmov.32 d15[1], r6
+; LE-I64-NEXT:    vldmia lr, {d20, d21} @ 16-byte Reload
+; LE-I64-NEXT:    add lr, sp, #24
+; LE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEXT:    add lr, sp, #160
+; LE-I64-NEXT:    vmov.32 d20[0], r0
+; LE-I64-NEXT:    vmov.32 d21[1], r8
+; LE-I64-NEXT:    vmov.32 d20[1], r1
+; LE-I64-NEXT:    ldr r1, [sp, #140] @ 4-byte Reload
+; LE-I64-NEXT:    vmov.32 d13[1], r5
+; LE-I64-NEXT:    mov r0, r1
+; LE-I64-NEXT:    vst1.64 {d8, d9}, [r0:128]!
+; LE-I64-NEXT:    vldmia lr, {d18, d19} @ 16-byte Reload
+; LE-I64-NEXT:    add lr, sp, #56
+; LE-I64-NEXT:    vmov.32 d14[1], r4
+; LE-I64-NEXT:    vst1.64 {d18, d19}, [r0:128]!
+; LE-I64-NEXT:    vldmia lr, {d18, d19} @ 16-byte Reload
+; LE-I64-NEXT:    add lr, sp, #96
+; LE-I64-NEXT:    vmov.32 d12[1], r7
+; LE-I64-NEXT:    vst1.64 {d18, d19}, [r0:128]!
+; LE-I64-NEXT:    vldmia lr, {d18, d19} @ 16-byte Reload
+; LE-I64-NEXT:    vmov.32 d17[1], r9
+; LE-I64-NEXT:    vst1.64 {d18, d19}, [r0:128]
+; LE-I64-NEXT:    add r0, r1, #64
+; LE-I64-NEXT:    vst1.64 {d14, d15}, [r0:128]!
+; LE-I64-NEXT:    vst1.64 {d12, d13}, [r0:128]!
+; LE-I64-NEXT:    vmov.32 d16[1], r11
+; LE-I64-NEXT:    vst1.64 {d20, d21}, [r0:128]!
+; LE-I64-NEXT:    vst1.64 {d16, d17}, [r0:128]
+; LE-I64-NEXT:    add sp, sp, #176
+; LE-I64-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEXT:    add sp, sp, #4
+; LE-I64-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; LE-I32-NEON-LABEL: lrint_v16f64:
+; LE-I32-NEON:       @ %bb.0:
+; LE-I32-NEON-NEXT:    .save {r4, r5, r6, lr}
+; LE-I32-NEON-NEXT:    push {r4, r5, r6, lr}
+; LE-I32-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I32-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I32-NEON-NEXT:    .pad #128
+; LE-I32-NEON-NEXT:    sub sp, sp, #128
+; LE-I32-NEON-NEXT:    add lr, sp, #80
+; LE-I32-NEON-NEXT:    add r0, sp, #240
+; LE-I32-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
+; LE-I32-NEON-NEXT:    add r0, sp, #208
+; LE-I32-NEON-NEXT:    vorr q6, q0, q0
+; LE-I32-NEON-NEXT:    vstmia lr, {d6, d7} @ 16-byte Spill
+; LE-I32-NEON-NEXT:    add lr, sp, #32
+; LE-I32-NEON-NEXT:    vorr q5, q1, q1
+; LE-I32-NEON-NEXT:    vstmia lr, {d4, d5} @ 16-byte Spill
+; LE-I32-NEON-NEXT:    add lr, sp, #16
+; LE-I32-NEON-NEXT:    vstmia lr, {d0, d1} @ 16-byte Spill
+; LE-I32-NEON-NEXT:    add lr, sp, #64
+; LE-I32-NEON-NEXT:    vorr d0, d4, d4
+; LE-I32-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I32-NEON-NEXT:    add lr, sp, #112
+; LE-I32-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
+; LE-I32-NEON-NEXT:    add r0, sp, #224
+; LE-I32-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I32-NEON-NEXT:    add lr, sp, #96
+; LE-I32-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
+; LE-I32-NEON-NEXT:    add r0, sp, #256
+; LE-I32-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I32-NEON-NEXT:    add lr, sp, #48
+; LE-I32-NEON-NEXT:    vld1.64 {d14, d15}, [r0]
+; LE-I32-NEON-NEXT:    vstmia sp, {d2, d3} @ 16-byte Spill
+; LE-I32-NEON-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-I32-NEON-NEXT:    bl lrint
+; LE-I32-NEON-NEXT:    vorr d0, d12, d12
+; LE-I32-NEON-NEXT:    vmov.32 d8[0], r0
+; LE-I32-NEON-NEXT:    bl lrint
+; LE-I32-NEON-NEXT:    vorr d0, d10, d10
+; LE-I32-NEON-NEXT:    vmov.32 d12[0], r0
+; LE-I32-NEON-NEXT:    bl lrint
+; LE-I32-NEON-NEXT:    vorr d0, d14, d14
+; LE-I32-NEON-NEXT:    vmov.32 d13[0], r0
+; LE-I32-NEON-NEXT:    bl lrint
+; LE-I32-NEON-NEXT:    add lr, sp, #64
+; LE-I32-NEON-NEXT:    mov r4, r0
+; LE-I32-NEON-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
+; LE-I32-NEON-NEXT:    @ kill: def $d0 killed $d0 killed $q0
+; LE-I32-NEON-NEXT:    bl lrint
+; LE-I32-NEON-NEXT:    add lr, sp, #80
+; LE-I32-NEON-NEXT:    vmov.32 d14[0], r0
+; LE-I32-NEON-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
+; LE-I32-NEON-NEXT:    @ kill: def $d0 killed $d0 killed $q0
+; LE-I32-NEON-NEXT:    bl lrint
+; LE-I32-NEON-NEXT:    add lr, sp, #112
+; LE-I32-NEON-NEXT:    vmov.32 d9[0], r0
+; LE-I32-NEON-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
+; LE-I32-NEON-NEXT:    @ kill: def $d0 killed $d0 killed $q0
+; LE-I32-NEON-NEXT:    vmov.32 d15[0], r4
+; LE-I32-NEON-NEXT:    bl lrint
+; LE-I32-NEON-NEXT:    vldmia sp, {d16, d17} @ 16-byte Reload
+; LE-I32-NEON-NEXT:    vorr d0, d17, d17
+; LE-I32-NEON-NEXT:    vmov.32 d10[0], r0
+; LE-I32-NEON-NEXT:    bl lrint
+; LE-I32-NEON-NEXT:    add lr, sp, #96
+; LE-I32-NEON-NEXT:    vmov.32 d13[1], r0
+; LE-I32-NEON-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
+; LE-I32-NEON-NEXT:    @ kill: def $d0 killed $d0 killed $q0
+; LE-I32-NEON-NEXT:    bl lrint
+; LE-I32-NEON-NEXT:    add lr, sp, #16
+; LE-I32-NEON-NEXT:    vmov.32 d11[0], r0
+; LE-I32-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I32-NEON-NEXT:    vorr d0, d17, d17
+; LE-I32-NEON-NEXT:    bl lrint
+; LE-I32-NEON-NEXT:    add lr, sp, #80
+; LE-I32-NEON-NEXT:    vmov.32 d12[1], r0
+; LE-I32-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I32-NEON-NEXT:    vorr d0, d17, d17
+; LE-I32-NEON-NEXT:    bl lrint
+; LE-I32-NEON-NEXT:    add lr, sp, #32
+; LE-I32-NEON-NEXT:    vmov.32 d9[1], r0
+; LE-I32-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I32-NEON-NEXT:    vorr d0, d17, d17
+; LE-I32-NEON-NEXT:    bl lrint
+; LE-I32-NEON-NEXT:    add lr, sp, #96
+; LE-I32-NEON-NEXT:    vmov.32 d8[1], r0
+; LE-I32-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I32-NEON-NEXT:    vorr d0, d17, d17
+; LE-I32-NEON-NEXT:    bl lrint
+; LE-I32-NEON-NEXT:    add lr, sp, #112
+; LE-I32-NEON-NEXT:    vmov.32 d11[1], r0
+; LE-I32-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I32-NEON-NEXT:    vorr d0, d17, d17
+; LE-I32-NEON-NEXT:    bl lrint
+; LE-I32-NEON-NEXT:    add lr, sp, #48
+; LE-I32-NEON-NEXT:    vmov.32 d10[1], r0
+; LE-I32-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I32-NEON-NEXT:    vorr d0, d17, d17
+; LE-I32-NEON-NEXT:    bl lrint
+; LE-I32-NEON-NEXT:    add lr, sp, #64
+; LE-I32-NEON-NEXT:    vmov.32 d15[1], r0
+; LE-I32-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I32-NEON-NEXT:    vorr d0, d17, d17
+; LE-I32-NEON-NEXT:    bl lrint
+; LE-I32-NEON-NEXT:    vmov.32 d14[1], r0
+; LE-I32-NEON-NEXT:    vorr q0, q6, q6
+; LE-I32-NEON-NEXT:    vorr q1, q4, q4
+; LE-I32-NEON-NEXT:    vorr q2, q5, q5
+; LE-I32-NEON-NEXT:    vorr q3, q7, q7
+; LE-I32-NEON-NEXT:    add sp, sp, #128
+; LE-I32-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I32-NEON-NEXT:    pop {r4, r5, r6, pc}
+;
+; LE-I64-NEON-LABEL: lrint_v16f64:
+; LE-I64-NEON:       @ %bb.0:
+; LE-I64-NEON-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-I64-NEON-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-I64-NEON-NEXT:    .pad #4
+; LE-I64-NEON-NEXT:    sub sp, sp, #4
+; LE-I64-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEON-NEXT:    .pad #176
+; LE-I64-NEON-NEXT:    sub sp, sp, #176
+; LE-I64-NEON-NEXT:    add lr, sp, #40
+; LE-I64-NEON-NEXT:    str r0, [sp, #140] @ 4-byte Spill
+; LE-I64-NEON-NEXT:    add r0, sp, #312
+; LE-I64-NEON-NEXT:    vorr q6, q2, q2
+; LE-I64-NEON-NEXT:    vstmia lr, {d6, d7} @ 16-byte Spill
+; LE-I64-NEON-NEXT:    add lr, sp, #96
+; LE-I64-NEON-NEXT:    vorr q7, q1, q1
+; LE-I64-NEON-NEXT:    vstmia lr, {d0, d1} @ 16-byte Spill
+; LE-I64-NEON-NEXT:    add lr, sp, #144
+; LE-I64-NEON-NEXT:    vorr d0, d1, d1
+; LE-I64-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
+; LE-I64-NEON-NEXT:    add r0, sp, #280
+; LE-I64-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEON-NEXT:    add lr, sp, #80
+; LE-I64-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
+; LE-I64-NEON-NEXT:    add r0, sp, #296
+; LE-I64-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEON-NEXT:    add lr, sp, #120
+; LE-I64-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
+; LE-I64-NEON-NEXT:    add r0, sp, #328
+; LE-I64-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEON-NEXT:    add lr, sp, #56
+; LE-I64-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
+; LE-I64-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEON-NEXT:    bl lrint
+; LE-I64-NEON-NEXT:    vorr d0, d14, d14
+; LE-I64-NEON-NEXT:    str r1, [sp, #116] @ 4-byte Spill
+; LE-I64-NEON-NEXT:    vmov.32 d11[0], r0
+; LE-I64-NEON-NEXT:    bl lrint
+; LE-I64-NEON-NEXT:    vorr d0, d15, d15
+; LE-I64-NEON-NEXT:    str r1, [sp, #76] @ 4-byte Spill
+; LE-I64-NEON-NEXT:    vmov.32 d8[0], r0
+; LE-I64-NEON-NEXT:    bl lrint
+; LE-I64-NEON-NEXT:    vorr d0, d12, d12
+; LE-I64-NEON-NEXT:    add lr, sp, #160
+; LE-I64-NEON-NEXT:    vmov.32 d9[0], r0
+; LE-I64-NEON-NEXT:    str r1, [sp, #72] @ 4-byte Spill
+; LE-I64-NEON-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-I64-NEON-NEXT:    bl lrint
+; LE-I64-NEON-NEXT:    vorr d0, d13, d13
+; LE-I64-NEON-NEXT:    mov r6, r1
+; LE-I64-NEON-NEXT:    vmov.32 d14[0], r0
+; LE-I64-NEON-NEXT:    bl lrint
+; LE-I64-NEON-NEXT:    add lr, sp, #40
+; LE-I64-NEON-NEXT:    mov r4, r1
+; LE-I64-NEON-NEXT:    vmov.32 d15[0], r0
+; LE-I64-NEON-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-I64-NEON-NEXT:    vorr d0, d8, d8
+; LE-I64-NEON-NEXT:    bl lrint
+; LE-I64-NEON-NEXT:    vorr d0, d9, d9
+; LE-I64-NEON-NEXT:    mov r7, r1
+; LE-I64-NEON-NEXT:    vmov.32 d12[0], r0
+; LE-I64-NEON-NEXT:    bl lrint
+; LE-I64-NEON-NEXT:    add lr, sp, #96
+; LE-I64-NEON-NEXT:    mov r5, r1
+; LE-I64-NEON-NEXT:    vmov.32 d13[0], r0
+; LE-I64-NEON-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
+; LE-I64-NEON-NEXT:    @ kill: def $d0 killed $d0 killed $q0
+; LE-I64-NEON-NEXT:    bl lrint
+; LE-I64-NEON-NEXT:    vmov.32 d10[0], r0
+; LE-I64-NEON-NEXT:    add lr, sp, #40
+; LE-I64-NEON-NEXT:    mov r10, r1
+; LE-I64-NEON-NEXT:    vmov.32 d13[1], r5
+; LE-I64-NEON-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-I64-NEON-NEXT:    add lr, sp, #56
+; LE-I64-NEON-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-I64-NEON-NEXT:    vorr d0, d9, d9
+; LE-I64-NEON-NEXT:    bl lrint
+; LE-I64-NEON-NEXT:    vorr d0, d8, d8
+; LE-I64-NEON-NEXT:    vmov.32 d12[1], r7
+; LE-I64-NEON-NEXT:    add lr, sp, #96
+; LE-I64-NEON-NEXT:    mov r9, r1
+; LE-I64-NEON-NEXT:    vmov.32 d11[0], r0
+; LE-I64-NEON-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-I64-NEON-NEXT:    bl lrint
+; LE-I64-NEON-NEXT:    vmov.32 d10[0], r0
+; LE-I64-NEON-NEXT:    add lr, sp, #24
+; LE-I64-NEON-NEXT:    mov r11, r1
+; LE-I64-NEON-NEXT:    vmov.32 d15[1], r4
+; LE-I64-NEON-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-I64-NEON-NEXT:    add lr, sp, #144
+; LE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEON-NEXT:    vorr d0, d17, d17
+; LE-I64-NEON-NEXT:    bl lrint
+; LE-I64-NEON-NEXT:    add lr, sp, #8
+; LE-I64-NEON-NEXT:    vmov.32 d14[1], r6
+; LE-I64-NEON-NEXT:    mov r8, r1
+; LE-I64-NEON-NEXT:    vmov.32 d17[0], r0
+; LE-I64-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEON-NEXT:    add lr, sp, #56
+; LE-I64-NEON-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-I64-NEON-NEXT:    add lr, sp, #80
+; LE-I64-NEON-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; LE-I64-NEON-NEXT:    vorr d0, d11, d11
+; LE-I64-NEON-NEXT:    bl lrint
+; LE-I64-NEON-NEXT:    vmov.32 d15[0], r0
+; LE-I64-NEON-NEXT:    add lr, sp, #160
+; LE-I64-NEON-NEXT:    vorr d0, d10, d10
+; LE-I64-NEON-NEXT:    ldr r0, [sp, #72] @ 4-byte Reload
+; LE-I64-NEON-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-I64-NEON-NEXT:    mov r6, r1
+; LE-I64-NEON-NEXT:    vmov.32 d9[1], r0
+; LE-I64-NEON-NEXT:    bl lrint
+; LE-I64-NEON-NEXT:    vmov.32 d14[0], r0
+; LE-I64-NEON-NEXT:    ldr r0, [sp, #76] @ 4-byte Reload
+; LE-I64-NEON-NEXT:    add lr, sp, #160
+; LE-I64-NEON-NEXT:    mov r4, r1
+; LE-I64-NEON-NEXT:    vmov.32 d8[1], r0
+; LE-I64-NEON-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-I64-NEON-NEXT:    add lr, sp, #120
+; LE-I64-NEON-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; LE-I64-NEON-NEXT:    vorr d0, d11, d11
+; LE-I64-NEON-NEXT:    bl lrint
+; LE-I64-NEON-NEXT:    vmov.32 d13[0], r0
+; LE-I64-NEON-NEXT:    add lr, sp, #40
+; LE-I64-NEON-NEXT:    vorr d0, d10, d10
+; LE-I64-NEON-NEXT:    ldr r0, [sp, #116] @ 4-byte Reload
+; LE-I64-NEON-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-I64-NEON-NEXT:    mov r5, r1
+; LE-I64-NEON-NEXT:    vmov.32 d9[1], r0
+; LE-I64-NEON-NEXT:    bl lrint
+; LE-I64-NEON-NEXT:    add lr, sp, #144
+; LE-I64-NEON-NEXT:    mov r7, r1
+; LE-I64-NEON-NEXT:    vmov.32 d12[0], r0
+; LE-I64-NEON-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
+; LE-I64-NEON-NEXT:    @ kill: def $d0 killed $d0 killed $q0
+; LE-I64-NEON-NEXT:    vmov.32 d8[1], r10
+; LE-I64-NEON-NEXT:    bl lrint
+; LE-I64-NEON-NEXT:    add lr, sp, #8
+; LE-I64-NEON-NEXT:    vmov.32 d15[1], r6
+; LE-I64-NEON-NEXT:    vldmia lr, {d20, d21} @ 16-byte Reload
+; LE-I64-NEON-NEXT:    add lr, sp, #24
+; LE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEON-NEXT:    add lr, sp, #160
+; LE-I64-NEON-NEXT:    vmov.32 d20[0], r0
+; LE-I64-NEON-NEXT:    vmov.32 d21[1], r8
+; LE-I64-NEON-NEXT:    vmov.32 d20[1], r1
+; LE-I64-NEON-NEXT:    ldr r1, [sp, #140] @ 4-byte Reload
+; LE-I64-NEON-NEXT:    vmov.32 d13[1], r5
+; LE-I64-NEON-NEXT:    mov r0, r1
+; LE-I64-NEON-NEXT:    vst1.64 {d8, d9}, [r0:128]!
+; LE-I64-NEON-NEXT:    vldmia lr, {d18, d19} @ 16-byte Reload
+; LE-I64-NEON-NEXT:    add lr, sp, #56
+; LE-I64-NEON-NEXT:    vmov.32 d14[1], r4
+; LE-I64-NEON-NEXT:    vst1.64 {d18, d19}, [r0:128]!
+; LE-I64-NEON-NEXT:    vldmia lr, {d18, d19} @ 16-byte Reload
+; LE-I64-NEON-NEXT:    add lr, sp, #96
+; LE-I64-NEON-NEXT:    vmov.32 d12[1], r7
+; LE-I64-NEON-NEXT:    vst1.64 {d18, d19}, [r0:128]!
+; LE-I64-NEON-NEXT:    vldmia lr, {d18, d19} @ 16-byte Reload
+; LE-I64-NEON-NEXT:    vmov.32 d17[1], r9
+; LE-I64-NEON-NEXT:    vst1.64 {d18, d19}, [r0:128]
+; LE-I64-NEON-NEXT:    add r0, r1, #64
+; LE-I64-NEON-NEXT:    vst1.64 {d14, d15}, [r0:128]!
+; LE-I64-NEON-NEXT:    vst1.64 {d12, d13}, [r0:128]!
+; LE-I64-NEON-NEXT:    vmov.32 d16[1], r11
+; LE-I64-NEON-NEXT:    vst1.64 {d20, d21}, [r0:128]!
+; LE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r0:128]
+; LE-I64-NEON-NEXT:    add sp, sp, #176
+; LE-I64-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEON-NEXT:    add sp, sp, #4
+; LE-I64-NEON-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; BE-I32-LABEL: lrint_v16f64:
+; BE-I32:       @ %bb.0:
+; BE-I32-NEXT:    .save {r4, r5, r6, lr}
+; BE-I32-NEXT:    push {r4, r5, r6, lr}
+; BE-I32-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I32-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I32-NEXT:    .pad #128
+; BE-I32-NEXT:    sub sp, sp, #128
+; BE-I32-NEXT:    add lr, sp, #64
+; BE-I32-NEXT:    add r0, sp, #240
+; BE-I32-NEXT:    vld1.64 {d16, d17}, [r0]
+; BE-I32-NEXT:    add r0, sp, #224
+; BE-I32-NEXT:    vorr q6, q3, q3
+; BE-I32-NEXT:    vstmia lr, {d4, d5} @ 16-byte Spill
+; BE-I32-NEXT:    add lr, sp, #16
+; BE-I32-NEXT:    vorr q5, q1, q1
+; BE-I32-NEXT:    vstmia lr, {d2, d3} @ 16-byte Spill
+; BE-I32-NEXT:    add lr, sp, #32
+; BE-I32-NEXT:    vstmia lr, {d0, d1} @ 16-byte Spill
+; BE-I32-NEXT:    add lr, sp, #80
+; BE-I32-NEXT:    @ kill: def $d0 killed $d0 killed $q0
+; BE-I32-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I32-NEXT:    add lr, sp, #112
+; BE-I32-NEXT:    vld1.64 {d16, d17}, [r0]
+; BE-I32-NEXT:    add r0, sp, #256
+; BE-I32-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I32-NEXT:    add lr, sp, #96
+; BE-I32-NEXT:    vld1.64 {d16, d17}, [r0]
+; BE-I32-NEXT:    add r0, sp, #208
+; BE-I32-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I32-NEXT:    add lr, sp, #48
+; BE-I32-NEXT:    vld1.64 {d14, d15}, [r0]
+; BE-I32-NEXT:    vstmia sp, {d6, d7} @ 16-byte Spill
+; BE-I32-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
+; BE-I32-NEXT:    bl lrint
+; BE-I32-NEXT:    vorr d0, d10, d10
+; BE-I32-NEXT:    vmov.32 d8[0], r0
+; BE-I32-NEXT:    bl lrint
+; BE-I32-NEXT:    vorr d0, d12, d12
+; BE-I32-NEXT:    vmov.32 d9[0], r0
+; BE-I32-NEXT:    bl lrint
+; BE-I32-NEXT:    vorr d0, d14, d14
+; BE-I32-NEXT:    vmov.32 d11[0], r0
+; BE-I32-NEXT:    bl lrint
+; BE-I32-NEXT:    add lr, sp, #80
+; BE-I32-NEXT:    mov r4, r0
+; BE-I32-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
+; BE-I32-NEXT:    @ kill: def $d0 killed $d0 killed $q0
+; BE-I32-NEXT:    bl lrint
+; BE-I32-NEXT:    add lr, sp, #64
+; BE-I32-NEXT:    vmov.32 d12[0], r0
+; BE-I32-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
+; BE-I32-NEXT:    @ kill: def $d0 killed $d0 killed $q0
+; BE-I32-NEXT:    bl lrint
+; BE-I32-NEXT:    add lr, sp, #112
+; BE-I32-NEXT:    vmov.32 d10[0], r0
+; BE-I32-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
+; BE-I32-NEXT:    @ kill: def $d0 killed $d0 killed $q0
+; BE-I32-NEXT:    vmov.32 d14[0], r4
+; BE-I32-NEXT:    bl lrint
+; BE-I32-NEXT:    vldmia sp, {d16, d17} @ 16-byte Reload
+; BE-I32-NEXT:    vorr d0, d17, d17
+; BE-I32-NEXT:    vmov.32 d15[0], r0
+; BE-I32-NEXT:    bl lrint
+; BE-I32-NEXT:    add lr, sp, #96
+; BE-I32-NEXT:    vmov.32 d11[1], r0
+; BE-I32-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
+; BE-I32-NEXT:    @ kill: def $d0 killed $d0 killed $q0
+; BE-I32-NEXT:    bl lrint
+; BE-I32-NEXT:    add lr, sp, #64
+; BE-I32-NEXT:    vmov.32 d13[0], r0
+; BE-I32-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I32-NEXT:    vorr d0, d17, d17
+; BE-I32-NEXT:    bl lrint
+; BE-I32-NEXT:    add lr, sp, #16
+; BE-I32-NEXT:    vmov.32 d10[1], r0
+; BE-I32-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I32-NEXT:    vorr d0, d17, d17
+; BE-I32-NEXT:    bl lrint
+; BE-I32-NEXT:    add lr, sp, #32
+; BE-I32-NEXT:    vmov.32 d9[1], r0
+; BE-I32-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I32-NEXT:    vorr d0, d17, d17
+; BE-I32-NEXT:    bl lrint
+; BE-I32-NEXT:    add lr, sp, #96
+; BE-I32-NEXT:    vmov.32 d8[1], r0
+; BE-I32-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I32-NEXT:    vorr d0, d17, d17
+; BE-I32-NEXT:    bl lrint
+; BE-I32-NEXT:    add lr, sp, #112
+; BE-I32-NEXT:    vmov.32 d13[1], r0
+; BE-I32-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I32-NEXT:    vorr d0, d17, d17
+; BE-I32-NEXT:    bl lrint
+; BE-I32-NEXT:    add lr, sp, #48
+; BE-I32-NEXT:    vmov.32 d15[1], r0
+; BE-I32-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I32-NEXT:    vorr d0, d17, d17
+; BE-I32-NEXT:    bl lrint
+; BE-I32-NEXT:    add lr, sp, #80
+; BE-I32-NEXT:    vmov.32 d14[1], r0
+; BE-I32-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I32-NEXT:    vorr d0, d17, d17
+; BE-I32-NEXT:    bl lrint
+; BE-I32-NEXT:    vmov.32 d12[1], r0
+; BE-I32-NEXT:    vrev64.32 q0, q4
+; BE-I32-NEXT:    vrev64.32 q1, q5
+; BE-I32-NEXT:    vrev64.32 q2, q7
+; BE-I32-NEXT:    vrev64.32 q3, q6
+; BE-I32-NEXT:    add sp, sp, #128
+; BE-I32-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I32-NEXT:    pop {r4, r5, r6, pc}
+;
+; BE-I64-LABEL: lrint_v16f64:
+; BE-I64:       @ %bb.0:
+; BE-I64-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-I64-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-I64-NEXT:    .pad #4
+; BE-I64-NEXT:    sub sp, sp, #4
+; BE-I64-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEXT:    .pad #168
+; BE-I64-NEXT:    sub sp, sp, #168
+; BE-I64-NEXT:    add lr, sp, #64
+; BE-I64-NEXT:    str r0, [sp, #132] @ 4-byte Spill
+; BE-I64-NEXT:    add r0, sp, #304
+; BE-I64-NEXT:    vorr q4, q3, q3
+; BE-I64-NEXT:    vstmia lr, {d0, d1} @ 16-byte Spill
+; BE-I64-NEXT:    add lr, sp, #48
+; BE-I64-NEXT:    vorr d0, d1, d1
+; BE-I64-NEXT:    vld1.64 {d16, d17}, [r0]
+; BE-I64-NEXT:    add r0, sp, #320
+; BE-I64-NEXT:    vorr q6, q2, q2
+; BE-I64-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I64-NEXT:    add lr, sp, #88
+; BE-I64-NEXT:    vorr q7, q1, q1
+; BE-I64-NEXT:    vld1.64 {d16, d17}, [r0]
+; BE-I64-NEXT:    add r0, sp, #272
+; BE-I64-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I64-NEXT:    add lr, sp, #112
+; BE-I64-NEXT:    vld1.64 {d16, d17}, [r0]
+; BE-I64-NEXT:    add r0, sp, #288
+; BE-I64-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I64-NEXT:    add lr, sp, #24
+; BE-I64-NEXT:    vld1.64 {d16, d17}, [r0]
+; BE-I64-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I64-NEXT:    bl lrint
+; BE-I64-NEXT:    vorr d0, d14, d14
+; BE-I64-NEXT:    add lr, sp, #136
+; BE-I64-NEXT:    vmov.32 d17[0], r0
+; BE-I64-NEXT:    str r1, [sp, #108] @ 4-byte Spill
+; BE-I64-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I64-NEXT:    bl lrint
+; BE-I64-NEXT:    vorr d0, d15, d15
+; BE-I64-NEXT:    str r1, [sp, #84] @ 4-byte Spill
+; BE-I64-NEXT:    vmov.32 d10[0], r0
+; BE-I64-NEXT:    bl lrint
+; BE-I64-NEXT:    vorr d0, d12, d12
+; BE-I64-NEXT:    add lr, sp, #152
+; BE-I64-NEXT:    vmov.32 d11[0], r0
+; BE-I64-NEXT:    str r1, [sp, #44] @ 4-byte Spill
+; BE-I64-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-I64-NEXT:    bl lrint
+; BE-I64-NEXT:    vorr d0, d13, d13
+; BE-I64-NEXT:    mov r6, r1
+; BE-I64-NEXT:    vmov.32 d10[0], r0
+; BE-I64-NEXT:    bl lrint
+; BE-I64-NEXT:    vorr d0, d8, d8
+; BE-I64-NEXT:    mov r4, r1
+; BE-I64-NEXT:    vmov.32 d11[0], r0
+; BE-I64-NEXT:    bl lrint
+; BE-I64-NEXT:    vorr d0, d9, d9
+; BE-I64-NEXT:    mov r7, r1
+; BE-I64-NEXT:    vmov.32 d12[0], r0
+; BE-I64-NEXT:    bl lrint
+; BE-I64-NEXT:    add lr, sp, #64
+; BE-I64-NEXT:    mov r5, r1
+; BE-I64-NEXT:    vmov.32 d13[0], r0
+; BE-I64-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
+; BE-I64-NEXT:    @ kill: def $d0 killed $d0 killed $q0
+; BE-I64-NEXT:    bl lrint
+; BE-I64-NEXT:    add lr, sp, #136
+; BE-I64-NEXT:    mov r9, r1
+; BE-I64-NEXT:    vmov.32 d13[1], r5
+; BE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEXT:    vmov.32 d16[0], r0
+; BE-I64-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I64-NEXT:    add lr, sp, #24
+; BE-I64-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
+; BE-I64-NEXT:    vorr d0, d9, d9
+; BE-I64-NEXT:    bl lrint
+; BE-I64-NEXT:    vorr d0, d8, d8
+; BE-I64-NEXT:    vmov.32 d12[1], r7
+; BE-I64-NEXT:    add lr, sp, #64
+; BE-I64-NEXT:    mov r10, r1
+; BE-I64-NEXT:    vmov.32 d15[0], r0
+; BE-I64-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
+; BE-I64-NEXT:    bl lrint
+; BE-I64-NEXT:    vmov.32 d14[0], r0
+; BE-I64-NEXT:    add lr, sp, #8
+; BE-I64-NEXT:    mov r11, r1
+; BE-I64-NEXT:    vmov.32 d11[1], r4
+; BE-I64-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
+; BE-I64-NEXT:    add lr, sp, #48
+; BE-I64-NEXT:    vorr q6, q5, q5
+; BE-I64-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
+; BE-I64-NEXT:    vorr d0, d9, d9
+; BE-I64-NEXT:    bl lrint
+; BE-I64-NEXT:    vorr d0, d8, d8
+; BE-I64-NEXT:    vmov.32 d12[1], r6
+; BE-I64-NEXT:    add lr, sp, #24
+; BE-I64-NEXT:    mov r8, r1
+; BE-I64-NEXT:    vmov.32 d11[0], r0
+; BE-I64-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
+; BE-I64-NEXT:    bl lrint
+; BE-I64-NEXT:    vmov.32 d10[0], r0
+; BE-I64-NEXT:    add lr, sp, #48
+; BE-I64-NEXT:    ldr r0, [sp, #44] @ 4-byte Reload
+; BE-I64-NEXT:    mov r6, r1
+; BE-I64-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-I64-NEXT:    add lr, sp, #152
+; BE-I64-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
+; BE-I64-NEXT:    add lr, sp, #88
+; BE-I64-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
+; BE-I64-NEXT:    vorr d0, d13, d13
+; BE-I64-NEXT:    vmov.32 d9[1], r0
+; BE-I64-NEXT:    bl lrint
+; BE-I64-NEXT:    vmov.32 d15[0], r0
+; BE-I64-NEXT:    ldr r0, [sp, #84] @ 4-byte Reload
+; BE-I64-NEXT:    vorr d0, d12, d12
+; BE-I64-NEXT:    add lr, sp, #152
+; BE-I64-NEXT:    mov r4, r1
+; BE-I64-NEXT:    vmov.32 d8[1], r0
+; BE-I64-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
+; BE-I64-NEXT:    bl lrint
+; BE-I64-NEXT:    add lr, sp, #136
+; BE-I64-NEXT:    vmov.32 d14[0], r0
+; BE-I64-NEXT:    ldr r0, [sp, #108] @ 4-byte Reload
+; BE-I64-NEXT:    mov r5, r1
+; BE-I64-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-I64-NEXT:    add lr, sp, #112
+; BE-I64-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
+; BE-I64-NEXT:    vorr d0, d9, d9
+; BE-I64-NEXT:    vmov.32 d11[1], r0
+; BE-I64-NEXT:    bl lrint
+; BE-I64-NEXT:    vorr d0, d8, d8
+; BE-I64-NEXT:    mov r7, r1
+; BE-I64-NEXT:    vmov.32 d13[0], r0
+; BE-I64-NEXT:    vmov.32 d10[1], r9
+; BE-I64-NEXT:    bl lrint
+; BE-I64-NEXT:    add lr, sp, #8
+; BE-I64-NEXT:    vmov.32 d12[0], r0
+; BE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEXT:    add lr, sp, #48
+; BE-I64-NEXT:    vmov.32 d17[1], r10
+; BE-I64-NEXT:    vmov.32 d16[1], r11
+; BE-I64-NEXT:    vorr q12, q8, q8
+; BE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEXT:    add lr, sp, #152
+; BE-I64-NEXT:    vmov.32 d17[1], r8
+; BE-I64-NEXT:    vldmia lr, {d18, d19} @ 16-byte Reload
+; BE-I64-NEXT:    add lr, sp, #24
+; BE-I64-NEXT:    vmov.32 d13[1], r7
+; BE-I64-NEXT:    vmov.32 d16[1], r6
+; BE-I64-NEXT:    vldmia lr, {d20, d21} @ 16-byte Reload
+; BE-I64-NEXT:    add lr, sp, #64
+; BE-I64-NEXT:    vorr q13, q8, q8
+; BE-I64-NEXT:    vmov.32 d12[1], r1
+; BE-I64-NEXT:    ldr r1, [sp, #132] @ 4-byte Reload
+; BE-I64-NEXT:    vrev64.32 q8, q5
+; BE-I64-NEXT:    mov r0, r1
+; BE-I64-NEXT:    vldmia lr, {d22, d23} @ 16-byte Reload
+; BE-I64-NEXT:    vrev64.32 q9, q9
+; BE-I64-NEXT:    vrev64.32 q10, q10
+; BE-I64-NEXT:    vst1.64 {d16, d17}, [r0:128]!
+; BE-I64-NEXT:    vst1.64 {d18, d19}, [r0:128]!
+; BE-I64-NEXT:    vrev64.32 q11, q11
+; BE-I64-NEXT:    vmov.32 d15[1], r4
+; BE-I64-NEXT:    vst1.64 {d20, d21}, [r0:128]!
+; BE-I64-NEXT:    vrev64.32 q15, q6
+; BE-I64-NEXT:    vmov.32 d14[1], r5
+; BE-I64-NEXT:    vrev64.32 q12, q12
+; BE-I64-NEXT:    vst1.64 {d22, d23}, [r0:128]
+; BE-I64-NEXT:    add r0, r1, #64
+; BE-I64-NEXT:    vrev64.32 q13, q13
+; BE-I64-NEXT:    vst1.64 {d30, d31}, [r0:128]!
+; BE-I64-NEXT:    vst1.64 {d24, d25}, [r0:128]!
+; BE-I64-NEXT:    vrev64.32 q14, q7
+; BE-I64-NEXT:    vst1.64 {d26, d27}, [r0:128]!
+; BE-I64-NEXT:    vst1.64 {d28, d29}, [r0:128]
+; BE-I64-NEXT:    add sp, sp, #168
+; BE-I64-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEXT:    add sp, sp, #4
+; BE-I64-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; BE-I32-NEON-LABEL: lrint_v16f64:
+; BE-I32-NEON:       @ %bb.0:
+; BE-I32-NEON-NEXT:    .save {r4, r5, r6, lr}
+; BE-I32-NEON-NEXT:    push {r4, r5, r6, lr}
+; BE-I32-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I32-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I32-NEON-NEXT:    .pad #128
+; BE-I32-NEON-NEXT:    sub sp, sp, #128
+; BE-I32-NEON-NEXT:    add lr, sp, #64
+; BE-I32-NEON-NEXT:    add r0, sp, #240
+; BE-I32-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
+; BE-I32-NEON-NEXT:    add r0, sp, #224
+; BE-I32-NEON-NEXT:    vorr q6, q3, q3
+; BE-I32-NEON-NEXT:    vstmia lr, {d4, d5} @ 16-byte Spill
+; BE-I32-NEON-NEXT:    add lr, sp, #16
+; BE-I32-NEON-NEXT:    vorr q5, q1, q1
+; BE-I32-NEON-NEXT:    vstmia lr, {d2, d3} @ 16-byte Spill
+; BE-I32-NEON-NEXT:    add lr, sp, #32
+; BE-I32-NEON-NEXT:    vstmia lr, {d0, d1} @ 16-byte Spill
+; BE-I32-NEON-NEXT:    add lr, sp, #80
+; BE-I32-NEON-NEXT:    @ kill: def $d0 killed $d0 killed $q0
+; BE-I32-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I32-NEON-NEXT:    add lr, sp, #112
+; BE-I32-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
+; BE-I32-NEON-NEXT:    add r0, sp, #256
+; BE-I32-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I32-NEON-NEXT:    add lr, sp, #96
+; BE-I32-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
+; BE-I32-NEON-NEXT:    add r0, sp, #208
+; BE-I32-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I32-NEON-NEXT:    add lr, sp, #48
+; BE-I32-NEON-NEXT:    vld1.64 {d14, d15}, [r0]
+; BE-I32-NEON-NEXT:    vstmia sp, {d6, d7} @ 16-byte Spill
+; BE-I32-NEON-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
+; BE-I32-NEON-NEXT:    bl lrint
+; BE-I32-NEON-NEXT:    vorr d0, d10, d10
+; BE-I32-NEON-NEXT:    vmov.32 d8[0], r0
+; BE-I32-NEON-NEXT:    bl lrint
+; BE-I32-NEON-NEXT:    vorr d0, d12, d12
+; BE-I32-NEON-NEXT:    vmov.32 d9[0], r0
+; BE-I32-NEON-NEXT:    bl lrint
+; BE-I32-NEON-NEXT:    vorr d0, d14, d14
+; BE-I32-NEON-NEXT:    vmov.32 d11[0], r0
+; BE-I32-NEON-NEXT:    bl lrint
+; BE-I32-NEON-NEXT:    add lr, sp, #80
+; BE-I32-NEON-NEXT:    mov r4, r0
+; BE-I32-NEON-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
+; BE-I32-NEON-NEXT:    @ kill: def $d0 killed $d0 killed $q0
+; BE-I32-NEON-NEXT:    bl lrint
+; BE-I32-NEON-NEXT:    add lr, sp, #64
+; BE-I32-NEON-NEXT:    vmov.32 d12[0], r0
+; BE-I32-NEON-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
+; BE-I32-NEON-NEXT:    @ kill: def $d0 killed $d0 killed $q0
+; BE-I32-NEON-NEXT:    bl lrint
+; BE-I32-NEON-NEXT:    add lr, sp, #112
+; BE-I32-NEON-NEXT:    vmov.32 d10[0], r0
+; BE-I32-NEON-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
+; BE-I32-NEON-NEXT:    @ kill: def $d0 killed $d0 killed $q0
+; BE-I32-NEON-NEXT:    vmov.32 d14[0], r4
+; BE-I32-NEON-NEXT:    bl lrint
+; BE-I32-NEON-NEXT:    vldmia sp, {d16, d17} @ 16-byte Reload
+; BE-I32-NEON-NEXT:    vorr d0, d17, d17
+; BE-I32-NEON-NEXT:    vmov.32 d15[0], r0
+; BE-I32-NEON-NEXT:    bl lrint
+; BE-I32-NEON-NEXT:    add lr, sp, #96
+; BE-I32-NEON-NEXT:    vmov.32 d11[1], r0
+; BE-I32-NEON-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
+; BE-I32-NEON-NEXT:    @ kill: def $d0 killed $d0 killed $q0
+; BE-I32-NEON-NEXT:    bl lrint
+; BE-I32-NEON-NEXT:    add lr, sp, #64
+; BE-I32-NEON-NEXT:    vmov.32 d13[0], r0
+; BE-I32-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I32-NEON-NEXT:    vorr d0, d17, d17
+; BE-I32-NEON-NEXT:    bl lrint
+; BE-I32-NEON-NEXT:    add lr, sp, #16
+; BE-I32-NEON-NEXT:    vmov.32 d10[1], r0
+; BE-I32-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I32-NEON-NEXT:    vorr d0, d17, d17
+; BE-I32-NEON-NEXT:    bl lrint
+; BE-I32-NEON-NEXT:    add lr, sp, #32
+; BE-I32-NEON-NEXT:    vmov.32 d9[1], r0
+; BE-I32-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I32-NEON-NEXT:    vorr d0, d17, d17
+; BE-I32-NEON-NEXT:    bl lrint
+; BE-I32-NEON-NEXT:    add lr, sp, #96
+; BE-I32-NEON-NEXT:    vmov.32 d8[1], r0
+; BE-I32-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I32-NEON-NEXT:    vorr d0, d17, d17
+; BE-I32-NEON-NEXT:    bl lrint
+; BE-I32-NEON-NEXT:    add lr, sp, #112
+; BE-I32-NEON-NEXT:    vmov.32 d13[1], r0
+; BE-I32-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I32-NEON-NEXT:    vorr d0, d17, d17
+; BE-I32-NEON-NEXT:    bl lrint
+; BE-I32-NEON-NEXT:    add lr, sp, #48
+; BE-I32-NEON-NEXT:    vmov.32 d15[1], r0
+; BE-I32-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I32-NEON-NEXT:    vorr d0, d17, d17
+; BE-I32-NEON-NEXT:    bl lrint
+; BE-I32-NEON-NEXT:    add lr, sp, #80
+; BE-I32-NEON-NEXT:    vmov.32 d14[1], r0
+; BE-I32-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I32-NEON-NEXT:    vorr d0, d17, d17
+; BE-I32-NEON-NEXT:    bl lrint
+; BE-I32-NEON-NEXT:    vmov.32 d12[1], r0
+; BE-I32-NEON-NEXT:    vrev64.32 q0, q4
+; BE-I32-NEON-NEXT:    vrev64.32 q1, q5
+; BE-I32-NEON-NEXT:    vrev64.32 q2, q7
+; BE-I32-NEON-NEXT:    vrev64.32 q3, q6
+; BE-I32-NEON-NEXT:    add sp, sp, #128
+; BE-I32-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I32-NEON-NEXT:    pop {r4, r5, r6, pc}
+;
+; BE-I64-NEON-LABEL: lrint_v16f64:
+; BE-I64-NEON:       @ %bb.0:
+; BE-I64-NEON-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-I64-NEON-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-I64-NEON-NEXT:    .pad #4
+; BE-I64-NEON-NEXT:    sub sp, sp, #4
+; BE-I64-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEON-NEXT:    .pad #168
+; BE-I64-NEON-NEXT:    sub sp, sp, #168
+; BE-I64-NEON-NEXT:    add lr, sp, #64
+; BE-I64-NEON-NEXT:    str r0, [sp, #132] @ 4-byte Spill
+; BE-I64-NEON-NEXT:    add r0, sp, #304
+; BE-I64-NEON-NEXT:    vorr q4, q3, q3
+; BE-I64-NEON-NEXT:    vstmia lr, {d0, d1} @ 16-byte Spill
+; BE-I64-NEON-NEXT:    add lr, sp, #48
+; BE-I64-NEON-NEXT:    vorr d0, d1, d1
+; BE-I64-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
+; BE-I64-NEON-NEXT:    add r0, sp, #320
+; BE-I64-NEON-NEXT:    vorr q6, q2, q2
+; BE-I64-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I64-NEON-NEXT:    add lr, sp, #88
+; BE-I64-NEON-NEXT:    vorr q7, q1, q1
+; BE-I64-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
+; BE-I64-NEON-NEXT:    add r0, sp, #272
+; BE-I64-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I64-NEON-NEXT:    add lr, sp, #112
+; BE-I64-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
+; BE-I64-NEON-NEXT:    add r0, sp, #288
+; BE-I64-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I64-NEON-NEXT:    add lr, sp, #24
+; BE-I64-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
+; BE-I64-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I64-NEON-NEXT:    bl lrint
+; BE-I64-NEON-NEXT:    vorr d0, d14, d14
+; BE-I64-NEON-NEXT:    add lr, sp, #136
+; BE-I64-NEON-NEXT:    vmov.32 d17[0], r0
+; BE-I64-NEON-NEXT:    str r1, [sp, #108] @ 4-byte Spill
+; BE-I64-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I64-NEON-NEXT:    bl lrint
+; BE-I64-NEON-NEXT:    vorr d0, d15, d15
+; BE-I64-NEON-NEXT:    str r1, [sp, #84] @ 4-byte Spill
+; BE-I64-NEON-NEXT:    vmov.32 d10[0], r0
+; BE-I64-NEON-NEXT:    bl lrint
+; BE-I64-NEON-NEXT:    vorr d0, d12, d12
+; BE-I64-NEON-NEXT:    add lr, sp, #152
+; BE-I64-NEON-NEXT:    vmov.32 d11[0], r0
+; BE-I64-NEON-NEXT:    str r1, [sp, #44] @ 4-byte Spill
+; BE-I64-NEON-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-I64-NEON-NEXT:    bl lrint
+; BE-I64-NEON-NEXT:    vorr d0, d13, d13
+; BE-I64-NEON-NEXT:    mov r6, r1
+; BE-I64-NEON-NEXT:    vmov.32 d10[0], r0
+; BE-I64-NEON-NEXT:    bl lrint
+; BE-I64-NEON-NEXT:    vorr d0, d8, d8
+; BE-I64-NEON-NEXT:    mov r4, r1
+; BE-I64-NEON-NEXT:    vmov.32 d11[0], r0
+; BE-I64-NEON-NEXT:    bl lrint
+; BE-I64-NEON-NEXT:    vorr d0, d9, d9
+; BE-I64-NEON-NEXT:    mov r7, r1
+; BE-I64-NEON-NEXT:    vmov.32 d12[0], r0
+; BE-I64-NEON-NEXT:    bl lrint
+; BE-I64-NEON-NEXT:    add lr, sp, #64
+; BE-I64-NEON-NEXT:    mov r5, r1
+; BE-I64-NEON-NEXT:    vmov.32 d13[0], r0
+; BE-I64-NEON-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
+; BE-I64-NEON-NEXT:    @ kill: def $d0 killed $d0 killed $q0
+; BE-I64-NEON-NEXT:    bl lrint
+; BE-I64-NEON-NEXT:    add lr, sp, #136
+; BE-I64-NEON-NEXT:    mov r9, r1
+; BE-I64-NEON-NEXT:    vmov.32 d13[1], r5
+; BE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEON-NEXT:    vmov.32 d16[0], r0
+; BE-I64-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I64-NEON-NEXT:    add lr, sp, #24
+; BE-I64-NEON-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
+; BE-I64-NEON-NEXT:    vorr d0, d9, d9
+; BE-I64-NEON-NEXT:    bl lrint
+; BE-I64-NEON-NEXT:    vorr d0, d8, d8
+; BE-I64-NEON-NEXT:    vmov.32 d12[1], r7
+; BE-I64-NEON-NEXT:    add lr, sp, #64
+; BE-I64-NEON-NEXT:    mov r10, r1
+; BE-I64-NEON-NEXT:    vmov.32 d15[0], r0
+; BE-I64-NEON-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
+; BE-I64-NEON-NEXT:    bl lrint
+; BE-I64-NEON-NEXT:    vmov.32 d14[0], r0
+; BE-I64-NEON-NEXT:    add lr, sp, #8
+; BE-I64-NEON-NEXT:    mov r11, r1
+; BE-I64-NEON-NEXT:    vmov.32 d11[1], r4
+; BE-I64-NEON-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
+; BE-I64-NEON-NEXT:    add lr, sp, #48
+; BE-I64-NEON-NEXT:    vorr q6, q5, q5
+; BE-I64-NEON-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
+; BE-I64-NEON-NEXT:    vorr d0, d9, d9
+; BE-I64-NEON-NEXT:    bl lrint
+; BE-I64-NEON-NEXT:    vorr d0, d8, d8
+; BE-I64-NEON-NEXT:    vmov.32 d12[1], r6
+; BE-I64-NEON-NEXT:    add lr, sp, #24
+; BE-I64-NEON-NEXT:    mov r8, r1
+; BE-I64-NEON-NEXT:    vmov.32 d11[0], r0
+; BE-I64-NEON-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
+; BE-I64-NEON-NEXT:    bl lrint
+; BE-I64-NEON-NEXT:    vmov.32 d10[0], r0
+; BE-I64-NEON-NEXT:    add lr, sp, #48
+; BE-I64-NEON-NEXT:    ldr r0, [sp, #44] @ 4-byte Reload
+; BE-I64-NEON-NEXT:    mov r6, r1
+; BE-I64-NEON-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-I64-NEON-NEXT:    add lr, sp, #152
+; BE-I64-NEON-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
+; BE-I64-NEON-NEXT:    add lr, sp, #88
+; BE-I64-NEON-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
+; BE-I64-NEON-NEXT:    vorr d0, d13, d13
+; BE-I64-NEON-NEXT:    vmov.32 d9[1], r0
+; BE-I64-NEON-NEXT:    bl lrint
+; BE-I64-NEON-NEXT:    vmov.32 d15[0], r0
+; BE-I64-NEON-NEXT:    ldr r0, [sp, #84] @ 4-byte Reload
+; BE-I64-NEON-NEXT:    vorr d0, d12, d12
+; BE-I64-NEON-NEXT:    add lr, sp, #152
+; BE-I64-NEON-NEXT:    mov r4, r1
+; BE-I64-NEON-NEXT:    vmov.32 d8[1], r0
+; BE-I64-NEON-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
+; BE-I64-NEON-NEXT:    bl lrint
+; BE-I64-NEON-NEXT:    add lr, sp, #136
+; BE-I64-NEON-NEXT:    vmov.32 d14[0], r0
+; BE-I64-NEON-NEXT:    ldr r0, [sp, #108] @ 4-byte Reload
+; BE-I64-NEON-NEXT:    mov r5, r1
+; BE-I64-NEON-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-I64-NEON-NEXT:    add lr, sp, #112
+; BE-I64-NEON-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
+; BE-I64-NEON-NEXT:    vorr d0, d9, d9
+; BE-I64-NEON-NEXT:    vmov.32 d11[1], r0
+; BE-I64-NEON-NEXT:    bl lrint
+; BE-I64-NEON-NEXT:    vorr d0, d8, d8
+; BE-I64-NEON-NEXT:    mov r7, r1
+; BE-I64-NEON-NEXT:    vmov.32 d13[0], r0
+; BE-I64-NEON-NEXT:    vmov.32 d10[1], r9
+; BE-I64-NEON-NEXT:    bl lrint
+; BE-I64-NEON-NEXT:    add lr, sp, #8
+; BE-I64-NEON-NEXT:    vmov.32 d12[0], r0
+; BE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEON-NEXT:    add lr, sp, #48
+; BE-I64-NEON-NEXT:    vmov.32 d17[1], r10
+; BE-I64-NEON-NEXT:    vmov.32 d16[1], r11
+; BE-I64-NEON-NEXT:    vorr q12, q8, q8
+; BE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEON-NEXT:    add lr, sp, #152
+; BE-I64-NEON-NEXT:    vmov.32 d17[1], r8
+; BE-I64-NEON-NEXT:    vldmia lr, {d18, d19} @ 16-byte Reload
+; BE-I64-NEON-NEXT:    add lr, sp, #24
+; BE-I64-NEON-NEXT:    vmov.32 d13[1], r7
+; BE-I64-NEON-NEXT:    vmov.32 d16[1], r6
+; BE-I64-NEON-NEXT:    vldmia lr, {d20, d21} @ 16-byte Reload
+; BE-I64-NEON-NEXT:    add lr, sp, #64
+; BE-I64-NEON-NEXT:    vorr q13, q8, q8
+; BE-I64-NEON-NEXT:    vmov.32 d12[1], r1
+; BE-I64-NEON-NEXT:    ldr r1, [sp, #132] @ 4-byte Reload
+; BE-I64-NEON-NEXT:    vrev64.32 q8, q5
+; BE-I64-NEON-NEXT:    mov r0, r1
+; BE-I64-NEON-NEXT:    vldmia lr, {d22, d23} @ 16-byte Reload
+; BE-I64-NEON-NEXT:    vrev64.32 q9, q9
+; BE-I64-NEON-NEXT:    vrev64.32 q10, q10
+; BE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r0:128]!
+; BE-I64-NEON-NEXT:    vst1.64 {d18, d19}, [r0:128]!
+; BE-I64-NEON-NEXT:    vrev64.32 q11, q11
+; BE-I64-NEON-NEXT:    vmov.32 d15[1], r4
+; BE-I64-NEON-NEXT:    vst1.64 {d20, d21}, [r0:128]!
+; BE-I64-NEON-NEXT:    vrev64.32 q15, q6
+; BE-I64-NEON-NEXT:    vmov.32 d14[1], r5
+; BE-I64-NEON-NEXT:    vrev64.32 q12, q12
+; BE-I64-NEON-NEXT:    vst1.64 {d22, d23}, [r0:128]
+; BE-I64-NEON-NEXT:    add r0, r1, #64
+; BE-I64-NEON-NEXT:    vrev64.32 q13, q13
+; BE-I64-NEON-NEXT:    vst1.64 {d30, d31}, [r0:128]!
+; BE-I64-NEON-NEXT:    vst1.64 {d24, d25}, [r0:128]!
+; BE-I64-NEON-NEXT:    vrev64.32 q14, q7
+; BE-I64-NEON-NEXT:    vst1.64 {d26, d27}, [r0:128]!
+; BE-I64-NEON-NEXT:    vst1.64 {d28, d29}, [r0:128]
+; BE-I64-NEON-NEXT:    add sp, sp, #168
+; BE-I64-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEON-NEXT:    add sp, sp, #4
+; BE-I64-NEON-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+  %a = call <16 x iXLen> @llvm.lrint.v16iXLen.v16f64(<16 x double> %x)
+  ret <16 x iXLen> %a
+}
+declare <16 x iXLen> @llvm.lrint.v16iXLen.v16f64(<16 x double>)
+
+define <32 x iXLen> @lrint_v32f64(<32 x double> %x) {
+; LE-I32-LABEL: lrint_v32f64:
+; LE-I32:       @ %bb.0:
+; LE-I32-NEXT:    .save {r4, r5, r6, lr}
+; LE-I32-NEXT:    push {r4, r5, r6, lr}
+; LE-I32-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I32-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I32-NEXT:    .pad #160
+; LE-I32-NEXT:    sub sp, sp, #160
+; LE-I32-NEXT:    add lr, sp, #96
+; LE-I32-NEXT:    mov r4, r0
+; LE-I32-NEXT:    add r0, sp, #304
+; LE-I32-NEXT:    vorr q6, q3, q3
+; LE-I32-NEXT:    vstmia lr, {d4, d5} @ 16-byte Spill
+; LE-I32-NEXT:    add lr, sp, #64
+; LE-I32-NEXT:    vorr q5, q1, q1
+; LE-I32-NEXT:    vstmia lr, {d0, d1} @ 16-byte Spill
+; LE-I32-NEXT:    add lr, sp, #48
+; LE-I32-NEXT:    vorr d0, d4, d4
+; LE-I32-NEXT:    vld1.64 {d16, d17}, [r0]
+; LE-I32-NEXT:    add r0, sp, #352
+; LE-I32-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I32-NEXT:    add lr, sp, #16
+; LE-I32-NEXT:    vld1.64 {d16, d17}, [r0]
+; LE-I32-NEXT:    add r0, sp, #272
+; LE-I32-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I32-NEXT:    add lr, sp, #112
+; LE-I32-NEXT:    vld1.64 {d16, d17}, [r0]
+; LE-I32-NEXT:    add r0, sp, #288
+; LE-I32-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I32-NEXT:    add lr, sp, #80
+; LE-I32-NEXT:    vld1.64 {d16, d17}, [r0]
+; LE-I32-NEXT:    add r0, sp, #336
+; LE-I32-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I32-NEXT:    add lr, sp, #144
+; LE-I32-NEXT:    vld1.64 {d16, d17}, [r0]
+; LE-I32-NEXT:    add r0, sp, #256
+; LE-I32-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I32-NEXT:    add lr, sp, #128
+; LE-I32-NEXT:    vld1.64 {d16, d17}, [r0]
+; LE-I32-NEXT:    add r0, sp, #320
+; LE-I32-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I32-NEXT:    add lr, sp, #32
+; LE-I32-NEXT:    vld1.64 {d16, d17}, [r0]
+; LE-I32-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I32-NEXT:    bl lrint
+; LE-I32-NEXT:    vorr d0, d12, d12
+; LE-I32-NEXT:    vmov.32 d14[0], r0
+; LE-I32-NEXT:    bl lrint
+; LE-I32-NEXT:    vorr d0, d10, d10
+; LE-I32-NEXT:    vmov.32 d15[0], r0
+; LE-I32-NEXT:    bl lrint
+; LE-I32-NEXT:    vorr d0, d13, d13
+; LE-I32-NEXT:    vmov.32 d9[0], r0
+; LE-I32-NEXT:    bl lrint
+; LE-I32-NEXT:    vorr d0, d11, d11
+; LE-I32-NEXT:    vmov.32 d15[1], r0
+; LE-I32-NEXT:    bl lrint
+; LE-I32-NEXT:    add lr, sp, #96
+; LE-I32-NEXT:    vorr q5, q4, q4
+; LE-I32-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I32-NEXT:    vorr d0, d17, d17
+; LE-I32-NEXT:    vmov.32 d11[1], r0
+; LE-I32-NEXT:    bl lrint
+; LE-I32-NEXT:    vmov.32 d14[1], r0
+; LE-I32-NEXT:    add lr, sp, #96
+; LE-I32-NEXT:    add r0, sp, #416
+; LE-I32-NEXT:    vld1.64 {d16, d17}, [r0]
+; LE-I32-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-I32-NEXT:    add lr, sp, #64
+; LE-I32-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-I32-NEXT:    vorr d0, d8, d8
+; LE-I32-NEXT:    vstmia sp, {d16, d17} @ 16-byte Spill
+; LE-I32-NEXT:    bl lrint
+; LE-I32-NEXT:    add lr, sp, #32
+; LE-I32-NEXT:    vmov.32 d10[0], r0
+; LE-I32-NEXT:    vldmia lr, {d14, d15} @ 16-byte Reload
+; LE-I32-NEXT:    vorr q6, q5, q5
+; LE-I32-NEXT:    vorr d0, d14, d14
+; LE-I32-NEXT:    bl lrint
+; LE-I32-NEXT:    vorr d0, d9, d9
+; LE-I32-NEXT:    vmov.32 d11[0], r0
+; LE-I32-NEXT:    bl lrint
+; LE-I32-NEXT:    vorr d0, d15, d15
+; LE-I32-NEXT:    vmov.32 d12[1], r0
+; LE-I32-NEXT:    add lr, sp, #64
+; LE-I32-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-I32-NEXT:    bl lrint
+; LE-I32-NEXT:    vmov.32 d11[1], r0
+; LE-I32-NEXT:    add r0, sp, #400
+; LE-I32-NEXT:    add lr, sp, #32
+; LE-I32-NEXT:    vorr q6, q5, q5
+; LE-I32-NEXT:    vldmia sp, {d10, d11} @ 16-byte Reload
+; LE-I32-NEXT:    vorr d0, d10, d10
+; LE-I32-NEXT:    vld1.64 {d16, d17}, [r0]
+; LE-I32-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I32-NEXT:    bl lrint
+; LE-I32-NEXT:    add lr, sp, #48
+; LE-I32-NEXT:    vmov.32 d15[0], r0
+; LE-I32-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-I32-NEXT:    vorr d0, d8, d8
+; LE-I32-NEXT:    bl lrint
+; LE-I32-NEXT:    vorr d0, d11, d11
+; LE-I32-NEXT:    vmov.32 d12[0], r0
+; LE-I32-NEXT:    bl lrint
+; LE-I32-NEXT:    vorr d0, d9, d9
+; LE-I32-NEXT:    vmov.32 d15[1], r0
+; LE-I32-NEXT:    bl lrint
+; LE-I32-NEXT:    vmov.32 d12[1], r0
+; LE-I32-NEXT:    add lr, sp, #48
+; LE-I32-NEXT:    add r0, sp, #384
+; LE-I32-NEXT:    vld1.64 {d16, d17}, [r0]
+; LE-I32-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-I32-NEXT:    add lr, sp, #32
+; LE-I32-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; LE-I32-NEXT:    vorr d0, d10, d10
+; LE-I32-NEXT:    vstmia sp, {d16, d17} @ 16-byte Spill
+; LE-I32-NEXT:    bl lrint
+; LE-I32-NEXT:    add lr, sp, #16
+; LE-I32-NEXT:    vmov.32 d14[0], r0
+; LE-I32-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-I32-NEXT:    vorr d0, d8, d8
+; LE-I32-NEXT:    bl lrint
+; LE-I32-NEXT:    vorr d0, d11, d11
+; LE-I32-NEXT:    vmov.32 d13[0], r0
+; LE-I32-NEXT:    bl lrint
+; LE-I32-NEXT:    vorr d0, d9, d9
+; LE-I32-NEXT:    vmov.32 d14[1], r0
+; LE-I32-NEXT:    add lr, sp, #32
+; LE-I32-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-I32-NEXT:    bl lrint
+; LE-I32-NEXT:    vorr q7, q6, q6
+; LE-I32-NEXT:    vldmia sp, {d10, d11} @ 16-byte Reload
+; LE-I32-NEXT:    vorr d0, d10, d10
+; LE-I32-NEXT:    vmov.32 d15[1], r0
+; LE-I32-NEXT:    add r0, sp, #368
+; LE-I32-NEXT:    vld1.64 {d12, d13}, [r0]
+; LE-I32-NEXT:    bl lrint
+; LE-I32-NEXT:    add lr, sp, #144
+; LE-I32-NEXT:    vmov.32 d9[0], r0
+; LE-I32-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
+; LE-I32-NEXT:    @ kill: def $d0 killed $d0 killed $q0
+; LE-I32-NEXT:    bl lrint
+; LE-I32-NEXT:    vorr d0, d11, d11
+; LE-I32-NEXT:    vmov.32 d14[0], r0
+; LE-I32-NEXT:    bl lrint
+; LE-I32-NEXT:    vorr d0, d12, d12
+; LE-I32-NEXT:    vmov.32 d9[1], r0
+; LE-I32-NEXT:    bl lrint
+; LE-I32-NEXT:    add lr, sp, #144
+; LE-I32-NEXT:    vmov.32 d8[0], r0
+; LE-I32-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I32-NEXT:    vorr d0, d17, d17
+; LE-I32-NEXT:    bl lrint
+; LE-I32-NEXT:    vmov.32 d14[1], r0
+; LE-I32-NEXT:    add r0, sp, #240
+; LE-I32-NEXT:    vorr d0, d13, d13
+; LE-I32-NEXT:    add lr, sp, #144
+; LE-I32-NEXT:    vld1.64 {d10, d11}, [r0]
+; LE-I32-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-I32-NEXT:    vstmia sp, {d10, d11} @ 16-byte Spill
+; LE-I32-NEXT:    bl lrint
+; LE-I32-NEXT:    vorr d0, d10, d10
+; LE-I32-NEXT:    vmov.32 d8[1], r0
+; LE-I32-NEXT:    add lr, sp, #16
+; LE-I32-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-I32-NEXT:    bl lrint
+; LE-I32-NEXT:    add lr, sp, #80
+; LE-I32-NEXT:    vmov.32 d8[0], r0
+; LE-I32-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
+; LE-I32-NEXT:    vorr d0, d12, d12
+; LE-I32-NEXT:    bl lrint
+; LE-I32-NEXT:    add lr, sp, #112
+; LE-I32-NEXT:    vmov.32 d11[0], r0
+; LE-I32-NEXT:    vldmia lr, {d14, d15} @ 16-byte Reload
+; LE-I32-NEXT:    vorr d0, d14, d14
+; LE-I32-NEXT:    bl lrint
+; LE-I32-NEXT:    vorr d0, d13, d13
+; LE-I32-NEXT:    vmov.32 d10[0], r0
+; LE-I32-NEXT:    bl lrint
+; LE-I32-NEXT:    add lr, sp, #128
+; LE-I32-NEXT:    vmov.32 d11[1], r0
+; LE-I32-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
+; LE-I32-NEXT:    vorr d0, d12, d12
+; LE-I32-NEXT:    bl lrint
+; LE-I32-NEXT:    vorr d0, d15, d15
+; LE-I32-NEXT:    vmov.32 d9[0], r0
+; LE-I32-NEXT:    bl lrint
+; LE-I32-NEXT:    vorr d0, d13, d13
+; LE-I32-NEXT:    vmov.32 d10[1], r0
+; LE-I32-NEXT:    bl lrint
+; LE-I32-NEXT:    vldmia sp, {d16, d17} @ 16-byte Reload
+; LE-I32-NEXT:    vorr d0, d17, d17
+; LE-I32-NEXT:    vmov.32 d9[1], r0
+; LE-I32-NEXT:    bl lrint
+; LE-I32-NEXT:    add lr, sp, #64
+; LE-I32-NEXT:    vmov.32 d8[1], r0
+; LE-I32-NEXT:    mov r0, r4
+; LE-I32-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I32-NEXT:    add lr, sp, #96
+; LE-I32-NEXT:    vst1.32 {d16, d17}, [r0:128]!
+; LE-I32-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I32-NEXT:    add lr, sp, #48
+; LE-I32-NEXT:    vst1.32 {d16, d17}, [r0:128]!
+; LE-I32-NEXT:    vst1.32 {d8, d9}, [r0:128]!
+; LE-I32-NEXT:    vst1.64 {d10, d11}, [r0:128]
+; LE-I32-NEXT:    add r0, r4, #64
+; LE-I32-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I32-NEXT:    add lr, sp, #144
+; LE-I32-NEXT:    vst1.32 {d16, d17}, [r0:128]!
+; LE-I32-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I32-NEXT:    add lr, sp, #16
+; LE-I32-NEXT:    vst1.32 {d16, d17}, [r0:128]!
+; LE-I32-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I32-NEXT:    add lr, sp, #32
+; LE-I32-NEXT:    vst1.32 {d16, d17}, [r0:128]!
+; LE-I32-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I32-NEXT:    vst1.64 {d16, d17}, [r0:128]
+; LE-I32-NEXT:    add sp, sp, #160
+; LE-I32-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I32-NEXT:    pop {r4, r5, r6, pc}
+;
+; LE-I64-LABEL: lrint_v32f64:
+; LE-I64:       @ %bb.0:
+; LE-I64-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-I64-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-I64-NEXT:    .pad #4
+; LE-I64-NEXT:    sub sp, sp, #4
+; LE-I64-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEXT:    .pad #208
+; LE-I64-NEXT:    sub sp, sp, #208
+; LE-I64-NEXT:    add lr, sp, #72
+; LE-I64-NEXT:    str r0, [sp, #156] @ 4-byte Spill
+; LE-I64-NEXT:    add r0, sp, #456
+; LE-I64-NEXT:    vorr q4, q0, q0
+; LE-I64-NEXT:    vstmia lr, {d6, d7} @ 16-byte Spill
+; LE-I64-NEXT:    add lr, sp, #56
+; LE-I64-NEXT:    vorr d0, d7, d7
+; LE-I64-NEXT:    vstmia lr, {d2, d3} @ 16-byte Spill
+; LE-I64-NEXT:    add lr, sp, #104
+; LE-I64-NEXT:    vorr q5, q2, q2
+; LE-I64-NEXT:    vld1.64 {d16, d17}, [r0]
+; LE-I64-NEXT:    add r0, sp, #344
+; LE-I64-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEXT:    add lr, sp, #192
+; LE-I64-NEXT:    vld1.64 {d16, d17}, [r0]
+; LE-I64-NEXT:    add r0, sp, #376
+; LE-I64-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEXT:    add lr, sp, #160
+; LE-I64-NEXT:    vld1.64 {d16, d17}, [r0]
+; LE-I64-NEXT:    add r0, sp, #360
+; LE-I64-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEXT:    add lr, sp, #136
+; LE-I64-NEXT:    vld1.64 {d16, d17}, [r0]
+; LE-I64-NEXT:    add r0, sp, #440
+; LE-I64-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEXT:    add lr, sp, #176
+; LE-I64-NEXT:    vld1.64 {d16, d17}, [r0]
+; LE-I64-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEXT:    bl lrint
+; LE-I64-NEXT:    vorr d0, d10, d10
+; LE-I64-NEXT:    str r1, [sp, #120] @ 4-byte Spill
+; LE-I64-NEXT:    vmov.32 d13[0], r0
+; LE-I64-NEXT:    bl lrint
+; LE-I64-NEXT:    vorr d0, d11, d11
+; LE-I64-NEXT:    mov r10, r1
+; LE-I64-NEXT:    vmov.32 d14[0], r0
+; LE-I64-NEXT:    bl lrint
+; LE-I64-NEXT:    vorr d0, d8, d8
+; LE-I64-NEXT:    add lr, sp, #88
+; LE-I64-NEXT:    vmov.32 d15[0], r0
+; LE-I64-NEXT:    mov r11, r1
+; LE-I64-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-I64-NEXT:    bl lrint
+; LE-I64-NEXT:    vorr d0, d9, d9
+; LE-I64-NEXT:    mov r9, r1
+; LE-I64-NEXT:    vmov.32 d10[0], r0
+; LE-I64-NEXT:    bl lrint
+; LE-I64-NEXT:    add lr, sp, #40
+; LE-I64-NEXT:    vmov.32 d11[0], r0
+; LE-I64-NEXT:    mov r6, r1
+; LE-I64-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-I64-NEXT:    add lr, sp, #56
+; LE-I64-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; LE-I64-NEXT:    vorr d0, d10, d10
+; LE-I64-NEXT:    bl lrint
+; LE-I64-NEXT:    vorr d0, d11, d11
+; LE-I64-NEXT:    mov r4, r1
+; LE-I64-NEXT:    vmov.32 d8[0], r0
+; LE-I64-NEXT:    bl lrint
+; LE-I64-NEXT:    add lr, sp, #72
+; LE-I64-NEXT:    mov r7, r1
+; LE-I64-NEXT:    vmov.32 d9[0], r0
+; LE-I64-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
+; LE-I64-NEXT:    @ kill: def $d0 killed $d0 killed $q0
+; LE-I64-NEXT:    bl lrint
+; LE-I64-NEXT:    vmov.32 d12[0], r0
+; LE-I64-NEXT:    add lr, sp, #56
+; LE-I64-NEXT:    mov r5, r1
+; LE-I64-NEXT:    vmov.32 d9[1], r7
+; LE-I64-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-I64-NEXT:    add lr, sp, #176
+; LE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEXT:    vorr d0, d17, d17
+; LE-I64-NEXT:    bl lrint
+; LE-I64-NEXT:    vmov.32 d8[1], r4
+; LE-I64-NEXT:    add lr, sp, #72
+; LE-I64-NEXT:    mov r7, r1
+; LE-I64-NEXT:    vmov.32 d13[0], r0
+; LE-I64-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-I64-NEXT:    add lr, sp, #104
+; LE-I64-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-I64-NEXT:    vorr d0, d9, d9
+; LE-I64-NEXT:    bl lrint
+; LE-I64-NEXT:    add lr, sp, #40
+; LE-I64-NEXT:    vorr d0, d8, d8
+; LE-I64-NEXT:    mov r8, r1
+; LE-I64-NEXT:    vmov.32 d15[0], r0
+; LE-I64-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; LE-I64-NEXT:    vmov.32 d11[1], r6
+; LE-I64-NEXT:    bl lrint
+; LE-I64-NEXT:    add lr, sp, #176
+; LE-I64-NEXT:    mov r4, r1
+; LE-I64-NEXT:    vmov.32 d14[0], r0
+; LE-I64-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
+; LE-I64-NEXT:    @ kill: def $d0 killed $d0 killed $q0
+; LE-I64-NEXT:    vmov.32 d10[1], r9
+; LE-I64-NEXT:    bl lrint
+; LE-I64-NEXT:    add lr, sp, #88
+; LE-I64-NEXT:    vmov.32 d12[0], r0
+; LE-I64-NEXT:    ldr r0, [sp, #120] @ 4-byte Reload
+; LE-I64-NEXT:    vldmia lr, {d20, d21} @ 16-byte Reload
+; LE-I64-NEXT:    add lr, sp, #56
+; LE-I64-NEXT:    vldmia lr, {d18, d19} @ 16-byte Reload
+; LE-I64-NEXT:    add lr, sp, #24
+; LE-I64-NEXT:    vmov.32 d19[1], r0
+; LE-I64-NEXT:    add r0, sp, #408
+; LE-I64-NEXT:    ldr r2, [sp, #156] @ 4-byte Reload
+; LE-I64-NEXT:    vld1.64 {d16, d17}, [r0]
+; LE-I64-NEXT:    vmov.32 d13[1], r7
+; LE-I64-NEXT:    mov r0, r2
+; LE-I64-NEXT:    vmov.32 d12[1], r1
+; LE-I64-NEXT:    add r1, sp, #488
+; LE-I64-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEXT:    add lr, sp, #40
+; LE-I64-NEXT:    vld1.64 {d16, d17}, [r1]
+; LE-I64-NEXT:    add r1, sp, #472
+; LE-I64-NEXT:    vst1.64 {d10, d11}, [r0:128]!
+; LE-I64-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEXT:    add lr, sp, #72
+; LE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEXT:    add lr, sp, #56
+; LE-I64-NEXT:    vmov.32 d21[1], r11
+; LE-I64-NEXT:    vmov.32 d20[1], r10
+; LE-I64-NEXT:    add r10, r2, #192
+; LE-I64-NEXT:    vst1.64 {d16, d17}, [r0:128]!
+; LE-I64-NEXT:    vld1.64 {d16, d17}, [r1]
+; LE-I64-NEXT:    add r1, sp, #392
+; LE-I64-NEXT:    vmov.32 d18[1], r5
+; LE-I64-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEXT:    add lr, sp, #72
+; LE-I64-NEXT:    vst1.64 {d20, d21}, [r0:128]!
+; LE-I64-NEXT:    vld1.64 {d16, d17}, [r1]
+; LE-I64-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEXT:    add lr, sp, #104
+; LE-I64-NEXT:    vst1.64 {d18, d19}, [r0:128]
+; LE-I64-NEXT:    add r0, sp, #312
+; LE-I64-NEXT:    vld1.64 {d16, d17}, [r0]
+; LE-I64-NEXT:    add r0, sp, #328
+; LE-I64-NEXT:    vmov.32 d15[1], r8
+; LE-I64-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEXT:    add lr, sp, #120
+; LE-I64-NEXT:    vld1.64 {d16, d17}, [r0]
+; LE-I64-NEXT:    add r0, sp, #424
+; LE-I64-NEXT:    vmov.32 d14[1], r4
+; LE-I64-NEXT:    vst1.64 {d12, d13}, [r10:128]!
+; LE-I64-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEXT:    add lr, sp, #176
+; LE-I64-NEXT:    vld1.64 {d16, d17}, [r0]
+; LE-I64-NEXT:    vst1.64 {d14, d15}, [r10:128]!
+; LE-I64-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEXT:    add lr, sp, #192
+; LE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEXT:    vorr d0, d17, d17
+; LE-I64-NEXT:    bl lrint
+; LE-I64-NEXT:    add lr, sp, #136
+; LE-I64-NEXT:    mov r9, r1
+; LE-I64-NEXT:    vmov.32 d9[0], r0
+; LE-I64-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; LE-I64-NEXT:    vorr d0, d10, d10
+; LE-I64-NEXT:    bl lrint
+; LE-I64-NEXT:    vorr d0, d11, d11
+; LE-I64-NEXT:    mov r8, r1
+; LE-I64-NEXT:    vmov.32 d12[0], r0
+; LE-I64-NEXT:    bl lrint
+; LE-I64-NEXT:    add lr, sp, #160
+; LE-I64-NEXT:    mov r5, r1
+; LE-I64-NEXT:    vmov.32 d13[0], r0
+; LE-I64-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; LE-I64-NEXT:    vorr d0, d10, d10
+; LE-I64-NEXT:    bl lrint
+; LE-I64-NEXT:    vorr d0, d11, d11
+; LE-I64-NEXT:    mov r6, r1
+; LE-I64-NEXT:    vmov.32 d14[0], r0
+; LE-I64-NEXT:    bl lrint
+; LE-I64-NEXT:    add lr, sp, #192
+; LE-I64-NEXT:    mov r4, r1
+; LE-I64-NEXT:    vmov.32 d15[0], r0
+; LE-I64-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
+; LE-I64-NEXT:    @ kill: def $d0 killed $d0 killed $q0
+; LE-I64-NEXT:    bl lrint
+; LE-I64-NEXT:    vmov.32 d8[0], r0
+; LE-I64-NEXT:    add lr, sp, #192
+; LE-I64-NEXT:    mov r11, r1
+; LE-I64-NEXT:    vmov.32 d15[1], r4
+; LE-I64-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-I64-NEXT:    add lr, sp, #176
+; LE-I64-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-I64-NEXT:    vorr d0, d9, d9
+; LE-I64-NEXT:    bl lrint
+; LE-I64-NEXT:    vorr d0, d8, d8
+; LE-I64-NEXT:    vmov.32 d14[1], r6
+; LE-I64-NEXT:    add lr, sp, #136
+; LE-I64-NEXT:    mov r4, r1
+; LE-I64-NEXT:    vmov.32 d11[0], r0
+; LE-I64-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-I64-NEXT:    bl lrint
+; LE-I64-NEXT:    vmov.32 d10[0], r0
+; LE-I64-NEXT:    add lr, sp, #176
+; LE-I64-NEXT:    mov r7, r1
+; LE-I64-NEXT:    vmov.32 d13[1], r5
+; LE-I64-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-I64-NEXT:    add lr, sp, #24
+; LE-I64-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-I64-NEXT:    vorr d0, d9, d9
+; LE-I64-NEXT:    bl lrint
+; LE-I64-NEXT:    vorr d0, d8, d8
+; LE-I64-NEXT:    vmov.32 d12[1], r8
+; LE-I64-NEXT:    add lr, sp, #88
+; LE-I64-NEXT:    mov r5, r1
+; LE-I64-NEXT:    vmov.32 d15[0], r0
+; LE-I64-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-I64-NEXT:    bl lrint
+; LE-I64-NEXT:    add lr, sp, #192
+; LE-I64-NEXT:    str r1, [sp, #24] @ 4-byte Spill
+; LE-I64-NEXT:    vmov.32 d14[0], r0
+; LE-I64-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-I64-NEXT:    add lr, sp, #40
+; LE-I64-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; LE-I64-NEXT:    vorr d0, d11, d11
+; LE-I64-NEXT:    vmov.32 d9[1], r9
+; LE-I64-NEXT:    bl lrint
+; LE-I64-NEXT:    vorr d0, d10, d10
+; LE-I64-NEXT:    vmov.32 d8[1], r11
+; LE-I64-NEXT:    add lr, sp, #192
+; LE-I64-NEXT:    mov r6, r1
+; LE-I64-NEXT:    vmov.32 d13[0], r0
+; LE-I64-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-I64-NEXT:    bl lrint
+; LE-I64-NEXT:    vmov.32 d12[0], r0
+; LE-I64-NEXT:    add lr, sp, #160
+; LE-I64-NEXT:    str r1, [sp, #40] @ 4-byte Spill
+; LE-I64-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-I64-NEXT:    add lr, sp, #176
+; LE-I64-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; LE-I64-NEXT:    add lr, sp, #56
+; LE-I64-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-I64-NEXT:    vorr d0, d9, d9
+; LE-I64-NEXT:    vmov.32 d11[1], r4
+; LE-I64-NEXT:    bl lrint
+; LE-I64-NEXT:    vorr d0, d8, d8
+; LE-I64-NEXT:    vmov.32 d10[1], r7
+; LE-I64-NEXT:    add lr, sp, #176
+; LE-I64-NEXT:    mov r8, r1
+; LE-I64-NEXT:    vmov.32 d13[0], r0
+; LE-I64-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-I64-NEXT:    bl lrint
+; LE-I64-NEXT:    vmov.32 d12[0], r0
+; LE-I64-NEXT:    add lr, sp, #8
+; LE-I64-NEXT:    mov r11, r1
+; LE-I64-NEXT:    vmov.32 d15[1], r5
+; LE-I64-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-I64-NEXT:    add lr, sp, #72
+; LE-I64-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-I64-NEXT:    vorr d0, d9, d9
+; LE-I64-NEXT:    bl lrint
+; LE-I64-NEXT:    vmov.32 d11[0], r0
+; LE-I64-NEXT:    ldr r0, [sp, #24] @ 4-byte Reload
+; LE-I64-NEXT:    vorr d0, d8, d8
+; LE-I64-NEXT:    add lr, sp, #56
+; LE-I64-NEXT:    mov r9, r1
+; LE-I64-NEXT:    vmov.32 d14[1], r0
+; LE-I64-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-I64-NEXT:    bl lrint
+; LE-I64-NEXT:    vmov.32 d10[0], r0
+; LE-I64-NEXT:    add lr, sp, #72
+; LE-I64-NEXT:    mov r7, r1
+; LE-I64-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-I64-NEXT:    add lr, sp, #160
+; LE-I64-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
+; LE-I64-NEXT:    add lr, sp, #104
+; LE-I64-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-I64-NEXT:    vorr d0, d9, d9
+; LE-I64-NEXT:    vmov.32 d13[1], r6
+; LE-I64-NEXT:    bl lrint
+; LE-I64-NEXT:    vmov.32 d11[0], r0
+; LE-I64-NEXT:    ldr r0, [sp, #40] @ 4-byte Reload
+; LE-I64-NEXT:    vorr d0, d8, d8
+; LE-I64-NEXT:    add lr, sp, #160
+; LE-I64-NEXT:    mov r6, r1
+; LE-I64-NEXT:    vmov.32 d12[1], r0
+; LE-I64-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-I64-NEXT:    bl lrint
+; LE-I64-NEXT:    add lr, sp, #8
+; LE-I64-NEXT:    mov r4, r1
+; LE-I64-NEXT:    vmov.32 d10[0], r0
+; LE-I64-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
+; LE-I64-NEXT:    add lr, sp, #120
+; LE-I64-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-I64-NEXT:    vorr d0, d9, d9
+; LE-I64-NEXT:    vmov.32 d13[1], r8
+; LE-I64-NEXT:    bl lrint
+; LE-I64-NEXT:    vorr d0, d8, d8
+; LE-I64-NEXT:    mov r5, r1
+; LE-I64-NEXT:    vmov.32 d15[0], r0
+; LE-I64-NEXT:    vmov.32 d12[1], r11
+; LE-I64-NEXT:    bl lrint
+; LE-I64-NEXT:    add lr, sp, #72
+; LE-I64-NEXT:    vmov.32 d14[0], r0
+; LE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEXT:    add lr, sp, #160
+; LE-I64-NEXT:    vmov.32 d17[1], r9
+; LE-I64-NEXT:    vmov.32 d16[1], r7
+; LE-I64-NEXT:    vst1.64 {d12, d13}, [r10:128]!
+; LE-I64-NEXT:    vorr q9, q8, q8
+; LE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEXT:    add lr, sp, #136
+; LE-I64-NEXT:    vmov.32 d15[1], r5
+; LE-I64-NEXT:    vst1.64 {d16, d17}, [r10:128]
+; LE-I64-NEXT:    vmov.32 d14[1], r1
+; LE-I64-NEXT:    ldr r1, [sp, #156] @ 4-byte Reload
+; LE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEXT:    add r0, r1, #128
+; LE-I64-NEXT:    add lr, sp, #56
+; LE-I64-NEXT:    vmov.32 d11[1], r6
+; LE-I64-NEXT:    vst1.64 {d16, d17}, [r0:128]!
+; LE-I64-NEXT:    vst1.64 {d18, d19}, [r0:128]!
+; LE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEXT:    add lr, sp, #176
+; LE-I64-NEXT:    vmov.32 d10[1], r4
+; LE-I64-NEXT:    vst1.64 {d16, d17}, [r0:128]!
+; LE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEXT:    add lr, sp, #192
+; LE-I64-NEXT:    vst1.64 {d16, d17}, [r0:128]
+; LE-I64-NEXT:    add r0, r1, #64
+; LE-I64-NEXT:    vst1.64 {d10, d11}, [r0:128]!
+; LE-I64-NEXT:    vst1.64 {d14, d15}, [r0:128]!
+; LE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEXT:    add lr, sp, #88
+; LE-I64-NEXT:    vst1.64 {d16, d17}, [r0:128]!
+; LE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEXT:    vst1.64 {d16, d17}, [r0:128]
+; LE-I64-NEXT:    add sp, sp, #208
+; LE-I64-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEXT:    add sp, sp, #4
+; LE-I64-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; LE-I32-NEON-LABEL: lrint_v32f64:
+; LE-I32-NEON:       @ %bb.0:
+; LE-I32-NEON-NEXT:    .save {r4, r5, r6, lr}
+; LE-I32-NEON-NEXT:    push {r4, r5, r6, lr}
+; LE-I32-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I32-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I32-NEON-NEXT:    .pad #160
+; LE-I32-NEON-NEXT:    sub sp, sp, #160
+; LE-I32-NEON-NEXT:    add lr, sp, #96
+; LE-I32-NEON-NEXT:    mov r4, r0
+; LE-I32-NEON-NEXT:    add r0, sp, #304
+; LE-I32-NEON-NEXT:    vorr q6, q3, q3
+; LE-I32-NEON-NEXT:    vstmia lr, {d4, d5} @ 16-byte Spill
+; LE-I32-NEON-NEXT:    add lr, sp, #64
+; LE-I32-NEON-NEXT:    vorr q5, q1, q1
+; LE-I32-NEON-NEXT:    vstmia lr, {d0, d1} @ 16-byte Spill
+; LE-I32-NEON-NEXT:    add lr, sp, #48
+; LE-I32-NEON-NEXT:    vorr d0, d4, d4
+; LE-I32-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
+; LE-I32-NEON-NEXT:    add r0, sp, #352
+; LE-I32-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I32-NEON-NEXT:    add lr, sp, #16
+; LE-I32-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
+; LE-I32-NEON-NEXT:    add r0, sp, #272
+; LE-I32-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I32-NEON-NEXT:    add lr, sp, #112
+; LE-I32-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
+; LE-I32-NEON-NEXT:    add r0, sp, #288
+; LE-I32-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I32-NEON-NEXT:    add lr, sp, #80
+; LE-I32-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
+; LE-I32-NEON-NEXT:    add r0, sp, #336
+; LE-I32-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I32-NEON-NEXT:    add lr, sp, #144
+; LE-I32-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
+; LE-I32-NEON-NEXT:    add r0, sp, #256
+; LE-I32-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I32-NEON-NEXT:    add lr, sp, #128
+; LE-I32-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
+; LE-I32-NEON-NEXT:    add r0, sp, #320
+; LE-I32-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I32-NEON-NEXT:    add lr, sp, #32
+; LE-I32-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
+; LE-I32-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I32-NEON-NEXT:    bl lrint
+; LE-I32-NEON-NEXT:    vorr d0, d12, d12
+; LE-I32-NEON-NEXT:    vmov.32 d14[0], r0
+; LE-I32-NEON-NEXT:    bl lrint
+; LE-I32-NEON-NEXT:    vorr d0, d10, d10
+; LE-I32-NEON-NEXT:    vmov.32 d15[0], r0
+; LE-I32-NEON-NEXT:    bl lrint
+; LE-I32-NEON-NEXT:    vorr d0, d13, d13
+; LE-I32-NEON-NEXT:    vmov.32 d9[0], r0
+; LE-I32-NEON-NEXT:    bl lrint
+; LE-I32-NEON-NEXT:    vorr d0, d11, d11
+; LE-I32-NEON-NEXT:    vmov.32 d15[1], r0
+; LE-I32-NEON-NEXT:    bl lrint
+; LE-I32-NEON-NEXT:    add lr, sp, #96
+; LE-I32-NEON-NEXT:    vorr q5, q4, q4
+; LE-I32-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I32-NEON-NEXT:    vorr d0, d17, d17
+; LE-I32-NEON-NEXT:    vmov.32 d11[1], r0
+; LE-I32-NEON-NEXT:    bl lrint
+; LE-I32-NEON-NEXT:    vmov.32 d14[1], r0
+; LE-I32-NEON-NEXT:    add lr, sp, #96
+; LE-I32-NEON-NEXT:    add r0, sp, #416
+; LE-I32-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
+; LE-I32-NEON-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-I32-NEON-NEXT:    add lr, sp, #64
+; LE-I32-NEON-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-I32-NEON-NEXT:    vorr d0, d8, d8
+; LE-I32-NEON-NEXT:    vstmia sp, {d16, d17} @ 16-byte Spill
+; LE-I32-NEON-NEXT:    bl lrint
+; LE-I32-NEON-NEXT:    add lr, sp, #32
+; LE-I32-NEON-NEXT:    vmov.32 d10[0], r0
+; LE-I32-NEON-NEXT:    vldmia lr, {d14, d15} @ 16-byte Reload
+; LE-I32-NEON-NEXT:    vorr q6, q5, q5
+; LE-I32-NEON-NEXT:    vorr d0, d14, d14
+; LE-I32-NEON-NEXT:    bl lrint
+; LE-I32-NEON-NEXT:    vorr d0, d9, d9
+; LE-I32-NEON-NEXT:    vmov.32 d11[0], r0
+; LE-I32-NEON-NEXT:    bl lrint
+; LE-I32-NEON-NEXT:    vorr d0, d15, d15
+; LE-I32-NEON-NEXT:    vmov.32 d12[1], r0
+; LE-I32-NEON-NEXT:    add lr, sp, #64
+; LE-I32-NEON-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-I32-NEON-NEXT:    bl lrint
+; LE-I32-NEON-NEXT:    vmov.32 d11[1], r0
+; LE-I32-NEON-NEXT:    add r0, sp, #400
+; LE-I32-NEON-NEXT:    add lr, sp, #32
+; LE-I32-NEON-NEXT:    vorr q6, q5, q5
+; LE-I32-NEON-NEXT:    vldmia sp, {d10, d11} @ 16-byte Reload
+; LE-I32-NEON-NEXT:    vorr d0, d10, d10
+; LE-I32-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
+; LE-I32-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I32-NEON-NEXT:    bl lrint
+; LE-I32-NEON-NEXT:    add lr, sp, #48
+; LE-I32-NEON-NEXT:    vmov.32 d15[0], r0
+; LE-I32-NEON-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-I32-NEON-NEXT:    vorr d0, d8, d8
+; LE-I32-NEON-NEXT:    bl lrint
+; LE-I32-NEON-NEXT:    vorr d0, d11, d11
+; LE-I32-NEON-NEXT:    vmov.32 d12[0], r0
+; LE-I32-NEON-NEXT:    bl lrint
+; LE-I32-NEON-NEXT:    vorr d0, d9, d9
+; LE-I32-NEON-NEXT:    vmov.32 d15[1], r0
+; LE-I32-NEON-NEXT:    bl lrint
+; LE-I32-NEON-NEXT:    vmov.32 d12[1], r0
+; LE-I32-NEON-NEXT:    add lr, sp, #48
+; LE-I32-NEON-NEXT:    add r0, sp, #384
+; LE-I32-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
+; LE-I32-NEON-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-I32-NEON-NEXT:    add lr, sp, #32
+; LE-I32-NEON-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; LE-I32-NEON-NEXT:    vorr d0, d10, d10
+; LE-I32-NEON-NEXT:    vstmia sp, {d16, d17} @ 16-byte Spill
+; LE-I32-NEON-NEXT:    bl lrint
+; LE-I32-NEON-NEXT:    add lr, sp, #16
+; LE-I32-NEON-NEXT:    vmov.32 d14[0], r0
+; LE-I32-NEON-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-I32-NEON-NEXT:    vorr d0, d8, d8
+; LE-I32-NEON-NEXT:    bl lrint
+; LE-I32-NEON-NEXT:    vorr d0, d11, d11
+; LE-I32-NEON-NEXT:    vmov.32 d13[0], r0
+; LE-I32-NEON-NEXT:    bl lrint
+; LE-I32-NEON-NEXT:    vorr d0, d9, d9
+; LE-I32-NEON-NEXT:    vmov.32 d14[1], r0
+; LE-I32-NEON-NEXT:    add lr, sp, #32
+; LE-I32-NEON-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-I32-NEON-NEXT:    bl lrint
+; LE-I32-NEON-NEXT:    vorr q7, q6, q6
+; LE-I32-NEON-NEXT:    vldmia sp, {d10, d11} @ 16-byte Reload
+; LE-I32-NEON-NEXT:    vorr d0, d10, d10
+; LE-I32-NEON-NEXT:    vmov.32 d15[1], r0
+; LE-I32-NEON-NEXT:    add r0, sp, #368
+; LE-I32-NEON-NEXT:    vld1.64 {d12, d13}, [r0]
+; LE-I32-NEON-NEXT:    bl lrint
+; LE-I32-NEON-NEXT:    add lr, sp, #144
+; LE-I32-NEON-NEXT:    vmov.32 d9[0], r0
+; LE-I32-NEON-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
+; LE-I32-NEON-NEXT:    @ kill: def $d0 killed $d0 killed $q0
+; LE-I32-NEON-NEXT:    bl lrint
+; LE-I32-NEON-NEXT:    vorr d0, d11, d11
+; LE-I32-NEON-NEXT:    vmov.32 d14[0], r0
+; LE-I32-NEON-NEXT:    bl lrint
+; LE-I32-NEON-NEXT:    vorr d0, d12, d12
+; LE-I32-NEON-NEXT:    vmov.32 d9[1], r0
+; LE-I32-NEON-NEXT:    bl lrint
+; LE-I32-NEON-NEXT:    add lr, sp, #144
+; LE-I32-NEON-NEXT:    vmov.32 d8[0], r0
+; LE-I32-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I32-NEON-NEXT:    vorr d0, d17, d17
+; LE-I32-NEON-NEXT:    bl lrint
+; LE-I32-NEON-NEXT:    vmov.32 d14[1], r0
+; LE-I32-NEON-NEXT:    add r0, sp, #240
+; LE-I32-NEON-NEXT:    vorr d0, d13, d13
+; LE-I32-NEON-NEXT:    add lr, sp, #144
+; LE-I32-NEON-NEXT:    vld1.64 {d10, d11}, [r0]
+; LE-I32-NEON-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-I32-NEON-NEXT:    vstmia sp, {d10, d11} @ 16-byte Spill
+; LE-I32-NEON-NEXT:    bl lrint
+; LE-I32-NEON-NEXT:    vorr d0, d10, d10
+; LE-I32-NEON-NEXT:    vmov.32 d8[1], r0
+; LE-I32-NEON-NEXT:    add lr, sp, #16
+; LE-I32-NEON-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-I32-NEON-NEXT:    bl lrint
+; LE-I32-NEON-NEXT:    add lr, sp, #80
+; LE-I32-NEON-NEXT:    vmov.32 d8[0], r0
+; LE-I32-NEON-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
+; LE-I32-NEON-NEXT:    vorr d0, d12, d12
+; LE-I32-NEON-NEXT:    bl lrint
+; LE-I32-NEON-NEXT:    add lr, sp, #112
+; LE-I32-NEON-NEXT:    vmov.32 d11[0], r0
+; LE-I32-NEON-NEXT:    vldmia lr, {d14, d15} @ 16-byte Reload
+; LE-I32-NEON-NEXT:    vorr d0, d14, d14
+; LE-I32-NEON-NEXT:    bl lrint
+; LE-I32-NEON-NEXT:    vorr d0, d13, d13
+; LE-I32-NEON-NEXT:    vmov.32 d10[0], r0
+; LE-I32-NEON-NEXT:    bl lrint
+; LE-I32-NEON-NEXT:    add lr, sp, #128
+; LE-I32-NEON-NEXT:    vmov.32 d11[1], r0
+; LE-I32-NEON-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
+; LE-I32-NEON-NEXT:    vorr d0, d12, d12
+; LE-I32-NEON-NEXT:    bl lrint
+; LE-I32-NEON-NEXT:    vorr d0, d15, d15
+; LE-I32-NEON-NEXT:    vmov.32 d9[0], r0
+; LE-I32-NEON-NEXT:    bl lrint
+; LE-I32-NEON-NEXT:    vorr d0, d13, d13
+; LE-I32-NEON-NEXT:    vmov.32 d10[1], r0
+; LE-I32-NEON-NEXT:    bl lrint
+; LE-I32-NEON-NEXT:    vldmia sp, {d16, d17} @ 16-byte Reload
+; LE-I32-NEON-NEXT:    vorr d0, d17, d17
+; LE-I32-NEON-NEXT:    vmov.32 d9[1], r0
+; LE-I32-NEON-NEXT:    bl lrint
+; LE-I32-NEON-NEXT:    add lr, sp, #64
+; LE-I32-NEON-NEXT:    vmov.32 d8[1], r0
+; LE-I32-NEON-NEXT:    mov r0, r4
+; LE-I32-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I32-NEON-NEXT:    add lr, sp, #96
+; LE-I32-NEON-NEXT:    vst1.32 {d16, d17}, [r0:128]!
+; LE-I32-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I32-NEON-NEXT:    add lr, sp, #48
+; LE-I32-NEON-NEXT:    vst1.32 {d16, d17}, [r0:128]!
+; LE-I32-NEON-NEXT:    vst1.32 {d8, d9}, [r0:128]!
+; LE-I32-NEON-NEXT:    vst1.64 {d10, d11}, [r0:128]
+; LE-I32-NEON-NEXT:    add r0, r4, #64
+; LE-I32-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I32-NEON-NEXT:    add lr, sp, #144
+; LE-I32-NEON-NEXT:    vst1.32 {d16, d17}, [r0:128]!
+; LE-I32-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I32-NEON-NEXT:    add lr, sp, #16
+; LE-I32-NEON-NEXT:    vst1.32 {d16, d17}, [r0:128]!
+; LE-I32-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I32-NEON-NEXT:    add lr, sp, #32
+; LE-I32-NEON-NEXT:    vst1.32 {d16, d17}, [r0:128]!
+; LE-I32-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I32-NEON-NEXT:    vst1.64 {d16, d17}, [r0:128]
+; LE-I32-NEON-NEXT:    add sp, sp, #160
+; LE-I32-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I32-NEON-NEXT:    pop {r4, r5, r6, pc}
+;
+; LE-I64-NEON-LABEL: lrint_v32f64:
+; LE-I64-NEON:       @ %bb.0:
+; LE-I64-NEON-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-I64-NEON-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-I64-NEON-NEXT:    .pad #4
+; LE-I64-NEON-NEXT:    sub sp, sp, #4
+; LE-I64-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEON-NEXT:    .pad #208
+; LE-I64-NEON-NEXT:    sub sp, sp, #208
+; LE-I64-NEON-NEXT:    add lr, sp, #72
+; LE-I64-NEON-NEXT:    str r0, [sp, #156] @ 4-byte Spill
+; LE-I64-NEON-NEXT:    add r0, sp, #456
+; LE-I64-NEON-NEXT:    vorr q4, q0, q0
+; LE-I64-NEON-NEXT:    vstmia lr, {d6, d7} @ 16-byte Spill
+; LE-I64-NEON-NEXT:    add lr, sp, #56
+; LE-I64-NEON-NEXT:    vorr d0, d7, d7
+; LE-I64-NEON-NEXT:    vstmia lr, {d2, d3} @ 16-byte Spill
+; LE-I64-NEON-NEXT:    add lr, sp, #104
+; LE-I64-NEON-NEXT:    vorr q5, q2, q2
+; LE-I64-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
+; LE-I64-NEON-NEXT:    add r0, sp, #344
+; LE-I64-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEON-NEXT:    add lr, sp, #192
+; LE-I64-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
+; LE-I64-NEON-NEXT:    add r0, sp, #376
+; LE-I64-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEON-NEXT:    add lr, sp, #160
+; LE-I64-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
+; LE-I64-NEON-NEXT:    add r0, sp, #360
+; LE-I64-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEON-NEXT:    add lr, sp, #136
+; LE-I64-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
+; LE-I64-NEON-NEXT:    add r0, sp, #440
+; LE-I64-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEON-NEXT:    add lr, sp, #176
+; LE-I64-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
+; LE-I64-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEON-NEXT:    bl lrint
+; LE-I64-NEON-NEXT:    vorr d0, d10, d10
+; LE-I64-NEON-NEXT:    str r1, [sp, #120] @ 4-byte Spill
+; LE-I64-NEON-NEXT:    vmov.32 d13[0], r0
+; LE-I64-NEON-NEXT:    bl lrint
+; LE-I64-NEON-NEXT:    vorr d0, d11, d11
+; LE-I64-NEON-NEXT:    mov r10, r1
+; LE-I64-NEON-NEXT:    vmov.32 d14[0], r0
+; LE-I64-NEON-NEXT:    bl lrint
+; LE-I64-NEON-NEXT:    vorr d0, d8, d8
+; LE-I64-NEON-NEXT:    add lr, sp, #88
+; LE-I64-NEON-NEXT:    vmov.32 d15[0], r0
+; LE-I64-NEON-NEXT:    mov r11, r1
+; LE-I64-NEON-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-I64-NEON-NEXT:    bl lrint
+; LE-I64-NEON-NEXT:    vorr d0, d9, d9
+; LE-I64-NEON-NEXT:    mov r9, r1
+; LE-I64-NEON-NEXT:    vmov.32 d10[0], r0
+; LE-I64-NEON-NEXT:    bl lrint
+; LE-I64-NEON-NEXT:    add lr, sp, #40
+; LE-I64-NEON-NEXT:    vmov.32 d11[0], r0
+; LE-I64-NEON-NEXT:    mov r6, r1
+; LE-I64-NEON-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-I64-NEON-NEXT:    add lr, sp, #56
+; LE-I64-NEON-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; LE-I64-NEON-NEXT:    vorr d0, d10, d10
+; LE-I64-NEON-NEXT:    bl lrint
+; LE-I64-NEON-NEXT:    vorr d0, d11, d11
+; LE-I64-NEON-NEXT:    mov r4, r1
+; LE-I64-NEON-NEXT:    vmov.32 d8[0], r0
+; LE-I64-NEON-NEXT:    bl lrint
+; LE-I64-NEON-NEXT:    add lr, sp, #72
+; LE-I64-NEON-NEXT:    mov r7, r1
+; LE-I64-NEON-NEXT:    vmov.32 d9[0], r0
+; LE-I64-NEON-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
+; LE-I64-NEON-NEXT:    @ kill: def $d0 killed $d0 killed $q0
+; LE-I64-NEON-NEXT:    bl lrint
+; LE-I64-NEON-NEXT:    vmov.32 d12[0], r0
+; LE-I64-NEON-NEXT:    add lr, sp, #56
+; LE-I64-NEON-NEXT:    mov r5, r1
+; LE-I64-NEON-NEXT:    vmov.32 d9[1], r7
+; LE-I64-NEON-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-I64-NEON-NEXT:    add lr, sp, #176
+; LE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEON-NEXT:    vorr d0, d17, d17
+; LE-I64-NEON-NEXT:    bl lrint
+; LE-I64-NEON-NEXT:    vmov.32 d8[1], r4
+; LE-I64-NEON-NEXT:    add lr, sp, #72
+; LE-I64-NEON-NEXT:    mov r7, r1
+; LE-I64-NEON-NEXT:    vmov.32 d13[0], r0
+; LE-I64-NEON-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-I64-NEON-NEXT:    add lr, sp, #104
+; LE-I64-NEON-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-I64-NEON-NEXT:    vorr d0, d9, d9
+; LE-I64-NEON-NEXT:    bl lrint
+; LE-I64-NEON-NEXT:    add lr, sp, #40
+; LE-I64-NEON-NEXT:    vorr d0, d8, d8
+; LE-I64-NEON-NEXT:    mov r8, r1
+; LE-I64-NEON-NEXT:    vmov.32 d15[0], r0
+; LE-I64-NEON-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; LE-I64-NEON-NEXT:    vmov.32 d11[1], r6
+; LE-I64-NEON-NEXT:    bl lrint
+; LE-I64-NEON-NEXT:    add lr, sp, #176
+; LE-I64-NEON-NEXT:    mov r4, r1
+; LE-I64-NEON-NEXT:    vmov.32 d14[0], r0
+; LE-I64-NEON-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
+; LE-I64-NEON-NEXT:    @ kill: def $d0 killed $d0 killed $q0
+; LE-I64-NEON-NEXT:    vmov.32 d10[1], r9
+; LE-I64-NEON-NEXT:    bl lrint
+; LE-I64-NEON-NEXT:    add lr, sp, #88
+; LE-I64-NEON-NEXT:    vmov.32 d12[0], r0
+; LE-I64-NEON-NEXT:    ldr r0, [sp, #120] @ 4-byte Reload
+; LE-I64-NEON-NEXT:    vldmia lr, {d20, d21} @ 16-byte Reload
+; LE-I64-NEON-NEXT:    add lr, sp, #56
+; LE-I64-NEON-NEXT:    vldmia lr, {d18, d19} @ 16-byte Reload
+; LE-I64-NEON-NEXT:    add lr, sp, #24
+; LE-I64-NEON-NEXT:    vmov.32 d19[1], r0
+; LE-I64-NEON-NEXT:    add r0, sp, #408
+; LE-I64-NEON-NEXT:    ldr r2, [sp, #156] @ 4-byte Reload
+; LE-I64-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
+; LE-I64-NEON-NEXT:    vmov.32 d13[1], r7
+; LE-I64-NEON-NEXT:    mov r0, r2
+; LE-I64-NEON-NEXT:    vmov.32 d12[1], r1
+; LE-I64-NEON-NEXT:    add r1, sp, #488
+; LE-I64-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEON-NEXT:    add lr, sp, #40
+; LE-I64-NEON-NEXT:    vld1.64 {d16, d17}, [r1]
+; LE-I64-NEON-NEXT:    add r1, sp, #472
+; LE-I64-NEON-NEXT:    vst1.64 {d10, d11}, [r0:128]!
+; LE-I64-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEON-NEXT:    add lr, sp, #72
+; LE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEON-NEXT:    add lr, sp, #56
+; LE-I64-NEON-NEXT:    vmov.32 d21[1], r11
+; LE-I64-NEON-NEXT:    vmov.32 d20[1], r10
+; LE-I64-NEON-NEXT:    add r10, r2, #192
+; LE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r0:128]!
+; LE-I64-NEON-NEXT:    vld1.64 {d16, d17}, [r1]
+; LE-I64-NEON-NEXT:    add r1, sp, #392
+; LE-I64-NEON-NEXT:    vmov.32 d18[1], r5
+; LE-I64-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEON-NEXT:    add lr, sp, #72
+; LE-I64-NEON-NEXT:    vst1.64 {d20, d21}, [r0:128]!
+; LE-I64-NEON-NEXT:    vld1.64 {d16, d17}, [r1]
+; LE-I64-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEON-NEXT:    add lr, sp, #104
+; LE-I64-NEON-NEXT:    vst1.64 {d18, d19}, [r0:128]
+; LE-I64-NEON-NEXT:    add r0, sp, #312
+; LE-I64-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
+; LE-I64-NEON-NEXT:    add r0, sp, #328
+; LE-I64-NEON-NEXT:    vmov.32 d15[1], r8
+; LE-I64-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEON-NEXT:    add lr, sp, #120
+; LE-I64-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
+; LE-I64-NEON-NEXT:    add r0, sp, #424
+; LE-I64-NEON-NEXT:    vmov.32 d14[1], r4
+; LE-I64-NEON-NEXT:    vst1.64 {d12, d13}, [r10:128]!
+; LE-I64-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEON-NEXT:    add lr, sp, #176
+; LE-I64-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
+; LE-I64-NEON-NEXT:    vst1.64 {d14, d15}, [r10:128]!
+; LE-I64-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEON-NEXT:    add lr, sp, #192
+; LE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEON-NEXT:    vorr d0, d17, d17
+; LE-I64-NEON-NEXT:    bl lrint
+; LE-I64-NEON-NEXT:    add lr, sp, #136
+; LE-I64-NEON-NEXT:    mov r9, r1
+; LE-I64-NEON-NEXT:    vmov.32 d9[0], r0
+; LE-I64-NEON-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; LE-I64-NEON-NEXT:    vorr d0, d10, d10
+; LE-I64-NEON-NEXT:    bl lrint
+; LE-I64-NEON-NEXT:    vorr d0, d11, d11
+; LE-I64-NEON-NEXT:    mov r8, r1
+; LE-I64-NEON-NEXT:    vmov.32 d12[0], r0
+; LE-I64-NEON-NEXT:    bl lrint
+; LE-I64-NEON-NEXT:    add lr, sp, #160
+; LE-I64-NEON-NEXT:    mov r5, r1
+; LE-I64-NEON-NEXT:    vmov.32 d13[0], r0
+; LE-I64-NEON-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; LE-I64-NEON-NEXT:    vorr d0, d10, d10
+; LE-I64-NEON-NEXT:    bl lrint
+; LE-I64-NEON-NEXT:    vorr d0, d11, d11
+; LE-I64-NEON-NEXT:    mov r6, r1
+; LE-I64-NEON-NEXT:    vmov.32 d14[0], r0
+; LE-I64-NEON-NEXT:    bl lrint
+; LE-I64-NEON-NEXT:    add lr, sp, #192
+; LE-I64-NEON-NEXT:    mov r4, r1
+; LE-I64-NEON-NEXT:    vmov.32 d15[0], r0
+; LE-I64-NEON-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
+; LE-I64-NEON-NEXT:    @ kill: def $d0 killed $d0 killed $q0
+; LE-I64-NEON-NEXT:    bl lrint
+; LE-I64-NEON-NEXT:    vmov.32 d8[0], r0
+; LE-I64-NEON-NEXT:    add lr, sp, #192
+; LE-I64-NEON-NEXT:    mov r11, r1
+; LE-I64-NEON-NEXT:    vmov.32 d15[1], r4
+; LE-I64-NEON-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-I64-NEON-NEXT:    add lr, sp, #176
+; LE-I64-NEON-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-I64-NEON-NEXT:    vorr d0, d9, d9
+; LE-I64-NEON-NEXT:    bl lrint
+; LE-I64-NEON-NEXT:    vorr d0, d8, d8
+; LE-I64-NEON-NEXT:    vmov.32 d14[1], r6
+; LE-I64-NEON-NEXT:    add lr, sp, #136
+; LE-I64-NEON-NEXT:    mov r4, r1
+; LE-I64-NEON-NEXT:    vmov.32 d11[0], r0
+; LE-I64-NEON-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-I64-NEON-NEXT:    bl lrint
+; LE-I64-NEON-NEXT:    vmov.32 d10[0], r0
+; LE-I64-NEON-NEXT:    add lr, sp, #176
+; LE-I64-NEON-NEXT:    mov r7, r1
+; LE-I64-NEON-NEXT:    vmov.32 d13[1], r5
+; LE-I64-NEON-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-I64-NEON-NEXT:    add lr, sp, #24
+; LE-I64-NEON-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-I64-NEON-NEXT:    vorr d0, d9, d9
+; LE-I64-NEON-NEXT:    bl lrint
+; LE-I64-NEON-NEXT:    vorr d0, d8, d8
+; LE-I64-NEON-NEXT:    vmov.32 d12[1], r8
+; LE-I64-NEON-NEXT:    add lr, sp, #88
+; LE-I64-NEON-NEXT:    mov r5, r1
+; LE-I64-NEON-NEXT:    vmov.32 d15[0], r0
+; LE-I64-NEON-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-I64-NEON-NEXT:    bl lrint
+; LE-I64-NEON-NEXT:    add lr, sp, #192
+; LE-I64-NEON-NEXT:    str r1, [sp, #24] @ 4-byte Spill
+; LE-I64-NEON-NEXT:    vmov.32 d14[0], r0
+; LE-I64-NEON-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-I64-NEON-NEXT:    add lr, sp, #40
+; LE-I64-NEON-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; LE-I64-NEON-NEXT:    vorr d0, d11, d11
+; LE-I64-NEON-NEXT:    vmov.32 d9[1], r9
+; LE-I64-NEON-NEXT:    bl lrint
+; LE-I64-NEON-NEXT:    vorr d0, d10, d10
+; LE-I64-NEON-NEXT:    vmov.32 d8[1], r11
+; LE-I64-NEON-NEXT:    add lr, sp, #192
+; LE-I64-NEON-NEXT:    mov r6, r1
+; LE-I64-NEON-NEXT:    vmov.32 d13[0], r0
+; LE-I64-NEON-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-I64-NEON-NEXT:    bl lrint
+; LE-I64-NEON-NEXT:    vmov.32 d12[0], r0
+; LE-I64-NEON-NEXT:    add lr, sp, #160
+; LE-I64-NEON-NEXT:    str r1, [sp, #40] @ 4-byte Spill
+; LE-I64-NEON-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-I64-NEON-NEXT:    add lr, sp, #176
+; LE-I64-NEON-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; LE-I64-NEON-NEXT:    add lr, sp, #56
+; LE-I64-NEON-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-I64-NEON-NEXT:    vorr d0, d9, d9
+; LE-I64-NEON-NEXT:    vmov.32 d11[1], r4
+; LE-I64-NEON-NEXT:    bl lrint
+; LE-I64-NEON-NEXT:    vorr d0, d8, d8
+; LE-I64-NEON-NEXT:    vmov.32 d10[1], r7
+; LE-I64-NEON-NEXT:    add lr, sp, #176
+; LE-I64-NEON-NEXT:    mov r8, r1
+; LE-I64-NEON-NEXT:    vmov.32 d13[0], r0
+; LE-I64-NEON-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-I64-NEON-NEXT:    bl lrint
+; LE-I64-NEON-NEXT:    vmov.32 d12[0], r0
+; LE-I64-NEON-NEXT:    add lr, sp, #8
+; LE-I64-NEON-NEXT:    mov r11, r1
+; LE-I64-NEON-NEXT:    vmov.32 d15[1], r5
+; LE-I64-NEON-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-I64-NEON-NEXT:    add lr, sp, #72
+; LE-I64-NEON-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-I64-NEON-NEXT:    vorr d0, d9, d9
+; LE-I64-NEON-NEXT:    bl lrint
+; LE-I64-NEON-NEXT:    vmov.32 d11[0], r0
+; LE-I64-NEON-NEXT:    ldr r0, [sp, #24] @ 4-byte Reload
+; LE-I64-NEON-NEXT:    vorr d0, d8, d8
+; LE-I64-NEON-NEXT:    add lr, sp, #56
+; LE-I64-NEON-NEXT:    mov r9, r1
+; LE-I64-NEON-NEXT:    vmov.32 d14[1], r0
+; LE-I64-NEON-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-I64-NEON-NEXT:    bl lrint
+; LE-I64-NEON-NEXT:    vmov.32 d10[0], r0
+; LE-I64-NEON-NEXT:    add lr, sp, #72
+; LE-I64-NEON-NEXT:    mov r7, r1
+; LE-I64-NEON-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-I64-NEON-NEXT:    add lr, sp, #160
+; LE-I64-NEON-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
+; LE-I64-NEON-NEXT:    add lr, sp, #104
+; LE-I64-NEON-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-I64-NEON-NEXT:    vorr d0, d9, d9
+; LE-I64-NEON-NEXT:    vmov.32 d13[1], r6
+; LE-I64-NEON-NEXT:    bl lrint
+; LE-I64-NEON-NEXT:    vmov.32 d11[0], r0
+; LE-I64-NEON-NEXT:    ldr r0, [sp, #40] @ 4-byte Reload
+; LE-I64-NEON-NEXT:    vorr d0, d8, d8
+; LE-I64-NEON-NEXT:    add lr, sp, #160
+; LE-I64-NEON-NEXT:    mov r6, r1
+; LE-I64-NEON-NEXT:    vmov.32 d12[1], r0
+; LE-I64-NEON-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-I64-NEON-NEXT:    bl lrint
+; LE-I64-NEON-NEXT:    add lr, sp, #8
+; LE-I64-NEON-NEXT:    mov r4, r1
+; LE-I64-NEON-NEXT:    vmov.32 d10[0], r0
+; LE-I64-NEON-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
+; LE-I64-NEON-NEXT:    add lr, sp, #120
+; LE-I64-NEON-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-I64-NEON-NEXT:    vorr d0, d9, d9
+; LE-I64-NEON-NEXT:    vmov.32 d13[1], r8
+; LE-I64-NEON-NEXT:    bl lrint
+; LE-I64-NEON-NEXT:    vorr d0, d8, d8
+; LE-I64-NEON-NEXT:    mov r5, r1
+; LE-I64-NEON-NEXT:    vmov.32 d15[0], r0
+; LE-I64-NEON-NEXT:    vmov.32 d12[1], r11
+; LE-I64-NEON-NEXT:    bl lrint
+; LE-I64-NEON-NEXT:    add lr, sp, #72
+; LE-I64-NEON-NEXT:    vmov.32 d14[0], r0
+; LE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEON-NEXT:    add lr, sp, #160
+; LE-I64-NEON-NEXT:    vmov.32 d17[1], r9
+; LE-I64-NEON-NEXT:    vmov.32 d16[1], r7
+; LE-I64-NEON-NEXT:    vst1.64 {d12, d13}, [r10:128]!
+; LE-I64-NEON-NEXT:    vorr q9, q8, q8
+; LE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEON-NEXT:    add lr, sp, #136
+; LE-I64-NEON-NEXT:    vmov.32 d15[1], r5
+; LE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r10:128]
+; LE-I64-NEON-NEXT:    vmov.32 d14[1], r1
+; LE-I64-NEON-NEXT:    ldr r1, [sp, #156] @ 4-byte Reload
+; LE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEON-NEXT:    add r0, r1, #128
+; LE-I64-NEON-NEXT:    add lr, sp, #56
+; LE-I64-NEON-NEXT:    vmov.32 d11[1], r6
+; LE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r0:128]!
+; LE-I64-NEON-NEXT:    vst1.64 {d18, d19}, [r0:128]!
+; LE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEON-NEXT:    add lr, sp, #176
+; LE-I64-NEON-NEXT:    vmov.32 d10[1], r4
+; LE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r0:128]!
+; LE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEON-NEXT:    add lr, sp, #192
+; LE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r0:128]
+; LE-I64-NEON-NEXT:    add r0, r1, #64
+; LE-I64-NEON-NEXT:    vst1.64 {d10, d11}, [r0:128]!
+; LE-I64-NEON-NEXT:    vst1.64 {d14, d15}, [r0:128]!
+; LE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEON-NEXT:    add lr, sp, #88
+; LE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r0:128]!
+; LE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r0:128]
+; LE-I64-NEON-NEXT:    add sp, sp, #208
+; LE-I64-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEON-NEXT:    add sp, sp, #4
+; LE-I64-NEON-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; BE-I32-LABEL: lrint_v32f64:
+; BE-I32:       @ %bb.0:
+; BE-I32-NEXT:    .save {r4, r5, r6, lr}
+; BE-I32-NEXT:    push {r4, r5, r6, lr}
+; BE-I32-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I32-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I32-NEXT:    .pad #176
+; BE-I32-NEXT:    sub sp, sp, #176
+; BE-I32-NEXT:    add lr, sp, #128
+; BE-I32-NEXT:    mov r4, r0
+; BE-I32-NEXT:    add r0, sp, #336
+; BE-I32-NEXT:    vorr q6, q3, q3
+; BE-I32-NEXT:    vstmia lr, {d4, d5} @ 16-byte Spill
+; BE-I32-NEXT:    add lr, sp, #80
+; BE-I32-NEXT:    vorr q5, q1, q1
+; BE-I32-NEXT:    vstmia lr, {d0, d1} @ 16-byte Spill
+; BE-I32-NEXT:    add lr, sp, #32
+; BE-I32-NEXT:    vorr d0, d4, d4
+; BE-I32-NEXT:    vld1.64 {d16, d17}, [r0]
+; BE-I32-NEXT:    add r0, sp, #320
+; BE-I32-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I32-NEXT:    add lr, sp, #160
+; BE-I32-NEXT:    vld1.64 {d16, d17}, [r0]
+; BE-I32-NEXT:    add r0, sp, #432
+; BE-I32-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I32-NEXT:    add lr, sp, #112
+; BE-I32-NEXT:    vld1.64 {d16, d17}, [r0]
+; BE-I32-NEXT:    add r0, sp, #288
+; BE-I32-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I32-NEXT:    add lr, sp, #96
+; BE-I32-NEXT:    vld1.64 {d16, d17}, [r0]
+; BE-I32-NEXT:    add r0, sp, #368
+; BE-I32-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I32-NEXT:    add lr, sp, #48
+; BE-I32-NEXT:    vld1.64 {d16, d17}, [r0]
+; BE-I32-NEXT:    add r0, sp, #416
+; BE-I32-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I32-NEXT:    add lr, sp, #144
+; BE-I32-NEXT:    vld1.64 {d16, d17}, [r0]
+; BE-I32-NEXT:    add r0, sp, #400
+; BE-I32-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I32-NEXT:    add lr, sp, #64
+; BE-I32-NEXT:    vld1.64 {d16, d17}, [r0]
+; BE-I32-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I32-NEXT:    bl lrint
+; BE-I32-NEXT:    vorr d0, d12, d12
+; BE-I32-NEXT:    vmov.32 d14[0], r0
+; BE-I32-NEXT:    bl lrint
+; BE-I32-NEXT:    vorr d0, d10, d10
+; BE-I32-NEXT:    vmov.32 d15[0], r0
+; BE-I32-NEXT:    bl lrint
+; BE-I32-NEXT:    vorr d0, d13, d13
+; BE-I32-NEXT:    vmov.32 d9[0], r0
+; BE-I32-NEXT:    bl lrint
+; BE-I32-NEXT:    vorr d0, d11, d11
+; BE-I32-NEXT:    vmov.32 d15[1], r0
+; BE-I32-NEXT:    bl lrint
+; BE-I32-NEXT:    add lr, sp, #128
+; BE-I32-NEXT:    vorr q5, q4, q4
+; BE-I32-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I32-NEXT:    vorr d0, d17, d17
+; BE-I32-NEXT:    vmov.32 d11[1], r0
+; BE-I32-NEXT:    bl lrint
+; BE-I32-NEXT:    vmov.32 d14[1], r0
+; BE-I32-NEXT:    add lr, sp, #128
+; BE-I32-NEXT:    add r0, sp, #384
+; BE-I32-NEXT:    vld1.64 {d16, d17}, [r0]
+; BE-I32-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
+; BE-I32-NEXT:    add lr, sp, #16
+; BE-I32-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I32-NEXT:    add lr, sp, #80
+; BE-I32-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
+; BE-I32-NEXT:    vorr d0, d8, d8
+; BE-I32-NEXT:    bl lrint
+; BE-I32-NEXT:    add lr, sp, #64
+; BE-I32-NEXT:    vmov.32 d10[0], r0
+; BE-I32-NEXT:    vldmia lr, {d14, d15} @ 16-byte Reload
+; BE-I32-NEXT:    vorr d0, d14, d14
+; BE-I32-NEXT:    bl lrint
+; BE-I32-NEXT:    vorr d0, d9, d9
+; BE-I32-NEXT:    vmov.32 d13[0], r0
+; BE-I32-NEXT:    bl lrint
+; BE-I32-NEXT:    vorr d0, d15, d15
+; BE-I32-NEXT:    vmov.32 d10[1], r0
+; BE-I32-NEXT:    add lr, sp, #80
+; BE-I32-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-I32-NEXT:    bl lrint
+; BE-I32-NEXT:    add lr, sp, #16
+; BE-I32-NEXT:    vmov.32 d13[1], r0
+; BE-I32-NEXT:    add r0, sp, #272
+; BE-I32-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-I32-NEXT:    vorr d0, d10, d10
+; BE-I32-NEXT:    vld1.64 {d16, d17}, [r0]
+; BE-I32-NEXT:    vstmia sp, {d16, d17} @ 16-byte Spill
+; BE-I32-NEXT:    bl lrint
+; BE-I32-NEXT:    add lr, sp, #32
+; BE-I32-NEXT:    vmov.32 d12[0], r0
+; BE-I32-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
+; BE-I32-NEXT:    vorr d0, d8, d8
+; BE-I32-NEXT:    bl lrint
+; BE-I32-NEXT:    vorr d0, d11, d11
+; BE-I32-NEXT:    vmov.32 d15[0], r0
+; BE-I32-NEXT:    bl lrint
+; BE-I32-NEXT:    vorr d0, d9, d9
+; BE-I32-NEXT:    vmov.32 d12[1], r0
+; BE-I32-NEXT:    add lr, sp, #64
+; BE-I32-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
+; BE-I32-NEXT:    bl lrint
+; BE-I32-NEXT:    vldmia sp, {d10, d11} @ 16-byte Reload
+; BE-I32-NEXT:    add lr, sp, #16
+; BE-I32-NEXT:    vmov.32 d15[1], r0
+; BE-I32-NEXT:    add r0, sp, #256
+; BE-I32-NEXT:    vorr d0, d10, d10
+; BE-I32-NEXT:    vld1.64 {d16, d17}, [r0]
+; BE-I32-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I32-NEXT:    bl lrint
+; BE-I32-NEXT:    add lr, sp, #160
+; BE-I32-NEXT:    vmov.32 d13[0], r0
+; BE-I32-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
+; BE-I32-NEXT:    vorr d0, d8, d8
+; BE-I32-NEXT:    bl lrint
+; BE-I32-NEXT:    vorr d0, d11, d11
+; BE-I32-NEXT:    vmov.32 d14[0], r0
+; BE-I32-NEXT:    bl lrint
+; BE-I32-NEXT:    vorr d0, d9, d9
+; BE-I32-NEXT:    vmov.32 d13[1], r0
+; BE-I32-NEXT:    bl lrint
+; BE-I32-NEXT:    vmov.32 d14[1], r0
+; BE-I32-NEXT:    add lr, sp, #32
+; BE-I32-NEXT:    add r0, sp, #304
+; BE-I32-NEXT:    vld1.64 {d10, d11}, [r0]
+; BE-I32-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
+; BE-I32-NEXT:    add lr, sp, #16
+; BE-I32-NEXT:    vldmia lr, {d14, d15} @ 16-byte Reload
+; BE-I32-NEXT:    vorr d0, d14, d14
+; BE-I32-NEXT:    bl lrint
+; BE-I32-NEXT:    add lr, sp, #48
+; BE-I32-NEXT:    vorr q4, q6, q6
+; BE-I32-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
+; BE-I32-NEXT:    vorr d0, d12, d12
+; BE-I32-NEXT:    vmov.32 d8[0], r0
+; BE-I32-NEXT:    bl lrint
+; BE-I32-NEXT:    vorr d0, d15, d15
+; BE-I32-NEXT:    add lr, sp, #160
+; BE-I32-NEXT:    vmov.32 d17[0], r0
+; BE-I32-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I32-NEXT:    bl lrint
+; BE-I32-NEXT:    vorr d0, d10, d10
+; BE-I32-NEXT:    vmov.32 d8[1], r0
+; BE-I32-NEXT:    add lr, sp, #16
+; BE-I32-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
+; BE-I32-NEXT:    bl lrint
+; BE-I32-NEXT:    vorr d0, d13, d13
+; BE-I32-NEXT:    vmov.32 d9[0], r0
+; BE-I32-NEXT:    bl lrint
+; BE-I32-NEXT:    add lr, sp, #160
+; BE-I32-NEXT:    vorr d0, d11, d11
+; BE-I32-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
+; BE-I32-NEXT:    add lr, sp, #48
+; BE-I32-NEXT:    vmov.32 d13[1], r0
+; BE-I32-NEXT:    add r0, sp, #352
+; BE-I32-NEXT:    vld1.64 {d14, d15}, [r0]
+; BE-I32-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
+; BE-I32-NEXT:    bl lrint
+; BE-I32-NEXT:    vorr d0, d14, d14
+; BE-I32-NEXT:    vmov.32 d9[1], r0
+; BE-I32-NEXT:    bl lrint
+; BE-I32-NEXT:    vmov.32 d12[0], r0
+; BE-I32-NEXT:    add lr, sp, #160
+; BE-I32-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
+; BE-I32-NEXT:    add lr, sp, #96
+; BE-I32-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
+; BE-I32-NEXT:    vorr d0, d12, d12
+; BE-I32-NEXT:    bl lrint
+; BE-I32-NEXT:    add lr, sp, #112
+; BE-I32-NEXT:    vmov.32 d8[0], r0
+; BE-I32-NEXT:    vldmia lr, {d14, d15} @ 16-byte Reload
+; BE-I32-NEXT:    vorr d0, d14, d14
+; BE-I32-NEXT:    bl lrint
+; BE-I32-NEXT:    vorr d0, d13, d13
+; BE-I32-NEXT:    vmov.32 d11[0], r0
+; BE-I32-NEXT:    bl lrint
+; BE-I32-NEXT:    add lr, sp, #144
+; BE-I32-NEXT:    vmov.32 d8[1], r0
+; BE-I32-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
+; BE-I32-NEXT:    vorr d0, d12, d12
+; BE-I32-NEXT:    bl lrint
+; BE-I32-NEXT:    vorr d0, d15, d15
+; BE-I32-NEXT:    vmov.32 d10[0], r0
+; BE-I32-NEXT:    bl lrint
+; BE-I32-NEXT:    vorr d0, d13, d13
+; BE-I32-NEXT:    vmov.32 d11[1], r0
+; BE-I32-NEXT:    bl lrint
+; BE-I32-NEXT:    add lr, sp, #48
+; BE-I32-NEXT:    vmov.32 d10[1], r0
+; BE-I32-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I32-NEXT:    vorr d0, d17, d17
+; BE-I32-NEXT:    bl lrint
+; BE-I32-NEXT:    add lr, sp, #160
+; BE-I32-NEXT:    vrev64.32 q9, q4
+; BE-I32-NEXT:    vldmia lr, {d22, d23} @ 16-byte Reload
+; BE-I32-NEXT:    add lr, sp, #80
+; BE-I32-NEXT:    vldmia lr, {d20, d21} @ 16-byte Reload
+; BE-I32-NEXT:    add lr, sp, #128
+; BE-I32-NEXT:    vmov.32 d22[1], r0
+; BE-I32-NEXT:    mov r0, r4
+; BE-I32-NEXT:    vst1.32 {d20, d21}, [r0:128]!
+; BE-I32-NEXT:    vldmia lr, {d20, d21} @ 16-byte Reload
+; BE-I32-NEXT:    add lr, sp, #16
+; BE-I32-NEXT:    vrev64.32 q8, q5
+; BE-I32-NEXT:    vst1.32 {d20, d21}, [r0:128]!
+; BE-I32-NEXT:    vldmia lr, {d20, d21} @ 16-byte Reload
+; BE-I32-NEXT:    add lr, sp, #32
+; BE-I32-NEXT:    vst1.32 {d20, d21}, [r0:128]!
+; BE-I32-NEXT:    vst1.64 {d18, d19}, [r0:128]
+; BE-I32-NEXT:    add r0, r4, #64
+; BE-I32-NEXT:    vldmia lr, {d18, d19} @ 16-byte Reload
+; BE-I32-NEXT:    add lr, sp, #64
+; BE-I32-NEXT:    vst1.32 {d18, d19}, [r0:128]!
+; BE-I32-NEXT:    vst1.32 {d22, d23}, [r0:128]!
+; BE-I32-NEXT:    vldmia lr, {d18, d19} @ 16-byte Reload
+; BE-I32-NEXT:    vst1.32 {d18, d19}, [r0:128]!
+; BE-I32-NEXT:    vst1.64 {d16, d17}, [r0:128]
+; BE-I32-NEXT:    add sp, sp, #176
+; BE-I32-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I32-NEXT:    pop {r4, r5, r6, pc}
+;
+; BE-I64-LABEL: lrint_v32f64:
+; BE-I64:       @ %bb.0:
+; BE-I64-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-I64-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-I64-NEXT:    .pad #4
+; BE-I64-NEXT:    sub sp, sp, #4
+; BE-I64-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEXT:    .pad #232
+; BE-I64-NEXT:    sub sp, sp, #232
+; BE-I64-NEXT:    add lr, sp, #184
+; BE-I64-NEXT:    str r0, [sp, #148] @ 4-byte Spill
+; BE-I64-NEXT:    add r0, sp, #416
+; BE-I64-NEXT:    vstmia lr, {d6, d7} @ 16-byte Spill
+; BE-I64-NEXT:    add lr, sp, #168
+; BE-I64-NEXT:    vstmia lr, {d4, d5} @ 16-byte Spill
+; BE-I64-NEXT:    add lr, sp, #152
+; BE-I64-NEXT:    vstmia lr, {d2, d3} @ 16-byte Spill
+; BE-I64-NEXT:    add lr, sp, #128
+; BE-I64-NEXT:    vstmia lr, {d0, d1} @ 16-byte Spill
+; BE-I64-NEXT:    add lr, sp, #200
+; BE-I64-NEXT:    vld1.64 {d18, d19}, [r0]
+; BE-I64-NEXT:    add r0, sp, #448
+; BE-I64-NEXT:    vorr d0, d19, d19
+; BE-I64-NEXT:    vld1.64 {d14, d15}, [r0]
+; BE-I64-NEXT:    add r0, sp, #336
+; BE-I64-NEXT:    vstmia lr, {d18, d19} @ 16-byte Spill
+; BE-I64-NEXT:    add lr, sp, #64
+; BE-I64-NEXT:    vld1.64 {d16, d17}, [r0]
+; BE-I64-NEXT:    add r0, sp, #400
+; BE-I64-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I64-NEXT:    add lr, sp, #8
+; BE-I64-NEXT:    vld1.64 {d16, d17}, [r0]
+; BE-I64-NEXT:    add r0, sp, #352
+; BE-I64-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I64-NEXT:    add lr, sp, #24
+; BE-I64-NEXT:    vld1.64 {d16, d17}, [r0]
+; BE-I64-NEXT:    add r0, sp, #368
+; BE-I64-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I64-NEXT:    add lr, sp, #48
+; BE-I64-NEXT:    vld1.64 {d16, d17}, [r0]
+; BE-I64-NEXT:    add r0, sp, #384
+; BE-I64-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I64-NEXT:    add lr, sp, #96
+; BE-I64-NEXT:    vld1.64 {d16, d17}, [r0]
+; BE-I64-NEXT:    add r0, sp, #512
+; BE-I64-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I64-NEXT:    add lr, sp, #112
+; BE-I64-NEXT:    vld1.64 {d16, d17}, [r0]
+; BE-I64-NEXT:    add r0, sp, #432
+; BE-I64-NEXT:    vld1.64 {d8, d9}, [r0]
+; BE-I64-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I64-NEXT:    bl lrint
+; BE-I64-NEXT:    vorr d0, d8, d8
+; BE-I64-NEXT:    str r1, [sp, #80] @ 4-byte Spill
+; BE-I64-NEXT:    vmov.32 d13[0], r0
+; BE-I64-NEXT:    bl lrint
+; BE-I64-NEXT:    vorr d0, d9, d9
+; BE-I64-NEXT:    str r1, [sp, #44] @ 4-byte Spill
+; BE-I64-NEXT:    vmov.32 d10[0], r0
+; BE-I64-NEXT:    bl lrint
+; BE-I64-NEXT:    vorr d0, d14, d14
+; BE-I64-NEXT:    add lr, sp, #216
+; BE-I64-NEXT:    vmov.32 d11[0], r0
+; BE-I64-NEXT:    mov r9, r1
+; BE-I64-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-I64-NEXT:    bl lrint
+; BE-I64-NEXT:    vorr d0, d15, d15
+; BE-I64-NEXT:    mov r8, r1
+; BE-I64-NEXT:    vmov.32 d8[0], r0
+; BE-I64-NEXT:    bl lrint
+; BE-I64-NEXT:    add lr, sp, #64
+; BE-I64-NEXT:    mov r4, r1
+; BE-I64-NEXT:    vmov.32 d9[0], r0
+; BE-I64-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-I64-NEXT:    vorr d0, d10, d10
+; BE-I64-NEXT:    bl lrint
+; BE-I64-NEXT:    vorr d0, d11, d11
+; BE-I64-NEXT:    mov r6, r1
+; BE-I64-NEXT:    vmov.32 d14[0], r0
+; BE-I64-NEXT:    bl lrint
+; BE-I64-NEXT:    add lr, sp, #200
+; BE-I64-NEXT:    mov r7, r1
+; BE-I64-NEXT:    vmov.32 d15[0], r0
+; BE-I64-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
+; BE-I64-NEXT:    @ kill: def $d0 killed $d0 killed $q0
+; BE-I64-NEXT:    bl lrint
+; BE-I64-NEXT:    vmov.32 d12[0], r0
+; BE-I64-NEXT:    add lr, sp, #200
+; BE-I64-NEXT:    mov r5, r1
+; BE-I64-NEXT:    vmov.32 d15[1], r7
+; BE-I64-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
+; BE-I64-NEXT:    add lr, sp, #8
+; BE-I64-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-I64-NEXT:    vorr d0, d11, d11
+; BE-I64-NEXT:    bl lrint
+; BE-I64-NEXT:    vorr d0, d10, d10
+; BE-I64-NEXT:    vmov.32 d14[1], r6
+; BE-I64-NEXT:    add lr, sp, #64
+; BE-I64-NEXT:    mov r10, r1
+; BE-I64-NEXT:    vmov.32 d13[0], r0
+; BE-I64-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
+; BE-I64-NEXT:    bl lrint
+; BE-I64-NEXT:    add lr, sp, #24
+; BE-I64-NEXT:    mov r11, r1
+; BE-I64-NEXT:    vmov.32 d12[0], r0
+; BE-I64-NEXT:    vldmia lr, {d14, d15} @ 16-byte Reload
+; BE-I64-NEXT:    vorr d0, d15, d15
+; BE-I64-NEXT:    vmov.32 d9[1], r4
+; BE-I64-NEXT:    bl lrint
+; BE-I64-NEXT:    vorr d0, d14, d14
+; BE-I64-NEXT:    vmov.32 d8[1], r8
+; BE-I64-NEXT:    add lr, sp, #8
+; BE-I64-NEXT:    str r1, [sp, #4] @ 4-byte Spill
+; BE-I64-NEXT:    vmov.32 d11[0], r0
+; BE-I64-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
+; BE-I64-NEXT:    bl lrint
+; BE-I64-NEXT:    vmov.32 d10[0], r0
+; BE-I64-NEXT:    add lr, sp, #24
+; BE-I64-NEXT:    mov r8, r1
+; BE-I64-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-I64-NEXT:    add lr, sp, #216
+; BE-I64-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-I64-NEXT:    add lr, sp, #48
+; BE-I64-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
+; BE-I64-NEXT:    vorr d0, d9, d9
+; BE-I64-NEXT:    vmov.32 d11[1], r9
+; BE-I64-NEXT:    bl lrint
+; BE-I64-NEXT:    vmov.32 d15[0], r0
+; BE-I64-NEXT:    ldr r0, [sp, #44] @ 4-byte Reload
+; BE-I64-NEXT:    vorr d0, d8, d8
+; BE-I64-NEXT:    add lr, sp, #216
+; BE-I64-NEXT:    mov r9, r1
+; BE-I64-NEXT:    vmov.32 d10[1], r0
+; BE-I64-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-I64-NEXT:    bl lrint
+; BE-I64-NEXT:    vmov.32 d14[0], r0
+; BE-I64-NEXT:    add lr, sp, #48
+; BE-I64-NEXT:    ldr r0, [sp, #80] @ 4-byte Reload
+; BE-I64-NEXT:    mov r6, r1
+; BE-I64-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
+; BE-I64-NEXT:    add lr, sp, #200
+; BE-I64-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-I64-NEXT:    add lr, sp, #96
+; BE-I64-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
+; BE-I64-NEXT:    vorr d0, d9, d9
+; BE-I64-NEXT:    vmov.32 d11[1], r0
+; BE-I64-NEXT:    bl lrint
+; BE-I64-NEXT:    vorr d0, d8, d8
+; BE-I64-NEXT:    vmov.32 d10[1], r5
+; BE-I64-NEXT:    add lr, sp, #200
+; BE-I64-NEXT:    mov r7, r1
+; BE-I64-NEXT:    vmov.32 d15[0], r0
+; BE-I64-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-I64-NEXT:    bl lrint
+; BE-I64-NEXT:    add lr, sp, #112
+; BE-I64-NEXT:    vorr q4, q6, q6
+; BE-I64-NEXT:    mov r4, r1
+; BE-I64-NEXT:    vmov.32 d14[0], r0
+; BE-I64-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
+; BE-I64-NEXT:    vorr d0, d13, d13
+; BE-I64-NEXT:    vmov.32 d9[1], r10
+; BE-I64-NEXT:    bl lrint
+; BE-I64-NEXT:    vorr d0, d12, d12
+; BE-I64-NEXT:    mov r5, r1
+; BE-I64-NEXT:    vmov.32 d11[0], r0
+; BE-I64-NEXT:    vmov.32 d8[1], r11
+; BE-I64-NEXT:    bl lrint
+; BE-I64-NEXT:    vmov.32 d10[0], r0
+; BE-I64-NEXT:    add lr, sp, #24
+; BE-I64-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
+; BE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEXT:    add lr, sp, #48
+; BE-I64-NEXT:    vmov.32 d17[1], r0
+; BE-I64-NEXT:    vmov.32 d16[1], r8
+; BE-I64-NEXT:    vorr q9, q8, q8
+; BE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEXT:    add lr, sp, #112
+; BE-I64-NEXT:    vmov.32 d17[1], r9
+; BE-I64-NEXT:    vmov.32 d16[1], r6
+; BE-I64-NEXT:    vorr q10, q8, q8
+; BE-I64-NEXT:    vrev64.32 q8, q4
+; BE-I64-NEXT:    vmov.32 d15[1], r7
+; BE-I64-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I64-NEXT:    add lr, sp, #200
+; BE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEXT:    vmov.32 d11[1], r5
+; BE-I64-NEXT:    vrev64.32 q8, q8
+; BE-I64-NEXT:    vmov.32 d14[1], r4
+; BE-I64-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I64-NEXT:    add lr, sp, #216
+; BE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEXT:    vmov.32 d10[1], r1
+; BE-I64-NEXT:    vrev64.32 q8, q8
+; BE-I64-NEXT:    vrev64.32 q6, q7
+; BE-I64-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I64-NEXT:    add lr, sp, #8
+; BE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEXT:    add lr, sp, #96
+; BE-I64-NEXT:    vrev64.32 q7, q5
+; BE-I64-NEXT:    vrev64.32 q8, q8
+; BE-I64-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I64-NEXT:    add lr, sp, #64
+; BE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEXT:    add lr, sp, #80
+; BE-I64-NEXT:    vrev64.32 q8, q8
+; BE-I64-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I64-NEXT:    add lr, sp, #64
+; BE-I64-NEXT:    vrev64.32 q8, q9
+; BE-I64-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I64-NEXT:    add lr, sp, #48
+; BE-I64-NEXT:    vrev64.32 q8, q10
+; BE-I64-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I64-NEXT:    add lr, sp, #128
+; BE-I64-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-I64-NEXT:    vorr d0, d11, d11
+; BE-I64-NEXT:    bl lrint
+; BE-I64-NEXT:    vorr d0, d10, d10
+; BE-I64-NEXT:    mov r4, r1
+; BE-I64-NEXT:    vmov.32 d9[0], r0
+; BE-I64-NEXT:    bl lrint
+; BE-I64-NEXT:    vmov.32 d8[0], r0
+; BE-I64-NEXT:    ldr r6, [sp, #148] @ 4-byte Reload
+; BE-I64-NEXT:    add lr, sp, #152
+; BE-I64-NEXT:    vmov.32 d9[1], r4
+; BE-I64-NEXT:    mov r5, r6
+; BE-I64-NEXT:    vmov.32 d8[1], r1
+; BE-I64-NEXT:    vrev64.32 q8, q4
+; BE-I64-NEXT:    vst1.64 {d16, d17}, [r5:128]!
+; BE-I64-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-I64-NEXT:    vorr d0, d11, d11
+; BE-I64-NEXT:    bl lrint
+; BE-I64-NEXT:    vorr d0, d10, d10
+; BE-I64-NEXT:    mov r4, r1
+; BE-I64-NEXT:    vmov.32 d9[0], r0
+; BE-I64-NEXT:    bl lrint
+; BE-I64-NEXT:    vmov.32 d8[0], r0
+; BE-I64-NEXT:    add lr, sp, #168
+; BE-I64-NEXT:    vmov.32 d9[1], r4
+; BE-I64-NEXT:    vmov.32 d8[1], r1
+; BE-I64-NEXT:    vrev64.32 q8, q4
+; BE-I64-NEXT:    vst1.64 {d16, d17}, [r5:128]!
+; BE-I64-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-I64-NEXT:    vorr d0, d11, d11
+; BE-I64-NEXT:    bl lrint
+; BE-I64-NEXT:    vorr d0, d10, d10
+; BE-I64-NEXT:    mov r4, r1
+; BE-I64-NEXT:    vmov.32 d9[0], r0
+; BE-I64-NEXT:    bl lrint
+; BE-I64-NEXT:    vmov.32 d8[0], r0
+; BE-I64-NEXT:    add lr, sp, #184
+; BE-I64-NEXT:    vmov.32 d9[1], r4
+; BE-I64-NEXT:    vmov.32 d8[1], r1
+; BE-I64-NEXT:    vrev64.32 q8, q4
+; BE-I64-NEXT:    vst1.64 {d16, d17}, [r5:128]!
+; BE-I64-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-I64-NEXT:    vorr d0, d11, d11
+; BE-I64-NEXT:    bl lrint
+; BE-I64-NEXT:    vorr d0, d10, d10
+; BE-I64-NEXT:    mov r4, r1
+; BE-I64-NEXT:    vmov.32 d9[0], r0
+; BE-I64-NEXT:    bl lrint
+; BE-I64-NEXT:    vmov.32 d8[0], r0
+; BE-I64-NEXT:    add r0, sp, #464
+; BE-I64-NEXT:    vmov.32 d9[1], r4
+; BE-I64-NEXT:    vmov.32 d8[1], r1
+; BE-I64-NEXT:    vrev64.32 q8, q4
+; BE-I64-NEXT:    vld1.64 {d8, d9}, [r0]
+; BE-I64-NEXT:    vorr d0, d9, d9
+; BE-I64-NEXT:    vst1.64 {d16, d17}, [r5:128]
+; BE-I64-NEXT:    bl lrint
+; BE-I64-NEXT:    vorr d0, d8, d8
+; BE-I64-NEXT:    mov r4, r1
+; BE-I64-NEXT:    vmov.32 d11[0], r0
+; BE-I64-NEXT:    bl lrint
+; BE-I64-NEXT:    vmov.32 d10[0], r0
+; BE-I64-NEXT:    add r0, sp, #480
+; BE-I64-NEXT:    add r5, r6, #192
+; BE-I64-NEXT:    vmov.32 d11[1], r4
+; BE-I64-NEXT:    vmov.32 d10[1], r1
+; BE-I64-NEXT:    vld1.64 {d8, d9}, [r0]
+; BE-I64-NEXT:    vorr d0, d9, d9
+; BE-I64-NEXT:    vrev64.32 q8, q5
+; BE-I64-NEXT:    vst1.64 {d16, d17}, [r5:128]!
+; BE-I64-NEXT:    bl lrint
+; BE-I64-NEXT:    vorr d0, d8, d8
+; BE-I64-NEXT:    mov r4, r1
+; BE-I64-NEXT:    vmov.32 d11[0], r0
+; BE-I64-NEXT:    bl lrint
+; BE-I64-NEXT:    vmov.32 d10[0], r0
+; BE-I64-NEXT:    add r0, sp, #496
+; BE-I64-NEXT:    vmov.32 d11[1], r4
+; BE-I64-NEXT:    vmov.32 d10[1], r1
+; BE-I64-NEXT:    vld1.64 {d8, d9}, [r0]
+; BE-I64-NEXT:    vorr d0, d9, d9
+; BE-I64-NEXT:    vrev64.32 q8, q5
+; BE-I64-NEXT:    vst1.64 {d16, d17}, [r5:128]!
+; BE-I64-NEXT:    bl lrint
+; BE-I64-NEXT:    vorr d0, d8, d8
+; BE-I64-NEXT:    mov r4, r1
+; BE-I64-NEXT:    vmov.32 d11[0], r0
+; BE-I64-NEXT:    bl lrint
+; BE-I64-NEXT:    vmov.32 d10[0], r0
+; BE-I64-NEXT:    add lr, sp, #112
+; BE-I64-NEXT:    add r0, r6, #128
+; BE-I64-NEXT:    vmov.32 d11[1], r4
+; BE-I64-NEXT:    vmov.32 d10[1], r1
+; BE-I64-NEXT:    vrev64.32 q8, q5
+; BE-I64-NEXT:    vst1.64 {d16, d17}, [r5:128]!
+; BE-I64-NEXT:    vst1.64 {d14, d15}, [r5:128]
+; BE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEXT:    add lr, sp, #200
+; BE-I64-NEXT:    vst1.64 {d16, d17}, [r0:128]!
+; BE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEXT:    add lr, sp, #216
+; BE-I64-NEXT:    vst1.64 {d16, d17}, [r0:128]!
+; BE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEXT:    add lr, sp, #96
+; BE-I64-NEXT:    vst1.64 {d16, d17}, [r0:128]!
+; BE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEXT:    add lr, sp, #80
+; BE-I64-NEXT:    vst1.64 {d16, d17}, [r0:128]
+; BE-I64-NEXT:    add r0, r6, #64
+; BE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEXT:    add lr, sp, #64
+; BE-I64-NEXT:    vst1.64 {d16, d17}, [r0:128]!
+; BE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEXT:    add lr, sp, #48
+; BE-I64-NEXT:    vst1.64 {d16, d17}, [r0:128]!
+; BE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEXT:    vst1.64 {d16, d17}, [r0:128]!
+; BE-I64-NEXT:    vst1.64 {d12, d13}, [r0:128]
+; BE-I64-NEXT:    add sp, sp, #232
+; BE-I64-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEXT:    add sp, sp, #4
+; BE-I64-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; BE-I32-NEON-LABEL: lrint_v32f64:
+; BE-I32-NEON:       @ %bb.0:
+; BE-I32-NEON-NEXT:    .save {r4, r5, r6, lr}
+; BE-I32-NEON-NEXT:    push {r4, r5, r6, lr}
+; BE-I32-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I32-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I32-NEON-NEXT:    .pad #176
+; BE-I32-NEON-NEXT:    sub sp, sp, #176
+; BE-I32-NEON-NEXT:    add lr, sp, #128
+; BE-I32-NEON-NEXT:    mov r4, r0
+; BE-I32-NEON-NEXT:    add r0, sp, #336
+; BE-I32-NEON-NEXT:    vorr q6, q3, q3
+; BE-I32-NEON-NEXT:    vstmia lr, {d4, d5} @ 16-byte Spill
+; BE-I32-NEON-NEXT:    add lr, sp, #80
+; BE-I32-NEON-NEXT:    vorr q5, q1, q1
+; BE-I32-NEON-NEXT:    vstmia lr, {d0, d1} @ 16-byte Spill
+; BE-I32-NEON-NEXT:    add lr, sp, #32
+; BE-I32-NEON-NEXT:    vorr d0, d4, d4
+; BE-I32-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
+; BE-I32-NEON-NEXT:    add r0, sp, #320
+; BE-I32-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I32-NEON-NEXT:    add lr, sp, #160
+; BE-I32-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
+; BE-I32-NEON-NEXT:    add r0, sp, #432
+; BE-I32-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I32-NEON-NEXT:    add lr, sp, #112
+; BE-I32-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
+; BE-I32-NEON-NEXT:    add r0, sp, #288
+; BE-I32-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I32-NEON-NEXT:    add lr, sp, #96
+; BE-I32-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
+; BE-I32-NEON-NEXT:    add r0, sp, #368
+; BE-I32-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I32-NEON-NEXT:    add lr, sp, #48
+; BE-I32-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
+; BE-I32-NEON-NEXT:    add r0, sp, #416
+; BE-I32-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I32-NEON-NEXT:    add lr, sp, #144
+; BE-I32-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
+; BE-I32-NEON-NEXT:    add r0, sp, #400
+; BE-I32-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I32-NEON-NEXT:    add lr, sp, #64
+; BE-I32-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
+; BE-I32-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I32-NEON-NEXT:    bl lrint
+; BE-I32-NEON-NEXT:    vorr d0, d12, d12
+; BE-I32-NEON-NEXT:    vmov.32 d14[0], r0
+; BE-I32-NEON-NEXT:    bl lrint
+; BE-I32-NEON-NEXT:    vorr d0, d10, d10
+; BE-I32-NEON-NEXT:    vmov.32 d15[0], r0
+; BE-I32-NEON-NEXT:    bl lrint
+; BE-I32-NEON-NEXT:    vorr d0, d13, d13
+; BE-I32-NEON-NEXT:    vmov.32 d9[0], r0
+; BE-I32-NEON-NEXT:    bl lrint
+; BE-I32-NEON-NEXT:    vorr d0, d11, d11
+; BE-I32-NEON-NEXT:    vmov.32 d15[1], r0
+; BE-I32-NEON-NEXT:    bl lrint
+; BE-I32-NEON-NEXT:    add lr, sp, #128
+; BE-I32-NEON-NEXT:    vorr q5, q4, q4
+; BE-I32-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I32-NEON-NEXT:    vorr d0, d17, d17
+; BE-I32-NEON-NEXT:    vmov.32 d11[1], r0
+; BE-I32-NEON-NEXT:    bl lrint
+; BE-I32-NEON-NEXT:    vmov.32 d14[1], r0
+; BE-I32-NEON-NEXT:    add lr, sp, #128
+; BE-I32-NEON-NEXT:    add r0, sp, #384
+; BE-I32-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
+; BE-I32-NEON-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
+; BE-I32-NEON-NEXT:    add lr, sp, #16
+; BE-I32-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I32-NEON-NEXT:    add lr, sp, #80
+; BE-I32-NEON-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
+; BE-I32-NEON-NEXT:    vorr d0, d8, d8
+; BE-I32-NEON-NEXT:    bl lrint
+; BE-I32-NEON-NEXT:    add lr, sp, #64
+; BE-I32-NEON-NEXT:    vmov.32 d10[0], r0
+; BE-I32-NEON-NEXT:    vldmia lr, {d14, d15} @ 16-byte Reload
+; BE-I32-NEON-NEXT:    vorr d0, d14, d14
+; BE-I32-NEON-NEXT:    bl lrint
+; BE-I32-NEON-NEXT:    vorr d0, d9, d9
+; BE-I32-NEON-NEXT:    vmov.32 d13[0], r0
+; BE-I32-NEON-NEXT:    bl lrint
+; BE-I32-NEON-NEXT:    vorr d0, d15, d15
+; BE-I32-NEON-NEXT:    vmov.32 d10[1], r0
+; BE-I32-NEON-NEXT:    add lr, sp, #80
+; BE-I32-NEON-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-I32-NEON-NEXT:    bl lrint
+; BE-I32-NEON-NEXT:    add lr, sp, #16
+; BE-I32-NEON-NEXT:    vmov.32 d13[1], r0
+; BE-I32-NEON-NEXT:    add r0, sp, #272
+; BE-I32-NEON-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-I32-NEON-NEXT:    vorr d0, d10, d10
+; BE-I32-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
+; BE-I32-NEON-NEXT:    vstmia sp, {d16, d17} @ 16-byte Spill
+; BE-I32-NEON-NEXT:    bl lrint
+; BE-I32-NEON-NEXT:    add lr, sp, #32
+; BE-I32-NEON-NEXT:    vmov.32 d12[0], r0
+; BE-I32-NEON-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
+; BE-I32-NEON-NEXT:    vorr d0, d8, d8
+; BE-I32-NEON-NEXT:    bl lrint
+; BE-I32-NEON-NEXT:    vorr d0, d11, d11
+; BE-I32-NEON-NEXT:    vmov.32 d15[0], r0
+; BE-I32-NEON-NEXT:    bl lrint
+; BE-I32-NEON-NEXT:    vorr d0, d9, d9
+; BE-I32-NEON-NEXT:    vmov.32 d12[1], r0
+; BE-I32-NEON-NEXT:    add lr, sp, #64
+; BE-I32-NEON-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
+; BE-I32-NEON-NEXT:    bl lrint
+; BE-I32-NEON-NEXT:    vldmia sp, {d10, d11} @ 16-byte Reload
+; BE-I32-NEON-NEXT:    add lr, sp, #16
+; BE-I32-NEON-NEXT:    vmov.32 d15[1], r0
+; BE-I32-NEON-NEXT:    add r0, sp, #256
+; BE-I32-NEON-NEXT:    vorr d0, d10, d10
+; BE-I32-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
+; BE-I32-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I32-NEON-NEXT:    bl lrint
+; BE-I32-NEON-NEXT:    add lr, sp, #160
+; BE-I32-NEON-NEXT:    vmov.32 d13[0], r0
+; BE-I32-NEON-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
+; BE-I32-NEON-NEXT:    vorr d0, d8, d8
+; BE-I32-NEON-NEXT:    bl lrint
+; BE-I32-NEON-NEXT:    vorr d0, d11, d11
+; BE-I32-NEON-NEXT:    vmov.32 d14[0], r0
+; BE-I32-NEON-NEXT:    bl lrint
+; BE-I32-NEON-NEXT:    vorr d0, d9, d9
+; BE-I32-NEON-NEXT:    vmov.32 d13[1], r0
+; BE-I32-NEON-NEXT:    bl lrint
+; BE-I32-NEON-NEXT:    vmov.32 d14[1], r0
+; BE-I32-NEON-NEXT:    add lr, sp, #32
+; BE-I32-NEON-NEXT:    add r0, sp, #304
+; BE-I32-NEON-NEXT:    vld1.64 {d10, d11}, [r0]
+; BE-I32-NEON-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
+; BE-I32-NEON-NEXT:    add lr, sp, #16
+; BE-I32-NEON-NEXT:    vldmia lr, {d14, d15} @ 16-byte Reload
+; BE-I32-NEON-NEXT:    vorr d0, d14, d14
+; BE-I32-NEON-NEXT:    bl lrint
+; BE-I32-NEON-NEXT:    add lr, sp, #48
+; BE-I32-NEON-NEXT:    vorr q4, q6, q6
+; BE-I32-NEON-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
+; BE-I32-NEON-NEXT:    vorr d0, d12, d12
+; BE-I32-NEON-NEXT:    vmov.32 d8[0], r0
+; BE-I32-NEON-NEXT:    bl lrint
+; BE-I32-NEON-NEXT:    vorr d0, d15, d15
+; BE-I32-NEON-NEXT:    add lr, sp, #160
+; BE-I32-NEON-NEXT:    vmov.32 d17[0], r0
+; BE-I32-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I32-NEON-NEXT:    bl lrint
+; BE-I32-NEON-NEXT:    vorr d0, d10, d10
+; BE-I32-NEON-NEXT:    vmov.32 d8[1], r0
+; BE-I32-NEON-NEXT:    add lr, sp, #16
+; BE-I32-NEON-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
+; BE-I32-NEON-NEXT:    bl lrint
+; BE-I32-NEON-NEXT:    vorr d0, d13, d13
+; BE-I32-NEON-NEXT:    vmov.32 d9[0], r0
+; BE-I32-NEON-NEXT:    bl lrint
+; BE-I32-NEON-NEXT:    add lr, sp, #160
+; BE-I32-NEON-NEXT:    vorr d0, d11, d11
+; BE-I32-NEON-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
+; BE-I32-NEON-NEXT:    add lr, sp, #48
+; BE-I32-NEON-NEXT:    vmov.32 d13[1], r0
+; BE-I32-NEON-NEXT:    add r0, sp, #352
+; BE-I32-NEON-NEXT:    vld1.64 {d14, d15}, [r0]
+; BE-I32-NEON-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
+; BE-I32-NEON-NEXT:    bl lrint
+; BE-I32-NEON-NEXT:    vorr d0, d14, d14
+; BE-I32-NEON-NEXT:    vmov.32 d9[1], r0
+; BE-I32-NEON-NEXT:    bl lrint
+; BE-I32-NEON-NEXT:    vmov.32 d12[0], r0
+; BE-I32-NEON-NEXT:    add lr, sp, #160
+; BE-I32-NEON-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
+; BE-I32-NEON-NEXT:    add lr, sp, #96
+; BE-I32-NEON-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
+; BE-I32-NEON-NEXT:    vorr d0, d12, d12
+; BE-I32-NEON-NEXT:    bl lrint
+; BE-I32-NEON-NEXT:    add lr, sp, #112
+; BE-I32-NEON-NEXT:    vmov.32 d8[0], r0
+; BE-I32-NEON-NEXT:    vldmia lr, {d14, d15} @ 16-byte Reload
+; BE-I32-NEON-NEXT:    vorr d0, d14, d14
+; BE-I32-NEON-NEXT:    bl lrint
+; BE-I32-NEON-NEXT:    vorr d0, d13, d13
+; BE-I32-NEON-NEXT:    vmov.32 d11[0], r0
+; BE-I32-NEON-NEXT:    bl lrint
+; BE-I32-NEON-NEXT:    add lr, sp, #144
+; BE-I32-NEON-NEXT:    vmov.32 d8[1], r0
+; BE-I32-NEON-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
+; BE-I32-NEON-NEXT:    vorr d0, d12, d12
+; BE-I32-NEON-NEXT:    bl lrint
+; BE-I32-NEON-NEXT:    vorr d0, d15, d15
+; BE-I32-NEON-NEXT:    vmov.32 d10[0], r0
+; BE-I32-NEON-NEXT:    bl lrint
+; BE-I32-NEON-NEXT:    vorr d0, d13, d13
+; BE-I32-NEON-NEXT:    vmov.32 d11[1], r0
+; BE-I32-NEON-NEXT:    bl lrint
+; BE-I32-NEON-NEXT:    add lr, sp, #48
+; BE-I32-NEON-NEXT:    vmov.32 d10[1], r0
+; BE-I32-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I32-NEON-NEXT:    vorr d0, d17, d17
+; BE-I32-NEON-NEXT:    bl lrint
+; BE-I32-NEON-NEXT:    add lr, sp, #160
+; BE-I32-NEON-NEXT:    vrev64.32 q9, q4
+; BE-I32-NEON-NEXT:    vldmia lr, {d22, d23} @ 16-byte Reload
+; BE-I32-NEON-NEXT:    add lr, sp, #80
+; BE-I32-NEON-NEXT:    vldmia lr, {d20, d21} @ 16-byte Reload
+; BE-I32-NEON-NEXT:    add lr, sp, #128
+; BE-I32-NEON-NEXT:    vmov.32 d22[1], r0
+; BE-I32-NEON-NEXT:    mov r0, r4
+; BE-I32-NEON-NEXT:    vst1.32 {d20, d21}, [r0:128]!
+; BE-I32-NEON-NEXT:    vldmia lr, {d20, d21} @ 16-byte Reload
+; BE-I32-NEON-NEXT:    add lr, sp, #16
+; BE-I32-NEON-NEXT:    vrev64.32 q8, q5
+; BE-I32-NEON-NEXT:    vst1.32 {d20, d21}, [r0:128]!
+; BE-I32-NEON-NEXT:    vldmia lr, {d20, d21} @ 16-byte Reload
+; BE-I32-NEON-NEXT:    add lr, sp, #32
+; BE-I32-NEON-NEXT:    vst1.32 {d20, d21}, [r0:128]!
+; BE-I32-NEON-NEXT:    vst1.64 {d18, d19}, [r0:128]
+; BE-I32-NEON-NEXT:    add r0, r4, #64
+; BE-I32-NEON-NEXT:    vldmia lr, {d18, d19} @ 16-byte Reload
+; BE-I32-NEON-NEXT:    add lr, sp, #64
+; BE-I32-NEON-NEXT:    vst1.32 {d18, d19}, [r0:128]!
+; BE-I32-NEON-NEXT:    vst1.32 {d22, d23}, [r0:128]!
+; BE-I32-NEON-NEXT:    vldmia lr, {d18, d19} @ 16-byte Reload
+; BE-I32-NEON-NEXT:    vst1.32 {d18, d19}, [r0:128]!
+; BE-I32-NEON-NEXT:    vst1.64 {d16, d17}, [r0:128]
+; BE-I32-NEON-NEXT:    add sp, sp, #176
+; BE-I32-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I32-NEON-NEXT:    pop {r4, r5, r6, pc}
+;
+; BE-I64-NEON-LABEL: lrint_v32f64:
+; BE-I64-NEON:       @ %bb.0:
+; BE-I64-NEON-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-I64-NEON-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-I64-NEON-NEXT:    .pad #4
+; BE-I64-NEON-NEXT:    sub sp, sp, #4
+; BE-I64-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEON-NEXT:    .pad #232
+; BE-I64-NEON-NEXT:    sub sp, sp, #232
+; BE-I64-NEON-NEXT:    add lr, sp, #184
+; BE-I64-NEON-NEXT:    str r0, [sp, #148] @ 4-byte Spill
+; BE-I64-NEON-NEXT:    add r0, sp, #416
+; BE-I64-NEON-NEXT:    vstmia lr, {d6, d7} @ 16-byte Spill
+; BE-I64-NEON-NEXT:    add lr, sp, #168
+; BE-I64-NEON-NEXT:    vstmia lr, {d4, d5} @ 16-byte Spill
+; BE-I64-NEON-NEXT:    add lr, sp, #152
+; BE-I64-NEON-NEXT:    vstmia lr, {d2, d3} @ 16-byte Spill
+; BE-I64-NEON-NEXT:    add lr, sp, #128
+; BE-I64-NEON-NEXT:    vstmia lr, {d0, d1} @ 16-byte Spill
+; BE-I64-NEON-NEXT:    add lr, sp, #200
+; BE-I64-NEON-NEXT:    vld1.64 {d18, d19}, [r0]
+; BE-I64-NEON-NEXT:    add r0, sp, #448
+; BE-I64-NEON-NEXT:    vorr d0, d19, d19
+; BE-I64-NEON-NEXT:    vld1.64 {d14, d15}, [r0]
+; BE-I64-NEON-NEXT:    add r0, sp, #336
+; BE-I64-NEON-NEXT:    vstmia lr, {d18, d19} @ 16-byte Spill
+; BE-I64-NEON-NEXT:    add lr, sp, #64
+; BE-I64-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
+; BE-I64-NEON-NEXT:    add r0, sp, #400
+; BE-I64-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I64-NEON-NEXT:    add lr, sp, #8
+; BE-I64-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
+; BE-I64-NEON-NEXT:    add r0, sp, #352
+; BE-I64-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I64-NEON-NEXT:    add lr, sp, #24
+; BE-I64-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
+; BE-I64-NEON-NEXT:    add r0, sp, #368
+; BE-I64-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I64-NEON-NEXT:    add lr, sp, #48
+; BE-I64-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
+; BE-I64-NEON-NEXT:    add r0, sp, #384
+; BE-I64-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I64-NEON-NEXT:    add lr, sp, #96
+; BE-I64-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
+; BE-I64-NEON-NEXT:    add r0, sp, #512
+; BE-I64-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I64-NEON-NEXT:    add lr, sp, #112
+; BE-I64-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
+; BE-I64-NEON-NEXT:    add r0, sp, #432
+; BE-I64-NEON-NEXT:    vld1.64 {d8, d9}, [r0]
+; BE-I64-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I64-NEON-NEXT:    bl lrint
+; BE-I64-NEON-NEXT:    vorr d0, d8, d8
+; BE-I64-NEON-NEXT:    str r1, [sp, #80] @ 4-byte Spill
+; BE-I64-NEON-NEXT:    vmov.32 d13[0], r0
+; BE-I64-NEON-NEXT:    bl lrint
+; BE-I64-NEON-NEXT:    vorr d0, d9, d9
+; BE-I64-NEON-NEXT:    str r1, [sp, #44] @ 4-byte Spill
+; BE-I64-NEON-NEXT:    vmov.32 d10[0], r0
+; BE-I64-NEON-NEXT:    bl lrint
+; BE-I64-NEON-NEXT:    vorr d0, d14, d14
+; BE-I64-NEON-NEXT:    add lr, sp, #216
+; BE-I64-NEON-NEXT:    vmov.32 d11[0], r0
+; BE-I64-NEON-NEXT:    mov r9, r1
+; BE-I64-NEON-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-I64-NEON-NEXT:    bl lrint
+; BE-I64-NEON-NEXT:    vorr d0, d15, d15
+; BE-I64-NEON-NEXT:    mov r8, r1
+; BE-I64-NEON-NEXT:    vmov.32 d8[0], r0
+; BE-I64-NEON-NEXT:    bl lrint
+; BE-I64-NEON-NEXT:    add lr, sp, #64
+; BE-I64-NEON-NEXT:    mov r4, r1
+; BE-I64-NEON-NEXT:    vmov.32 d9[0], r0
+; BE-I64-NEON-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-I64-NEON-NEXT:    vorr d0, d10, d10
+; BE-I64-NEON-NEXT:    bl lrint
+; BE-I64-NEON-NEXT:    vorr d0, d11, d11
+; BE-I64-NEON-NEXT:    mov r6, r1
+; BE-I64-NEON-NEXT:    vmov.32 d14[0], r0
+; BE-I64-NEON-NEXT:    bl lrint
+; BE-I64-NEON-NEXT:    add lr, sp, #200
+; BE-I64-NEON-NEXT:    mov r7, r1
+; BE-I64-NEON-NEXT:    vmov.32 d15[0], r0
+; BE-I64-NEON-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
+; BE-I64-NEON-NEXT:    @ kill: def $d0 killed $d0 killed $q0
+; BE-I64-NEON-NEXT:    bl lrint
+; BE-I64-NEON-NEXT:    vmov.32 d12[0], r0
+; BE-I64-NEON-NEXT:    add lr, sp, #200
+; BE-I64-NEON-NEXT:    mov r5, r1
+; BE-I64-NEON-NEXT:    vmov.32 d15[1], r7
+; BE-I64-NEON-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
+; BE-I64-NEON-NEXT:    add lr, sp, #8
+; BE-I64-NEON-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-I64-NEON-NEXT:    vorr d0, d11, d11
+; BE-I64-NEON-NEXT:    bl lrint
+; BE-I64-NEON-NEXT:    vorr d0, d10, d10
+; BE-I64-NEON-NEXT:    vmov.32 d14[1], r6
+; BE-I64-NEON-NEXT:    add lr, sp, #64
+; BE-I64-NEON-NEXT:    mov r10, r1
+; BE-I64-NEON-NEXT:    vmov.32 d13[0], r0
+; BE-I64-NEON-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
+; BE-I64-NEON-NEXT:    bl lrint
+; BE-I64-NEON-NEXT:    add lr, sp, #24
+; BE-I64-NEON-NEXT:    mov r11, r1
+; BE-I64-NEON-NEXT:    vmov.32 d12[0], r0
+; BE-I64-NEON-NEXT:    vldmia lr, {d14, d15} @ 16-byte Reload
+; BE-I64-NEON-NEXT:    vorr d0, d15, d15
+; BE-I64-NEON-NEXT:    vmov.32 d9[1], r4
+; BE-I64-NEON-NEXT:    bl lrint
+; BE-I64-NEON-NEXT:    vorr d0, d14, d14
+; BE-I64-NEON-NEXT:    vmov.32 d8[1], r8
+; BE-I64-NEON-NEXT:    add lr, sp, #8
+; BE-I64-NEON-NEXT:    str r1, [sp, #4] @ 4-byte Spill
+; BE-I64-NEON-NEXT:    vmov.32 d11[0], r0
+; BE-I64-NEON-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
+; BE-I64-NEON-NEXT:    bl lrint
+; BE-I64-NEON-NEXT:    vmov.32 d10[0], r0
+; BE-I64-NEON-NEXT:    add lr, sp, #24
+; BE-I64-NEON-NEXT:    mov r8, r1
+; BE-I64-NEON-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-I64-NEON-NEXT:    add lr, sp, #216
+; BE-I64-NEON-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-I64-NEON-NEXT:    add lr, sp, #48
+; BE-I64-NEON-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
+; BE-I64-NEON-NEXT:    vorr d0, d9, d9
+; BE-I64-NEON-NEXT:    vmov.32 d11[1], r9
+; BE-I64-NEON-NEXT:    bl lrint
+; BE-I64-NEON-NEXT:    vmov.32 d15[0], r0
+; BE-I64-NEON-NEXT:    ldr r0, [sp, #44] @ 4-byte Reload
+; BE-I64-NEON-NEXT:    vorr d0, d8, d8
+; BE-I64-NEON-NEXT:    add lr, sp, #216
+; BE-I64-NEON-NEXT:    mov r9, r1
+; BE-I64-NEON-NEXT:    vmov.32 d10[1], r0
+; BE-I64-NEON-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-I64-NEON-NEXT:    bl lrint
+; BE-I64-NEON-NEXT:    vmov.32 d14[0], r0
+; BE-I64-NEON-NEXT:    add lr, sp, #48
+; BE-I64-NEON-NEXT:    ldr r0, [sp, #80] @ 4-byte Reload
+; BE-I64-NEON-NEXT:    mov r6, r1
+; BE-I64-NEON-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
+; BE-I64-NEON-NEXT:    add lr, sp, #200
+; BE-I64-NEON-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-I64-NEON-NEXT:    add lr, sp, #96
+; BE-I64-NEON-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
+; BE-I64-NEON-NEXT:    vorr d0, d9, d9
+; BE-I64-NEON-NEXT:    vmov.32 d11[1], r0
+; BE-I64-NEON-NEXT:    bl lrint
+; BE-I64-NEON-NEXT:    vorr d0, d8, d8
+; BE-I64-NEON-NEXT:    vmov.32 d10[1], r5
+; BE-I64-NEON-NEXT:    add lr, sp, #200
+; BE-I64-NEON-NEXT:    mov r7, r1
+; BE-I64-NEON-NEXT:    vmov.32 d15[0], r0
+; BE-I64-NEON-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-I64-NEON-NEXT:    bl lrint
+; BE-I64-NEON-NEXT:    add lr, sp, #112
+; BE-I64-NEON-NEXT:    vorr q4, q6, q6
+; BE-I64-NEON-NEXT:    mov r4, r1
+; BE-I64-NEON-NEXT:    vmov.32 d14[0], r0
+; BE-I64-NEON-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
+; BE-I64-NEON-NEXT:    vorr d0, d13, d13
+; BE-I64-NEON-NEXT:    vmov.32 d9[1], r10
+; BE-I64-NEON-NEXT:    bl lrint
+; BE-I64-NEON-NEXT:    vorr d0, d12, d12
+; BE-I64-NEON-NEXT:    mov r5, r1
+; BE-I64-NEON-NEXT:    vmov.32 d11[0], r0
+; BE-I64-NEON-NEXT:    vmov.32 d8[1], r11
+; BE-I64-NEON-NEXT:    bl lrint
+; BE-I64-NEON-NEXT:    vmov.32 d10[0], r0
+; BE-I64-NEON-NEXT:    add lr, sp, #24
+; BE-I64-NEON-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
+; BE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEON-NEXT:    add lr, sp, #48
+; BE-I64-NEON-NEXT:    vmov.32 d17[1], r0
+; BE-I64-NEON-NEXT:    vmov.32 d16[1], r8
+; BE-I64-NEON-NEXT:    vorr q9, q8, q8
+; BE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEON-NEXT:    add lr, sp, #112
+; BE-I64-NEON-NEXT:    vmov.32 d17[1], r9
+; BE-I64-NEON-NEXT:    vmov.32 d16[1], r6
+; BE-I64-NEON-NEXT:    vorr q10, q8, q8
+; BE-I64-NEON-NEXT:    vrev64.32 q8, q4
+; BE-I64-NEON-NEXT:    vmov.32 d15[1], r7
+; BE-I64-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I64-NEON-NEXT:    add lr, sp, #200
+; BE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEON-NEXT:    vmov.32 d11[1], r5
+; BE-I64-NEON-NEXT:    vrev64.32 q8, q8
+; BE-I64-NEON-NEXT:    vmov.32 d14[1], r4
+; BE-I64-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I64-NEON-NEXT:    add lr, sp, #216
+; BE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEON-NEXT:    vmov.32 d10[1], r1
+; BE-I64-NEON-NEXT:    vrev64.32 q8, q8
+; BE-I64-NEON-NEXT:    vrev64.32 q6, q7
+; BE-I64-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I64-NEON-NEXT:    add lr, sp, #8
+; BE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEON-NEXT:    add lr, sp, #96
+; BE-I64-NEON-NEXT:    vrev64.32 q7, q5
+; BE-I64-NEON-NEXT:    vrev64.32 q8, q8
+; BE-I64-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I64-NEON-NEXT:    add lr, sp, #64
+; BE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEON-NEXT:    add lr, sp, #80
+; BE-I64-NEON-NEXT:    vrev64.32 q8, q8
+; BE-I64-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I64-NEON-NEXT:    add lr, sp, #64
+; BE-I64-NEON-NEXT:    vrev64.32 q8, q9
+; BE-I64-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I64-NEON-NEXT:    add lr, sp, #48
+; BE-I64-NEON-NEXT:    vrev64.32 q8, q10
+; BE-I64-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I64-NEON-NEXT:    add lr, sp, #128
+; BE-I64-NEON-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-I64-NEON-NEXT:    vorr d0, d11, d11
+; BE-I64-NEON-NEXT:    bl lrint
+; BE-I64-NEON-NEXT:    vorr d0, d10, d10
+; BE-I64-NEON-NEXT:    mov r4, r1
+; BE-I64-NEON-NEXT:    vmov.32 d9[0], r0
+; BE-I64-NEON-NEXT:    bl lrint
+; BE-I64-NEON-NEXT:    vmov.32 d8[0], r0
+; BE-I64-NEON-NEXT:    ldr r6, [sp, #148] @ 4-byte Reload
+; BE-I64-NEON-NEXT:    add lr, sp, #152
+; BE-I64-NEON-NEXT:    vmov.32 d9[1], r4
+; BE-I64-NEON-NEXT:    mov r5, r6
+; BE-I64-NEON-NEXT:    vmov.32 d8[1], r1
+; BE-I64-NEON-NEXT:    vrev64.32 q8, q4
+; BE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r5:128]!
+; BE-I64-NEON-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-I64-NEON-NEXT:    vorr d0, d11, d11
+; BE-I64-NEON-NEXT:    bl lrint
+; BE-I64-NEON-NEXT:    vorr d0, d10, d10
+; BE-I64-NEON-NEXT:    mov r4, r1
+; BE-I64-NEON-NEXT:    vmov.32 d9[0], r0
+; BE-I64-NEON-NEXT:    bl lrint
+; BE-I64-NEON-NEXT:    vmov.32 d8[0], r0
+; BE-I64-NEON-NEXT:    add lr, sp, #168
+; BE-I64-NEON-NEXT:    vmov.32 d9[1], r4
+; BE-I64-NEON-NEXT:    vmov.32 d8[1], r1
+; BE-I64-NEON-NEXT:    vrev64.32 q8, q4
+; BE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r5:128]!
+; BE-I64-NEON-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-I64-NEON-NEXT:    vorr d0, d11, d11
+; BE-I64-NEON-NEXT:    bl lrint
+; BE-I64-NEON-NEXT:    vorr d0, d10, d10
+; BE-I64-NEON-NEXT:    mov r4, r1
+; BE-I64-NEON-NEXT:    vmov.32 d9[0], r0
+; BE-I64-NEON-NEXT:    bl lrint
+; BE-I64-NEON-NEXT:    vmov.32 d8[0], r0
+; BE-I64-NEON-NEXT:    add lr, sp, #184
+; BE-I64-NEON-NEXT:    vmov.32 d9[1], r4
+; BE-I64-NEON-NEXT:    vmov.32 d8[1], r1
+; BE-I64-NEON-NEXT:    vrev64.32 q8, q4
+; BE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r5:128]!
+; BE-I64-NEON-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-I64-NEON-NEXT:    vorr d0, d11, d11
+; BE-I64-NEON-NEXT:    bl lrint
+; BE-I64-NEON-NEXT:    vorr d0, d10, d10
+; BE-I64-NEON-NEXT:    mov r4, r1
+; BE-I64-NEON-NEXT:    vmov.32 d9[0], r0
+; BE-I64-NEON-NEXT:    bl lrint
+; BE-I64-NEON-NEXT:    vmov.32 d8[0], r0
+; BE-I64-NEON-NEXT:    add r0, sp, #464
+; BE-I64-NEON-NEXT:    vmov.32 d9[1], r4
+; BE-I64-NEON-NEXT:    vmov.32 d8[1], r1
+; BE-I64-NEON-NEXT:    vrev64.32 q8, q4
+; BE-I64-NEON-NEXT:    vld1.64 {d8, d9}, [r0]
+; BE-I64-NEON-NEXT:    vorr d0, d9, d9
+; BE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r5:128]
+; BE-I64-NEON-NEXT:    bl lrint
+; BE-I64-NEON-NEXT:    vorr d0, d8, d8
+; BE-I64-NEON-NEXT:    mov r4, r1
+; BE-I64-NEON-NEXT:    vmov.32 d11[0], r0
+; BE-I64-NEON-NEXT:    bl lrint
+; BE-I64-NEON-NEXT:    vmov.32 d10[0], r0
+; BE-I64-NEON-NEXT:    add r0, sp, #480
+; BE-I64-NEON-NEXT:    add r5, r6, #192
+; BE-I64-NEON-NEXT:    vmov.32 d11[1], r4
+; BE-I64-NEON-NEXT:    vmov.32 d10[1], r1
+; BE-I64-NEON-NEXT:    vld1.64 {d8, d9}, [r0]
+; BE-I64-NEON-NEXT:    vorr d0, d9, d9
+; BE-I64-NEON-NEXT:    vrev64.32 q8, q5
+; BE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r5:128]!
+; BE-I64-NEON-NEXT:    bl lrint
+; BE-I64-NEON-NEXT:    vorr d0, d8, d8
+; BE-I64-NEON-NEXT:    mov r4, r1
+; BE-I64-NEON-NEXT:    vmov.32 d11[0], r0
+; BE-I64-NEON-NEXT:    bl lrint
+; BE-I64-NEON-NEXT:    vmov.32 d10[0], r0
+; BE-I64-NEON-NEXT:    add r0, sp, #496
+; BE-I64-NEON-NEXT:    vmov.32 d11[1], r4
+; BE-I64-NEON-NEXT:    vmov.32 d10[1], r1
+; BE-I64-NEON-NEXT:    vld1.64 {d8, d9}, [r0]
+; BE-I64-NEON-NEXT:    vorr d0, d9, d9
+; BE-I64-NEON-NEXT:    vrev64.32 q8, q5
+; BE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r5:128]!
+; BE-I64-NEON-NEXT:    bl lrint
+; BE-I64-NEON-NEXT:    vorr d0, d8, d8
+; BE-I64-NEON-NEXT:    mov r4, r1
+; BE-I64-NEON-NEXT:    vmov.32 d11[0], r0
+; BE-I64-NEON-NEXT:    bl lrint
+; BE-I64-NEON-NEXT:    vmov.32 d10[0], r0
+; BE-I64-NEON-NEXT:    add lr, sp, #112
+; BE-I64-NEON-NEXT:    add r0, r6, #128
+; BE-I64-NEON-NEXT:    vmov.32 d11[1], r4
+; BE-I64-NEON-NEXT:    vmov.32 d10[1], r1
+; BE-I64-NEON-NEXT:    vrev64.32 q8, q5
+; BE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r5:128]!
+; BE-I64-NEON-NEXT:    vst1.64 {d14, d15}, [r5:128]
+; BE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEON-NEXT:    add lr, sp, #200
+; BE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r0:128]!
+; BE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEON-NEXT:    add lr, sp, #216
+; BE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r0:128]!
+; BE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEON-NEXT:    add lr, sp, #96
+; BE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r0:128]!
+; BE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEON-NEXT:    add lr, sp, #80
+; BE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r0:128]
+; BE-I64-NEON-NEXT:    add r0, r6, #64
+; BE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEON-NEXT:    add lr, sp, #64
+; BE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r0:128]!
+; BE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEON-NEXT:    add lr, sp, #48
+; BE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r0:128]!
+; BE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r0:128]!
+; BE-I64-NEON-NEXT:    vst1.64 {d12, d13}, [r0:128]
+; BE-I64-NEON-NEXT:    add sp, sp, #232
+; BE-I64-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEON-NEXT:    add sp, sp, #4
+; BE-I64-NEON-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+  %a = call <32 x iXLen> @llvm.lrint.v32iXLen.v16f64(<32 x double> %x)
+  ret <32 x iXLen> %a
+}
+declare <32 x iXLen> @llvm.lrint.v32iXLen.v32f64(<32 x double>)
+
+define <1 x iXLen> @lrint_v1fp128(<1 x fp128> %x) {
+; LE-I32-LABEL: lrint_v1fp128:
+; LE-I32:       @ %bb.0:
+; LE-I32-NEXT:    .save {r11, lr}
+; LE-I32-NEXT:    push {r11, lr}
+; LE-I32-NEXT:    bl lrintl
+; LE-I32-NEXT:    pop {r11, pc}
+;
+; LE-I64-LABEL: lrint_v1fp128:
+; LE-I64:       @ %bb.0:
+; LE-I64-NEXT:    .save {r11, lr}
+; LE-I64-NEXT:    push {r11, lr}
+; LE-I64-NEXT:    bl lrintl
+; LE-I64-NEXT:    vmov.32 d0[0], r0
+; LE-I64-NEXT:    vmov.32 d0[1], r1
+; LE-I64-NEXT:    pop {r11, pc}
+;
+; LE-I32-NEON-LABEL: lrint_v1fp128:
+; LE-I32-NEON:       @ %bb.0:
+; LE-I32-NEON-NEXT:    .save {r11, lr}
+; LE-I32-NEON-NEXT:    push {r11, lr}
+; LE-I32-NEON-NEXT:    bl lrintl
+; LE-I32-NEON-NEXT:    pop {r11, pc}
+;
+; LE-I64-NEON-LABEL: lrint_v1fp128:
+; LE-I64-NEON:       @ %bb.0:
+; LE-I64-NEON-NEXT:    .save {r11, lr}
+; LE-I64-NEON-NEXT:    push {r11, lr}
+; LE-I64-NEON-NEXT:    bl lrintl
+; LE-I64-NEON-NEXT:    vmov.32 d0[0], r0
+; LE-I64-NEON-NEXT:    vmov.32 d0[1], r1
+; LE-I64-NEON-NEXT:    pop {r11, pc}
+;
+; BE-I32-LABEL: lrint_v1fp128:
+; BE-I32:       @ %bb.0:
+; BE-I32-NEXT:    .save {r11, lr}
+; BE-I32-NEXT:    push {r11, lr}
+; BE-I32-NEXT:    bl lrintl
+; BE-I32-NEXT:    pop {r11, pc}
+;
+; BE-I64-LABEL: lrint_v1fp128:
+; BE-I64:       @ %bb.0:
+; BE-I64-NEXT:    .save {r11, lr}
+; BE-I64-NEXT:    push {r11, lr}
+; BE-I64-NEXT:    bl lrintl
+; BE-I64-NEXT:    vmov.32 d16[0], r0
+; BE-I64-NEXT:    vmov.32 d16[1], r1
+; BE-I64-NEXT:    vrev64.32 d0, d16
+; BE-I64-NEXT:    pop {r11, pc}
+;
+; BE-I32-NEON-LABEL: lrint_v1fp128:
+; BE-I32-NEON:       @ %bb.0:
+; BE-I32-NEON-NEXT:    .save {r11, lr}
+; BE-I32-NEON-NEXT:    push {r11, lr}
+; BE-I32-NEON-NEXT:    bl lrintl
+; BE-I32-NEON-NEXT:    pop {r11, pc}
+;
+; BE-I64-NEON-LABEL: lrint_v1fp128:
+; BE-I64-NEON:       @ %bb.0:
+; BE-I64-NEON-NEXT:    .save {r11, lr}
+; BE-I64-NEON-NEXT:    push {r11, lr}
+; BE-I64-NEON-NEXT:    bl lrintl
+; BE-I64-NEON-NEXT:    vmov.32 d16[0], r0
+; BE-I64-NEON-NEXT:    vmov.32 d16[1], r1
+; BE-I64-NEON-NEXT:    vrev64.32 d0, d16
+; BE-I64-NEON-NEXT:    pop {r11, pc}
+  %a = call <1 x iXLen> @llvm.lrint.v1iXLen.v1fp128(<1 x fp128> %x)
+  ret <1 x iXLen> %a
+}
+declare <1 x iXLen> @llvm.lrint.v1iXLen.v1fp128(<1 x fp128>)
+
+define <2 x iXLen> @lrint_v2fp128(<2 x fp128> %x) {
+; LE-I32-LABEL: lrint_v2fp128:
+; LE-I32:       @ %bb.0:
+; LE-I32-NEXT:    .save {r4, r5, r6, r7, r8, lr}
+; LE-I32-NEXT:    push {r4, r5, r6, r7, r8, lr}
+; LE-I32-NEXT:    mov r8, r3
+; LE-I32-NEXT:    add r3, sp, #24
+; LE-I32-NEXT:    mov r5, r2
+; LE-I32-NEXT:    mov r6, r1
+; LE-I32-NEXT:    mov r7, r0
+; LE-I32-NEXT:    ldm r3, {r0, r1, r2, r3}
+; LE-I32-NEXT:    bl lrintl
+; LE-I32-NEXT:    mov r4, r0
+; LE-I32-NEXT:    mov r0, r7
+; LE-I32-NEXT:    mov r1, r6
+; LE-I32-NEXT:    mov r2, r5
+; LE-I32-NEXT:    mov r3, r8
+; LE-I32-NEXT:    bl lrintl
+; LE-I32-NEXT:    vmov.32 d0[0], r0
+; LE-I32-NEXT:    vmov.32 d0[1], r4
+; LE-I32-NEXT:    pop {r4, r5, r6, r7, r8, pc}
+;
+; LE-I64-LABEL: lrint_v2fp128:
+; LE-I64:       @ %bb.0:
+; LE-I64-NEXT:    .save {r4, r5, r6, r7, r8, lr}
+; LE-I64-NEXT:    push {r4, r5, r6, r7, r8, lr}
+; LE-I64-NEXT:    .vsave {d8, d9}
+; LE-I64-NEXT:    vpush {d8, d9}
+; LE-I64-NEXT:    mov r8, r3
+; LE-I64-NEXT:    add r3, sp, #40
+; LE-I64-NEXT:    mov r5, r2
+; LE-I64-NEXT:    mov r6, r1
+; LE-I64-NEXT:    mov r7, r0
+; LE-I64-NEXT:    ldm r3, {r0, r1, r2, r3}
+; LE-I64-NEXT:    bl lrintl
+; LE-I64-NEXT:    mov r4, r1
+; LE-I64-NEXT:    vmov.32 d9[0], r0
+; LE-I64-NEXT:    mov r0, r7
+; LE-I64-NEXT:    mov r1, r6
+; LE-I64-NEXT:    mov r2, r5
+; LE-I64-NEXT:    mov r3, r8
+; LE-I64-NEXT:    bl lrintl
+; LE-I64-NEXT:    vmov.32 d8[0], r0
+; LE-I64-NEXT:    vmov.32 d9[1], r4
+; LE-I64-NEXT:    vmov.32 d8[1], r1
+; LE-I64-NEXT:    vorr q0, q4, q4
+; LE-I64-NEXT:    vpop {d8, d9}
+; LE-I64-NEXT:    pop {r4, r5, r6, r7, r8, pc}
+;
+; LE-I32-NEON-LABEL: lrint_v2fp128:
+; LE-I32-NEON:       @ %bb.0:
+; LE-I32-NEON-NEXT:    .save {r4, r5, r6, r7, r8, lr}
+; LE-I32-NEON-NEXT:    push {r4, r5, r6, r7, r8, lr}
+; LE-I32-NEON-NEXT:    mov r8, r3
+; LE-I32-NEON-NEXT:    add r3, sp, #24
+; LE-I32-NEON-NEXT:    mov r5, r2
+; LE-I32-NEON-NEXT:    mov r6, r1
+; LE-I32-NEON-NEXT:    mov r7, r0
+; LE-I32-NEON-NEXT:    ldm r3, {r0, r1, r2, r3}
+; LE-I32-NEON-NEXT:    bl lrintl
+; LE-I32-NEON-NEXT:    mov r4, r0
+; LE-I32-NEON-NEXT:    mov r0, r7
+; LE-I32-NEON-NEXT:    mov r1, r6
+; LE-I32-NEON-NEXT:    mov r2, r5
+; LE-I32-NEON-NEXT:    mov r3, r8
+; LE-I32-NEON-NEXT:    bl lrintl
+; LE-I32-NEON-NEXT:    vmov.32 d0[0], r0
+; LE-I32-NEON-NEXT:    vmov.32 d0[1], r4
+; LE-I32-NEON-NEXT:    pop {r4, r5, r6, r7, r8, pc}
+;
+; LE-I64-NEON-LABEL: lrint_v2fp128:
+; LE-I64-NEON:       @ %bb.0:
+; LE-I64-NEON-NEXT:    .save {r4, r5, r6, r7, r8, lr}
+; LE-I64-NEON-NEXT:    push {r4, r5, r6, r7, r8, lr}
+; LE-I64-NEON-NEXT:    .vsave {d8, d9}
+; LE-I64-NEON-NEXT:    vpush {d8, d9}
+; LE-I64-NEON-NEXT:    mov r8, r3
+; LE-I64-NEON-NEXT:    add r3, sp, #40
+; LE-I64-NEON-NEXT:    mov r5, r2
+; LE-I64-NEON-NEXT:    mov r6, r1
+; LE-I64-NEON-NEXT:    mov r7, r0
+; LE-I64-NEON-NEXT:    ldm r3, {r0, r1, r2, r3}
+; LE-I64-NEON-NEXT:    bl lrintl
+; LE-I64-NEON-NEXT:    mov r4, r1
+; LE-I64-NEON-NEXT:    vmov.32 d9[0], r0
+; LE-I64-NEON-NEXT:    mov r0, r7
+; LE-I64-NEON-NEXT:    mov r1, r6
+; LE-I64-NEON-NEXT:    mov r2, r5
+; LE-I64-NEON-NEXT:    mov r3, r8
+; LE-I64-NEON-NEXT:    bl lrintl
+; LE-I64-NEON-NEXT:    vmov.32 d8[0], r0
+; LE-I64-NEON-NEXT:    vmov.32 d9[1], r4
+; LE-I64-NEON-NEXT:    vmov.32 d8[1], r1
+; LE-I64-NEON-NEXT:    vorr q0, q4, q4
+; LE-I64-NEON-NEXT:    vpop {d8, d9}
+; LE-I64-NEON-NEXT:    pop {r4, r5, r6, r7, r8, pc}
+;
+; BE-I32-LABEL: lrint_v2fp128:
+; BE-I32:       @ %bb.0:
+; BE-I32-NEXT:    .save {r4, r5, r6, r7, r8, lr}
+; BE-I32-NEXT:    push {r4, r5, r6, r7, r8, lr}
+; BE-I32-NEXT:    mov r8, r3
+; BE-I32-NEXT:    add r3, sp, #24
+; BE-I32-NEXT:    mov r5, r2
+; BE-I32-NEXT:    mov r6, r1
+; BE-I32-NEXT:    mov r7, r0
+; BE-I32-NEXT:    ldm r3, {r0, r1, r2, r3}
+; BE-I32-NEXT:    bl lrintl
+; BE-I32-NEXT:    mov r4, r0
+; BE-I32-NEXT:    mov r0, r7
+; BE-I32-NEXT:    mov r1, r6
+; BE-I32-NEXT:    mov r2, r5
+; BE-I32-NEXT:    mov r3, r8
+; BE-I32-NEXT:    bl lrintl
+; BE-I32-NEXT:    vmov.32 d16[0], r0
+; BE-I32-NEXT:    vmov.32 d16[1], r4
+; BE-I32-NEXT:    vrev64.32 d0, d16
+; BE-I32-NEXT:    pop {r4, r5, r6, r7, r8, pc}
+;
+; BE-I64-LABEL: lrint_v2fp128:
+; BE-I64:       @ %bb.0:
+; BE-I64-NEXT:    .save {r4, r5, r6, r7, r8, lr}
+; BE-I64-NEXT:    push {r4, r5, r6, r7, r8, lr}
+; BE-I64-NEXT:    .vsave {d8}
+; BE-I64-NEXT:    vpush {d8}
+; BE-I64-NEXT:    mov r8, r3
+; BE-I64-NEXT:    add r3, sp, #32
+; BE-I64-NEXT:    mov r5, r2
+; BE-I64-NEXT:    mov r6, r1
+; BE-I64-NEXT:    mov r7, r0
+; BE-I64-NEXT:    ldm r3, {r0, r1, r2, r3}
+; BE-I64-NEXT:    bl lrintl
+; BE-I64-NEXT:    mov r4, r1
+; BE-I64-NEXT:    vmov.32 d8[0], r0
+; BE-I64-NEXT:    mov r0, r7
+; BE-I64-NEXT:    mov r1, r6
+; BE-I64-NEXT:    mov r2, r5
+; BE-I64-NEXT:    mov r3, r8
+; BE-I64-NEXT:    bl lrintl
+; BE-I64-NEXT:    vmov.32 d16[0], r0
+; BE-I64-NEXT:    vmov.32 d8[1], r4
+; BE-I64-NEXT:    vmov.32 d16[1], r1
+; BE-I64-NEXT:    vrev64.32 d1, d8
+; BE-I64-NEXT:    vrev64.32 d0, d16
+; BE-I64-NEXT:    vpop {d8}
+; BE-I64-NEXT:    pop {r4, r5, r6, r7, r8, pc}
+;
+; BE-I32-NEON-LABEL: lrint_v2fp128:
+; BE-I32-NEON:       @ %bb.0:
+; BE-I32-NEON-NEXT:    .save {r4, r5, r6, r7, r8, lr}
+; BE-I32-NEON-NEXT:    push {r4, r5, r6, r7, r8, lr}
+; BE-I32-NEON-NEXT:    mov r8, r3
+; BE-I32-NEON-NEXT:    add r3, sp, #24
+; BE-I32-NEON-NEXT:    mov r5, r2
+; BE-I32-NEON-NEXT:    mov r6, r1
+; BE-I32-NEON-NEXT:    mov r7, r0
+; BE-I32-NEON-NEXT:    ldm r3, {r0, r1, r2, r3}
+; BE-I32-NEON-NEXT:    bl lrintl
+; BE-I32-NEON-NEXT:    mov r4, r0
+; BE-I32-NEON-NEXT:    mov r0, r7
+; BE-I32-NEON-NEXT:    mov r1, r6
+; BE-I32-NEON-NEXT:    mov r2, r5
+; BE-I32-NEON-NEXT:    mov r3, r8
+; BE-I32-NEON-NEXT:    bl lrintl
+; BE-I32-NEON-NEXT:    vmov.32 d16[0], r0
+; BE-I32-NEON-NEXT:    vmov.32 d16[1], r4
+; BE-I32-NEON-NEXT:    vrev64.32 d0, d16
+; BE-I32-NEON-NEXT:    pop {r4, r5, r6, r7, r8, pc}
+;
+; BE-I64-NEON-LABEL: lrint_v2fp128:
+; BE-I64-NEON:       @ %bb.0:
+; BE-I64-NEON-NEXT:    .save {r4, r5, r6, r7, r8, lr}
+; BE-I64-NEON-NEXT:    push {r4, r5, r6, r7, r8, lr}
+; BE-I64-NEON-NEXT:    .vsave {d8}
+; BE-I64-NEON-NEXT:    vpush {d8}
+; BE-I64-NEON-NEXT:    mov r8, r3
+; BE-I64-NEON-NEXT:    add r3, sp, #32
+; BE-I64-NEON-NEXT:    mov r5, r2
+; BE-I64-NEON-NEXT:    mov r6, r1
+; BE-I64-NEON-NEXT:    mov r7, r0
+; BE-I64-NEON-NEXT:    ldm r3, {r0, r1, r2, r3}
+; BE-I64-NEON-NEXT:    bl lrintl
+; BE-I64-NEON-NEXT:    mov r4, r1
+; BE-I64-NEON-NEXT:    vmov.32 d8[0], r0
+; BE-I64-NEON-NEXT:    mov r0, r7
+; BE-I64-NEON-NEXT:    mov r1, r6
+; BE-I64-NEON-NEXT:    mov r2, r5
+; BE-I64-NEON-NEXT:    mov r3, r8
+; BE-I64-NEON-NEXT:    bl lrintl
+; BE-I64-NEON-NEXT:    vmov.32 d16[0], r0
+; BE-I64-NEON-NEXT:    vmov.32 d8[1], r4
+; BE-I64-NEON-NEXT:    vmov.32 d16[1], r1
+; BE-I64-NEON-NEXT:    vrev64.32 d1, d8
+; BE-I64-NEON-NEXT:    vrev64.32 d0, d16
+; BE-I64-NEON-NEXT:    vpop {d8}
+; BE-I64-NEON-NEXT:    pop {r4, r5, r6, r7, r8, pc}
+  %a = call <2 x iXLen> @llvm.lrint.v2iXLen.v2fp128(<2 x fp128> %x)
+  ret <2 x iXLen> %a
+}
+declare <2 x iXLen> @llvm.lrint.v2iXLen.v2fp128(<2 x fp128>)
+
+define <4 x iXLen> @lrint_v4fp128(<4 x fp128> %x) {
+; LE-I32-LABEL: lrint_v4fp128:
+; LE-I32:       @ %bb.0:
+; LE-I32-NEXT:    .save {r4, lr}
+; LE-I32-NEXT:    push {r4, lr}
+; LE-I32-NEXT:    .vsave {d8, d9}
+; LE-I32-NEXT:    vpush {d8, d9}
+; LE-I32-NEXT:    bl lrintl
+; LE-I32-NEXT:    add r3, sp, #60
+; LE-I32-NEXT:    ldr r12, [sp, #56]
+; LE-I32-NEXT:    vmov.32 d8[0], r0
+; LE-I32-NEXT:    ldm r3, {r1, r2, r3}
+; LE-I32-NEXT:    mov r0, r12
+; LE-I32-NEXT:    bl lrintl
+; LE-I32-NEXT:    add r3, sp, #40
+; LE-I32-NEXT:    mov r4, r0
+; LE-I32-NEXT:    ldm r3, {r0, r1, r2, r3}
+; LE-I32-NEXT:    bl lrintl
+; LE-I32-NEXT:    add r3, sp, #28
+; LE-I32-NEXT:    ldr r12, [sp, #24]
+; LE-I32-NEXT:    vmov.32 d9[0], r0
+; LE-I32-NEXT:    ldm r3, {r1, r2, r3}
+; LE-I32-NEXT:    mov r0, r12
+; LE-I32-NEXT:    vmov.32 d9[1], r4
+; LE-I32-NEXT:    bl lrintl
+; LE-I32-NEXT:    vmov.32 d8[1], r0
+; LE-I32-NEXT:    vorr q0, q4, q4
+; LE-I32-NEXT:    vpop {d8, d9}
+; LE-I32-NEXT:    pop {r4, pc}
+;
+; LE-I64-LABEL: lrint_v4fp128:
+; LE-I64:       @ %bb.0:
+; LE-I64-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, lr}
+; LE-I64-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, lr}
+; LE-I64-NEXT:    .vsave {d8, d9, d10, d11}
+; LE-I64-NEXT:    vpush {d8, d9, d10, d11}
+; LE-I64-NEXT:    mov r5, r3
+; LE-I64-NEXT:    add r3, sp, #96
+; LE-I64-NEXT:    mov r7, r2
+; LE-I64-NEXT:    mov r6, r1
+; LE-I64-NEXT:    mov r4, r0
+; LE-I64-NEXT:    ldm r3, {r0, r1, r2, r3}
+; LE-I64-NEXT:    bl lrintl
+; LE-I64-NEXT:    mov r9, r1
+; LE-I64-NEXT:    vmov.32 d9[0], r0
+; LE-I64-NEXT:    mov r0, r4
+; LE-I64-NEXT:    mov r1, r6
+; LE-I64-NEXT:    mov r2, r7
+; LE-I64-NEXT:    mov r3, r5
+; LE-I64-NEXT:    ldr r8, [sp, #80]
+; LE-I64-NEXT:    ldr r10, [sp, #64]
+; LE-I64-NEXT:    bl lrintl
+; LE-I64-NEXT:    add r3, sp, #68
+; LE-I64-NEXT:    mov r5, r1
+; LE-I64-NEXT:    vmov.32 d10[0], r0
+; LE-I64-NEXT:    mov r0, r10
+; LE-I64-NEXT:    ldm r3, {r1, r2, r3}
+; LE-I64-NEXT:    bl lrintl
+; LE-I64-NEXT:    add r3, sp, #84
+; LE-I64-NEXT:    mov r4, r1
+; LE-I64-NEXT:    vmov.32 d11[0], r0
+; LE-I64-NEXT:    mov r0, r8
+; LE-I64-NEXT:    ldm r3, {r1, r2, r3}
+; LE-I64-NEXT:    bl lrintl
+; LE-I64-NEXT:    vmov.32 d8[0], r0
+; LE-I64-NEXT:    vmov.32 d11[1], r4
+; LE-I64-NEXT:    vmov.32 d9[1], r9
+; LE-I64-NEXT:    vmov.32 d10[1], r5
+; LE-I64-NEXT:    vmov.32 d8[1], r1
+; LE-I64-NEXT:    vorr q0, q5, q5
+; LE-I64-NEXT:    vorr q1, q4, q4
+; LE-I64-NEXT:    vpop {d8, d9, d10, d11}
+; LE-I64-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, pc}
+;
+; LE-I32-NEON-LABEL: lrint_v4fp128:
+; LE-I32-NEON:       @ %bb.0:
+; LE-I32-NEON-NEXT:    .save {r4, lr}
+; LE-I32-NEON-NEXT:    push {r4, lr}
+; LE-I32-NEON-NEXT:    .vsave {d8, d9}
+; LE-I32-NEON-NEXT:    vpush {d8, d9}
+; LE-I32-NEON-NEXT:    bl lrintl
+; LE-I32-NEON-NEXT:    add r3, sp, #60
+; LE-I32-NEON-NEXT:    ldr r12, [sp, #56]
+; LE-I32-NEON-NEXT:    vmov.32 d8[0], r0
+; LE-I32-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; LE-I32-NEON-NEXT:    mov r0, r12
+; LE-I32-NEON-NEXT:    bl lrintl
+; LE-I32-NEON-NEXT:    add r3, sp, #40
+; LE-I32-NEON-NEXT:    mov r4, r0
+; LE-I32-NEON-NEXT:    ldm r3, {r0, r1, r2, r3}
+; LE-I32-NEON-NEXT:    bl lrintl
+; LE-I32-NEON-NEXT:    add r3, sp, #28
+; LE-I32-NEON-NEXT:    ldr r12, [sp, #24]
+; LE-I32-NEON-NEXT:    vmov.32 d9[0], r0
+; LE-I32-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; LE-I32-NEON-NEXT:    mov r0, r12
+; LE-I32-NEON-NEXT:    vmov.32 d9[1], r4
+; LE-I32-NEON-NEXT:    bl lrintl
+; LE-I32-NEON-NEXT:    vmov.32 d8[1], r0
+; LE-I32-NEON-NEXT:    vorr q0, q4, q4
+; LE-I32-NEON-NEXT:    vpop {d8, d9}
+; LE-I32-NEON-NEXT:    pop {r4, pc}
+;
+; LE-I64-NEON-LABEL: lrint_v4fp128:
+; LE-I64-NEON:       @ %bb.0:
+; LE-I64-NEON-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, lr}
+; LE-I64-NEON-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, lr}
+; LE-I64-NEON-NEXT:    .vsave {d8, d9, d10, d11}
+; LE-I64-NEON-NEXT:    vpush {d8, d9, d10, d11}
+; LE-I64-NEON-NEXT:    mov r5, r3
+; LE-I64-NEON-NEXT:    add r3, sp, #96
+; LE-I64-NEON-NEXT:    mov r7, r2
+; LE-I64-NEON-NEXT:    mov r6, r1
+; LE-I64-NEON-NEXT:    mov r4, r0
+; LE-I64-NEON-NEXT:    ldm r3, {r0, r1, r2, r3}
+; LE-I64-NEON-NEXT:    bl lrintl
+; LE-I64-NEON-NEXT:    mov r9, r1
+; LE-I64-NEON-NEXT:    vmov.32 d9[0], r0
+; LE-I64-NEON-NEXT:    mov r0, r4
+; LE-I64-NEON-NEXT:    mov r1, r6
+; LE-I64-NEON-NEXT:    mov r2, r7
+; LE-I64-NEON-NEXT:    mov r3, r5
+; LE-I64-NEON-NEXT:    ldr r8, [sp, #80]
+; LE-I64-NEON-NEXT:    ldr r10, [sp, #64]
+; LE-I64-NEON-NEXT:    bl lrintl
+; LE-I64-NEON-NEXT:    add r3, sp, #68
+; LE-I64-NEON-NEXT:    mov r5, r1
+; LE-I64-NEON-NEXT:    vmov.32 d10[0], r0
+; LE-I64-NEON-NEXT:    mov r0, r10
+; LE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; LE-I64-NEON-NEXT:    bl lrintl
+; LE-I64-NEON-NEXT:    add r3, sp, #84
+; LE-I64-NEON-NEXT:    mov r4, r1
+; LE-I64-NEON-NEXT:    vmov.32 d11[0], r0
+; LE-I64-NEON-NEXT:    mov r0, r8
+; LE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; LE-I64-NEON-NEXT:    bl lrintl
+; LE-I64-NEON-NEXT:    vmov.32 d8[0], r0
+; LE-I64-NEON-NEXT:    vmov.32 d11[1], r4
+; LE-I64-NEON-NEXT:    vmov.32 d9[1], r9
+; LE-I64-NEON-NEXT:    vmov.32 d10[1], r5
+; LE-I64-NEON-NEXT:    vmov.32 d8[1], r1
+; LE-I64-NEON-NEXT:    vorr q0, q5, q5
+; LE-I64-NEON-NEXT:    vorr q1, q4, q4
+; LE-I64-NEON-NEXT:    vpop {d8, d9, d10, d11}
+; LE-I64-NEON-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, pc}
+;
+; BE-I32-LABEL: lrint_v4fp128:
+; BE-I32:       @ %bb.0:
+; BE-I32-NEXT:    .save {r4, lr}
+; BE-I32-NEXT:    push {r4, lr}
+; BE-I32-NEXT:    .vsave {d8, d9}
+; BE-I32-NEXT:    vpush {d8, d9}
+; BE-I32-NEXT:    bl lrintl
+; BE-I32-NEXT:    add r3, sp, #60
+; BE-I32-NEXT:    ldr r12, [sp, #56]
+; BE-I32-NEXT:    vmov.32 d8[0], r0
+; BE-I32-NEXT:    ldm r3, {r1, r2, r3}
+; BE-I32-NEXT:    mov r0, r12
+; BE-I32-NEXT:    bl lrintl
+; BE-I32-NEXT:    add r3, sp, #40
+; BE-I32-NEXT:    mov r4, r0
+; BE-I32-NEXT:    ldm r3, {r0, r1, r2, r3}
+; BE-I32-NEXT:    bl lrintl
+; BE-I32-NEXT:    add r3, sp, #28
+; BE-I32-NEXT:    ldr r12, [sp, #24]
+; BE-I32-NEXT:    vmov.32 d9[0], r0
+; BE-I32-NEXT:    ldm r3, {r1, r2, r3}
+; BE-I32-NEXT:    mov r0, r12
+; BE-I32-NEXT:    vmov.32 d9[1], r4
+; BE-I32-NEXT:    bl lrintl
+; BE-I32-NEXT:    vmov.32 d8[1], r0
+; BE-I32-NEXT:    vrev64.32 q0, q4
+; BE-I32-NEXT:    vpop {d8, d9}
+; BE-I32-NEXT:    pop {r4, pc}
+;
+; BE-I64-LABEL: lrint_v4fp128:
+; BE-I64:       @ %bb.0:
+; BE-I64-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, lr}
+; BE-I64-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, lr}
+; BE-I64-NEXT:    .vsave {d8, d9, d10}
+; BE-I64-NEXT:    vpush {d8, d9, d10}
+; BE-I64-NEXT:    mov r5, r3
+; BE-I64-NEXT:    add r3, sp, #88
+; BE-I64-NEXT:    mov r7, r2
+; BE-I64-NEXT:    mov r6, r1
+; BE-I64-NEXT:    mov r4, r0
+; BE-I64-NEXT:    ldm r3, {r0, r1, r2, r3}
+; BE-I64-NEXT:    bl lrintl
+; BE-I64-NEXT:    mov r9, r1
+; BE-I64-NEXT:    vmov.32 d8[0], r0
+; BE-I64-NEXT:    mov r0, r4
+; BE-I64-NEXT:    mov r1, r6
+; BE-I64-NEXT:    mov r2, r7
+; BE-I64-NEXT:    mov r3, r5
+; BE-I64-NEXT:    ldr r8, [sp, #72]
+; BE-I64-NEXT:    ldr r10, [sp, #56]
+; BE-I64-NEXT:    bl lrintl
+; BE-I64-NEXT:    add r3, sp, #60
+; BE-I64-NEXT:    mov r5, r1
+; BE-I64-NEXT:    vmov.32 d9[0], r0
+; BE-I64-NEXT:    mov r0, r10
+; BE-I64-NEXT:    ldm r3, {r1, r2, r3}
+; BE-I64-NEXT:    bl lrintl
+; BE-I64-NEXT:    add r3, sp, #76
+; BE-I64-NEXT:    mov r4, r1
+; BE-I64-NEXT:    vmov.32 d10[0], r0
+; BE-I64-NEXT:    mov r0, r8
+; BE-I64-NEXT:    ldm r3, {r1, r2, r3}
+; BE-I64-NEXT:    bl lrintl
+; BE-I64-NEXT:    vmov.32 d16[0], r0
+; BE-I64-NEXT:    vmov.32 d10[1], r4
+; BE-I64-NEXT:    vmov.32 d8[1], r9
+; BE-I64-NEXT:    vmov.32 d9[1], r5
+; BE-I64-NEXT:    vmov.32 d16[1], r1
+; BE-I64-NEXT:    vrev64.32 d1, d10
+; BE-I64-NEXT:    vrev64.32 d3, d8
+; BE-I64-NEXT:    vrev64.32 d0, d9
+; BE-I64-NEXT:    vrev64.32 d2, d16
+; BE-I64-NEXT:    vpop {d8, d9, d10}
+; BE-I64-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, pc}
+;
+; BE-I32-NEON-LABEL: lrint_v4fp128:
+; BE-I32-NEON:       @ %bb.0:
+; BE-I32-NEON-NEXT:    .save {r4, lr}
+; BE-I32-NEON-NEXT:    push {r4, lr}
+; BE-I32-NEON-NEXT:    .vsave {d8, d9}
+; BE-I32-NEON-NEXT:    vpush {d8, d9}
+; BE-I32-NEON-NEXT:    bl lrintl
+; BE-I32-NEON-NEXT:    add r3, sp, #60
+; BE-I32-NEON-NEXT:    ldr r12, [sp, #56]
+; BE-I32-NEON-NEXT:    vmov.32 d8[0], r0
+; BE-I32-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; BE-I32-NEON-NEXT:    mov r0, r12
+; BE-I32-NEON-NEXT:    bl lrintl
+; BE-I32-NEON-NEXT:    add r3, sp, #40
+; BE-I32-NEON-NEXT:    mov r4, r0
+; BE-I32-NEON-NEXT:    ldm r3, {r0, r1, r2, r3}
+; BE-I32-NEON-NEXT:    bl lrintl
+; BE-I32-NEON-NEXT:    add r3, sp, #28
+; BE-I32-NEON-NEXT:    ldr r12, [sp, #24]
+; BE-I32-NEON-NEXT:    vmov.32 d9[0], r0
+; BE-I32-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; BE-I32-NEON-NEXT:    mov r0, r12
+; BE-I32-NEON-NEXT:    vmov.32 d9[1], r4
+; BE-I32-NEON-NEXT:    bl lrintl
+; BE-I32-NEON-NEXT:    vmov.32 d8[1], r0
+; BE-I32-NEON-NEXT:    vrev64.32 q0, q4
+; BE-I32-NEON-NEXT:    vpop {d8, d9}
+; BE-I32-NEON-NEXT:    pop {r4, pc}
+;
+; BE-I64-NEON-LABEL: lrint_v4fp128:
+; BE-I64-NEON:       @ %bb.0:
+; BE-I64-NEON-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, lr}
+; BE-I64-NEON-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, lr}
+; BE-I64-NEON-NEXT:    .vsave {d8, d9, d10}
+; BE-I64-NEON-NEXT:    vpush {d8, d9, d10}
+; BE-I64-NEON-NEXT:    mov r5, r3
+; BE-I64-NEON-NEXT:    add r3, sp, #88
+; BE-I64-NEON-NEXT:    mov r7, r2
+; BE-I64-NEON-NEXT:    mov r6, r1
+; BE-I64-NEON-NEXT:    mov r4, r0
+; BE-I64-NEON-NEXT:    ldm r3, {r0, r1, r2, r3}
+; BE-I64-NEON-NEXT:    bl lrintl
+; BE-I64-NEON-NEXT:    mov r9, r1
+; BE-I64-NEON-NEXT:    vmov.32 d8[0], r0
+; BE-I64-NEON-NEXT:    mov r0, r4
+; BE-I64-NEON-NEXT:    mov r1, r6
+; BE-I64-NEON-NEXT:    mov r2, r7
+; BE-I64-NEON-NEXT:    mov r3, r5
+; BE-I64-NEON-NEXT:    ldr r8, [sp, #72]
+; BE-I64-NEON-NEXT:    ldr r10, [sp, #56]
+; BE-I64-NEON-NEXT:    bl lrintl
+; BE-I64-NEON-NEXT:    add r3, sp, #60
+; BE-I64-NEON-NEXT:    mov r5, r1
+; BE-I64-NEON-NEXT:    vmov.32 d9[0], r0
+; BE-I64-NEON-NEXT:    mov r0, r10
+; BE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; BE-I64-NEON-NEXT:    bl lrintl
+; BE-I64-NEON-NEXT:    add r3, sp, #76
+; BE-I64-NEON-NEXT:    mov r4, r1
+; BE-I64-NEON-NEXT:    vmov.32 d10[0], r0
+; BE-I64-NEON-NEXT:    mov r0, r8
+; BE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; BE-I64-NEON-NEXT:    bl lrintl
+; BE-I64-NEON-NEXT:    vmov.32 d16[0], r0
+; BE-I64-NEON-NEXT:    vmov.32 d10[1], r4
+; BE-I64-NEON-NEXT:    vmov.32 d8[1], r9
+; BE-I64-NEON-NEXT:    vmov.32 d9[1], r5
+; BE-I64-NEON-NEXT:    vmov.32 d16[1], r1
+; BE-I64-NEON-NEXT:    vrev64.32 d1, d10
+; BE-I64-NEON-NEXT:    vrev64.32 d3, d8
+; BE-I64-NEON-NEXT:    vrev64.32 d0, d9
+; BE-I64-NEON-NEXT:    vrev64.32 d2, d16
+; BE-I64-NEON-NEXT:    vpop {d8, d9, d10}
+; BE-I64-NEON-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, pc}
+  %a = call <4 x iXLen> @llvm.lrint.v4iXLen.v4fp128(<4 x fp128> %x)
+  ret <4 x iXLen> %a
+}
+declare <4 x iXLen> @llvm.lrint.v4iXLen.v4fp128(<4 x fp128>)
+
+define <8 x iXLen> @lrint_v8fp128(<8 x fp128> %x) {
+; LE-I32-LABEL: lrint_v8fp128:
+; LE-I32:       @ %bb.0:
+; LE-I32-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, lr}
+; LE-I32-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, lr}
+; LE-I32-NEXT:    .vsave {d8, d9, d10, d11}
+; LE-I32-NEXT:    vpush {d8, d9, d10, d11}
+; LE-I32-NEXT:    mov r6, r3
+; LE-I32-NEXT:    add r3, sp, #112
+; LE-I32-NEXT:    mov r7, r2
+; LE-I32-NEXT:    mov r4, r1
+; LE-I32-NEXT:    mov r5, r0
+; LE-I32-NEXT:    ldm r3, {r0, r1, r2, r3}
+; LE-I32-NEXT:    bl lrintl
+; LE-I32-NEXT:    vmov.32 d8[0], r0
+; LE-I32-NEXT:    mov r0, r5
+; LE-I32-NEXT:    mov r1, r4
+; LE-I32-NEXT:    mov r2, r7
+; LE-I32-NEXT:    mov r3, r6
+; LE-I32-NEXT:    ldr r8, [sp, #160]
+; LE-I32-NEXT:    ldr r9, [sp, #64]
+; LE-I32-NEXT:    ldr r10, [sp, #80]
+; LE-I32-NEXT:    bl lrintl
+; LE-I32-NEXT:    add r3, sp, #84
+; LE-I32-NEXT:    vmov.32 d10[0], r0
+; LE-I32-NEXT:    mov r0, r10
+; LE-I32-NEXT:    ldm r3, {r1, r2, r3}
+; LE-I32-NEXT:    bl lrintl
+; LE-I32-NEXT:    ldr r6, [sp, #96]
+; LE-I32-NEXT:    vmov.32 d11[0], r0
+; LE-I32-NEXT:    ldr r1, [sp, #100]
+; LE-I32-NEXT:    ldr r2, [sp, #104]
+; LE-I32-NEXT:    ldr r3, [sp, #108]
+; LE-I32-NEXT:    mov r0, r6
+; LE-I32-NEXT:    ldr r4, [sp, #68]
+; LE-I32-NEXT:    ldr r5, [sp, #72]
+; LE-I32-NEXT:    ldr r10, [sp, #164]
+; LE-I32-NEXT:    ldr r7, [sp, #168]
+; LE-I32-NEXT:    bl lrintl
+; LE-I32-NEXT:    ldr r3, [sp, #76]
+; LE-I32-NEXT:    vmov.32 d11[1], r0
+; LE-I32-NEXT:    mov r0, r9
+; LE-I32-NEXT:    mov r1, r4
+; LE-I32-NEXT:    mov r2, r5
+; LE-I32-NEXT:    bl lrintl
+; LE-I32-NEXT:    ldr r3, [sp, #172]
+; LE-I32-NEXT:    vmov.32 d10[1], r0
+; LE-I32-NEXT:    mov r0, r8
+; LE-I32-NEXT:    mov r1, r10
+; LE-I32-NEXT:    mov r2, r7
+; LE-I32-NEXT:    bl lrintl
+; LE-I32-NEXT:    add r3, sp, #144
+; LE-I32-NEXT:    mov r4, r0
+; LE-I32-NEXT:    ldm r3, {r0, r1, r2, r3}
+; LE-I32-NEXT:    bl lrintl
+; LE-I32-NEXT:    add r3, sp, #132
+; LE-I32-NEXT:    ldr r7, [sp, #128]
+; LE-I32-NEXT:    vmov.32 d9[0], r0
+; LE-I32-NEXT:    ldm r3, {r1, r2, r3}
+; LE-I32-NEXT:    mov r0, r7
+; LE-I32-NEXT:    vmov.32 d9[1], r4
+; LE-I32-NEXT:    bl lrintl
+; LE-I32-NEXT:    vmov.32 d8[1], r0
+; LE-I32-NEXT:    vorr q0, q5, q5
+; LE-I32-NEXT:    vorr q1, q4, q4
+; LE-I32-NEXT:    vpop {d8, d9, d10, d11}
+; LE-I32-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, pc}
+;
+; LE-I64-LABEL: lrint_v8fp128:
+; LE-I64:       @ %bb.0:
+; LE-I64-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-I64-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-I64-NEXT:    .pad #4
+; LE-I64-NEXT:    sub sp, sp, #4
+; LE-I64-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEXT:    .pad #8
+; LE-I64-NEXT:    sub sp, sp, #8
+; LE-I64-NEXT:    mov r11, r3
+; LE-I64-NEXT:    add r3, sp, #208
+; LE-I64-NEXT:    mov r10, r2
+; LE-I64-NEXT:    mov r4, r1
+; LE-I64-NEXT:    mov r5, r0
+; LE-I64-NEXT:    ldm r3, {r0, r1, r2, r3}
+; LE-I64-NEXT:    bl lrintl
+; LE-I64-NEXT:    add r7, sp, #164
+; LE-I64-NEXT:    ldr r6, [sp, #160]
+; LE-I64-NEXT:    str r1, [sp, #4] @ 4-byte Spill
+; LE-I64-NEXT:    vmov.32 d9[0], r0
+; LE-I64-NEXT:    ldm r7, {r1, r2, r3, r7}
+; LE-I64-NEXT:    mov r0, r6
+; LE-I64-NEXT:    ldr r8, [sp, #128]
+; LE-I64-NEXT:    ldr r9, [sp, #144]
+; LE-I64-NEXT:    bl lrintl
+; LE-I64-NEXT:    add r3, sp, #180
+; LE-I64-NEXT:    str r1, [sp] @ 4-byte Spill
+; LE-I64-NEXT:    vmov.32 d10[0], r0
+; LE-I64-NEXT:    mov r0, r7
+; LE-I64-NEXT:    ldm r3, {r1, r2, r3}
+; LE-I64-NEXT:    bl lrintl
+; LE-I64-NEXT:    add r3, sp, #132
+; LE-I64-NEXT:    mov r7, r1
+; LE-I64-NEXT:    vmov.32 d11[0], r0
+; LE-I64-NEXT:    mov r0, r8
+; LE-I64-NEXT:    ldm r3, {r1, r2, r3}
+; LE-I64-NEXT:    bl lrintl
+; LE-I64-NEXT:    add r3, sp, #148
+; LE-I64-NEXT:    mov r8, r1
+; LE-I64-NEXT:    vmov.32 d12[0], r0
+; LE-I64-NEXT:    mov r0, r9
+; LE-I64-NEXT:    ldm r3, {r1, r2, r3}
+; LE-I64-NEXT:    bl lrintl
+; LE-I64-NEXT:    mov r9, r1
+; LE-I64-NEXT:    vmov.32 d13[0], r0
+; LE-I64-NEXT:    mov r0, r5
+; LE-I64-NEXT:    mov r1, r4
+; LE-I64-NEXT:    mov r2, r10
+; LE-I64-NEXT:    mov r3, r11
+; LE-I64-NEXT:    ldr r6, [sp, #112]
+; LE-I64-NEXT:    bl lrintl
+; LE-I64-NEXT:    add r3, sp, #116
+; LE-I64-NEXT:    mov r4, r1
+; LE-I64-NEXT:    vmov.32 d14[0], r0
+; LE-I64-NEXT:    mov r0, r6
+; LE-I64-NEXT:    ldm r3, {r1, r2, r3}
+; LE-I64-NEXT:    bl lrintl
+; LE-I64-NEXT:    add r3, sp, #196
+; LE-I64-NEXT:    vmov.32 d15[0], r0
+; LE-I64-NEXT:    ldr r0, [sp, #192]
+; LE-I64-NEXT:    mov r5, r1
+; LE-I64-NEXT:    ldm r3, {r1, r2, r3}
+; LE-I64-NEXT:    bl lrintl
+; LE-I64-NEXT:    vmov.32 d8[0], r0
+; LE-I64-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; LE-I64-NEXT:    vmov.32 d11[1], r7
+; LE-I64-NEXT:    vmov.32 d10[1], r0
+; LE-I64-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
+; LE-I64-NEXT:    vmov.32 d15[1], r5
+; LE-I64-NEXT:    vorr q2, q5, q5
+; LE-I64-NEXT:    vmov.32 d13[1], r9
+; LE-I64-NEXT:    vmov.32 d9[1], r0
+; LE-I64-NEXT:    vmov.32 d14[1], r4
+; LE-I64-NEXT:    vmov.32 d12[1], r8
+; LE-I64-NEXT:    vorr q0, q7, q7
+; LE-I64-NEXT:    vmov.32 d8[1], r1
+; LE-I64-NEXT:    vorr q1, q6, q6
+; LE-I64-NEXT:    vorr q3, q4, q4
+; LE-I64-NEXT:    add sp, sp, #8
+; LE-I64-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEXT:    add sp, sp, #4
+; LE-I64-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; LE-I32-NEON-LABEL: lrint_v8fp128:
+; LE-I32-NEON:       @ %bb.0:
+; LE-I32-NEON-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, lr}
+; LE-I32-NEON-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, lr}
+; LE-I32-NEON-NEXT:    .vsave {d8, d9, d10, d11}
+; LE-I32-NEON-NEXT:    vpush {d8, d9, d10, d11}
+; LE-I32-NEON-NEXT:    mov r6, r3
+; LE-I32-NEON-NEXT:    add r3, sp, #112
+; LE-I32-NEON-NEXT:    mov r7, r2
+; LE-I32-NEON-NEXT:    mov r4, r1
+; LE-I32-NEON-NEXT:    mov r5, r0
+; LE-I32-NEON-NEXT:    ldm r3, {r0, r1, r2, r3}
+; LE-I32-NEON-NEXT:    bl lrintl
+; LE-I32-NEON-NEXT:    vmov.32 d8[0], r0
+; LE-I32-NEON-NEXT:    mov r0, r5
+; LE-I32-NEON-NEXT:    mov r1, r4
+; LE-I32-NEON-NEXT:    mov r2, r7
+; LE-I32-NEON-NEXT:    mov r3, r6
+; LE-I32-NEON-NEXT:    ldr r8, [sp, #160]
+; LE-I32-NEON-NEXT:    ldr r9, [sp, #64]
+; LE-I32-NEON-NEXT:    ldr r10, [sp, #80]
+; LE-I32-NEON-NEXT:    bl lrintl
+; LE-I32-NEON-NEXT:    add r3, sp, #84
+; LE-I32-NEON-NEXT:    vmov.32 d10[0], r0
+; LE-I32-NEON-NEXT:    mov r0, r10
+; LE-I32-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; LE-I32-NEON-NEXT:    bl lrintl
+; LE-I32-NEON-NEXT:    ldr r6, [sp, #96]
+; LE-I32-NEON-NEXT:    vmov.32 d11[0], r0
+; LE-I32-NEON-NEXT:    ldr r1, [sp, #100]
+; LE-I32-NEON-NEXT:    ldr r2, [sp, #104]
+; LE-I32-NEON-NEXT:    ldr r3, [sp, #108]
+; LE-I32-NEON-NEXT:    mov r0, r6
+; LE-I32-NEON-NEXT:    ldr r4, [sp, #68]
+; LE-I32-NEON-NEXT:    ldr r5, [sp, #72]
+; LE-I32-NEON-NEXT:    ldr r10, [sp, #164]
+; LE-I32-NEON-NEXT:    ldr r7, [sp, #168]
+; LE-I32-NEON-NEXT:    bl lrintl
+; LE-I32-NEON-NEXT:    ldr r3, [sp, #76]
+; LE-I32-NEON-NEXT:    vmov.32 d11[1], r0
+; LE-I32-NEON-NEXT:    mov r0, r9
+; LE-I32-NEON-NEXT:    mov r1, r4
+; LE-I32-NEON-NEXT:    mov r2, r5
+; LE-I32-NEON-NEXT:    bl lrintl
+; LE-I32-NEON-NEXT:    ldr r3, [sp, #172]
+; LE-I32-NEON-NEXT:    vmov.32 d10[1], r0
+; LE-I32-NEON-NEXT:    mov r0, r8
+; LE-I32-NEON-NEXT:    mov r1, r10
+; LE-I32-NEON-NEXT:    mov r2, r7
+; LE-I32-NEON-NEXT:    bl lrintl
+; LE-I32-NEON-NEXT:    add r3, sp, #144
+; LE-I32-NEON-NEXT:    mov r4, r0
+; LE-I32-NEON-NEXT:    ldm r3, {r0, r1, r2, r3}
+; LE-I32-NEON-NEXT:    bl lrintl
+; LE-I32-NEON-NEXT:    add r3, sp, #132
+; LE-I32-NEON-NEXT:    ldr r7, [sp, #128]
+; LE-I32-NEON-NEXT:    vmov.32 d9[0], r0
+; LE-I32-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; LE-I32-NEON-NEXT:    mov r0, r7
+; LE-I32-NEON-NEXT:    vmov.32 d9[1], r4
+; LE-I32-NEON-NEXT:    bl lrintl
+; LE-I32-NEON-NEXT:    vmov.32 d8[1], r0
+; LE-I32-NEON-NEXT:    vorr q0, q5, q5
+; LE-I32-NEON-NEXT:    vorr q1, q4, q4
+; LE-I32-NEON-NEXT:    vpop {d8, d9, d10, d11}
+; LE-I32-NEON-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, pc}
+;
+; LE-I64-NEON-LABEL: lrint_v8fp128:
+; LE-I64-NEON:       @ %bb.0:
+; LE-I64-NEON-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-I64-NEON-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-I64-NEON-NEXT:    .pad #4
+; LE-I64-NEON-NEXT:    sub sp, sp, #4
+; LE-I64-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEON-NEXT:    .pad #8
+; LE-I64-NEON-NEXT:    sub sp, sp, #8
+; LE-I64-NEON-NEXT:    mov r11, r3
+; LE-I64-NEON-NEXT:    add r3, sp, #208
+; LE-I64-NEON-NEXT:    mov r10, r2
+; LE-I64-NEON-NEXT:    mov r4, r1
+; LE-I64-NEON-NEXT:    mov r5, r0
+; LE-I64-NEON-NEXT:    ldm r3, {r0, r1, r2, r3}
+; LE-I64-NEON-NEXT:    bl lrintl
+; LE-I64-NEON-NEXT:    add r7, sp, #164
+; LE-I64-NEON-NEXT:    ldr r6, [sp, #160]
+; LE-I64-NEON-NEXT:    str r1, [sp, #4] @ 4-byte Spill
+; LE-I64-NEON-NEXT:    vmov.32 d9[0], r0
+; LE-I64-NEON-NEXT:    ldm r7, {r1, r2, r3, r7}
+; LE-I64-NEON-NEXT:    mov r0, r6
+; LE-I64-NEON-NEXT:    ldr r8, [sp, #128]
+; LE-I64-NEON-NEXT:    ldr r9, [sp, #144]
+; LE-I64-NEON-NEXT:    bl lrintl
+; LE-I64-NEON-NEXT:    add r3, sp, #180
+; LE-I64-NEON-NEXT:    str r1, [sp] @ 4-byte Spill
+; LE-I64-NEON-NEXT:    vmov.32 d10[0], r0
+; LE-I64-NEON-NEXT:    mov r0, r7
+; LE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; LE-I64-NEON-NEXT:    bl lrintl
+; LE-I64-NEON-NEXT:    add r3, sp, #132
+; LE-I64-NEON-NEXT:    mov r7, r1
+; LE-I64-NEON-NEXT:    vmov.32 d11[0], r0
+; LE-I64-NEON-NEXT:    mov r0, r8
+; LE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; LE-I64-NEON-NEXT:    bl lrintl
+; LE-I64-NEON-NEXT:    add r3, sp, #148
+; LE-I64-NEON-NEXT:    mov r8, r1
+; LE-I64-NEON-NEXT:    vmov.32 d12[0], r0
+; LE-I64-NEON-NEXT:    mov r0, r9
+; LE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; LE-I64-NEON-NEXT:    bl lrintl
+; LE-I64-NEON-NEXT:    mov r9, r1
+; LE-I64-NEON-NEXT:    vmov.32 d13[0], r0
+; LE-I64-NEON-NEXT:    mov r0, r5
+; LE-I64-NEON-NEXT:    mov r1, r4
+; LE-I64-NEON-NEXT:    mov r2, r10
+; LE-I64-NEON-NEXT:    mov r3, r11
+; LE-I64-NEON-NEXT:    ldr r6, [sp, #112]
+; LE-I64-NEON-NEXT:    bl lrintl
+; LE-I64-NEON-NEXT:    add r3, sp, #116
+; LE-I64-NEON-NEXT:    mov r4, r1
+; LE-I64-NEON-NEXT:    vmov.32 d14[0], r0
+; LE-I64-NEON-NEXT:    mov r0, r6
+; LE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; LE-I64-NEON-NEXT:    bl lrintl
+; LE-I64-NEON-NEXT:    add r3, sp, #196
+; LE-I64-NEON-NEXT:    vmov.32 d15[0], r0
+; LE-I64-NEON-NEXT:    ldr r0, [sp, #192]
+; LE-I64-NEON-NEXT:    mov r5, r1
+; LE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; LE-I64-NEON-NEXT:    bl lrintl
+; LE-I64-NEON-NEXT:    vmov.32 d8[0], r0
+; LE-I64-NEON-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; LE-I64-NEON-NEXT:    vmov.32 d11[1], r7
+; LE-I64-NEON-NEXT:    vmov.32 d10[1], r0
+; LE-I64-NEON-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
+; LE-I64-NEON-NEXT:    vmov.32 d15[1], r5
+; LE-I64-NEON-NEXT:    vorr q2, q5, q5
+; LE-I64-NEON-NEXT:    vmov.32 d13[1], r9
+; LE-I64-NEON-NEXT:    vmov.32 d9[1], r0
+; LE-I64-NEON-NEXT:    vmov.32 d14[1], r4
+; LE-I64-NEON-NEXT:    vmov.32 d12[1], r8
+; LE-I64-NEON-NEXT:    vorr q0, q7, q7
+; LE-I64-NEON-NEXT:    vmov.32 d8[1], r1
+; LE-I64-NEON-NEXT:    vorr q1, q6, q6
+; LE-I64-NEON-NEXT:    vorr q3, q4, q4
+; LE-I64-NEON-NEXT:    add sp, sp, #8
+; LE-I64-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEON-NEXT:    add sp, sp, #4
+; LE-I64-NEON-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; BE-I32-LABEL: lrint_v8fp128:
+; BE-I32:       @ %bb.0:
+; BE-I32-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-I32-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-I32-NEXT:    .pad #4
+; BE-I32-NEXT:    sub sp, sp, #4
+; BE-I32-NEXT:    .vsave {d8, d9, d10, d11}
+; BE-I32-NEXT:    vpush {d8, d9, d10, d11}
+; BE-I32-NEXT:    .pad #8
+; BE-I32-NEXT:    sub sp, sp, #8
+; BE-I32-NEXT:    str r3, [sp, #4] @ 4-byte Spill
+; BE-I32-NEXT:    add r3, sp, #128
+; BE-I32-NEXT:    mov r11, r2
+; BE-I32-NEXT:    mov r6, r1
+; BE-I32-NEXT:    mov r7, r0
+; BE-I32-NEXT:    ldm r3, {r0, r1, r2, r3}
+; BE-I32-NEXT:    bl lrintl
+; BE-I32-NEXT:    add r3, sp, #100
+; BE-I32-NEXT:    ldr r5, [sp, #96]
+; BE-I32-NEXT:    vmov.32 d8[0], r0
+; BE-I32-NEXT:    ldr r4, [sp, #160]
+; BE-I32-NEXT:    ldm r3, {r1, r2, r3}
+; BE-I32-NEXT:    mov r0, r5
+; BE-I32-NEXT:    bl lrintl
+; BE-I32-NEXT:    add r3, sp, #164
+; BE-I32-NEXT:    vmov.32 d11[0], r0
+; BE-I32-NEXT:    mov r0, r4
+; BE-I32-NEXT:    ldm r3, {r1, r2, r3}
+; BE-I32-NEXT:    bl lrintl
+; BE-I32-NEXT:    ldr r4, [sp, #176]
+; BE-I32-NEXT:    vmov.32 d9[0], r0
+; BE-I32-NEXT:    ldr r1, [sp, #180]
+; BE-I32-NEXT:    ldr r2, [sp, #184]
+; BE-I32-NEXT:    ldr r3, [sp, #188]
+; BE-I32-NEXT:    mov r0, r4
+; BE-I32-NEXT:    ldr r5, [sp, #116]
+; BE-I32-NEXT:    ldr r8, [sp, #120]
+; BE-I32-NEXT:    ldr r10, [sp, #84]
+; BE-I32-NEXT:    ldr r9, [sp, #88]
+; BE-I32-NEXT:    bl lrintl
+; BE-I32-NEXT:    vmov.32 d9[1], r0
+; BE-I32-NEXT:    ldr r3, [sp, #124]
+; BE-I32-NEXT:    ldr r0, [sp, #112]
+; BE-I32-NEXT:    mov r1, r5
+; BE-I32-NEXT:    mov r2, r8
+; BE-I32-NEXT:    bl lrintl
+; BE-I32-NEXT:    vmov.32 d11[1], r0
+; BE-I32-NEXT:    ldr r3, [sp, #92]
+; BE-I32-NEXT:    ldr r0, [sp, #80]
+; BE-I32-NEXT:    mov r1, r10
+; BE-I32-NEXT:    mov r2, r9
+; BE-I32-NEXT:    bl lrintl
+; BE-I32-NEXT:    ldr r3, [sp, #4] @ 4-byte Reload
+; BE-I32-NEXT:    mov r4, r0
+; BE-I32-NEXT:    mov r0, r7
+; BE-I32-NEXT:    mov r1, r6
+; BE-I32-NEXT:    mov r2, r11
+; BE-I32-NEXT:    bl lrintl
+; BE-I32-NEXT:    add r3, sp, #148
+; BE-I32-NEXT:    ldr r7, [sp, #144]
+; BE-I32-NEXT:    vmov.32 d10[0], r0
+; BE-I32-NEXT:    ldm r3, {r1, r2, r3}
+; BE-I32-NEXT:    mov r0, r7
+; BE-I32-NEXT:    vmov.32 d10[1], r4
+; BE-I32-NEXT:    bl lrintl
+; BE-I32-NEXT:    vmov.32 d8[1], r0
+; BE-I32-NEXT:    vrev64.32 q0, q5
+; BE-I32-NEXT:    vrev64.32 q1, q4
+; BE-I32-NEXT:    add sp, sp, #8
+; BE-I32-NEXT:    vpop {d8, d9, d10, d11}
+; BE-I32-NEXT:    add sp, sp, #4
+; BE-I32-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; BE-I64-LABEL: lrint_v8fp128:
+; BE-I64:       @ %bb.0:
+; BE-I64-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-I64-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-I64-NEXT:    .pad #4
+; BE-I64-NEXT:    sub sp, sp, #4
+; BE-I64-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14}
+; BE-I64-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14}
+; BE-I64-NEXT:    .pad #16
+; BE-I64-NEXT:    sub sp, sp, #16
+; BE-I64-NEXT:    str r3, [sp, #4] @ 4-byte Spill
+; BE-I64-NEXT:    add r3, sp, #208
+; BE-I64-NEXT:    mov r11, r2
+; BE-I64-NEXT:    mov r4, r1
+; BE-I64-NEXT:    mov r5, r0
+; BE-I64-NEXT:    ldm r3, {r0, r1, r2, r3}
+; BE-I64-NEXT:    bl lrintl
+; BE-I64-NEXT:    ldr r7, [sp, #176]
+; BE-I64-NEXT:    add r3, sp, #180
+; BE-I64-NEXT:    str r1, [sp, #12] @ 4-byte Spill
+; BE-I64-NEXT:    vmov.32 d8[0], r0
+; BE-I64-NEXT:    ldm r3, {r1, r2, r3}
+; BE-I64-NEXT:    mov r0, r7
+; BE-I64-NEXT:    ldr r6, [sp, #128]
+; BE-I64-NEXT:    ldr r8, [sp, #144]
+; BE-I64-NEXT:    bl lrintl
+; BE-I64-NEXT:    add r3, sp, #132
+; BE-I64-NEXT:    str r1, [sp, #8] @ 4-byte Spill
+; BE-I64-NEXT:    vmov.32 d9[0], r0
+; BE-I64-NEXT:    mov r0, r6
+; BE-I64-NEXT:    ldm r3, {r1, r2, r3}
+; BE-I64-NEXT:    bl lrintl
+; BE-I64-NEXT:    add r3, sp, #148
+; BE-I64-NEXT:    mov r6, r1
+; BE-I64-NEXT:    vmov.32 d10[0], r0
+; BE-I64-NEXT:    mov r0, r8
+; BE-I64-NEXT:    ldm r3, {r1, r2, r3}
+; BE-I64-NEXT:    bl lrintl
+; BE-I64-NEXT:    add r3, sp, #160
+; BE-I64-NEXT:    mov r9, r0
+; BE-I64-NEXT:    mov r7, r1
+; BE-I64-NEXT:    ldm r3, {r0, r1, r2, r3}
+; BE-I64-NEXT:    bl lrintl
+; BE-I64-NEXT:    ldr r3, [sp, #4] @ 4-byte Reload
+; BE-I64-NEXT:    mov r8, r1
+; BE-I64-NEXT:    vmov.32 d11[0], r0
+; BE-I64-NEXT:    mov r0, r5
+; BE-I64-NEXT:    mov r1, r4
+; BE-I64-NEXT:    mov r2, r11
+; BE-I64-NEXT:    ldr r10, [sp, #112]
+; BE-I64-NEXT:    vmov.32 d12[0], r9
+; BE-I64-NEXT:    bl lrintl
+; BE-I64-NEXT:    add r3, sp, #116
+; BE-I64-NEXT:    mov r4, r1
+; BE-I64-NEXT:    vmov.32 d13[0], r0
+; BE-I64-NEXT:    mov r0, r10
+; BE-I64-NEXT:    ldm r3, {r1, r2, r3}
+; BE-I64-NEXT:    bl lrintl
+; BE-I64-NEXT:    add r3, sp, #196
+; BE-I64-NEXT:    vmov.32 d14[0], r0
+; BE-I64-NEXT:    ldr r0, [sp, #192]
+; BE-I64-NEXT:    mov r5, r1
+; BE-I64-NEXT:    ldm r3, {r1, r2, r3}
+; BE-I64-NEXT:    bl lrintl
+; BE-I64-NEXT:    vmov.32 d16[0], r0
+; BE-I64-NEXT:    ldr r0, [sp, #8] @ 4-byte Reload
+; BE-I64-NEXT:    vmov.32 d14[1], r5
+; BE-I64-NEXT:    vmov.32 d9[1], r0
+; BE-I64-NEXT:    ldr r0, [sp, #12] @ 4-byte Reload
+; BE-I64-NEXT:    vmov.32 d12[1], r7
+; BE-I64-NEXT:    vmov.32 d8[1], r0
+; BE-I64-NEXT:    vmov.32 d13[1], r4
+; BE-I64-NEXT:    vmov.32 d10[1], r6
+; BE-I64-NEXT:    vmov.32 d11[1], r8
+; BE-I64-NEXT:    vmov.32 d16[1], r1
+; BE-I64-NEXT:    vrev64.32 d1, d14
+; BE-I64-NEXT:    vrev64.32 d3, d12
+; BE-I64-NEXT:    vrev64.32 d5, d9
+; BE-I64-NEXT:    vrev64.32 d7, d8
+; BE-I64-NEXT:    vrev64.32 d0, d13
+; BE-I64-NEXT:    vrev64.32 d2, d10
+; BE-I64-NEXT:    vrev64.32 d4, d11
+; BE-I64-NEXT:    vrev64.32 d6, d16
+; BE-I64-NEXT:    add sp, sp, #16
+; BE-I64-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14}
+; BE-I64-NEXT:    add sp, sp, #4
+; BE-I64-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; BE-I32-NEON-LABEL: lrint_v8fp128:
+; BE-I32-NEON:       @ %bb.0:
+; BE-I32-NEON-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-I32-NEON-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-I32-NEON-NEXT:    .pad #4
+; BE-I32-NEON-NEXT:    sub sp, sp, #4
+; BE-I32-NEON-NEXT:    .vsave {d8, d9, d10, d11}
+; BE-I32-NEON-NEXT:    vpush {d8, d9, d10, d11}
+; BE-I32-NEON-NEXT:    .pad #8
+; BE-I32-NEON-NEXT:    sub sp, sp, #8
+; BE-I32-NEON-NEXT:    str r3, [sp, #4] @ 4-byte Spill
+; BE-I32-NEON-NEXT:    add r3, sp, #128
+; BE-I32-NEON-NEXT:    mov r11, r2
+; BE-I32-NEON-NEXT:    mov r6, r1
+; BE-I32-NEON-NEXT:    mov r7, r0
+; BE-I32-NEON-NEXT:    ldm r3, {r0, r1, r2, r3}
+; BE-I32-NEON-NEXT:    bl lrintl
+; BE-I32-NEON-NEXT:    add r3, sp, #100
+; BE-I32-NEON-NEXT:    ldr r5, [sp, #96]
+; BE-I32-NEON-NEXT:    vmov.32 d8[0], r0
+; BE-I32-NEON-NEXT:    ldr r4, [sp, #160]
+; BE-I32-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; BE-I32-NEON-NEXT:    mov r0, r5
+; BE-I32-NEON-NEXT:    bl lrintl
+; BE-I32-NEON-NEXT:    add r3, sp, #164
+; BE-I32-NEON-NEXT:    vmov.32 d11[0], r0
+; BE-I32-NEON-NEXT:    mov r0, r4
+; BE-I32-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; BE-I32-NEON-NEXT:    bl lrintl
+; BE-I32-NEON-NEXT:    ldr r4, [sp, #176]
+; BE-I32-NEON-NEXT:    vmov.32 d9[0], r0
+; BE-I32-NEON-NEXT:    ldr r1, [sp, #180]
+; BE-I32-NEON-NEXT:    ldr r2, [sp, #184]
+; BE-I32-NEON-NEXT:    ldr r3, [sp, #188]
+; BE-I32-NEON-NEXT:    mov r0, r4
+; BE-I32-NEON-NEXT:    ldr r5, [sp, #116]
+; BE-I32-NEON-NEXT:    ldr r8, [sp, #120]
+; BE-I32-NEON-NEXT:    ldr r10, [sp, #84]
+; BE-I32-NEON-NEXT:    ldr r9, [sp, #88]
+; BE-I32-NEON-NEXT:    bl lrintl
+; BE-I32-NEON-NEXT:    vmov.32 d9[1], r0
+; BE-I32-NEON-NEXT:    ldr r3, [sp, #124]
+; BE-I32-NEON-NEXT:    ldr r0, [sp, #112]
+; BE-I32-NEON-NEXT:    mov r1, r5
+; BE-I32-NEON-NEXT:    mov r2, r8
+; BE-I32-NEON-NEXT:    bl lrintl
+; BE-I32-NEON-NEXT:    vmov.32 d11[1], r0
+; BE-I32-NEON-NEXT:    ldr r3, [sp, #92]
+; BE-I32-NEON-NEXT:    ldr r0, [sp, #80]
+; BE-I32-NEON-NEXT:    mov r1, r10
+; BE-I32-NEON-NEXT:    mov r2, r9
+; BE-I32-NEON-NEXT:    bl lrintl
+; BE-I32-NEON-NEXT:    ldr r3, [sp, #4] @ 4-byte Reload
+; BE-I32-NEON-NEXT:    mov r4, r0
+; BE-I32-NEON-NEXT:    mov r0, r7
+; BE-I32-NEON-NEXT:    mov r1, r6
+; BE-I32-NEON-NEXT:    mov r2, r11
+; BE-I32-NEON-NEXT:    bl lrintl
+; BE-I32-NEON-NEXT:    add r3, sp, #148
+; BE-I32-NEON-NEXT:    ldr r7, [sp, #144]
+; BE-I32-NEON-NEXT:    vmov.32 d10[0], r0
+; BE-I32-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; BE-I32-NEON-NEXT:    mov r0, r7
+; BE-I32-NEON-NEXT:    vmov.32 d10[1], r4
+; BE-I32-NEON-NEXT:    bl lrintl
+; BE-I32-NEON-NEXT:    vmov.32 d8[1], r0
+; BE-I32-NEON-NEXT:    vrev64.32 q0, q5
+; BE-I32-NEON-NEXT:    vrev64.32 q1, q4
+; BE-I32-NEON-NEXT:    add sp, sp, #8
+; BE-I32-NEON-NEXT:    vpop {d8, d9, d10, d11}
+; BE-I32-NEON-NEXT:    add sp, sp, #4
+; BE-I32-NEON-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; BE-I64-NEON-LABEL: lrint_v8fp128:
+; BE-I64-NEON:       @ %bb.0:
+; BE-I64-NEON-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-I64-NEON-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-I64-NEON-NEXT:    .pad #4
+; BE-I64-NEON-NEXT:    sub sp, sp, #4
+; BE-I64-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14}
+; BE-I64-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14}
+; BE-I64-NEON-NEXT:    .pad #16
+; BE-I64-NEON-NEXT:    sub sp, sp, #16
+; BE-I64-NEON-NEXT:    str r3, [sp, #4] @ 4-byte Spill
+; BE-I64-NEON-NEXT:    add r3, sp, #208
+; BE-I64-NEON-NEXT:    mov r11, r2
+; BE-I64-NEON-NEXT:    mov r4, r1
+; BE-I64-NEON-NEXT:    mov r5, r0
+; BE-I64-NEON-NEXT:    ldm r3, {r0, r1, r2, r3}
+; BE-I64-NEON-NEXT:    bl lrintl
+; BE-I64-NEON-NEXT:    ldr r7, [sp, #176]
+; BE-I64-NEON-NEXT:    add r3, sp, #180
+; BE-I64-NEON-NEXT:    str r1, [sp, #12] @ 4-byte Spill
+; BE-I64-NEON-NEXT:    vmov.32 d8[0], r0
+; BE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; BE-I64-NEON-NEXT:    mov r0, r7
+; BE-I64-NEON-NEXT:    ldr r6, [sp, #128]
+; BE-I64-NEON-NEXT:    ldr r8, [sp, #144]
+; BE-I64-NEON-NEXT:    bl lrintl
+; BE-I64-NEON-NEXT:    add r3, sp, #132
+; BE-I64-NEON-NEXT:    str r1, [sp, #8] @ 4-byte Spill
+; BE-I64-NEON-NEXT:    vmov.32 d9[0], r0
+; BE-I64-NEON-NEXT:    mov r0, r6
+; BE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; BE-I64-NEON-NEXT:    bl lrintl
+; BE-I64-NEON-NEXT:    add r3, sp, #148
+; BE-I64-NEON-NEXT:    mov r6, r1
+; BE-I64-NEON-NEXT:    vmov.32 d10[0], r0
+; BE-I64-NEON-NEXT:    mov r0, r8
+; BE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; BE-I64-NEON-NEXT:    bl lrintl
+; BE-I64-NEON-NEXT:    add r3, sp, #160
+; BE-I64-NEON-NEXT:    mov r9, r0
+; BE-I64-NEON-NEXT:    mov r7, r1
+; BE-I64-NEON-NEXT:    ldm r3, {r0, r1, r2, r3}
+; BE-I64-NEON-NEXT:    bl lrintl
+; BE-I64-NEON-NEXT:    ldr r3, [sp, #4] @ 4-byte Reload
+; BE-I64-NEON-NEXT:    mov r8, r1
+; BE-I64-NEON-NEXT:    vmov.32 d11[0], r0
+; BE-I64-NEON-NEXT:    mov r0, r5
+; BE-I64-NEON-NEXT:    mov r1, r4
+; BE-I64-NEON-NEXT:    mov r2, r11
+; BE-I64-NEON-NEXT:    ldr r10, [sp, #112]
+; BE-I64-NEON-NEXT:    vmov.32 d12[0], r9
+; BE-I64-NEON-NEXT:    bl lrintl
+; BE-I64-NEON-NEXT:    add r3, sp, #116
+; BE-I64-NEON-NEXT:    mov r4, r1
+; BE-I64-NEON-NEXT:    vmov.32 d13[0], r0
+; BE-I64-NEON-NEXT:    mov r0, r10
+; BE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; BE-I64-NEON-NEXT:    bl lrintl
+; BE-I64-NEON-NEXT:    add r3, sp, #196
+; BE-I64-NEON-NEXT:    vmov.32 d14[0], r0
+; BE-I64-NEON-NEXT:    ldr r0, [sp, #192]
+; BE-I64-NEON-NEXT:    mov r5, r1
+; BE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; BE-I64-NEON-NEXT:    bl lrintl
+; BE-I64-NEON-NEXT:    vmov.32 d16[0], r0
+; BE-I64-NEON-NEXT:    ldr r0, [sp, #8] @ 4-byte Reload
+; BE-I64-NEON-NEXT:    vmov.32 d14[1], r5
+; BE-I64-NEON-NEXT:    vmov.32 d9[1], r0
+; BE-I64-NEON-NEXT:    ldr r0, [sp, #12] @ 4-byte Reload
+; BE-I64-NEON-NEXT:    vmov.32 d12[1], r7
+; BE-I64-NEON-NEXT:    vmov.32 d8[1], r0
+; BE-I64-NEON-NEXT:    vmov.32 d13[1], r4
+; BE-I64-NEON-NEXT:    vmov.32 d10[1], r6
+; BE-I64-NEON-NEXT:    vmov.32 d11[1], r8
+; BE-I64-NEON-NEXT:    vmov.32 d16[1], r1
+; BE-I64-NEON-NEXT:    vrev64.32 d1, d14
+; BE-I64-NEON-NEXT:    vrev64.32 d3, d12
+; BE-I64-NEON-NEXT:    vrev64.32 d5, d9
+; BE-I64-NEON-NEXT:    vrev64.32 d7, d8
+; BE-I64-NEON-NEXT:    vrev64.32 d0, d13
+; BE-I64-NEON-NEXT:    vrev64.32 d2, d10
+; BE-I64-NEON-NEXT:    vrev64.32 d4, d11
+; BE-I64-NEON-NEXT:    vrev64.32 d6, d16
+; BE-I64-NEON-NEXT:    add sp, sp, #16
+; BE-I64-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14}
+; BE-I64-NEON-NEXT:    add sp, sp, #4
+; BE-I64-NEON-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+  %a = call <8 x iXLen> @llvm.lrint.v8iXLen.v8fp128(<8 x fp128> %x)
+  ret <8 x iXLen> %a
+}
+declare <8 x iXLen> @llvm.lrint.v8iXLen.v8fp128(<8 x fp128>)
+
+define <16 x iXLen> @lrint_v16fp128(<16 x fp128> %x) {
+; LE-I32-LABEL: lrint_v16fp128:
+; LE-I32:       @ %bb.0:
+; LE-I32-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-I32-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-I32-NEXT:    .pad #4
+; LE-I32-NEXT:    sub sp, sp, #4
+; LE-I32-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I32-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I32-NEXT:    mov r8, r3
+; LE-I32-NEXT:    add r3, sp, #280
+; LE-I32-NEXT:    mov r9, r2
+; LE-I32-NEXT:    mov r10, r1
+; LE-I32-NEXT:    mov r6, r0
+; LE-I32-NEXT:    ldm r3, {r0, r1, r2, r3}
+; LE-I32-NEXT:    bl lrintl
+; LE-I32-NEXT:    ldr r4, [sp, #216]
+; LE-I32-NEXT:    vmov.32 d8[0], r0
+; LE-I32-NEXT:    ldr r1, [sp, #220]
+; LE-I32-NEXT:    ldr r2, [sp, #224]
+; LE-I32-NEXT:    ldr r3, [sp, #228]
+; LE-I32-NEXT:    mov r0, r4
+; LE-I32-NEXT:    ldr r7, [sp, #152]
+; LE-I32-NEXT:    ldr r11, [sp, #104]
+; LE-I32-NEXT:    bl lrintl
+; LE-I32-NEXT:    add r3, sp, #156
+; LE-I32-NEXT:    vmov.32 d10[0], r0
+; LE-I32-NEXT:    mov r0, r7
+; LE-I32-NEXT:    ldm r3, {r1, r2, r3}
+; LE-I32-NEXT:    bl lrintl
+; LE-I32-NEXT:    ldr r7, [sp, #184]
+; LE-I32-NEXT:    vmov.32 d12[0], r0
+; LE-I32-NEXT:    ldr r1, [sp, #188]
+; LE-I32-NEXT:    ldr r2, [sp, #192]
+; LE-I32-NEXT:    ldr r3, [sp, #196]
+; LE-I32-NEXT:    mov r0, r7
+; LE-I32-NEXT:    ldr r4, [sp, #120]
+; LE-I32-NEXT:    bl lrintl
+; LE-I32-NEXT:    add r3, sp, #124
+; LE-I32-NEXT:    vmov.32 d13[0], r0
+; LE-I32-NEXT:    mov r0, r4
+; LE-I32-NEXT:    ldm r3, {r1, r2, r3}
+; LE-I32-NEXT:    bl lrintl
+; LE-I32-NEXT:    ldr r5, [sp, #136]
+; LE-I32-NEXT:    vmov.32 d15[0], r0
+; LE-I32-NEXT:    ldr r1, [sp, #140]
+; LE-I32-NEXT:    ldr r2, [sp, #144]
+; LE-I32-NEXT:    ldr r3, [sp, #148]
+; LE-I32-NEXT:    mov r0, r5
+; LE-I32-NEXT:    ldr r4, [sp, #108]
+; LE-I32-NEXT:    ldr r7, [sp, #112]
+; LE-I32-NEXT:    bl lrintl
+; LE-I32-NEXT:    ldr r3, [sp, #116]
+; LE-I32-NEXT:    vmov.32 d15[1], r0
+; LE-I32-NEXT:    mov r0, r11
+; LE-I32-NEXT:    mov r1, r4
+; LE-I32-NEXT:    mov r2, r7
+; LE-I32-NEXT:    bl lrintl
+; LE-I32-NEXT:    mov r4, r0
+; LE-I32-NEXT:    mov r0, r6
+; LE-I32-NEXT:    mov r1, r10
+; LE-I32-NEXT:    mov r2, r9
+; LE-I32-NEXT:    mov r3, r8
+; LE-I32-NEXT:    bl lrintl
+; LE-I32-NEXT:    ldr r7, [sp, #200]
+; LE-I32-NEXT:    vmov.32 d14[0], r0
+; LE-I32-NEXT:    ldr r1, [sp, #204]
+; LE-I32-NEXT:    ldr r2, [sp, #208]
+; LE-I32-NEXT:    ldr r3, [sp, #212]
+; LE-I32-NEXT:    mov r0, r7
+; LE-I32-NEXT:    ldr r5, [sp, #172]
+; LE-I32-NEXT:    vmov.32 d14[1], r4
+; LE-I32-NEXT:    ldr r6, [sp, #176]
+; LE-I32-NEXT:    bl lrintl
+; LE-I32-NEXT:    vmov.32 d13[1], r0
+; LE-I32-NEXT:    ldr r3, [sp, #180]
+; LE-I32-NEXT:    ldr r0, [sp, #168]
+; LE-I32-NEXT:    mov r1, r5
+; LE-I32-NEXT:    mov r2, r6
+; LE-I32-NEXT:    bl lrintl
+; LE-I32-NEXT:    add r3, sp, #248
+; LE-I32-NEXT:    mov r5, r0
+; LE-I32-NEXT:    ldm r3, {r0, r1, r2, r3}
+; LE-I32-NEXT:    bl lrintl
+; LE-I32-NEXT:    ldr r4, [sp, #264]
+; LE-I32-NEXT:    vmov.32 d11[0], r0
+; LE-I32-NEXT:    ldr r1, [sp, #268]
+; LE-I32-NEXT:    ldr r2, [sp, #272]
+; LE-I32-NEXT:    vmov.32 d12[1], r5
+; LE-I32-NEXT:    ldr r3, [sp, #276]
+; LE-I32-NEXT:    mov r0, r4
+; LE-I32-NEXT:    ldr r6, [sp, #236]
+; LE-I32-NEXT:    ldr r7, [sp, #240]
+; LE-I32-NEXT:    ldr r8, [sp, #332]
+; LE-I32-NEXT:    ldr r5, [sp, #336]
+; LE-I32-NEXT:    bl lrintl
+; LE-I32-NEXT:    vmov.32 d11[1], r0
+; LE-I32-NEXT:    ldr r3, [sp, #244]
+; LE-I32-NEXT:    ldr r0, [sp, #232]
+; LE-I32-NEXT:    mov r1, r6
+; LE-I32-NEXT:    mov r2, r7
+; LE-I32-NEXT:    bl lrintl
+; LE-I32-NEXT:    vmov.32 d10[1], r0
+; LE-I32-NEXT:    ldr r3, [sp, #340]
+; LE-I32-NEXT:    ldr r0, [sp, #328]
+; LE-I32-NEXT:    mov r1, r8
+; LE-I32-NEXT:    mov r2, r5
+; LE-I32-NEXT:    bl lrintl
+; LE-I32-NEXT:    add r3, sp, #312
+; LE-I32-NEXT:    mov r4, r0
+; LE-I32-NEXT:    ldm r3, {r0, r1, r2, r3}
+; LE-I32-NEXT:    bl lrintl
+; LE-I32-NEXT:    add r3, sp, #300
+; LE-I32-NEXT:    ldr r7, [sp, #296]
+; LE-I32-NEXT:    vmov.32 d9[0], r0
+; LE-I32-NEXT:    ldm r3, {r1, r2, r3}
+; LE-I32-NEXT:    mov r0, r7
+; LE-I32-NEXT:    vmov.32 d9[1], r4
+; LE-I32-NEXT:    bl lrintl
+; LE-I32-NEXT:    vmov.32 d8[1], r0
+; LE-I32-NEXT:    vorr q0, q7, q7
+; LE-I32-NEXT:    vorr q1, q6, q6
+; LE-I32-NEXT:    vorr q2, q5, q5
+; LE-I32-NEXT:    vorr q3, q4, q4
+; LE-I32-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I32-NEXT:    add sp, sp, #4
+; LE-I32-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; LE-I64-LABEL: lrint_v16fp128:
+; LE-I64:       @ %bb.0:
+; LE-I64-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-I64-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-I64-NEXT:    .pad #4
+; LE-I64-NEXT:    sub sp, sp, #4
+; LE-I64-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEXT:    .pad #72
+; LE-I64-NEXT:    sub sp, sp, #72
+; LE-I64-NEXT:    mov r6, r3
+; LE-I64-NEXT:    add r3, sp, #408
+; LE-I64-NEXT:    mov r7, r2
+; LE-I64-NEXT:    mov r4, r0
+; LE-I64-NEXT:    ldm r3, {r0, r1, r2, r3}
+; LE-I64-NEXT:    bl lrintl
+; LE-I64-NEXT:    add r5, sp, #176
+; LE-I64-NEXT:    mov r10, r1
+; LE-I64-NEXT:    vmov.32 d13[0], r0
+; LE-I64-NEXT:    mov r0, r7
+; LE-I64-NEXT:    ldm r5, {r2, r3, r5}
+; LE-I64-NEXT:    mov r1, r6
+; LE-I64-NEXT:    ldr r8, [sp, #232]
+; LE-I64-NEXT:    bl lrintl
+; LE-I64-NEXT:    add r3, sp, #188
+; LE-I64-NEXT:    mov r9, r1
+; LE-I64-NEXT:    vmov.32 d8[0], r0
+; LE-I64-NEXT:    mov r0, r5
+; LE-I64-NEXT:    ldm r3, {r1, r2, r3}
+; LE-I64-NEXT:    bl lrintl
+; LE-I64-NEXT:    add r3, sp, #236
+; LE-I64-NEXT:    mov r11, r1
+; LE-I64-NEXT:    vmov.32 d9[0], r0
+; LE-I64-NEXT:    mov r0, r8
+; LE-I64-NEXT:    ldm r3, {r1, r2, r3}
+; LE-I64-NEXT:    bl lrintl
+; LE-I64-NEXT:    add r3, sp, #252
+; LE-I64-NEXT:    vmov.32 d10[0], r0
+; LE-I64-NEXT:    ldr r0, [sp, #248]
+; LE-I64-NEXT:    mov r8, r1
+; LE-I64-NEXT:    ldm r3, {r1, r2, r3}
+; LE-I64-NEXT:    bl lrintl
+; LE-I64-NEXT:    add r3, sp, #268
+; LE-I64-NEXT:    vmov.32 d11[0], r0
+; LE-I64-NEXT:    ldr r0, [sp, #264]
+; LE-I64-NEXT:    mov r6, r1
+; LE-I64-NEXT:    ldm r3, {r1, r2, r3}
+; LE-I64-NEXT:    bl lrintl
+; LE-I64-NEXT:    add r3, sp, #284
+; LE-I64-NEXT:    vmov.32 d14[0], r0
+; LE-I64-NEXT:    ldr r0, [sp, #280]
+; LE-I64-NEXT:    mov r7, r1
+; LE-I64-NEXT:    ldm r3, {r1, r2, r3}
+; LE-I64-NEXT:    bl lrintl
+; LE-I64-NEXT:    add r3, sp, #316
+; LE-I64-NEXT:    vmov.32 d15[0], r0
+; LE-I64-NEXT:    ldr r0, [sp, #312]
+; LE-I64-NEXT:    mov r5, r1
+; LE-I64-NEXT:    ldm r3, {r1, r2, r3}
+; LE-I64-NEXT:    bl lrintl
+; LE-I64-NEXT:    vmov.32 d15[1], r5
+; LE-I64-NEXT:    add lr, sp, #56
+; LE-I64-NEXT:    ldr r5, [sp, #300]
+; LE-I64-NEXT:    vmov.32 d14[1], r7
+; LE-I64-NEXT:    ldr r2, [sp, #304]
+; LE-I64-NEXT:    ldr r3, [sp, #308]
+; LE-I64-NEXT:    vmov.32 d11[1], r6
+; LE-I64-NEXT:    ldr r6, [sp, #200]
+; LE-I64-NEXT:    ldr r7, [sp, #204]
+; LE-I64-NEXT:    vmov.32 d10[1], r8
+; LE-I64-NEXT:    ldr r8, [sp, #344]
+; LE-I64-NEXT:    vmov.32 d9[1], r11
+; LE-I64-NEXT:    ldr r11, [sp, #216]
+; LE-I64-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-I64-NEXT:    add lr, sp, #40
+; LE-I64-NEXT:    vmov.32 d17[0], r0
+; LE-I64-NEXT:    ldr r0, [sp, #296]
+; LE-I64-NEXT:    vmov.32 d8[1], r9
+; LE-I64-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-I64-NEXT:    add lr, sp, #24
+; LE-I64-NEXT:    vorr q5, q8, q8
+; LE-I64-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-I64-NEXT:    vorr q4, q6, q6
+; LE-I64-NEXT:    vmov.32 d11[1], r1
+; LE-I64-NEXT:    mov r1, r5
+; LE-I64-NEXT:    vmov.32 d9[1], r10
+; LE-I64-NEXT:    bl lrintl
+; LE-I64-NEXT:    vmov.32 d10[0], r0
+; LE-I64-NEXT:    ldr r2, [sp, #208]
+; LE-I64-NEXT:    ldr r3, [sp, #212]
+; LE-I64-NEXT:    add lr, sp, #8
+; LE-I64-NEXT:    mov r9, r1
+; LE-I64-NEXT:    mov r0, r6
+; LE-I64-NEXT:    mov r1, r7
+; LE-I64-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-I64-NEXT:    bl lrintl
+; LE-I64-NEXT:    add r3, sp, #220
+; LE-I64-NEXT:    mov r10, r1
+; LE-I64-NEXT:    vmov.32 d10[0], r0
+; LE-I64-NEXT:    mov r0, r11
+; LE-I64-NEXT:    ldm r3, {r1, r2, r3}
+; LE-I64-NEXT:    bl lrintl
+; LE-I64-NEXT:    add r3, sp, #348
+; LE-I64-NEXT:    mov r11, r1
+; LE-I64-NEXT:    vmov.32 d11[0], r0
+; LE-I64-NEXT:    mov r0, r8
+; LE-I64-NEXT:    ldm r3, {r1, r2, r3}
+; LE-I64-NEXT:    bl lrintl
+; LE-I64-NEXT:    add r3, sp, #364
+; LE-I64-NEXT:    vmov.32 d13[0], r0
+; LE-I64-NEXT:    ldr r0, [sp, #360]
+; LE-I64-NEXT:    mov r8, r1
+; LE-I64-NEXT:    ldm r3, {r1, r2, r3}
+; LE-I64-NEXT:    bl lrintl
+; LE-I64-NEXT:    add r3, sp, #380
+; LE-I64-NEXT:    vmov.32 d14[0], r0
+; LE-I64-NEXT:    ldr r0, [sp, #376]
+; LE-I64-NEXT:    mov r5, r1
+; LE-I64-NEXT:    ldm r3, {r1, r2, r3}
+; LE-I64-NEXT:    bl lrintl
+; LE-I64-NEXT:    add r3, sp, #396
+; LE-I64-NEXT:    vmov.32 d15[0], r0
+; LE-I64-NEXT:    ldr r0, [sp, #392]
+; LE-I64-NEXT:    mov r6, r1
+; LE-I64-NEXT:    ldm r3, {r1, r2, r3}
+; LE-I64-NEXT:    bl lrintl
+; LE-I64-NEXT:    add r3, sp, #332
+; LE-I64-NEXT:    vmov.32 d8[0], r0
+; LE-I64-NEXT:    ldr r0, [sp, #328]
+; LE-I64-NEXT:    mov r7, r1
+; LE-I64-NEXT:    ldm r3, {r1, r2, r3}
+; LE-I64-NEXT:    bl lrintl
+; LE-I64-NEXT:    add lr, sp, #8
+; LE-I64-NEXT:    vmov.32 d12[0], r0
+; LE-I64-NEXT:    add r0, r4, #64
+; LE-I64-NEXT:    vldmia lr, {d18, d19} @ 16-byte Reload
+; LE-I64-NEXT:    add lr, sp, #24
+; LE-I64-NEXT:    vmov.32 d13[1], r8
+; LE-I64-NEXT:    vmov.32 d18[1], r9
+; LE-I64-NEXT:    vmov.32 d15[1], r6
+; LE-I64-NEXT:    vmov.32 d12[1], r1
+; LE-I64-NEXT:    vmov.32 d14[1], r5
+; LE-I64-NEXT:    vst1.64 {d18, d19}, [r0:128]!
+; LE-I64-NEXT:    vst1.64 {d12, d13}, [r0:128]!
+; LE-I64-NEXT:    vmov.32 d8[1], r7
+; LE-I64-NEXT:    vst1.64 {d14, d15}, [r0:128]!
+; LE-I64-NEXT:    vst1.64 {d8, d9}, [r0:128]
+; LE-I64-NEXT:    vmov.32 d11[1], r11
+; LE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEXT:    add lr, sp, #40
+; LE-I64-NEXT:    vmov.32 d10[1], r10
+; LE-I64-NEXT:    vst1.64 {d16, d17}, [r4:128]!
+; LE-I64-NEXT:    vst1.64 {d10, d11}, [r4:128]!
+; LE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEXT:    add lr, sp, #56
+; LE-I64-NEXT:    vst1.64 {d16, d17}, [r4:128]!
+; LE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEXT:    vst1.64 {d16, d17}, [r4:128]
+; LE-I64-NEXT:    add sp, sp, #72
+; LE-I64-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEXT:    add sp, sp, #4
+; LE-I64-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; LE-I32-NEON-LABEL: lrint_v16fp128:
+; LE-I32-NEON:       @ %bb.0:
+; LE-I32-NEON-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-I32-NEON-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-I32-NEON-NEXT:    .pad #4
+; LE-I32-NEON-NEXT:    sub sp, sp, #4
+; LE-I32-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I32-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I32-NEON-NEXT:    mov r8, r3
+; LE-I32-NEON-NEXT:    add r3, sp, #280
+; LE-I32-NEON-NEXT:    mov r9, r2
+; LE-I32-NEON-NEXT:    mov r10, r1
+; LE-I32-NEON-NEXT:    mov r6, r0
+; LE-I32-NEON-NEXT:    ldm r3, {r0, r1, r2, r3}
+; LE-I32-NEON-NEXT:    bl lrintl
+; LE-I32-NEON-NEXT:    ldr r4, [sp, #216]
+; LE-I32-NEON-NEXT:    vmov.32 d8[0], r0
+; LE-I32-NEON-NEXT:    ldr r1, [sp, #220]
+; LE-I32-NEON-NEXT:    ldr r2, [sp, #224]
+; LE-I32-NEON-NEXT:    ldr r3, [sp, #228]
+; LE-I32-NEON-NEXT:    mov r0, r4
+; LE-I32-NEON-NEXT:    ldr r7, [sp, #152]
+; LE-I32-NEON-NEXT:    ldr r11, [sp, #104]
+; LE-I32-NEON-NEXT:    bl lrintl
+; LE-I32-NEON-NEXT:    add r3, sp, #156
+; LE-I32-NEON-NEXT:    vmov.32 d10[0], r0
+; LE-I32-NEON-NEXT:    mov r0, r7
+; LE-I32-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; LE-I32-NEON-NEXT:    bl lrintl
+; LE-I32-NEON-NEXT:    ldr r7, [sp, #184]
+; LE-I32-NEON-NEXT:    vmov.32 d12[0], r0
+; LE-I32-NEON-NEXT:    ldr r1, [sp, #188]
+; LE-I32-NEON-NEXT:    ldr r2, [sp, #192]
+; LE-I32-NEON-NEXT:    ldr r3, [sp, #196]
+; LE-I32-NEON-NEXT:    mov r0, r7
+; LE-I32-NEON-NEXT:    ldr r4, [sp, #120]
+; LE-I32-NEON-NEXT:    bl lrintl
+; LE-I32-NEON-NEXT:    add r3, sp, #124
+; LE-I32-NEON-NEXT:    vmov.32 d13[0], r0
+; LE-I32-NEON-NEXT:    mov r0, r4
+; LE-I32-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; LE-I32-NEON-NEXT:    bl lrintl
+; LE-I32-NEON-NEXT:    ldr r5, [sp, #136]
+; LE-I32-NEON-NEXT:    vmov.32 d15[0], r0
+; LE-I32-NEON-NEXT:    ldr r1, [sp, #140]
+; LE-I32-NEON-NEXT:    ldr r2, [sp, #144]
+; LE-I32-NEON-NEXT:    ldr r3, [sp, #148]
+; LE-I32-NEON-NEXT:    mov r0, r5
+; LE-I32-NEON-NEXT:    ldr r4, [sp, #108]
+; LE-I32-NEON-NEXT:    ldr r7, [sp, #112]
+; LE-I32-NEON-NEXT:    bl lrintl
+; LE-I32-NEON-NEXT:    ldr r3, [sp, #116]
+; LE-I32-NEON-NEXT:    vmov.32 d15[1], r0
+; LE-I32-NEON-NEXT:    mov r0, r11
+; LE-I32-NEON-NEXT:    mov r1, r4
+; LE-I32-NEON-NEXT:    mov r2, r7
+; LE-I32-NEON-NEXT:    bl lrintl
+; LE-I32-NEON-NEXT:    mov r4, r0
+; LE-I32-NEON-NEXT:    mov r0, r6
+; LE-I32-NEON-NEXT:    mov r1, r10
+; LE-I32-NEON-NEXT:    mov r2, r9
+; LE-I32-NEON-NEXT:    mov r3, r8
+; LE-I32-NEON-NEXT:    bl lrintl
+; LE-I32-NEON-NEXT:    ldr r7, [sp, #200]
+; LE-I32-NEON-NEXT:    vmov.32 d14[0], r0
+; LE-I32-NEON-NEXT:    ldr r1, [sp, #204]
+; LE-I32-NEON-NEXT:    ldr r2, [sp, #208]
+; LE-I32-NEON-NEXT:    ldr r3, [sp, #212]
+; LE-I32-NEON-NEXT:    mov r0, r7
+; LE-I32-NEON-NEXT:    ldr r5, [sp, #172]
+; LE-I32-NEON-NEXT:    vmov.32 d14[1], r4
+; LE-I32-NEON-NEXT:    ldr r6, [sp, #176]
+; LE-I32-NEON-NEXT:    bl lrintl
+; LE-I32-NEON-NEXT:    vmov.32 d13[1], r0
+; LE-I32-NEON-NEXT:    ldr r3, [sp, #180]
+; LE-I32-NEON-NEXT:    ldr r0, [sp, #168]
+; LE-I32-NEON-NEXT:    mov r1, r5
+; LE-I32-NEON-NEXT:    mov r2, r6
+; LE-I32-NEON-NEXT:    bl lrintl
+; LE-I32-NEON-NEXT:    add r3, sp, #248
+; LE-I32-NEON-NEXT:    mov r5, r0
+; LE-I32-NEON-NEXT:    ldm r3, {r0, r1, r2, r3}
+; LE-I32-NEON-NEXT:    bl lrintl
+; LE-I32-NEON-NEXT:    ldr r4, [sp, #264]
+; LE-I32-NEON-NEXT:    vmov.32 d11[0], r0
+; LE-I32-NEON-NEXT:    ldr r1, [sp, #268]
+; LE-I32-NEON-NEXT:    ldr r2, [sp, #272]
+; LE-I32-NEON-NEXT:    vmov.32 d12[1], r5
+; LE-I32-NEON-NEXT:    ldr r3, [sp, #276]
+; LE-I32-NEON-NEXT:    mov r0, r4
+; LE-I32-NEON-NEXT:    ldr r6, [sp, #236]
+; LE-I32-NEON-NEXT:    ldr r7, [sp, #240]
+; LE-I32-NEON-NEXT:    ldr r8, [sp, #332]
+; LE-I32-NEON-NEXT:    ldr r5, [sp, #336]
+; LE-I32-NEON-NEXT:    bl lrintl
+; LE-I32-NEON-NEXT:    vmov.32 d11[1], r0
+; LE-I32-NEON-NEXT:    ldr r3, [sp, #244]
+; LE-I32-NEON-NEXT:    ldr r0, [sp, #232]
+; LE-I32-NEON-NEXT:    mov r1, r6
+; LE-I32-NEON-NEXT:    mov r2, r7
+; LE-I32-NEON-NEXT:    bl lrintl
+; LE-I32-NEON-NEXT:    vmov.32 d10[1], r0
+; LE-I32-NEON-NEXT:    ldr r3, [sp, #340]
+; LE-I32-NEON-NEXT:    ldr r0, [sp, #328]
+; LE-I32-NEON-NEXT:    mov r1, r8
+; LE-I32-NEON-NEXT:    mov r2, r5
+; LE-I32-NEON-NEXT:    bl lrintl
+; LE-I32-NEON-NEXT:    add r3, sp, #312
+; LE-I32-NEON-NEXT:    mov r4, r0
+; LE-I32-NEON-NEXT:    ldm r3, {r0, r1, r2, r3}
+; LE-I32-NEON-NEXT:    bl lrintl
+; LE-I32-NEON-NEXT:    add r3, sp, #300
+; LE-I32-NEON-NEXT:    ldr r7, [sp, #296]
+; LE-I32-NEON-NEXT:    vmov.32 d9[0], r0
+; LE-I32-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; LE-I32-NEON-NEXT:    mov r0, r7
+; LE-I32-NEON-NEXT:    vmov.32 d9[1], r4
+; LE-I32-NEON-NEXT:    bl lrintl
+; LE-I32-NEON-NEXT:    vmov.32 d8[1], r0
+; LE-I32-NEON-NEXT:    vorr q0, q7, q7
+; LE-I32-NEON-NEXT:    vorr q1, q6, q6
+; LE-I32-NEON-NEXT:    vorr q2, q5, q5
+; LE-I32-NEON-NEXT:    vorr q3, q4, q4
+; LE-I32-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I32-NEON-NEXT:    add sp, sp, #4
+; LE-I32-NEON-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; LE-I64-NEON-LABEL: lrint_v16fp128:
+; LE-I64-NEON:       @ %bb.0:
+; LE-I64-NEON-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-I64-NEON-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-I64-NEON-NEXT:    .pad #4
+; LE-I64-NEON-NEXT:    sub sp, sp, #4
+; LE-I64-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEON-NEXT:    .pad #72
+; LE-I64-NEON-NEXT:    sub sp, sp, #72
+; LE-I64-NEON-NEXT:    mov r6, r3
+; LE-I64-NEON-NEXT:    add r3, sp, #408
+; LE-I64-NEON-NEXT:    mov r7, r2
+; LE-I64-NEON-NEXT:    mov r4, r0
+; LE-I64-NEON-NEXT:    ldm r3, {r0, r1, r2, r3}
+; LE-I64-NEON-NEXT:    bl lrintl
+; LE-I64-NEON-NEXT:    add r5, sp, #176
+; LE-I64-NEON-NEXT:    mov r10, r1
+; LE-I64-NEON-NEXT:    vmov.32 d13[0], r0
+; LE-I64-NEON-NEXT:    mov r0, r7
+; LE-I64-NEON-NEXT:    ldm r5, {r2, r3, r5}
+; LE-I64-NEON-NEXT:    mov r1, r6
+; LE-I64-NEON-NEXT:    ldr r8, [sp, #232]
+; LE-I64-NEON-NEXT:    bl lrintl
+; LE-I64-NEON-NEXT:    add r3, sp, #188
+; LE-I64-NEON-NEXT:    mov r9, r1
+; LE-I64-NEON-NEXT:    vmov.32 d8[0], r0
+; LE-I64-NEON-NEXT:    mov r0, r5
+; LE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; LE-I64-NEON-NEXT:    bl lrintl
+; LE-I64-NEON-NEXT:    add r3, sp, #236
+; LE-I64-NEON-NEXT:    mov r11, r1
+; LE-I64-NEON-NEXT:    vmov.32 d9[0], r0
+; LE-I64-NEON-NEXT:    mov r0, r8
+; LE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; LE-I64-NEON-NEXT:    bl lrintl
+; LE-I64-NEON-NEXT:    add r3, sp, #252
+; LE-I64-NEON-NEXT:    vmov.32 d10[0], r0
+; LE-I64-NEON-NEXT:    ldr r0, [sp, #248]
+; LE-I64-NEON-NEXT:    mov r8, r1
+; LE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; LE-I64-NEON-NEXT:    bl lrintl
+; LE-I64-NEON-NEXT:    add r3, sp, #268
+; LE-I64-NEON-NEXT:    vmov.32 d11[0], r0
+; LE-I64-NEON-NEXT:    ldr r0, [sp, #264]
+; LE-I64-NEON-NEXT:    mov r6, r1
+; LE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; LE-I64-NEON-NEXT:    bl lrintl
+; LE-I64-NEON-NEXT:    add r3, sp, #284
+; LE-I64-NEON-NEXT:    vmov.32 d14[0], r0
+; LE-I64-NEON-NEXT:    ldr r0, [sp, #280]
+; LE-I64-NEON-NEXT:    mov r7, r1
+; LE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; LE-I64-NEON-NEXT:    bl lrintl
+; LE-I64-NEON-NEXT:    add r3, sp, #316
+; LE-I64-NEON-NEXT:    vmov.32 d15[0], r0
+; LE-I64-NEON-NEXT:    ldr r0, [sp, #312]
+; LE-I64-NEON-NEXT:    mov r5, r1
+; LE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; LE-I64-NEON-NEXT:    bl lrintl
+; LE-I64-NEON-NEXT:    vmov.32 d15[1], r5
+; LE-I64-NEON-NEXT:    add lr, sp, #56
+; LE-I64-NEON-NEXT:    ldr r5, [sp, #300]
+; LE-I64-NEON-NEXT:    vmov.32 d14[1], r7
+; LE-I64-NEON-NEXT:    ldr r2, [sp, #304]
+; LE-I64-NEON-NEXT:    ldr r3, [sp, #308]
+; LE-I64-NEON-NEXT:    vmov.32 d11[1], r6
+; LE-I64-NEON-NEXT:    ldr r6, [sp, #200]
+; LE-I64-NEON-NEXT:    ldr r7, [sp, #204]
+; LE-I64-NEON-NEXT:    vmov.32 d10[1], r8
+; LE-I64-NEON-NEXT:    ldr r8, [sp, #344]
+; LE-I64-NEON-NEXT:    vmov.32 d9[1], r11
+; LE-I64-NEON-NEXT:    ldr r11, [sp, #216]
+; LE-I64-NEON-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-I64-NEON-NEXT:    add lr, sp, #40
+; LE-I64-NEON-NEXT:    vmov.32 d17[0], r0
+; LE-I64-NEON-NEXT:    ldr r0, [sp, #296]
+; LE-I64-NEON-NEXT:    vmov.32 d8[1], r9
+; LE-I64-NEON-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-I64-NEON-NEXT:    add lr, sp, #24
+; LE-I64-NEON-NEXT:    vorr q5, q8, q8
+; LE-I64-NEON-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-I64-NEON-NEXT:    vorr q4, q6, q6
+; LE-I64-NEON-NEXT:    vmov.32 d11[1], r1
+; LE-I64-NEON-NEXT:    mov r1, r5
+; LE-I64-NEON-NEXT:    vmov.32 d9[1], r10
+; LE-I64-NEON-NEXT:    bl lrintl
+; LE-I64-NEON-NEXT:    vmov.32 d10[0], r0
+; LE-I64-NEON-NEXT:    ldr r2, [sp, #208]
+; LE-I64-NEON-NEXT:    ldr r3, [sp, #212]
+; LE-I64-NEON-NEXT:    add lr, sp, #8
+; LE-I64-NEON-NEXT:    mov r9, r1
+; LE-I64-NEON-NEXT:    mov r0, r6
+; LE-I64-NEON-NEXT:    mov r1, r7
+; LE-I64-NEON-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-I64-NEON-NEXT:    bl lrintl
+; LE-I64-NEON-NEXT:    add r3, sp, #220
+; LE-I64-NEON-NEXT:    mov r10, r1
+; LE-I64-NEON-NEXT:    vmov.32 d10[0], r0
+; LE-I64-NEON-NEXT:    mov r0, r11
+; LE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; LE-I64-NEON-NEXT:    bl lrintl
+; LE-I64-NEON-NEXT:    add r3, sp, #348
+; LE-I64-NEON-NEXT:    mov r11, r1
+; LE-I64-NEON-NEXT:    vmov.32 d11[0], r0
+; LE-I64-NEON-NEXT:    mov r0, r8
+; LE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; LE-I64-NEON-NEXT:    bl lrintl
+; LE-I64-NEON-NEXT:    add r3, sp, #364
+; LE-I64-NEON-NEXT:    vmov.32 d13[0], r0
+; LE-I64-NEON-NEXT:    ldr r0, [sp, #360]
+; LE-I64-NEON-NEXT:    mov r8, r1
+; LE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; LE-I64-NEON-NEXT:    bl lrintl
+; LE-I64-NEON-NEXT:    add r3, sp, #380
+; LE-I64-NEON-NEXT:    vmov.32 d14[0], r0
+; LE-I64-NEON-NEXT:    ldr r0, [sp, #376]
+; LE-I64-NEON-NEXT:    mov r5, r1
+; LE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; LE-I64-NEON-NEXT:    bl lrintl
+; LE-I64-NEON-NEXT:    add r3, sp, #396
+; LE-I64-NEON-NEXT:    vmov.32 d15[0], r0
+; LE-I64-NEON-NEXT:    ldr r0, [sp, #392]
+; LE-I64-NEON-NEXT:    mov r6, r1
+; LE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; LE-I64-NEON-NEXT:    bl lrintl
+; LE-I64-NEON-NEXT:    add r3, sp, #332
+; LE-I64-NEON-NEXT:    vmov.32 d8[0], r0
+; LE-I64-NEON-NEXT:    ldr r0, [sp, #328]
+; LE-I64-NEON-NEXT:    mov r7, r1
+; LE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; LE-I64-NEON-NEXT:    bl lrintl
+; LE-I64-NEON-NEXT:    add lr, sp, #8
+; LE-I64-NEON-NEXT:    vmov.32 d12[0], r0
+; LE-I64-NEON-NEXT:    add r0, r4, #64
+; LE-I64-NEON-NEXT:    vldmia lr, {d18, d19} @ 16-byte Reload
+; LE-I64-NEON-NEXT:    add lr, sp, #24
+; LE-I64-NEON-NEXT:    vmov.32 d13[1], r8
+; LE-I64-NEON-NEXT:    vmov.32 d18[1], r9
+; LE-I64-NEON-NEXT:    vmov.32 d15[1], r6
+; LE-I64-NEON-NEXT:    vmov.32 d12[1], r1
+; LE-I64-NEON-NEXT:    vmov.32 d14[1], r5
+; LE-I64-NEON-NEXT:    vst1.64 {d18, d19}, [r0:128]!
+; LE-I64-NEON-NEXT:    vst1.64 {d12, d13}, [r0:128]!
+; LE-I64-NEON-NEXT:    vmov.32 d8[1], r7
+; LE-I64-NEON-NEXT:    vst1.64 {d14, d15}, [r0:128]!
+; LE-I64-NEON-NEXT:    vst1.64 {d8, d9}, [r0:128]
+; LE-I64-NEON-NEXT:    vmov.32 d11[1], r11
+; LE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEON-NEXT:    add lr, sp, #40
+; LE-I64-NEON-NEXT:    vmov.32 d10[1], r10
+; LE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r4:128]!
+; LE-I64-NEON-NEXT:    vst1.64 {d10, d11}, [r4:128]!
+; LE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEON-NEXT:    add lr, sp, #56
+; LE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r4:128]!
+; LE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r4:128]
+; LE-I64-NEON-NEXT:    add sp, sp, #72
+; LE-I64-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEON-NEXT:    add sp, sp, #4
+; LE-I64-NEON-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; BE-I32-LABEL: lrint_v16fp128:
+; BE-I32:       @ %bb.0:
+; BE-I32-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-I32-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-I32-NEXT:    .pad #4
+; BE-I32-NEXT:    sub sp, sp, #4
+; BE-I32-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I32-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I32-NEXT:    .pad #16
+; BE-I32-NEXT:    sub sp, sp, #16
+; BE-I32-NEXT:    stm sp, {r0, r1, r2, r3} @ 16-byte Folded Spill
+; BE-I32-NEXT:    add r3, sp, #264
+; BE-I32-NEXT:    ldm r3, {r0, r1, r2, r3}
+; BE-I32-NEXT:    bl lrintl
+; BE-I32-NEXT:    add r3, sp, #332
+; BE-I32-NEXT:    ldr r7, [sp, #328]
+; BE-I32-NEXT:    vmov.32 d9[0], r0
+; BE-I32-NEXT:    ldr r10, [sp, #280]
+; BE-I32-NEXT:    ldm r3, {r1, r2, r3}
+; BE-I32-NEXT:    mov r0, r7
+; BE-I32-NEXT:    ldr r8, [sp, #168]
+; BE-I32-NEXT:    bl lrintl
+; BE-I32-NEXT:    ldr r5, [sp, #344]
+; BE-I32-NEXT:    vmov.32 d11[0], r0
+; BE-I32-NEXT:    ldr r1, [sp, #348]
+; BE-I32-NEXT:    ldr r2, [sp, #352]
+; BE-I32-NEXT:    ldr r3, [sp, #356]
+; BE-I32-NEXT:    mov r0, r5
+; BE-I32-NEXT:    ldr r7, [sp, #284]
+; BE-I32-NEXT:    ldr r4, [sp, #288]
+; BE-I32-NEXT:    ldr r6, [sp, #172]
+; BE-I32-NEXT:    ldr r9, [sp, #176]
+; BE-I32-NEXT:    bl lrintl
+; BE-I32-NEXT:    ldr r3, [sp, #292]
+; BE-I32-NEXT:    vmov.32 d11[1], r0
+; BE-I32-NEXT:    mov r0, r10
+; BE-I32-NEXT:    mov r1, r7
+; BE-I32-NEXT:    mov r2, r4
+; BE-I32-NEXT:    bl lrintl
+; BE-I32-NEXT:    ldr r3, [sp, #180]
+; BE-I32-NEXT:    vmov.32 d9[1], r0
+; BE-I32-NEXT:    mov r0, r8
+; BE-I32-NEXT:    mov r1, r6
+; BE-I32-NEXT:    mov r2, r9
+; BE-I32-NEXT:    bl lrintl
+; BE-I32-NEXT:    add r3, sp, #232
+; BE-I32-NEXT:    mov r4, r0
+; BE-I32-NEXT:    ldm r3, {r0, r1, r2, r3}
+; BE-I32-NEXT:    bl lrintl
+; BE-I32-NEXT:    add r3, sp, #136
+; BE-I32-NEXT:    mov r6, r0
+; BE-I32-NEXT:    ldm r3, {r0, r1, r2, r3}
+; BE-I32-NEXT:    bl lrintl
+; BE-I32-NEXT:    ldr r5, [sp, #296]
+; BE-I32-NEXT:    vmov.32 d13[0], r0
+; BE-I32-NEXT:    ldr r1, [sp, #300]
+; BE-I32-NEXT:    ldr r2, [sp, #304]
+; BE-I32-NEXT:    ldr r3, [sp, #308]
+; BE-I32-NEXT:    mov r0, r5
+; BE-I32-NEXT:    ldr r10, [sp, #216]
+; BE-I32-NEXT:    ldr r8, [sp, #220]
+; BE-I32-NEXT:    ldr r9, [sp, #152]
+; BE-I32-NEXT:    bl lrintl
+; BE-I32-NEXT:    ldr r7, [sp, #248]
+; BE-I32-NEXT:    vmov.32 d10[0], r0
+; BE-I32-NEXT:    ldr r1, [sp, #252]
+; BE-I32-NEXT:    ldr r2, [sp, #256]
+; BE-I32-NEXT:    vmov.32 d8[0], r6
+; BE-I32-NEXT:    ldr r3, [sp, #260]
+; BE-I32-NEXT:    mov r0, r7
+; BE-I32-NEXT:    ldr r5, [sp, #224]
+; BE-I32-NEXT:    ldr r11, [sp, #120]
+; BE-I32-NEXT:    bl lrintl
+; BE-I32-NEXT:    ldr r3, [sp, #228]
+; BE-I32-NEXT:    vmov.32 d8[1], r0
+; BE-I32-NEXT:    mov r0, r10
+; BE-I32-NEXT:    mov r1, r8
+; BE-I32-NEXT:    mov r2, r5
+; BE-I32-NEXT:    bl lrintl
+; BE-I32-NEXT:    add r3, sp, #200
+; BE-I32-NEXT:    mov r5, r0
+; BE-I32-NEXT:    ldm r3, {r0, r1, r2, r3}
+; BE-I32-NEXT:    bl lrintl
+; BE-I32-NEXT:    vmov.32 d15[0], r0
+; BE-I32-NEXT:    ldr r0, [sp, #184]
+; BE-I32-NEXT:    ldr r1, [sp, #188]
+; BE-I32-NEXT:    ldr r2, [sp, #192]
+; BE-I32-NEXT:    vmov.32 d14[0], r4
+; BE-I32-NEXT:    ldr r3, [sp, #196]
+; BE-I32-NEXT:    vmov.32 d15[1], r5
+; BE-I32-NEXT:    ldr r7, [sp, #156]
+; BE-I32-NEXT:    ldr r6, [sp, #160]
+; BE-I32-NEXT:    ldr r4, [sp, #124]
+; BE-I32-NEXT:    ldr r5, [sp, #128]
+; BE-I32-NEXT:    bl lrintl
+; BE-I32-NEXT:    ldr r3, [sp, #164]
+; BE-I32-NEXT:    vmov.32 d14[1], r0
+; BE-I32-NEXT:    mov r0, r9
+; BE-I32-NEXT:    mov r1, r7
+; BE-I32-NEXT:    mov r2, r6
+; BE-I32-NEXT:    bl lrintl
+; BE-I32-NEXT:    ldr r3, [sp, #132]
+; BE-I32-NEXT:    vmov.32 d13[1], r0
+; BE-I32-NEXT:    mov r0, r11
+; BE-I32-NEXT:    mov r1, r4
+; BE-I32-NEXT:    mov r2, r5
+; BE-I32-NEXT:    bl lrintl
+; BE-I32-NEXT:    mov r4, r0
+; BE-I32-NEXT:    ldm sp, {r0, r1, r2, r3} @ 16-byte Folded Reload
+; BE-I32-NEXT:    bl lrintl
+; BE-I32-NEXT:    add r3, sp, #316
+; BE-I32-NEXT:    ldr r7, [sp, #312]
+; BE-I32-NEXT:    vmov.32 d12[0], r0
+; BE-I32-NEXT:    ldm r3, {r1, r2, r3}
+; BE-I32-NEXT:    mov r0, r7
+; BE-I32-NEXT:    vmov.32 d12[1], r4
+; BE-I32-NEXT:    bl lrintl
+; BE-I32-NEXT:    vmov.32 d10[1], r0
+; BE-I32-NEXT:    vrev64.32 q0, q6
+; BE-I32-NEXT:    vrev64.32 q1, q7
+; BE-I32-NEXT:    vrev64.32 q2, q4
+; BE-I32-NEXT:    vrev64.32 q3, q5
+; BE-I32-NEXT:    add sp, sp, #16
+; BE-I32-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I32-NEXT:    add sp, sp, #4
+; BE-I32-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; BE-I64-LABEL: lrint_v16fp128:
+; BE-I64:       @ %bb.0:
+; BE-I64-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-I64-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-I64-NEXT:    .pad #4
+; BE-I64-NEXT:    sub sp, sp, #4
+; BE-I64-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEXT:    .pad #56
+; BE-I64-NEXT:    sub sp, sp, #56
+; BE-I64-NEXT:    mov r5, r3
+; BE-I64-NEXT:    add r3, sp, #376
+; BE-I64-NEXT:    mov r6, r2
+; BE-I64-NEXT:    mov r4, r0
+; BE-I64-NEXT:    ldm r3, {r0, r1, r2, r3}
+; BE-I64-NEXT:    bl lrintl
+; BE-I64-NEXT:    ldr r7, [sp, #392]
+; BE-I64-NEXT:    add r3, sp, #396
+; BE-I64-NEXT:    mov r9, r1
+; BE-I64-NEXT:    vmov.32 d8[0], r0
+; BE-I64-NEXT:    ldm r3, {r1, r2, r3}
+; BE-I64-NEXT:    mov r0, r7
+; BE-I64-NEXT:    ldr r11, [sp, #168]
+; BE-I64-NEXT:    bl lrintl
+; BE-I64-NEXT:    ldr r2, [sp, #160]
+; BE-I64-NEXT:    mov r10, r1
+; BE-I64-NEXT:    ldr r3, [sp, #164]
+; BE-I64-NEXT:    vmov.32 d9[0], r0
+; BE-I64-NEXT:    mov r0, r6
+; BE-I64-NEXT:    mov r1, r5
+; BE-I64-NEXT:    bl lrintl
+; BE-I64-NEXT:    add r3, sp, #172
+; BE-I64-NEXT:    mov r8, r1
+; BE-I64-NEXT:    vmov.32 d10[0], r0
+; BE-I64-NEXT:    mov r0, r11
+; BE-I64-NEXT:    ldm r3, {r1, r2, r3}
+; BE-I64-NEXT:    bl lrintl
+; BE-I64-NEXT:    add r3, sp, #220
+; BE-I64-NEXT:    vmov.32 d11[0], r0
+; BE-I64-NEXT:    ldr r0, [sp, #216]
+; BE-I64-NEXT:    mov r11, r1
+; BE-I64-NEXT:    ldm r3, {r1, r2, r3}
+; BE-I64-NEXT:    bl lrintl
+; BE-I64-NEXT:    add r3, sp, #236
+; BE-I64-NEXT:    vmov.32 d12[0], r0
+; BE-I64-NEXT:    ldr r0, [sp, #232]
+; BE-I64-NEXT:    mov r6, r1
+; BE-I64-NEXT:    ldm r3, {r1, r2, r3}
+; BE-I64-NEXT:    bl lrintl
+; BE-I64-NEXT:    add r3, sp, #252
+; BE-I64-NEXT:    vmov.32 d13[0], r0
+; BE-I64-NEXT:    ldr r0, [sp, #248]
+; BE-I64-NEXT:    mov r7, r1
+; BE-I64-NEXT:    ldm r3, {r1, r2, r3}
+; BE-I64-NEXT:    bl lrintl
+; BE-I64-NEXT:    add r3, sp, #268
+; BE-I64-NEXT:    vmov.32 d14[0], r0
+; BE-I64-NEXT:    ldr r0, [sp, #264]
+; BE-I64-NEXT:    mov r5, r1
+; BE-I64-NEXT:    ldm r3, {r1, r2, r3}
+; BE-I64-NEXT:    bl lrintl
+; BE-I64-NEXT:    vmov.32 d15[0], r0
+; BE-I64-NEXT:    ldr r0, [sp, #280]
+; BE-I64-NEXT:    ldr r2, [sp, #288]
+; BE-I64-NEXT:    vmov.32 d13[1], r7
+; BE-I64-NEXT:    ldr r7, [sp, #284]
+; BE-I64-NEXT:    ldr r3, [sp, #292]
+; BE-I64-NEXT:    vmov.32 d14[1], r5
+; BE-I64-NEXT:    ldr r5, [sp, #328]
+; BE-I64-NEXT:    vmov.32 d12[1], r6
+; BE-I64-NEXT:    ldr r6, [sp, #300]
+; BE-I64-NEXT:    vmov.32 d10[1], r8
+; BE-I64-NEXT:    ldr r8, [sp, #184]
+; BE-I64-NEXT:    vmov.32 d11[1], r11
+; BE-I64-NEXT:    vmov.32 d9[1], r10
+; BE-I64-NEXT:    vmov.32 d8[1], r9
+; BE-I64-NEXT:    vmov.32 d15[1], r1
+; BE-I64-NEXT:    mov r1, r7
+; BE-I64-NEXT:    vstr d14, [sp, #48] @ 8-byte Spill
+; BE-I64-NEXT:    vstr d13, [sp, #40] @ 8-byte Spill
+; BE-I64-NEXT:    vstr d12, [sp, #32] @ 8-byte Spill
+; BE-I64-NEXT:    vstr d11, [sp, #24] @ 8-byte Spill
+; BE-I64-NEXT:    vstr d10, [sp, #16] @ 8-byte Spill
+; BE-I64-NEXT:    vstr d9, [sp, #8] @ 8-byte Spill
+; BE-I64-NEXT:    vstr d8, [sp] @ 8-byte Spill
+; BE-I64-NEXT:    bl lrintl
+; BE-I64-NEXT:    mov r10, r1
+; BE-I64-NEXT:    ldr r1, [sp, #296]
+; BE-I64-NEXT:    ldr r2, [sp, #304]
+; BE-I64-NEXT:    vmov.32 d8[0], r0
+; BE-I64-NEXT:    ldr r3, [sp, #308]
+; BE-I64-NEXT:    mov r0, r1
+; BE-I64-NEXT:    mov r1, r6
+; BE-I64-NEXT:    bl lrintl
+; BE-I64-NEXT:    add r3, sp, #332
+; BE-I64-NEXT:    mov r11, r1
+; BE-I64-NEXT:    vmov.32 d9[0], r0
+; BE-I64-NEXT:    mov r0, r5
+; BE-I64-NEXT:    ldm r3, {r1, r2, r3}
+; BE-I64-NEXT:    bl lrintl
+; BE-I64-NEXT:    add r3, sp, #188
+; BE-I64-NEXT:    mov r7, r1
+; BE-I64-NEXT:    vmov.32 d10[0], r0
+; BE-I64-NEXT:    mov r0, r8
+; BE-I64-NEXT:    ldm r3, {r1, r2, r3}
+; BE-I64-NEXT:    bl lrintl
+; BE-I64-NEXT:    add r3, sp, #204
+; BE-I64-NEXT:    vmov.32 d11[0], r0
+; BE-I64-NEXT:    ldr r0, [sp, #200]
+; BE-I64-NEXT:    mov r8, r1
+; BE-I64-NEXT:    ldm r3, {r1, r2, r3}
+; BE-I64-NEXT:    bl lrintl
+; BE-I64-NEXT:    add r3, sp, #348
+; BE-I64-NEXT:    vmov.32 d12[0], r0
+; BE-I64-NEXT:    ldr r0, [sp, #344]
+; BE-I64-NEXT:    mov r5, r1
+; BE-I64-NEXT:    ldm r3, {r1, r2, r3}
+; BE-I64-NEXT:    bl lrintl
+; BE-I64-NEXT:    add r3, sp, #364
+; BE-I64-NEXT:    vmov.32 d13[0], r0
+; BE-I64-NEXT:    ldr r0, [sp, #360]
+; BE-I64-NEXT:    mov r9, r1
+; BE-I64-NEXT:    ldm r3, {r1, r2, r3}
+; BE-I64-NEXT:    bl lrintl
+; BE-I64-NEXT:    add r3, sp, #316
+; BE-I64-NEXT:    vmov.32 d14[0], r0
+; BE-I64-NEXT:    ldr r0, [sp, #312]
+; BE-I64-NEXT:    mov r6, r1
+; BE-I64-NEXT:    ldm r3, {r1, r2, r3}
+; BE-I64-NEXT:    bl lrintl
+; BE-I64-NEXT:    vldr d18, [sp, #48] @ 8-byte Reload
+; BE-I64-NEXT:    vrev64.32 d17, d15
+; BE-I64-NEXT:    vrev64.32 d16, d18
+; BE-I64-NEXT:    vldr d18, [sp, #40] @ 8-byte Reload
+; BE-I64-NEXT:    vmov.32 d24[0], r0
+; BE-I64-NEXT:    add r0, r4, #64
+; BE-I64-NEXT:    vldr d20, [sp, #32] @ 8-byte Reload
+; BE-I64-NEXT:    vrev64.32 d19, d18
+; BE-I64-NEXT:    vmov.32 d9[1], r11
+; BE-I64-NEXT:    vmov.32 d10[1], r7
+; BE-I64-NEXT:    vrev64.32 d18, d20
+; BE-I64-NEXT:    vldr d20, [sp, #24] @ 8-byte Reload
+; BE-I64-NEXT:    vmov.32 d8[1], r10
+; BE-I64-NEXT:    vmov.32 d14[1], r6
+; BE-I64-NEXT:    vmov.32 d24[1], r1
+; BE-I64-NEXT:    vldr d22, [sp, #16] @ 8-byte Reload
+; BE-I64-NEXT:    vrev64.32 d21, d20
+; BE-I64-NEXT:    vrev64.32 d1, d9
+; BE-I64-NEXT:    vmov.32 d13[1], r9
+; BE-I64-NEXT:    vrev64.32 d31, d10
+; BE-I64-NEXT:    vrev64.32 d20, d22
+; BE-I64-NEXT:    vldr d22, [sp, #8] @ 8-byte Reload
+; BE-I64-NEXT:    vrev64.32 d0, d8
+; BE-I64-NEXT:    vrev64.32 d29, d14
+; BE-I64-NEXT:    vmov.32 d12[1], r5
+; BE-I64-NEXT:    vrev64.32 d30, d24
+; BE-I64-NEXT:    vrev64.32 d27, d22
+; BE-I64-NEXT:    vldr d22, [sp] @ 8-byte Reload
+; BE-I64-NEXT:    vst1.64 {d0, d1}, [r0:128]!
+; BE-I64-NEXT:    vmov.32 d11[1], r8
+; BE-I64-NEXT:    vrev64.32 d28, d13
+; BE-I64-NEXT:    vst1.64 {d30, d31}, [r0:128]!
+; BE-I64-NEXT:    vrev64.32 d26, d22
+; BE-I64-NEXT:    vrev64.32 d23, d12
+; BE-I64-NEXT:    vst1.64 {d28, d29}, [r0:128]!
+; BE-I64-NEXT:    vrev64.32 d22, d11
+; BE-I64-NEXT:    vst1.64 {d26, d27}, [r0:128]
+; BE-I64-NEXT:    vst1.64 {d20, d21}, [r4:128]!
+; BE-I64-NEXT:    vst1.64 {d22, d23}, [r4:128]!
+; BE-I64-NEXT:    vst1.64 {d18, d19}, [r4:128]!
+; BE-I64-NEXT:    vst1.64 {d16, d17}, [r4:128]
+; BE-I64-NEXT:    add sp, sp, #56
+; BE-I64-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEXT:    add sp, sp, #4
+; BE-I64-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; BE-I32-NEON-LABEL: lrint_v16fp128:
+; BE-I32-NEON:       @ %bb.0:
+; BE-I32-NEON-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-I32-NEON-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-I32-NEON-NEXT:    .pad #4
+; BE-I32-NEON-NEXT:    sub sp, sp, #4
+; BE-I32-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I32-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I32-NEON-NEXT:    .pad #16
+; BE-I32-NEON-NEXT:    sub sp, sp, #16
+; BE-I32-NEON-NEXT:    stm sp, {r0, r1, r2, r3} @ 16-byte Folded Spill
+; BE-I32-NEON-NEXT:    add r3, sp, #264
+; BE-I32-NEON-NEXT:    ldm r3, {r0, r1, r2, r3}
+; BE-I32-NEON-NEXT:    bl lrintl
+; BE-I32-NEON-NEXT:    add r3, sp, #332
+; BE-I32-NEON-NEXT:    ldr r7, [sp, #328]
+; BE-I32-NEON-NEXT:    vmov.32 d9[0], r0
+; BE-I32-NEON-NEXT:    ldr r10, [sp, #280]
+; BE-I32-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; BE-I32-NEON-NEXT:    mov r0, r7
+; BE-I32-NEON-NEXT:    ldr r8, [sp, #168]
+; BE-I32-NEON-NEXT:    bl lrintl
+; BE-I32-NEON-NEXT:    ldr r5, [sp, #344]
+; BE-I32-NEON-NEXT:    vmov.32 d11[0], r0
+; BE-I32-NEON-NEXT:    ldr r1, [sp, #348]
+; BE-I32-NEON-NEXT:    ldr r2, [sp, #352]
+; BE-I32-NEON-NEXT:    ldr r3, [sp, #356]
+; BE-I32-NEON-NEXT:    mov r0, r5
+; BE-I32-NEON-NEXT:    ldr r7, [sp, #284]
+; BE-I32-NEON-NEXT:    ldr r4, [sp, #288]
+; BE-I32-NEON-NEXT:    ldr r6, [sp, #172]
+; BE-I32-NEON-NEXT:    ldr r9, [sp, #176]
+; BE-I32-NEON-NEXT:    bl lrintl
+; BE-I32-NEON-NEXT:    ldr r3, [sp, #292]
+; BE-I32-NEON-NEXT:    vmov.32 d11[1], r0
+; BE-I32-NEON-NEXT:    mov r0, r10
+; BE-I32-NEON-NEXT:    mov r1, r7
+; BE-I32-NEON-NEXT:    mov r2, r4
+; BE-I32-NEON-NEXT:    bl lrintl
+; BE-I32-NEON-NEXT:    ldr r3, [sp, #180]
+; BE-I32-NEON-NEXT:    vmov.32 d9[1], r0
+; BE-I32-NEON-NEXT:    mov r0, r8
+; BE-I32-NEON-NEXT:    mov r1, r6
+; BE-I32-NEON-NEXT:    mov r2, r9
+; BE-I32-NEON-NEXT:    bl lrintl
+; BE-I32-NEON-NEXT:    add r3, sp, #232
+; BE-I32-NEON-NEXT:    mov r4, r0
+; BE-I32-NEON-NEXT:    ldm r3, {r0, r1, r2, r3}
+; BE-I32-NEON-NEXT:    bl lrintl
+; BE-I32-NEON-NEXT:    add r3, sp, #136
+; BE-I32-NEON-NEXT:    mov r6, r0
+; BE-I32-NEON-NEXT:    ldm r3, {r0, r1, r2, r3}
+; BE-I32-NEON-NEXT:    bl lrintl
+; BE-I32-NEON-NEXT:    ldr r5, [sp, #296]
+; BE-I32-NEON-NEXT:    vmov.32 d13[0], r0
+; BE-I32-NEON-NEXT:    ldr r1, [sp, #300]
+; BE-I32-NEON-NEXT:    ldr r2, [sp, #304]
+; BE-I32-NEON-NEXT:    ldr r3, [sp, #308]
+; BE-I32-NEON-NEXT:    mov r0, r5
+; BE-I32-NEON-NEXT:    ldr r10, [sp, #216]
+; BE-I32-NEON-NEXT:    ldr r8, [sp, #220]
+; BE-I32-NEON-NEXT:    ldr r9, [sp, #152]
+; BE-I32-NEON-NEXT:    bl lrintl
+; BE-I32-NEON-NEXT:    ldr r7, [sp, #248]
+; BE-I32-NEON-NEXT:    vmov.32 d10[0], r0
+; BE-I32-NEON-NEXT:    ldr r1, [sp, #252]
+; BE-I32-NEON-NEXT:    ldr r2, [sp, #256]
+; BE-I32-NEON-NEXT:    vmov.32 d8[0], r6
+; BE-I32-NEON-NEXT:    ldr r3, [sp, #260]
+; BE-I32-NEON-NEXT:    mov r0, r7
+; BE-I32-NEON-NEXT:    ldr r5, [sp, #224]
+; BE-I32-NEON-NEXT:    ldr r11, [sp, #120]
+; BE-I32-NEON-NEXT:    bl lrintl
+; BE-I32-NEON-NEXT:    ldr r3, [sp, #228]
+; BE-I32-NEON-NEXT:    vmov.32 d8[1], r0
+; BE-I32-NEON-NEXT:    mov r0, r10
+; BE-I32-NEON-NEXT:    mov r1, r8
+; BE-I32-NEON-NEXT:    mov r2, r5
+; BE-I32-NEON-NEXT:    bl lrintl
+; BE-I32-NEON-NEXT:    add r3, sp, #200
+; BE-I32-NEON-NEXT:    mov r5, r0
+; BE-I32-NEON-NEXT:    ldm r3, {r0, r1, r2, r3}
+; BE-I32-NEON-NEXT:    bl lrintl
+; BE-I32-NEON-NEXT:    vmov.32 d15[0], r0
+; BE-I32-NEON-NEXT:    ldr r0, [sp, #184]
+; BE-I32-NEON-NEXT:    ldr r1, [sp, #188]
+; BE-I32-NEON-NEXT:    ldr r2, [sp, #192]
+; BE-I32-NEON-NEXT:    vmov.32 d14[0], r4
+; BE-I32-NEON-NEXT:    ldr r3, [sp, #196]
+; BE-I32-NEON-NEXT:    vmov.32 d15[1], r5
+; BE-I32-NEON-NEXT:    ldr r7, [sp, #156]
+; BE-I32-NEON-NEXT:    ldr r6, [sp, #160]
+; BE-I32-NEON-NEXT:    ldr r4, [sp, #124]
+; BE-I32-NEON-NEXT:    ldr r5, [sp, #128]
+; BE-I32-NEON-NEXT:    bl lrintl
+; BE-I32-NEON-NEXT:    ldr r3, [sp, #164]
+; BE-I32-NEON-NEXT:    vmov.32 d14[1], r0
+; BE-I32-NEON-NEXT:    mov r0, r9
+; BE-I32-NEON-NEXT:    mov r1, r7
+; BE-I32-NEON-NEXT:    mov r2, r6
+; BE-I32-NEON-NEXT:    bl lrintl
+; BE-I32-NEON-NEXT:    ldr r3, [sp, #132]
+; BE-I32-NEON-NEXT:    vmov.32 d13[1], r0
+; BE-I32-NEON-NEXT:    mov r0, r11
+; BE-I32-NEON-NEXT:    mov r1, r4
+; BE-I32-NEON-NEXT:    mov r2, r5
+; BE-I32-NEON-NEXT:    bl lrintl
+; BE-I32-NEON-NEXT:    mov r4, r0
+; BE-I32-NEON-NEXT:    ldm sp, {r0, r1, r2, r3} @ 16-byte Folded Reload
+; BE-I32-NEON-NEXT:    bl lrintl
+; BE-I32-NEON-NEXT:    add r3, sp, #316
+; BE-I32-NEON-NEXT:    ldr r7, [sp, #312]
+; BE-I32-NEON-NEXT:    vmov.32 d12[0], r0
+; BE-I32-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; BE-I32-NEON-NEXT:    mov r0, r7
+; BE-I32-NEON-NEXT:    vmov.32 d12[1], r4
+; BE-I32-NEON-NEXT:    bl lrintl
+; BE-I32-NEON-NEXT:    vmov.32 d10[1], r0
+; BE-I32-NEON-NEXT:    vrev64.32 q0, q6
+; BE-I32-NEON-NEXT:    vrev64.32 q1, q7
+; BE-I32-NEON-NEXT:    vrev64.32 q2, q4
+; BE-I32-NEON-NEXT:    vrev64.32 q3, q5
+; BE-I32-NEON-NEXT:    add sp, sp, #16
+; BE-I32-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I32-NEON-NEXT:    add sp, sp, #4
+; BE-I32-NEON-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; BE-I64-NEON-LABEL: lrint_v16fp128:
+; BE-I64-NEON:       @ %bb.0:
+; BE-I64-NEON-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-I64-NEON-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-I64-NEON-NEXT:    .pad #4
+; BE-I64-NEON-NEXT:    sub sp, sp, #4
+; BE-I64-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEON-NEXT:    .pad #56
+; BE-I64-NEON-NEXT:    sub sp, sp, #56
+; BE-I64-NEON-NEXT:    mov r5, r3
+; BE-I64-NEON-NEXT:    add r3, sp, #376
+; BE-I64-NEON-NEXT:    mov r6, r2
+; BE-I64-NEON-NEXT:    mov r4, r0
+; BE-I64-NEON-NEXT:    ldm r3, {r0, r1, r2, r3}
+; BE-I64-NEON-NEXT:    bl lrintl
+; BE-I64-NEON-NEXT:    ldr r7, [sp, #392]
+; BE-I64-NEON-NEXT:    add r3, sp, #396
+; BE-I64-NEON-NEXT:    mov r9, r1
+; BE-I64-NEON-NEXT:    vmov.32 d8[0], r0
+; BE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; BE-I64-NEON-NEXT:    mov r0, r7
+; BE-I64-NEON-NEXT:    ldr r11, [sp, #168]
+; BE-I64-NEON-NEXT:    bl lrintl
+; BE-I64-NEON-NEXT:    ldr r2, [sp, #160]
+; BE-I64-NEON-NEXT:    mov r10, r1
+; BE-I64-NEON-NEXT:    ldr r3, [sp, #164]
+; BE-I64-NEON-NEXT:    vmov.32 d9[0], r0
+; BE-I64-NEON-NEXT:    mov r0, r6
+; BE-I64-NEON-NEXT:    mov r1, r5
+; BE-I64-NEON-NEXT:    bl lrintl
+; BE-I64-NEON-NEXT:    add r3, sp, #172
+; BE-I64-NEON-NEXT:    mov r8, r1
+; BE-I64-NEON-NEXT:    vmov.32 d10[0], r0
+; BE-I64-NEON-NEXT:    mov r0, r11
+; BE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; BE-I64-NEON-NEXT:    bl lrintl
+; BE-I64-NEON-NEXT:    add r3, sp, #220
+; BE-I64-NEON-NEXT:    vmov.32 d11[0], r0
+; BE-I64-NEON-NEXT:    ldr r0, [sp, #216]
+; BE-I64-NEON-NEXT:    mov r11, r1
+; BE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; BE-I64-NEON-NEXT:    bl lrintl
+; BE-I64-NEON-NEXT:    add r3, sp, #236
+; BE-I64-NEON-NEXT:    vmov.32 d12[0], r0
+; BE-I64-NEON-NEXT:    ldr r0, [sp, #232]
+; BE-I64-NEON-NEXT:    mov r6, r1
+; BE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; BE-I64-NEON-NEXT:    bl lrintl
+; BE-I64-NEON-NEXT:    add r3, sp, #252
+; BE-I64-NEON-NEXT:    vmov.32 d13[0], r0
+; BE-I64-NEON-NEXT:    ldr r0, [sp, #248]
+; BE-I64-NEON-NEXT:    mov r7, r1
+; BE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; BE-I64-NEON-NEXT:    bl lrintl
+; BE-I64-NEON-NEXT:    add r3, sp, #268
+; BE-I64-NEON-NEXT:    vmov.32 d14[0], r0
+; BE-I64-NEON-NEXT:    ldr r0, [sp, #264]
+; BE-I64-NEON-NEXT:    mov r5, r1
+; BE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; BE-I64-NEON-NEXT:    bl lrintl
+; BE-I64-NEON-NEXT:    vmov.32 d15[0], r0
+; BE-I64-NEON-NEXT:    ldr r0, [sp, #280]
+; BE-I64-NEON-NEXT:    ldr r2, [sp, #288]
+; BE-I64-NEON-NEXT:    vmov.32 d13[1], r7
+; BE-I64-NEON-NEXT:    ldr r7, [sp, #284]
+; BE-I64-NEON-NEXT:    ldr r3, [sp, #292]
+; BE-I64-NEON-NEXT:    vmov.32 d14[1], r5
+; BE-I64-NEON-NEXT:    ldr r5, [sp, #328]
+; BE-I64-NEON-NEXT:    vmov.32 d12[1], r6
+; BE-I64-NEON-NEXT:    ldr r6, [sp, #300]
+; BE-I64-NEON-NEXT:    vmov.32 d10[1], r8
+; BE-I64-NEON-NEXT:    ldr r8, [sp, #184]
+; BE-I64-NEON-NEXT:    vmov.32 d11[1], r11
+; BE-I64-NEON-NEXT:    vmov.32 d9[1], r10
+; BE-I64-NEON-NEXT:    vmov.32 d8[1], r9
+; BE-I64-NEON-NEXT:    vmov.32 d15[1], r1
+; BE-I64-NEON-NEXT:    mov r1, r7
+; BE-I64-NEON-NEXT:    vstr d14, [sp, #48] @ 8-byte Spill
+; BE-I64-NEON-NEXT:    vstr d13, [sp, #40] @ 8-byte Spill
+; BE-I64-NEON-NEXT:    vstr d12, [sp, #32] @ 8-byte Spill
+; BE-I64-NEON-NEXT:    vstr d11, [sp, #24] @ 8-byte Spill
+; BE-I64-NEON-NEXT:    vstr d10, [sp, #16] @ 8-byte Spill
+; BE-I64-NEON-NEXT:    vstr d9, [sp, #8] @ 8-byte Spill
+; BE-I64-NEON-NEXT:    vstr d8, [sp] @ 8-byte Spill
+; BE-I64-NEON-NEXT:    bl lrintl
+; BE-I64-NEON-NEXT:    mov r10, r1
+; BE-I64-NEON-NEXT:    ldr r1, [sp, #296]
+; BE-I64-NEON-NEXT:    ldr r2, [sp, #304]
+; BE-I64-NEON-NEXT:    vmov.32 d8[0], r0
+; BE-I64-NEON-NEXT:    ldr r3, [sp, #308]
+; BE-I64-NEON-NEXT:    mov r0, r1
+; BE-I64-NEON-NEXT:    mov r1, r6
+; BE-I64-NEON-NEXT:    bl lrintl
+; BE-I64-NEON-NEXT:    add r3, sp, #332
+; BE-I64-NEON-NEXT:    mov r11, r1
+; BE-I64-NEON-NEXT:    vmov.32 d9[0], r0
+; BE-I64-NEON-NEXT:    mov r0, r5
+; BE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; BE-I64-NEON-NEXT:    bl lrintl
+; BE-I64-NEON-NEXT:    add r3, sp, #188
+; BE-I64-NEON-NEXT:    mov r7, r1
+; BE-I64-NEON-NEXT:    vmov.32 d10[0], r0
+; BE-I64-NEON-NEXT:    mov r0, r8
+; BE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; BE-I64-NEON-NEXT:    bl lrintl
+; BE-I64-NEON-NEXT:    add r3, sp, #204
+; BE-I64-NEON-NEXT:    vmov.32 d11[0], r0
+; BE-I64-NEON-NEXT:    ldr r0, [sp, #200]
+; BE-I64-NEON-NEXT:    mov r8, r1
+; BE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; BE-I64-NEON-NEXT:    bl lrintl
+; BE-I64-NEON-NEXT:    add r3, sp, #348
+; BE-I64-NEON-NEXT:    vmov.32 d12[0], r0
+; BE-I64-NEON-NEXT:    ldr r0, [sp, #344]
+; BE-I64-NEON-NEXT:    mov r5, r1
+; BE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; BE-I64-NEON-NEXT:    bl lrintl
+; BE-I64-NEON-NEXT:    add r3, sp, #364
+; BE-I64-NEON-NEXT:    vmov.32 d13[0], r0
+; BE-I64-NEON-NEXT:    ldr r0, [sp, #360]
+; BE-I64-NEON-NEXT:    mov r9, r1
+; BE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; BE-I64-NEON-NEXT:    bl lrintl
+; BE-I64-NEON-NEXT:    add r3, sp, #316
+; BE-I64-NEON-NEXT:    vmov.32 d14[0], r0
+; BE-I64-NEON-NEXT:    ldr r0, [sp, #312]
+; BE-I64-NEON-NEXT:    mov r6, r1
+; BE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; BE-I64-NEON-NEXT:    bl lrintl
+; BE-I64-NEON-NEXT:    vldr d18, [sp, #48] @ 8-byte Reload
+; BE-I64-NEON-NEXT:    vrev64.32 d17, d15
+; BE-I64-NEON-NEXT:    vrev64.32 d16, d18
+; BE-I64-NEON-NEXT:    vldr d18, [sp, #40] @ 8-byte Reload
+; BE-I64-NEON-NEXT:    vmov.32 d24[0], r0
+; BE-I64-NEON-NEXT:    add r0, r4, #64
+; BE-I64-NEON-NEXT:    vldr d20, [sp, #32] @ 8-byte Reload
+; BE-I64-NEON-NEXT:    vrev64.32 d19, d18
+; BE-I64-NEON-NEXT:    vmov.32 d9[1], r11
+; BE-I64-NEON-NEXT:    vmov.32 d10[1], r7
+; BE-I64-NEON-NEXT:    vrev64.32 d18, d20
+; BE-I64-NEON-NEXT:    vldr d20, [sp, #24] @ 8-byte Reload
+; BE-I64-NEON-NEXT:    vmov.32 d8[1], r10
+; BE-I64-NEON-NEXT:    vmov.32 d14[1], r6
+; BE-I64-NEON-NEXT:    vmov.32 d24[1], r1
+; BE-I64-NEON-NEXT:    vldr d22, [sp, #16] @ 8-byte Reload
+; BE-I64-NEON-NEXT:    vrev64.32 d21, d20
+; BE-I64-NEON-NEXT:    vrev64.32 d1, d9
+; BE-I64-NEON-NEXT:    vmov.32 d13[1], r9
+; BE-I64-NEON-NEXT:    vrev64.32 d31, d10
+; BE-I64-NEON-NEXT:    vrev64.32 d20, d22
+; BE-I64-NEON-NEXT:    vldr d22, [sp, #8] @ 8-byte Reload
+; BE-I64-NEON-NEXT:    vrev64.32 d0, d8
+; BE-I64-NEON-NEXT:    vrev64.32 d29, d14
+; BE-I64-NEON-NEXT:    vmov.32 d12[1], r5
+; BE-I64-NEON-NEXT:    vrev64.32 d30, d24
+; BE-I64-NEON-NEXT:    vrev64.32 d27, d22
+; BE-I64-NEON-NEXT:    vldr d22, [sp] @ 8-byte Reload
+; BE-I64-NEON-NEXT:    vst1.64 {d0, d1}, [r0:128]!
+; BE-I64-NEON-NEXT:    vmov.32 d11[1], r8
+; BE-I64-NEON-NEXT:    vrev64.32 d28, d13
+; BE-I64-NEON-NEXT:    vst1.64 {d30, d31}, [r0:128]!
+; BE-I64-NEON-NEXT:    vrev64.32 d26, d22
+; BE-I64-NEON-NEXT:    vrev64.32 d23, d12
+; BE-I64-NEON-NEXT:    vst1.64 {d28, d29}, [r0:128]!
+; BE-I64-NEON-NEXT:    vrev64.32 d22, d11
+; BE-I64-NEON-NEXT:    vst1.64 {d26, d27}, [r0:128]
+; BE-I64-NEON-NEXT:    vst1.64 {d20, d21}, [r4:128]!
+; BE-I64-NEON-NEXT:    vst1.64 {d22, d23}, [r4:128]!
+; BE-I64-NEON-NEXT:    vst1.64 {d18, d19}, [r4:128]!
+; BE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r4:128]
+; BE-I64-NEON-NEXT:    add sp, sp, #56
+; BE-I64-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEON-NEXT:    add sp, sp, #4
+; BE-I64-NEON-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+  %a = call <16 x iXLen> @llvm.lrint.v16iXLen.v16fp128(<16 x fp128> %x)
+  ret <16 x iXLen> %a
+}
+declare <16 x iXLen> @llvm.lrint.v16iXLen.v16fp128(<16 x fp128>)
+
+define <32 x iXLen> @lrint_v32fp128(<32 x fp128> %x) {
+; LE-I32-LABEL: lrint_v32fp128:
+; LE-I32:       @ %bb.0:
+; LE-I32-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-I32-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-I32-NEXT:    .pad #4
+; LE-I32-NEXT:    sub sp, sp, #4
+; LE-I32-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I32-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I32-NEXT:    .pad #80
+; LE-I32-NEXT:    sub sp, sp, #80
+; LE-I32-NEXT:    str r3, [sp, #16] @ 4-byte Spill
+; LE-I32-NEXT:    add r3, sp, #336
+; LE-I32-NEXT:    str r2, [sp, #12] @ 4-byte Spill
+; LE-I32-NEXT:    mov r9, r0
+; LE-I32-NEXT:    ldm r3, {r0, r1, r2, r3}
+; LE-I32-NEXT:    bl lrintl
+; LE-I32-NEXT:    add r3, sp, #244
+; LE-I32-NEXT:    ldr r7, [sp, #240]
+; LE-I32-NEXT:    vmov.32 d9[0], r0
+; LE-I32-NEXT:    ldr r5, [sp, #288]
+; LE-I32-NEXT:    ldm r3, {r1, r2, r3}
+; LE-I32-NEXT:    mov r0, r7
+; LE-I32-NEXT:    ldr r8, [sp, #352]
+; LE-I32-NEXT:    ldr r11, [sp, #656]
+; LE-I32-NEXT:    bl lrintl
+; LE-I32-NEXT:    add r3, sp, #292
+; LE-I32-NEXT:    vmov.32 d10[0], r0
+; LE-I32-NEXT:    mov r0, r5
+; LE-I32-NEXT:    ldm r3, {r1, r2, r3}
+; LE-I32-NEXT:    bl lrintl
+; LE-I32-NEXT:    add r3, sp, #272
+; LE-I32-NEXT:    mov r10, r0
+; LE-I32-NEXT:    ldm r3, {r0, r1, r2, r3}
+; LE-I32-NEXT:    bl lrintl
+; LE-I32-NEXT:    ldr r6, [sp, #256]
+; LE-I32-NEXT:    vmov.32 d11[0], r0
+; LE-I32-NEXT:    ldr r1, [sp, #260]
+; LE-I32-NEXT:    ldr r2, [sp, #264]
+; LE-I32-NEXT:    ldr r3, [sp, #268]
+; LE-I32-NEXT:    mov r0, r6
+; LE-I32-NEXT:    ldr r7, [sp, #660]
+; LE-I32-NEXT:    vmov.32 d11[1], r10
+; LE-I32-NEXT:    ldr r5, [sp, #664]
+; LE-I32-NEXT:    bl lrintl
+; LE-I32-NEXT:    vmov.32 d10[1], r0
+; LE-I32-NEXT:    ldr r1, [sp, #356]
+; LE-I32-NEXT:    ldr r2, [sp, #360]
+; LE-I32-NEXT:    add lr, sp, #32
+; LE-I32-NEXT:    ldr r3, [sp, #364]
+; LE-I32-NEXT:    mov r0, r8
+; LE-I32-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-I32-NEXT:    bl lrintl
+; LE-I32-NEXT:    ldr r3, [sp, #668]
+; LE-I32-NEXT:    add lr, sp, #48
+; LE-I32-NEXT:    vmov.32 d9[1], r0
+; LE-I32-NEXT:    mov r0, r11
+; LE-I32-NEXT:    mov r1, r7
+; LE-I32-NEXT:    mov r2, r5
+; LE-I32-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-I32-NEXT:    bl lrintl
+; LE-I32-NEXT:    add r3, sp, #400
+; LE-I32-NEXT:    mov r8, r0
+; LE-I32-NEXT:    ldm r3, {r0, r1, r2, r3}
+; LE-I32-NEXT:    bl lrintl
+; LE-I32-NEXT:    add r3, sp, #592
+; LE-I32-NEXT:    mov r6, r0
+; LE-I32-NEXT:    ldm r3, {r0, r1, r2, r3}
+; LE-I32-NEXT:    bl lrintl
+; LE-I32-NEXT:    ldr r4, [sp, #416]
+; LE-I32-NEXT:    vmov.32 d15[0], r0
+; LE-I32-NEXT:    ldr r1, [sp, #420]
+; LE-I32-NEXT:    ldr r2, [sp, #424]
+; LE-I32-NEXT:    vmov.32 d13[0], r6
+; LE-I32-NEXT:    ldr r3, [sp, #428]
+; LE-I32-NEXT:    mov r0, r4
+; LE-I32-NEXT:    ldr r7, [sp, #224]
+; LE-I32-NEXT:    ldr r10, [sp, #228]
+; LE-I32-NEXT:    ldr r5, [sp, #232]
+; LE-I32-NEXT:    ldr r11, [sp, #464]
+; LE-I32-NEXT:    bl lrintl
+; LE-I32-NEXT:    ldr r3, [sp, #236]
+; LE-I32-NEXT:    vmov.32 d13[1], r0
+; LE-I32-NEXT:    mov r0, r7
+; LE-I32-NEXT:    mov r1, r10
+; LE-I32-NEXT:    mov r2, r5
+; LE-I32-NEXT:    bl lrintl
+; LE-I32-NEXT:    add r3, sp, #208
+; LE-I32-NEXT:    mov r4, r0
+; LE-I32-NEXT:    ldm r3, {r0, r1, r2, r3}
+; LE-I32-NEXT:    bl lrintl
+; LE-I32-NEXT:    vmov.32 d9[0], r0
+; LE-I32-NEXT:    ldr r0, [sp, #672]
+; LE-I32-NEXT:    ldr r1, [sp, #676]
+; LE-I32-NEXT:    ldr r2, [sp, #680]
+; LE-I32-NEXT:    vmov.32 d11[0], r8
+; LE-I32-NEXT:    ldr r3, [sp, #684]
+; LE-I32-NEXT:    vmov.32 d9[1], r4
+; LE-I32-NEXT:    ldr r7, [sp, #612]
+; LE-I32-NEXT:    ldr r6, [sp, #616]
+; LE-I32-NEXT:    ldr r5, [sp, #468]
+; LE-I32-NEXT:    ldr r4, [sp, #472]
+; LE-I32-NEXT:    bl lrintl
+; LE-I32-NEXT:    vmov.32 d11[1], r0
+; LE-I32-NEXT:    ldr r3, [sp, #620]
+; LE-I32-NEXT:    ldr r0, [sp, #608]
+; LE-I32-NEXT:    mov r1, r7
+; LE-I32-NEXT:    mov r2, r6
+; LE-I32-NEXT:    bl lrintl
+; LE-I32-NEXT:    ldr r3, [sp, #476]
+; LE-I32-NEXT:    vmov.32 d15[1], r0
+; LE-I32-NEXT:    mov r0, r11
+; LE-I32-NEXT:    mov r1, r5
+; LE-I32-NEXT:    mov r2, r4
+; LE-I32-NEXT:    bl lrintl
+; LE-I32-NEXT:    add r3, sp, #560
+; LE-I32-NEXT:    str r0, [sp, #8] @ 4-byte Spill
+; LE-I32-NEXT:    ldm r3, {r0, r1, r2, r3}
+; LE-I32-NEXT:    bl lrintl
+; LE-I32-NEXT:    add r3, sp, #644
+; LE-I32-NEXT:    ldr r7, [sp, #640]
+; LE-I32-NEXT:    vmov.32 d14[0], r0
+; LE-I32-NEXT:    add lr, sp, #64
+; LE-I32-NEXT:    ldm r3, {r1, r2, r3}
+; LE-I32-NEXT:    mov r0, r7
+; LE-I32-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-I32-NEXT:    bl lrintl
+; LE-I32-NEXT:    add r3, sp, #624
+; LE-I32-NEXT:    mov r11, r0
+; LE-I32-NEXT:    ldm r3, {r0, r1, r2, r3}
+; LE-I32-NEXT:    bl lrintl
+; LE-I32-NEXT:    add r3, sp, #196
+; LE-I32-NEXT:    ldr r7, [sp, #192]
+; LE-I32-NEXT:    vmov.32 d10[0], r0
+; LE-I32-NEXT:    ldm r3, {r1, r2, r3}
+; LE-I32-NEXT:    mov r0, r7
+; LE-I32-NEXT:    bl lrintl
+; LE-I32-NEXT:    mov r6, r0
+; LE-I32-NEXT:    ldr r2, [sp, #184]
+; LE-I32-NEXT:    ldr r3, [sp, #188]
+; LE-I32-NEXT:    ldr r0, [sp, #12] @ 4-byte Reload
+; LE-I32-NEXT:    ldr r1, [sp, #16] @ 4-byte Reload
+; LE-I32-NEXT:    bl lrintl
+; LE-I32-NEXT:    add r3, sp, #324
+; LE-I32-NEXT:    ldr r7, [sp, #320]
+; LE-I32-NEXT:    vmov.32 d8[0], r0
+; LE-I32-NEXT:    ldm r3, {r1, r2, r3}
+; LE-I32-NEXT:    mov r0, r7
+; LE-I32-NEXT:    bl lrintl
+; LE-I32-NEXT:    add r3, sp, #304
+; LE-I32-NEXT:    mov r7, r0
+; LE-I32-NEXT:    ldm r3, {r0, r1, r2, r3}
+; LE-I32-NEXT:    bl lrintl
+; LE-I32-NEXT:    add lr, sp, #48
+; LE-I32-NEXT:    ldr r4, [sp, #368]
+; LE-I32-NEXT:    ldr r1, [sp, #372]
+; LE-I32-NEXT:    vldmia lr, {d14, d15} @ 16-byte Reload
+; LE-I32-NEXT:    ldr r2, [sp, #376]
+; LE-I32-NEXT:    ldr r3, [sp, #380]
+; LE-I32-NEXT:    vmov.32 d14[0], r0
+; LE-I32-NEXT:    mov r0, r4
+; LE-I32-NEXT:    bl lrintl
+; LE-I32-NEXT:    ldr r5, [sp, #384]
+; LE-I32-NEXT:    vmov.32 d12[0], r0
+; LE-I32-NEXT:    ldr r1, [sp, #388]
+; LE-I32-NEXT:    ldr r2, [sp, #392]
+; LE-I32-NEXT:    ldr r3, [sp, #396]
+; LE-I32-NEXT:    mov r0, r5
+; LE-I32-NEXT:    ldr r4, [sp, #432]
+; LE-I32-NEXT:    bl lrintl
+; LE-I32-NEXT:    vmov.32 d12[1], r0
+; LE-I32-NEXT:    ldr r1, [sp, #436]
+; LE-I32-NEXT:    ldr r2, [sp, #440]
+; LE-I32-NEXT:    add lr, sp, #16
+; LE-I32-NEXT:    ldr r3, [sp, #444]
+; LE-I32-NEXT:    mov r0, r4
+; LE-I32-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-I32-NEXT:    bl lrintl
+; LE-I32-NEXT:    vmov.32 d12[0], r0
+; LE-I32-NEXT:    ldr r0, [sp, #576]
+; LE-I32-NEXT:    ldr r1, [sp, #580]
+; LE-I32-NEXT:    add lr, sp, #48
+; LE-I32-NEXT:    vmov.32 d14[1], r7
+; LE-I32-NEXT:    ldr r2, [sp, #584]
+; LE-I32-NEXT:    ldr r3, [sp, #588]
+; LE-I32-NEXT:    vmov.32 d10[1], r11
+; LE-I32-NEXT:    ldr r8, [sp, #448]
+; LE-I32-NEXT:    ldr r4, [sp, #544]
+; LE-I32-NEXT:    ldr r10, [sp, #548]
+; LE-I32-NEXT:    vmov.32 d8[1], r6
+; LE-I32-NEXT:    ldr r7, [sp, #552]
+; LE-I32-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-I32-NEXT:    ldr r11, [sp, #512]
+; LE-I32-NEXT:    bl lrintl
+; LE-I32-NEXT:    add lr, sp, #64
+; LE-I32-NEXT:    ldr r3, [sp, #556]
+; LE-I32-NEXT:    mov r1, r10
+; LE-I32-NEXT:    mov r2, r7
+; LE-I32-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I32-NEXT:    vmov.32 d16[1], r0
+; LE-I32-NEXT:    mov r0, r4
+; LE-I32-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I32-NEXT:    bl lrintl
+; LE-I32-NEXT:    add r3, sp, #528
+; LE-I32-NEXT:    mov r4, r0
+; LE-I32-NEXT:    ldm r3, {r0, r1, r2, r3}
+; LE-I32-NEXT:    bl lrintl
+; LE-I32-NEXT:    ldr r1, [sp, #8] @ 4-byte Reload
+; LE-I32-NEXT:    vmov.32 d15[0], r0
+; LE-I32-NEXT:    ldr r0, [sp, #480]
+; LE-I32-NEXT:    ldr r2, [sp, #488]
+; LE-I32-NEXT:    vmov.32 d13[0], r1
+; LE-I32-NEXT:    ldr r1, [sp, #484]
+; LE-I32-NEXT:    ldr r3, [sp, #492]
+; LE-I32-NEXT:    vmov.32 d15[1], r4
+; LE-I32-NEXT:    ldr r7, [sp, #452]
+; LE-I32-NEXT:    ldr r5, [sp, #456]
+; LE-I32-NEXT:    ldr r6, [sp, #516]
+; LE-I32-NEXT:    ldr r4, [sp, #520]
+; LE-I32-NEXT:    bl lrintl
+; LE-I32-NEXT:    ldr r3, [sp, #460]
+; LE-I32-NEXT:    vmov.32 d13[1], r0
+; LE-I32-NEXT:    mov r0, r8
+; LE-I32-NEXT:    mov r1, r7
+; LE-I32-NEXT:    mov r2, r5
+; LE-I32-NEXT:    bl lrintl
+; LE-I32-NEXT:    ldr r3, [sp, #524]
+; LE-I32-NEXT:    vmov.32 d12[1], r0
+; LE-I32-NEXT:    mov r0, r11
+; LE-I32-NEXT:    mov r1, r6
+; LE-I32-NEXT:    mov r2, r4
+; LE-I32-NEXT:    bl lrintl
+; LE-I32-NEXT:    add r3, sp, #496
+; LE-I32-NEXT:    mov r4, r0
+; LE-I32-NEXT:    ldm r3, {r0, r1, r2, r3}
+; LE-I32-NEXT:    bl lrintl
+; LE-I32-NEXT:    vmov.32 d14[0], r0
+; LE-I32-NEXT:    add r0, r9, #64
+; LE-I32-NEXT:    add lr, sp, #64
+; LE-I32-NEXT:    vst1.32 {d12, d13}, [r0:128]!
+; LE-I32-NEXT:    vmov.32 d14[1], r4
+; LE-I32-NEXT:    vst1.32 {d14, d15}, [r0:128]!
+; LE-I32-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I32-NEXT:    add lr, sp, #32
+; LE-I32-NEXT:    vst1.32 {d16, d17}, [r0:128]!
+; LE-I32-NEXT:    vst1.64 {d10, d11}, [r0:128]
+; LE-I32-NEXT:    vst1.32 {d8, d9}, [r9:128]!
+; LE-I32-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I32-NEXT:    add lr, sp, #48
+; LE-I32-NEXT:    vst1.32 {d16, d17}, [r9:128]!
+; LE-I32-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I32-NEXT:    add lr, sp, #16
+; LE-I32-NEXT:    vst1.32 {d16, d17}, [r9:128]!
+; LE-I32-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I32-NEXT:    vst1.64 {d16, d17}, [r9:128]
+; LE-I32-NEXT:    add sp, sp, #80
+; LE-I32-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I32-NEXT:    add sp, sp, #4
+; LE-I32-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; LE-I64-LABEL: lrint_v32fp128:
+; LE-I64:       @ %bb.0:
+; LE-I64-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-I64-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-I64-NEXT:    .pad #4
+; LE-I64-NEXT:    sub sp, sp, #4
+; LE-I64-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEXT:    .pad #192
+; LE-I64-NEXT:    sub sp, sp, #192
+; LE-I64-NEXT:    str r3, [sp, #60] @ 4-byte Spill
+; LE-I64-NEXT:    add r3, sp, #688
+; LE-I64-NEXT:    str r2, [sp, #56] @ 4-byte Spill
+; LE-I64-NEXT:    mov r9, r0
+; LE-I64-NEXT:    ldm r3, {r0, r1, r2, r3}
+; LE-I64-NEXT:    bl lrintl
+; LE-I64-NEXT:    add r3, sp, #560
+; LE-I64-NEXT:    mov r4, r0
+; LE-I64-NEXT:    str r1, [sp, #64] @ 4-byte Spill
+; LE-I64-NEXT:    ldm r3, {r0, r1, r2, r3}
+; LE-I64-NEXT:    bl lrintl
+; LE-I64-NEXT:    vmov.32 d17[0], r0
+; LE-I64-NEXT:    ldr r7, [sp, #544]
+; LE-I64-NEXT:    ldr r6, [sp, #548]
+; LE-I64-NEXT:    add lr, sp, #96
+; LE-I64-NEXT:    ldr r2, [sp, #552]
+; LE-I64-NEXT:    vmov.32 d17[1], r1
+; LE-I64-NEXT:    ldr r3, [sp, #556]
+; LE-I64-NEXT:    mov r0, r7
+; LE-I64-NEXT:    mov r1, r6
+; LE-I64-NEXT:    vorr q4, q8, q8
+; LE-I64-NEXT:    ldr r5, [sp, #528]
+; LE-I64-NEXT:    vmov.32 d17[0], r4
+; LE-I64-NEXT:    ldr r10, [sp, #304]
+; LE-I64-NEXT:    ldr r8, [sp, #368]
+; LE-I64-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEXT:    bl lrintl
+; LE-I64-NEXT:    add r3, sp, #532
+; LE-I64-NEXT:    vmov.32 d8[0], r0
+; LE-I64-NEXT:    mov r11, r1
+; LE-I64-NEXT:    add lr, sp, #144
+; LE-I64-NEXT:    ldm r3, {r1, r2, r3}
+; LE-I64-NEXT:    mov r0, r5
+; LE-I64-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-I64-NEXT:    bl lrintl
+; LE-I64-NEXT:    add r3, sp, #308
+; LE-I64-NEXT:    mov r5, r1
+; LE-I64-NEXT:    add lr, sp, #176
+; LE-I64-NEXT:    vmov.32 d17[0], r0
+; LE-I64-NEXT:    ldm r3, {r1, r2, r3}
+; LE-I64-NEXT:    mov r0, r10
+; LE-I64-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEXT:    bl lrintl
+; LE-I64-NEXT:    add r3, sp, #372
+; LE-I64-NEXT:    mov r10, r1
+; LE-I64-NEXT:    vmov.32 d11[0], r0
+; LE-I64-NEXT:    mov r0, r8
+; LE-I64-NEXT:    ldm r3, {r1, r2, r3}
+; LE-I64-NEXT:    bl lrintl
+; LE-I64-NEXT:    add r3, sp, #404
+; LE-I64-NEXT:    vmov.32 d15[0], r0
+; LE-I64-NEXT:    ldr r0, [sp, #400]
+; LE-I64-NEXT:    mov r6, r1
+; LE-I64-NEXT:    ldm r3, {r1, r2, r3}
+; LE-I64-NEXT:    bl lrintl
+; LE-I64-NEXT:    add r3, sp, #596
+; LE-I64-NEXT:    vmov.32 d9[0], r0
+; LE-I64-NEXT:    ldr r0, [sp, #592]
+; LE-I64-NEXT:    mov r7, r1
+; LE-I64-NEXT:    ldm r3, {r1, r2, r3}
+; LE-I64-NEXT:    bl lrintl
+; LE-I64-NEXT:    add r3, sp, #676
+; LE-I64-NEXT:    vmov.32 d13[0], r0
+; LE-I64-NEXT:    ldr r0, [sp, #672]
+; LE-I64-NEXT:    mov r4, r1
+; LE-I64-NEXT:    ldm r3, {r1, r2, r3}
+; LE-I64-NEXT:    bl lrintl
+; LE-I64-NEXT:    add lr, sp, #96
+; LE-I64-NEXT:    vmov.32 d13[1], r4
+; LE-I64-NEXT:    str r1, [sp, #52] @ 4-byte Spill
+; LE-I64-NEXT:    vldmia lr, {d18, d19} @ 16-byte Reload
+; LE-I64-NEXT:    add lr, sp, #80
+; LE-I64-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-I64-NEXT:    add lr, sp, #128
+; LE-I64-NEXT:    vmov.32 d9[1], r7
+; LE-I64-NEXT:    ldr r1, [sp, #628]
+; LE-I64-NEXT:    ldr r2, [sp, #632]
+; LE-I64-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-I64-NEXT:    add lr, sp, #112
+; LE-I64-NEXT:    vmov.32 d15[1], r6
+; LE-I64-NEXT:    ldr r3, [sp, #636]
+; LE-I64-NEXT:    ldr r7, [sp, #64] @ 4-byte Reload
+; LE-I64-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-I64-NEXT:    add lr, sp, #160
+; LE-I64-NEXT:    vmov.32 d11[1], r10
+; LE-I64-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-I64-NEXT:    add lr, sp, #176
+; LE-I64-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-I64-NEXT:    add lr, sp, #144
+; LE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEXT:    vmov.32 d18[0], r0
+; LE-I64-NEXT:    ldr r0, [sp, #624]
+; LE-I64-NEXT:    vmov.32 d16[1], r11
+; LE-I64-NEXT:    vmov.32 d9[1], r5
+; LE-I64-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEXT:    add lr, sp, #96
+; LE-I64-NEXT:    vmov.32 d19[1], r7
+; LE-I64-NEXT:    vstmia lr, {d18, d19} @ 16-byte Spill
+; LE-I64-NEXT:    bl lrintl
+; LE-I64-NEXT:    add r3, sp, #512
+; LE-I64-NEXT:    str r0, [sp, #48] @ 4-byte Spill
+; LE-I64-NEXT:    str r1, [sp, #64] @ 4-byte Spill
+; LE-I64-NEXT:    ldm r3, {r0, r1, r2, r3}
+; LE-I64-NEXT:    bl lrintl
+; LE-I64-NEXT:    add r3, sp, #768
+; LE-I64-NEXT:    mov r11, r0
+; LE-I64-NEXT:    str r1, [sp, #28] @ 4-byte Spill
+; LE-I64-NEXT:    ldm r3, {r0, r1, r2, r3}
+; LE-I64-NEXT:    bl lrintl
+; LE-I64-NEXT:    ldr r6, [sp, #784]
+; LE-I64-NEXT:    add r3, sp, #788
+; LE-I64-NEXT:    mov r8, r1
+; LE-I64-NEXT:    vmov.32 d14[0], r0
+; LE-I64-NEXT:    ldm r3, {r1, r2, r3}
+; LE-I64-NEXT:    mov r0, r6
+; LE-I64-NEXT:    ldr r5, [sp, #736]
+; LE-I64-NEXT:    ldr r7, [sp, #752]
+; LE-I64-NEXT:    ldr r4, [sp, #720]
+; LE-I64-NEXT:    bl lrintl
+; LE-I64-NEXT:    add r3, sp, #740
+; LE-I64-NEXT:    mov r6, r1
+; LE-I64-NEXT:    vmov.32 d15[0], r0
+; LE-I64-NEXT:    mov r0, r5
+; LE-I64-NEXT:    ldm r3, {r1, r2, r3}
+; LE-I64-NEXT:    bl lrintl
+; LE-I64-NEXT:    add r3, sp, #756
+; LE-I64-NEXT:    mov r5, r1
+; LE-I64-NEXT:    vmov.32 d12[0], r0
+; LE-I64-NEXT:    mov r0, r7
+; LE-I64-NEXT:    ldm r3, {r1, r2, r3}
+; LE-I64-NEXT:    bl lrintl
+; LE-I64-NEXT:    add r3, sp, #724
+; LE-I64-NEXT:    mov r7, r1
+; LE-I64-NEXT:    vmov.32 d13[0], r0
+; LE-I64-NEXT:    mov r0, r4
+; LE-I64-NEXT:    ldm r3, {r1, r2, r3}
+; LE-I64-NEXT:    bl lrintl
+; LE-I64-NEXT:    vmov.32 d13[1], r7
+; LE-I64-NEXT:    add lr, sp, #8
+; LE-I64-NEXT:    ldr r2, [sp, #296]
+; LE-I64-NEXT:    vmov.32 d12[1], r5
+; LE-I64-NEXT:    ldr r3, [sp, #300]
+; LE-I64-NEXT:    ldr r4, [sp, #576]
+; LE-I64-NEXT:    vmov.32 d11[0], r0
+; LE-I64-NEXT:    ldr r0, [sp, #56] @ 4-byte Reload
+; LE-I64-NEXT:    ldr r10, [sp, #384]
+; LE-I64-NEXT:    vmov.32 d15[1], r6
+; LE-I64-NEXT:    ldr r6, [sp, #352]
+; LE-I64-NEXT:    vmov.32 d14[1], r8
+; LE-I64-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-I64-NEXT:    add lr, sp, #32
+; LE-I64-NEXT:    vmov.32 d11[1], r1
+; LE-I64-NEXT:    ldr r1, [sp, #60] @ 4-byte Reload
+; LE-I64-NEXT:    vmov.32 d8[0], r11
+; LE-I64-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-I64-NEXT:    add lr, sp, #176
+; LE-I64-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-I64-NEXT:    bl lrintl
+; LE-I64-NEXT:    add lr, sp, #160
+; LE-I64-NEXT:    add r3, sp, #356
+; LE-I64-NEXT:    mov r5, r1
+; LE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEXT:    ldm r3, {r1, r2, r3}
+; LE-I64-NEXT:    vmov.32 d16[0], r0
+; LE-I64-NEXT:    mov r0, r6
+; LE-I64-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEXT:    bl lrintl
+; LE-I64-NEXT:    add lr, sp, #112
+; LE-I64-NEXT:    add r3, sp, #388
+; LE-I64-NEXT:    mov r7, r1
+; LE-I64-NEXT:    vldmia lr, {d14, d15} @ 16-byte Reload
+; LE-I64-NEXT:    ldm r3, {r1, r2, r3}
+; LE-I64-NEXT:    vmov.32 d14[0], r0
+; LE-I64-NEXT:    mov r0, r10
+; LE-I64-NEXT:    bl lrintl
+; LE-I64-NEXT:    add lr, sp, #128
+; LE-I64-NEXT:    add r3, sp, #580
+; LE-I64-NEXT:    mov r6, r1
+; LE-I64-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
+; LE-I64-NEXT:    ldm r3, {r1, r2, r3}
+; LE-I64-NEXT:    vmov.32 d12[0], r0
+; LE-I64-NEXT:    mov r0, r4
+; LE-I64-NEXT:    bl lrintl
+; LE-I64-NEXT:    add lr, sp, #80
+; LE-I64-NEXT:    add r3, sp, #708
+; LE-I64-NEXT:    mov r4, r1
+; LE-I64-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-I64-NEXT:    ldm r3, {r1, r2, r3}
+; LE-I64-NEXT:    vmov.32 d8[0], r0
+; LE-I64-NEXT:    ldr r0, [sp, #704]
+; LE-I64-NEXT:    bl lrintl
+; LE-I64-NEXT:    vmov.32 d8[1], r4
+; LE-I64-NEXT:    add lr, sp, #80
+; LE-I64-NEXT:    ldr r2, [sp, #52] @ 4-byte Reload
+; LE-I64-NEXT:    vmov.32 d12[1], r6
+; LE-I64-NEXT:    ldr r6, [sp, #644]
+; LE-I64-NEXT:    ldr r3, [sp, #652]
+; LE-I64-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-I64-NEXT:    add lr, sp, #128
+; LE-I64-NEXT:    vmov.32 d14[1], r7
+; LE-I64-NEXT:    ldr r4, [sp, #480]
+; LE-I64-NEXT:    ldr r7, [sp, #656]
+; LE-I64-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-I64-NEXT:    add lr, sp, #112
+; LE-I64-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-I64-NEXT:    add lr, sp, #160
+; LE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEXT:    vmov.32 d10[0], r0
+; LE-I64-NEXT:    ldr r0, [sp, #28] @ 4-byte Reload
+; LE-I64-NEXT:    ldr r10, [sp, #496]
+; LE-I64-NEXT:    vmov.32 d16[1], r5
+; LE-I64-NEXT:    add r5, r9, #192
+; LE-I64-NEXT:    ldr r8, [sp, #608]
+; LE-I64-NEXT:    vmov.32 d10[1], r1
+; LE-I64-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEXT:    add lr, sp, #176
+; LE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEXT:    vmov.32 d16[1], r0
+; LE-I64-NEXT:    ldr r0, [sp, #640]
+; LE-I64-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEXT:    add lr, sp, #96
+; LE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEXT:    add lr, sp, #8
+; LE-I64-NEXT:    vmov.32 d16[1], r2
+; LE-I64-NEXT:    ldr r2, [sp, #648]
+; LE-I64-NEXT:    vst1.64 {d16, d17}, [r5:128]!
+; LE-I64-NEXT:    vst1.64 {d10, d11}, [r5:128]!
+; LE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEXT:    vst1.64 {d16, d17}, [r5:128]!
+; LE-I64-NEXT:    ldr r1, [sp, #48] @ 4-byte Reload
+; LE-I64-NEXT:    vmov.32 d9[0], r1
+; LE-I64-NEXT:    mov r1, r6
+; LE-I64-NEXT:    bl lrintl
+; LE-I64-NEXT:    add r3, sp, #660
+; LE-I64-NEXT:    mov r11, r1
+; LE-I64-NEXT:    vmov.32 d12[0], r0
+; LE-I64-NEXT:    mov r0, r7
+; LE-I64-NEXT:    ldm r3, {r1, r2, r3}
+; LE-I64-NEXT:    bl lrintl
+; LE-I64-NEXT:    add r3, sp, #484
+; LE-I64-NEXT:    mov r7, r1
+; LE-I64-NEXT:    vmov.32 d13[0], r0
+; LE-I64-NEXT:    mov r0, r4
+; LE-I64-NEXT:    ldm r3, {r1, r2, r3}
+; LE-I64-NEXT:    bl lrintl
+; LE-I64-NEXT:    add r3, sp, #500
+; LE-I64-NEXT:    mov r6, r1
+; LE-I64-NEXT:    vmov.32 d10[0], r0
+; LE-I64-NEXT:    mov r0, r10
+; LE-I64-NEXT:    ldm r3, {r1, r2, r3}
+; LE-I64-NEXT:    bl lrintl
+; LE-I64-NEXT:    add r3, sp, #612
+; LE-I64-NEXT:    mov r4, r1
+; LE-I64-NEXT:    vmov.32 d11[0], r0
+; LE-I64-NEXT:    mov r0, r8
+; LE-I64-NEXT:    ldm r3, {r1, r2, r3}
+; LE-I64-NEXT:    bl lrintl
+; LE-I64-NEXT:    vmov.32 d8[0], r0
+; LE-I64-NEXT:    ldr r0, [sp, #64] @ 4-byte Reload
+; LE-I64-NEXT:    add lr, sp, #96
+; LE-I64-NEXT:    add r8, r9, #128
+; LE-I64-NEXT:    vmov.32 d13[1], r7
+; LE-I64-NEXT:    ldr r2, [sp, #344]
+; LE-I64-NEXT:    ldr r3, [sp, #348]
+; LE-I64-NEXT:    vmov.32 d12[1], r11
+; LE-I64-NEXT:    ldr r7, [sp, #452]
+; LE-I64-NEXT:    ldr r10, [sp, #416]
+; LE-I64-NEXT:    vmov.32 d9[1], r0
+; LE-I64-NEXT:    ldr r0, [sp, #336]
+; LE-I64-NEXT:    vmov.32 d8[1], r1
+; LE-I64-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-I64-NEXT:    add lr, sp, #64
+; LE-I64-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-I64-NEXT:    add lr, sp, #32
+; LE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEXT:    add lr, sp, #144
+; LE-I64-NEXT:    vmov.32 d11[1], r4
+; LE-I64-NEXT:    ldr r4, [sp, #340]
+; LE-I64-NEXT:    vst1.64 {d16, d17}, [r5:128]
+; LE-I64-NEXT:    mov r1, r4
+; LE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEXT:    add lr, sp, #80
+; LE-I64-NEXT:    vmov.32 d10[1], r6
+; LE-I64-NEXT:    ldr r6, [sp, #448]
+; LE-I64-NEXT:    vst1.64 {d16, d17}, [r8:128]!
+; LE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEXT:    vst1.64 {d16, d17}, [r8:128]!
+; LE-I64-NEXT:    bl lrintl
+; LE-I64-NEXT:    ldr r2, [sp, #456]
+; LE-I64-NEXT:    mov r11, r1
+; LE-I64-NEXT:    ldr r3, [sp, #460]
+; LE-I64-NEXT:    vmov.32 d15[0], r0
+; LE-I64-NEXT:    mov r0, r6
+; LE-I64-NEXT:    mov r1, r7
+; LE-I64-NEXT:    ldr r5, [sp, #432]
+; LE-I64-NEXT:    bl lrintl
+; LE-I64-NEXT:    add r3, sp, #468
+; LE-I64-NEXT:    vmov.32 d12[0], r0
+; LE-I64-NEXT:    ldr r0, [sp, #464]
+; LE-I64-NEXT:    mov r6, r1
+; LE-I64-NEXT:    ldm r3, {r1, r2, r3}
+; LE-I64-NEXT:    bl lrintl
+; LE-I64-NEXT:    add r3, sp, #420
+; LE-I64-NEXT:    mov r7, r1
+; LE-I64-NEXT:    vmov.32 d13[0], r0
+; LE-I64-NEXT:    mov r0, r10
+; LE-I64-NEXT:    ldm r3, {r1, r2, r3}
+; LE-I64-NEXT:    bl lrintl
+; LE-I64-NEXT:    add r3, sp, #436
+; LE-I64-NEXT:    mov r4, r1
+; LE-I64-NEXT:    vmov.32 d8[0], r0
+; LE-I64-NEXT:    mov r0, r5
+; LE-I64-NEXT:    ldm r3, {r1, r2, r3}
+; LE-I64-NEXT:    bl lrintl
+; LE-I64-NEXT:    add r3, sp, #324
+; LE-I64-NEXT:    vmov.32 d9[0], r0
+; LE-I64-NEXT:    ldr r0, [sp, #320]
+; LE-I64-NEXT:    mov r5, r1
+; LE-I64-NEXT:    ldm r3, {r1, r2, r3}
+; LE-I64-NEXT:    bl lrintl
+; LE-I64-NEXT:    add lr, sp, #64
+; LE-I64-NEXT:    vmov.32 d9[1], r5
+; LE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEXT:    add lr, sp, #96
+; LE-I64-NEXT:    vmov.32 d13[1], r7
+; LE-I64-NEXT:    vst1.64 {d16, d17}, [r8:128]!
+; LE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEXT:    add lr, sp, #176
+; LE-I64-NEXT:    vmov.32 d8[1], r4
+; LE-I64-NEXT:    vmov.32 d12[1], r6
+; LE-I64-NEXT:    vmov.32 d14[0], r0
+; LE-I64-NEXT:    add r0, r9, #64
+; LE-I64-NEXT:    vst1.64 {d16, d17}, [r8:128]
+; LE-I64-NEXT:    vst1.64 {d8, d9}, [r0:128]!
+; LE-I64-NEXT:    vst1.64 {d12, d13}, [r0:128]!
+; LE-I64-NEXT:    vst1.64 {d10, d11}, [r0:128]!
+; LE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEXT:    add lr, sp, #160
+; LE-I64-NEXT:    vmov.32 d15[1], r11
+; LE-I64-NEXT:    vst1.64 {d16, d17}, [r0:128]
+; LE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEXT:    add lr, sp, #112
+; LE-I64-NEXT:    vmov.32 d14[1], r1
+; LE-I64-NEXT:    vst1.64 {d16, d17}, [r9:128]!
+; LE-I64-NEXT:    vst1.64 {d14, d15}, [r9:128]!
+; LE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEXT:    add lr, sp, #128
+; LE-I64-NEXT:    vst1.64 {d16, d17}, [r9:128]!
+; LE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEXT:    vst1.64 {d16, d17}, [r9:128]
+; LE-I64-NEXT:    add sp, sp, #192
+; LE-I64-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEXT:    add sp, sp, #4
+; LE-I64-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; LE-I32-NEON-LABEL: lrint_v32fp128:
+; LE-I32-NEON:       @ %bb.0:
+; LE-I32-NEON-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-I32-NEON-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-I32-NEON-NEXT:    .pad #4
+; LE-I32-NEON-NEXT:    sub sp, sp, #4
+; LE-I32-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I32-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I32-NEON-NEXT:    .pad #80
+; LE-I32-NEON-NEXT:    sub sp, sp, #80
+; LE-I32-NEON-NEXT:    str r3, [sp, #16] @ 4-byte Spill
+; LE-I32-NEON-NEXT:    add r3, sp, #336
+; LE-I32-NEON-NEXT:    str r2, [sp, #12] @ 4-byte Spill
+; LE-I32-NEON-NEXT:    mov r9, r0
+; LE-I32-NEON-NEXT:    ldm r3, {r0, r1, r2, r3}
+; LE-I32-NEON-NEXT:    bl lrintl
+; LE-I32-NEON-NEXT:    add r3, sp, #244
+; LE-I32-NEON-NEXT:    ldr r7, [sp, #240]
+; LE-I32-NEON-NEXT:    vmov.32 d9[0], r0
+; LE-I32-NEON-NEXT:    ldr r5, [sp, #288]
+; LE-I32-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; LE-I32-NEON-NEXT:    mov r0, r7
+; LE-I32-NEON-NEXT:    ldr r8, [sp, #352]
+; LE-I32-NEON-NEXT:    ldr r11, [sp, #656]
+; LE-I32-NEON-NEXT:    bl lrintl
+; LE-I32-NEON-NEXT:    add r3, sp, #292
+; LE-I32-NEON-NEXT:    vmov.32 d10[0], r0
+; LE-I32-NEON-NEXT:    mov r0, r5
+; LE-I32-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; LE-I32-NEON-NEXT:    bl lrintl
+; LE-I32-NEON-NEXT:    add r3, sp, #272
+; LE-I32-NEON-NEXT:    mov r10, r0
+; LE-I32-NEON-NEXT:    ldm r3, {r0, r1, r2, r3}
+; LE-I32-NEON-NEXT:    bl lrintl
+; LE-I32-NEON-NEXT:    ldr r6, [sp, #256]
+; LE-I32-NEON-NEXT:    vmov.32 d11[0], r0
+; LE-I32-NEON-NEXT:    ldr r1, [sp, #260]
+; LE-I32-NEON-NEXT:    ldr r2, [sp, #264]
+; LE-I32-NEON-NEXT:    ldr r3, [sp, #268]
+; LE-I32-NEON-NEXT:    mov r0, r6
+; LE-I32-NEON-NEXT:    ldr r7, [sp, #660]
+; LE-I32-NEON-NEXT:    vmov.32 d11[1], r10
+; LE-I32-NEON-NEXT:    ldr r5, [sp, #664]
+; LE-I32-NEON-NEXT:    bl lrintl
+; LE-I32-NEON-NEXT:    vmov.32 d10[1], r0
+; LE-I32-NEON-NEXT:    ldr r1, [sp, #356]
+; LE-I32-NEON-NEXT:    ldr r2, [sp, #360]
+; LE-I32-NEON-NEXT:    add lr, sp, #32
+; LE-I32-NEON-NEXT:    ldr r3, [sp, #364]
+; LE-I32-NEON-NEXT:    mov r0, r8
+; LE-I32-NEON-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-I32-NEON-NEXT:    bl lrintl
+; LE-I32-NEON-NEXT:    ldr r3, [sp, #668]
+; LE-I32-NEON-NEXT:    add lr, sp, #48
+; LE-I32-NEON-NEXT:    vmov.32 d9[1], r0
+; LE-I32-NEON-NEXT:    mov r0, r11
+; LE-I32-NEON-NEXT:    mov r1, r7
+; LE-I32-NEON-NEXT:    mov r2, r5
+; LE-I32-NEON-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-I32-NEON-NEXT:    bl lrintl
+; LE-I32-NEON-NEXT:    add r3, sp, #400
+; LE-I32-NEON-NEXT:    mov r8, r0
+; LE-I32-NEON-NEXT:    ldm r3, {r0, r1, r2, r3}
+; LE-I32-NEON-NEXT:    bl lrintl
+; LE-I32-NEON-NEXT:    add r3, sp, #592
+; LE-I32-NEON-NEXT:    mov r6, r0
+; LE-I32-NEON-NEXT:    ldm r3, {r0, r1, r2, r3}
+; LE-I32-NEON-NEXT:    bl lrintl
+; LE-I32-NEON-NEXT:    ldr r4, [sp, #416]
+; LE-I32-NEON-NEXT:    vmov.32 d15[0], r0
+; LE-I32-NEON-NEXT:    ldr r1, [sp, #420]
+; LE-I32-NEON-NEXT:    ldr r2, [sp, #424]
+; LE-I32-NEON-NEXT:    vmov.32 d13[0], r6
+; LE-I32-NEON-NEXT:    ldr r3, [sp, #428]
+; LE-I32-NEON-NEXT:    mov r0, r4
+; LE-I32-NEON-NEXT:    ldr r7, [sp, #224]
+; LE-I32-NEON-NEXT:    ldr r10, [sp, #228]
+; LE-I32-NEON-NEXT:    ldr r5, [sp, #232]
+; LE-I32-NEON-NEXT:    ldr r11, [sp, #464]
+; LE-I32-NEON-NEXT:    bl lrintl
+; LE-I32-NEON-NEXT:    ldr r3, [sp, #236]
+; LE-I32-NEON-NEXT:    vmov.32 d13[1], r0
+; LE-I32-NEON-NEXT:    mov r0, r7
+; LE-I32-NEON-NEXT:    mov r1, r10
+; LE-I32-NEON-NEXT:    mov r2, r5
+; LE-I32-NEON-NEXT:    bl lrintl
+; LE-I32-NEON-NEXT:    add r3, sp, #208
+; LE-I32-NEON-NEXT:    mov r4, r0
+; LE-I32-NEON-NEXT:    ldm r3, {r0, r1, r2, r3}
+; LE-I32-NEON-NEXT:    bl lrintl
+; LE-I32-NEON-NEXT:    vmov.32 d9[0], r0
+; LE-I32-NEON-NEXT:    ldr r0, [sp, #672]
+; LE-I32-NEON-NEXT:    ldr r1, [sp, #676]
+; LE-I32-NEON-NEXT:    ldr r2, [sp, #680]
+; LE-I32-NEON-NEXT:    vmov.32 d11[0], r8
+; LE-I32-NEON-NEXT:    ldr r3, [sp, #684]
+; LE-I32-NEON-NEXT:    vmov.32 d9[1], r4
+; LE-I32-NEON-NEXT:    ldr r7, [sp, #612]
+; LE-I32-NEON-NEXT:    ldr r6, [sp, #616]
+; LE-I32-NEON-NEXT:    ldr r5, [sp, #468]
+; LE-I32-NEON-NEXT:    ldr r4, [sp, #472]
+; LE-I32-NEON-NEXT:    bl lrintl
+; LE-I32-NEON-NEXT:    vmov.32 d11[1], r0
+; LE-I32-NEON-NEXT:    ldr r3, [sp, #620]
+; LE-I32-NEON-NEXT:    ldr r0, [sp, #608]
+; LE-I32-NEON-NEXT:    mov r1, r7
+; LE-I32-NEON-NEXT:    mov r2, r6
+; LE-I32-NEON-NEXT:    bl lrintl
+; LE-I32-NEON-NEXT:    ldr r3, [sp, #476]
+; LE-I32-NEON-NEXT:    vmov.32 d15[1], r0
+; LE-I32-NEON-NEXT:    mov r0, r11
+; LE-I32-NEON-NEXT:    mov r1, r5
+; LE-I32-NEON-NEXT:    mov r2, r4
+; LE-I32-NEON-NEXT:    bl lrintl
+; LE-I32-NEON-NEXT:    add r3, sp, #560
+; LE-I32-NEON-NEXT:    str r0, [sp, #8] @ 4-byte Spill
+; LE-I32-NEON-NEXT:    ldm r3, {r0, r1, r2, r3}
+; LE-I32-NEON-NEXT:    bl lrintl
+; LE-I32-NEON-NEXT:    add r3, sp, #644
+; LE-I32-NEON-NEXT:    ldr r7, [sp, #640]
+; LE-I32-NEON-NEXT:    vmov.32 d14[0], r0
+; LE-I32-NEON-NEXT:    add lr, sp, #64
+; LE-I32-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; LE-I32-NEON-NEXT:    mov r0, r7
+; LE-I32-NEON-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-I32-NEON-NEXT:    bl lrintl
+; LE-I32-NEON-NEXT:    add r3, sp, #624
+; LE-I32-NEON-NEXT:    mov r11, r0
+; LE-I32-NEON-NEXT:    ldm r3, {r0, r1, r2, r3}
+; LE-I32-NEON-NEXT:    bl lrintl
+; LE-I32-NEON-NEXT:    add r3, sp, #196
+; LE-I32-NEON-NEXT:    ldr r7, [sp, #192]
+; LE-I32-NEON-NEXT:    vmov.32 d10[0], r0
+; LE-I32-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; LE-I32-NEON-NEXT:    mov r0, r7
+; LE-I32-NEON-NEXT:    bl lrintl
+; LE-I32-NEON-NEXT:    mov r6, r0
+; LE-I32-NEON-NEXT:    ldr r2, [sp, #184]
+; LE-I32-NEON-NEXT:    ldr r3, [sp, #188]
+; LE-I32-NEON-NEXT:    ldr r0, [sp, #12] @ 4-byte Reload
+; LE-I32-NEON-NEXT:    ldr r1, [sp, #16] @ 4-byte Reload
+; LE-I32-NEON-NEXT:    bl lrintl
+; LE-I32-NEON-NEXT:    add r3, sp, #324
+; LE-I32-NEON-NEXT:    ldr r7, [sp, #320]
+; LE-I32-NEON-NEXT:    vmov.32 d8[0], r0
+; LE-I32-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; LE-I32-NEON-NEXT:    mov r0, r7
+; LE-I32-NEON-NEXT:    bl lrintl
+; LE-I32-NEON-NEXT:    add r3, sp, #304
+; LE-I32-NEON-NEXT:    mov r7, r0
+; LE-I32-NEON-NEXT:    ldm r3, {r0, r1, r2, r3}
+; LE-I32-NEON-NEXT:    bl lrintl
+; LE-I32-NEON-NEXT:    add lr, sp, #48
+; LE-I32-NEON-NEXT:    ldr r4, [sp, #368]
+; LE-I32-NEON-NEXT:    ldr r1, [sp, #372]
+; LE-I32-NEON-NEXT:    vldmia lr, {d14, d15} @ 16-byte Reload
+; LE-I32-NEON-NEXT:    ldr r2, [sp, #376]
+; LE-I32-NEON-NEXT:    ldr r3, [sp, #380]
+; LE-I32-NEON-NEXT:    vmov.32 d14[0], r0
+; LE-I32-NEON-NEXT:    mov r0, r4
+; LE-I32-NEON-NEXT:    bl lrintl
+; LE-I32-NEON-NEXT:    ldr r5, [sp, #384]
+; LE-I32-NEON-NEXT:    vmov.32 d12[0], r0
+; LE-I32-NEON-NEXT:    ldr r1, [sp, #388]
+; LE-I32-NEON-NEXT:    ldr r2, [sp, #392]
+; LE-I32-NEON-NEXT:    ldr r3, [sp, #396]
+; LE-I32-NEON-NEXT:    mov r0, r5
+; LE-I32-NEON-NEXT:    ldr r4, [sp, #432]
+; LE-I32-NEON-NEXT:    bl lrintl
+; LE-I32-NEON-NEXT:    vmov.32 d12[1], r0
+; LE-I32-NEON-NEXT:    ldr r1, [sp, #436]
+; LE-I32-NEON-NEXT:    ldr r2, [sp, #440]
+; LE-I32-NEON-NEXT:    add lr, sp, #16
+; LE-I32-NEON-NEXT:    ldr r3, [sp, #444]
+; LE-I32-NEON-NEXT:    mov r0, r4
+; LE-I32-NEON-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-I32-NEON-NEXT:    bl lrintl
+; LE-I32-NEON-NEXT:    vmov.32 d12[0], r0
+; LE-I32-NEON-NEXT:    ldr r0, [sp, #576]
+; LE-I32-NEON-NEXT:    ldr r1, [sp, #580]
+; LE-I32-NEON-NEXT:    add lr, sp, #48
+; LE-I32-NEON-NEXT:    vmov.32 d14[1], r7
+; LE-I32-NEON-NEXT:    ldr r2, [sp, #584]
+; LE-I32-NEON-NEXT:    ldr r3, [sp, #588]
+; LE-I32-NEON-NEXT:    vmov.32 d10[1], r11
+; LE-I32-NEON-NEXT:    ldr r8, [sp, #448]
+; LE-I32-NEON-NEXT:    ldr r4, [sp, #544]
+; LE-I32-NEON-NEXT:    ldr r10, [sp, #548]
+; LE-I32-NEON-NEXT:    vmov.32 d8[1], r6
+; LE-I32-NEON-NEXT:    ldr r7, [sp, #552]
+; LE-I32-NEON-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-I32-NEON-NEXT:    ldr r11, [sp, #512]
+; LE-I32-NEON-NEXT:    bl lrintl
+; LE-I32-NEON-NEXT:    add lr, sp, #64
+; LE-I32-NEON-NEXT:    ldr r3, [sp, #556]
+; LE-I32-NEON-NEXT:    mov r1, r10
+; LE-I32-NEON-NEXT:    mov r2, r7
+; LE-I32-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I32-NEON-NEXT:    vmov.32 d16[1], r0
+; LE-I32-NEON-NEXT:    mov r0, r4
+; LE-I32-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I32-NEON-NEXT:    bl lrintl
+; LE-I32-NEON-NEXT:    add r3, sp, #528
+; LE-I32-NEON-NEXT:    mov r4, r0
+; LE-I32-NEON-NEXT:    ldm r3, {r0, r1, r2, r3}
+; LE-I32-NEON-NEXT:    bl lrintl
+; LE-I32-NEON-NEXT:    ldr r1, [sp, #8] @ 4-byte Reload
+; LE-I32-NEON-NEXT:    vmov.32 d15[0], r0
+; LE-I32-NEON-NEXT:    ldr r0, [sp, #480]
+; LE-I32-NEON-NEXT:    ldr r2, [sp, #488]
+; LE-I32-NEON-NEXT:    vmov.32 d13[0], r1
+; LE-I32-NEON-NEXT:    ldr r1, [sp, #484]
+; LE-I32-NEON-NEXT:    ldr r3, [sp, #492]
+; LE-I32-NEON-NEXT:    vmov.32 d15[1], r4
+; LE-I32-NEON-NEXT:    ldr r7, [sp, #452]
+; LE-I32-NEON-NEXT:    ldr r5, [sp, #456]
+; LE-I32-NEON-NEXT:    ldr r6, [sp, #516]
+; LE-I32-NEON-NEXT:    ldr r4, [sp, #520]
+; LE-I32-NEON-NEXT:    bl lrintl
+; LE-I32-NEON-NEXT:    ldr r3, [sp, #460]
+; LE-I32-NEON-NEXT:    vmov.32 d13[1], r0
+; LE-I32-NEON-NEXT:    mov r0, r8
+; LE-I32-NEON-NEXT:    mov r1, r7
+; LE-I32-NEON-NEXT:    mov r2, r5
+; LE-I32-NEON-NEXT:    bl lrintl
+; LE-I32-NEON-NEXT:    ldr r3, [sp, #524]
+; LE-I32-NEON-NEXT:    vmov.32 d12[1], r0
+; LE-I32-NEON-NEXT:    mov r0, r11
+; LE-I32-NEON-NEXT:    mov r1, r6
+; LE-I32-NEON-NEXT:    mov r2, r4
+; LE-I32-NEON-NEXT:    bl lrintl
+; LE-I32-NEON-NEXT:    add r3, sp, #496
+; LE-I32-NEON-NEXT:    mov r4, r0
+; LE-I32-NEON-NEXT:    ldm r3, {r0, r1, r2, r3}
+; LE-I32-NEON-NEXT:    bl lrintl
+; LE-I32-NEON-NEXT:    vmov.32 d14[0], r0
+; LE-I32-NEON-NEXT:    add r0, r9, #64
+; LE-I32-NEON-NEXT:    add lr, sp, #64
+; LE-I32-NEON-NEXT:    vst1.32 {d12, d13}, [r0:128]!
+; LE-I32-NEON-NEXT:    vmov.32 d14[1], r4
+; LE-I32-NEON-NEXT:    vst1.32 {d14, d15}, [r0:128]!
+; LE-I32-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I32-NEON-NEXT:    add lr, sp, #32
+; LE-I32-NEON-NEXT:    vst1.32 {d16, d17}, [r0:128]!
+; LE-I32-NEON-NEXT:    vst1.64 {d10, d11}, [r0:128]
+; LE-I32-NEON-NEXT:    vst1.32 {d8, d9}, [r9:128]!
+; LE-I32-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I32-NEON-NEXT:    add lr, sp, #48
+; LE-I32-NEON-NEXT:    vst1.32 {d16, d17}, [r9:128]!
+; LE-I32-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I32-NEON-NEXT:    add lr, sp, #16
+; LE-I32-NEON-NEXT:    vst1.32 {d16, d17}, [r9:128]!
+; LE-I32-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I32-NEON-NEXT:    vst1.64 {d16, d17}, [r9:128]
+; LE-I32-NEON-NEXT:    add sp, sp, #80
+; LE-I32-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I32-NEON-NEXT:    add sp, sp, #4
+; LE-I32-NEON-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; LE-I64-NEON-LABEL: lrint_v32fp128:
+; LE-I64-NEON:       @ %bb.0:
+; LE-I64-NEON-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-I64-NEON-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-I64-NEON-NEXT:    .pad #4
+; LE-I64-NEON-NEXT:    sub sp, sp, #4
+; LE-I64-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEON-NEXT:    .pad #192
+; LE-I64-NEON-NEXT:    sub sp, sp, #192
+; LE-I64-NEON-NEXT:    str r3, [sp, #60] @ 4-byte Spill
+; LE-I64-NEON-NEXT:    add r3, sp, #688
+; LE-I64-NEON-NEXT:    str r2, [sp, #56] @ 4-byte Spill
+; LE-I64-NEON-NEXT:    mov r9, r0
+; LE-I64-NEON-NEXT:    ldm r3, {r0, r1, r2, r3}
+; LE-I64-NEON-NEXT:    bl lrintl
+; LE-I64-NEON-NEXT:    add r3, sp, #560
+; LE-I64-NEON-NEXT:    mov r4, r0
+; LE-I64-NEON-NEXT:    str r1, [sp, #64] @ 4-byte Spill
+; LE-I64-NEON-NEXT:    ldm r3, {r0, r1, r2, r3}
+; LE-I64-NEON-NEXT:    bl lrintl
+; LE-I64-NEON-NEXT:    vmov.32 d17[0], r0
+; LE-I64-NEON-NEXT:    ldr r7, [sp, #544]
+; LE-I64-NEON-NEXT:    ldr r6, [sp, #548]
+; LE-I64-NEON-NEXT:    add lr, sp, #96
+; LE-I64-NEON-NEXT:    ldr r2, [sp, #552]
+; LE-I64-NEON-NEXT:    vmov.32 d17[1], r1
+; LE-I64-NEON-NEXT:    ldr r3, [sp, #556]
+; LE-I64-NEON-NEXT:    mov r0, r7
+; LE-I64-NEON-NEXT:    mov r1, r6
+; LE-I64-NEON-NEXT:    vorr q4, q8, q8
+; LE-I64-NEON-NEXT:    ldr r5, [sp, #528]
+; LE-I64-NEON-NEXT:    vmov.32 d17[0], r4
+; LE-I64-NEON-NEXT:    ldr r10, [sp, #304]
+; LE-I64-NEON-NEXT:    ldr r8, [sp, #368]
+; LE-I64-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEON-NEXT:    bl lrintl
+; LE-I64-NEON-NEXT:    add r3, sp, #532
+; LE-I64-NEON-NEXT:    vmov.32 d8[0], r0
+; LE-I64-NEON-NEXT:    mov r11, r1
+; LE-I64-NEON-NEXT:    add lr, sp, #144
+; LE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; LE-I64-NEON-NEXT:    mov r0, r5
+; LE-I64-NEON-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-I64-NEON-NEXT:    bl lrintl
+; LE-I64-NEON-NEXT:    add r3, sp, #308
+; LE-I64-NEON-NEXT:    mov r5, r1
+; LE-I64-NEON-NEXT:    add lr, sp, #176
+; LE-I64-NEON-NEXT:    vmov.32 d17[0], r0
+; LE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; LE-I64-NEON-NEXT:    mov r0, r10
+; LE-I64-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEON-NEXT:    bl lrintl
+; LE-I64-NEON-NEXT:    add r3, sp, #372
+; LE-I64-NEON-NEXT:    mov r10, r1
+; LE-I64-NEON-NEXT:    vmov.32 d11[0], r0
+; LE-I64-NEON-NEXT:    mov r0, r8
+; LE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; LE-I64-NEON-NEXT:    bl lrintl
+; LE-I64-NEON-NEXT:    add r3, sp, #404
+; LE-I64-NEON-NEXT:    vmov.32 d15[0], r0
+; LE-I64-NEON-NEXT:    ldr r0, [sp, #400]
+; LE-I64-NEON-NEXT:    mov r6, r1
+; LE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; LE-I64-NEON-NEXT:    bl lrintl
+; LE-I64-NEON-NEXT:    add r3, sp, #596
+; LE-I64-NEON-NEXT:    vmov.32 d9[0], r0
+; LE-I64-NEON-NEXT:    ldr r0, [sp, #592]
+; LE-I64-NEON-NEXT:    mov r7, r1
+; LE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; LE-I64-NEON-NEXT:    bl lrintl
+; LE-I64-NEON-NEXT:    add r3, sp, #676
+; LE-I64-NEON-NEXT:    vmov.32 d13[0], r0
+; LE-I64-NEON-NEXT:    ldr r0, [sp, #672]
+; LE-I64-NEON-NEXT:    mov r4, r1
+; LE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; LE-I64-NEON-NEXT:    bl lrintl
+; LE-I64-NEON-NEXT:    add lr, sp, #96
+; LE-I64-NEON-NEXT:    vmov.32 d13[1], r4
+; LE-I64-NEON-NEXT:    str r1, [sp, #52] @ 4-byte Spill
+; LE-I64-NEON-NEXT:    vldmia lr, {d18, d19} @ 16-byte Reload
+; LE-I64-NEON-NEXT:    add lr, sp, #80
+; LE-I64-NEON-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-I64-NEON-NEXT:    add lr, sp, #128
+; LE-I64-NEON-NEXT:    vmov.32 d9[1], r7
+; LE-I64-NEON-NEXT:    ldr r1, [sp, #628]
+; LE-I64-NEON-NEXT:    ldr r2, [sp, #632]
+; LE-I64-NEON-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-I64-NEON-NEXT:    add lr, sp, #112
+; LE-I64-NEON-NEXT:    vmov.32 d15[1], r6
+; LE-I64-NEON-NEXT:    ldr r3, [sp, #636]
+; LE-I64-NEON-NEXT:    ldr r7, [sp, #64] @ 4-byte Reload
+; LE-I64-NEON-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-I64-NEON-NEXT:    add lr, sp, #160
+; LE-I64-NEON-NEXT:    vmov.32 d11[1], r10
+; LE-I64-NEON-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-I64-NEON-NEXT:    add lr, sp, #176
+; LE-I64-NEON-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-I64-NEON-NEXT:    add lr, sp, #144
+; LE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEON-NEXT:    vmov.32 d18[0], r0
+; LE-I64-NEON-NEXT:    ldr r0, [sp, #624]
+; LE-I64-NEON-NEXT:    vmov.32 d16[1], r11
+; LE-I64-NEON-NEXT:    vmov.32 d9[1], r5
+; LE-I64-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEON-NEXT:    add lr, sp, #96
+; LE-I64-NEON-NEXT:    vmov.32 d19[1], r7
+; LE-I64-NEON-NEXT:    vstmia lr, {d18, d19} @ 16-byte Spill
+; LE-I64-NEON-NEXT:    bl lrintl
+; LE-I64-NEON-NEXT:    add r3, sp, #512
+; LE-I64-NEON-NEXT:    str r0, [sp, #48] @ 4-byte Spill
+; LE-I64-NEON-NEXT:    str r1, [sp, #64] @ 4-byte Spill
+; LE-I64-NEON-NEXT:    ldm r3, {r0, r1, r2, r3}
+; LE-I64-NEON-NEXT:    bl lrintl
+; LE-I64-NEON-NEXT:    add r3, sp, #768
+; LE-I64-NEON-NEXT:    mov r11, r0
+; LE-I64-NEON-NEXT:    str r1, [sp, #28] @ 4-byte Spill
+; LE-I64-NEON-NEXT:    ldm r3, {r0, r1, r2, r3}
+; LE-I64-NEON-NEXT:    bl lrintl
+; LE-I64-NEON-NEXT:    ldr r6, [sp, #784]
+; LE-I64-NEON-NEXT:    add r3, sp, #788
+; LE-I64-NEON-NEXT:    mov r8, r1
+; LE-I64-NEON-NEXT:    vmov.32 d14[0], r0
+; LE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; LE-I64-NEON-NEXT:    mov r0, r6
+; LE-I64-NEON-NEXT:    ldr r5, [sp, #736]
+; LE-I64-NEON-NEXT:    ldr r7, [sp, #752]
+; LE-I64-NEON-NEXT:    ldr r4, [sp, #720]
+; LE-I64-NEON-NEXT:    bl lrintl
+; LE-I64-NEON-NEXT:    add r3, sp, #740
+; LE-I64-NEON-NEXT:    mov r6, r1
+; LE-I64-NEON-NEXT:    vmov.32 d15[0], r0
+; LE-I64-NEON-NEXT:    mov r0, r5
+; LE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; LE-I64-NEON-NEXT:    bl lrintl
+; LE-I64-NEON-NEXT:    add r3, sp, #756
+; LE-I64-NEON-NEXT:    mov r5, r1
+; LE-I64-NEON-NEXT:    vmov.32 d12[0], r0
+; LE-I64-NEON-NEXT:    mov r0, r7
+; LE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; LE-I64-NEON-NEXT:    bl lrintl
+; LE-I64-NEON-NEXT:    add r3, sp, #724
+; LE-I64-NEON-NEXT:    mov r7, r1
+; LE-I64-NEON-NEXT:    vmov.32 d13[0], r0
+; LE-I64-NEON-NEXT:    mov r0, r4
+; LE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; LE-I64-NEON-NEXT:    bl lrintl
+; LE-I64-NEON-NEXT:    vmov.32 d13[1], r7
+; LE-I64-NEON-NEXT:    add lr, sp, #8
+; LE-I64-NEON-NEXT:    ldr r2, [sp, #296]
+; LE-I64-NEON-NEXT:    vmov.32 d12[1], r5
+; LE-I64-NEON-NEXT:    ldr r3, [sp, #300]
+; LE-I64-NEON-NEXT:    ldr r4, [sp, #576]
+; LE-I64-NEON-NEXT:    vmov.32 d11[0], r0
+; LE-I64-NEON-NEXT:    ldr r0, [sp, #56] @ 4-byte Reload
+; LE-I64-NEON-NEXT:    ldr r10, [sp, #384]
+; LE-I64-NEON-NEXT:    vmov.32 d15[1], r6
+; LE-I64-NEON-NEXT:    ldr r6, [sp, #352]
+; LE-I64-NEON-NEXT:    vmov.32 d14[1], r8
+; LE-I64-NEON-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-I64-NEON-NEXT:    add lr, sp, #32
+; LE-I64-NEON-NEXT:    vmov.32 d11[1], r1
+; LE-I64-NEON-NEXT:    ldr r1, [sp, #60] @ 4-byte Reload
+; LE-I64-NEON-NEXT:    vmov.32 d8[0], r11
+; LE-I64-NEON-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-I64-NEON-NEXT:    add lr, sp, #176
+; LE-I64-NEON-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-I64-NEON-NEXT:    bl lrintl
+; LE-I64-NEON-NEXT:    add lr, sp, #160
+; LE-I64-NEON-NEXT:    add r3, sp, #356
+; LE-I64-NEON-NEXT:    mov r5, r1
+; LE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; LE-I64-NEON-NEXT:    vmov.32 d16[0], r0
+; LE-I64-NEON-NEXT:    mov r0, r6
+; LE-I64-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEON-NEXT:    bl lrintl
+; LE-I64-NEON-NEXT:    add lr, sp, #112
+; LE-I64-NEON-NEXT:    add r3, sp, #388
+; LE-I64-NEON-NEXT:    mov r7, r1
+; LE-I64-NEON-NEXT:    vldmia lr, {d14, d15} @ 16-byte Reload
+; LE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; LE-I64-NEON-NEXT:    vmov.32 d14[0], r0
+; LE-I64-NEON-NEXT:    mov r0, r10
+; LE-I64-NEON-NEXT:    bl lrintl
+; LE-I64-NEON-NEXT:    add lr, sp, #128
+; LE-I64-NEON-NEXT:    add r3, sp, #580
+; LE-I64-NEON-NEXT:    mov r6, r1
+; LE-I64-NEON-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
+; LE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; LE-I64-NEON-NEXT:    vmov.32 d12[0], r0
+; LE-I64-NEON-NEXT:    mov r0, r4
+; LE-I64-NEON-NEXT:    bl lrintl
+; LE-I64-NEON-NEXT:    add lr, sp, #80
+; LE-I64-NEON-NEXT:    add r3, sp, #708
+; LE-I64-NEON-NEXT:    mov r4, r1
+; LE-I64-NEON-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; LE-I64-NEON-NEXT:    vmov.32 d8[0], r0
+; LE-I64-NEON-NEXT:    ldr r0, [sp, #704]
+; LE-I64-NEON-NEXT:    bl lrintl
+; LE-I64-NEON-NEXT:    vmov.32 d8[1], r4
+; LE-I64-NEON-NEXT:    add lr, sp, #80
+; LE-I64-NEON-NEXT:    ldr r2, [sp, #52] @ 4-byte Reload
+; LE-I64-NEON-NEXT:    vmov.32 d12[1], r6
+; LE-I64-NEON-NEXT:    ldr r6, [sp, #644]
+; LE-I64-NEON-NEXT:    ldr r3, [sp, #652]
+; LE-I64-NEON-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-I64-NEON-NEXT:    add lr, sp, #128
+; LE-I64-NEON-NEXT:    vmov.32 d14[1], r7
+; LE-I64-NEON-NEXT:    ldr r4, [sp, #480]
+; LE-I64-NEON-NEXT:    ldr r7, [sp, #656]
+; LE-I64-NEON-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-I64-NEON-NEXT:    add lr, sp, #112
+; LE-I64-NEON-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-I64-NEON-NEXT:    add lr, sp, #160
+; LE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEON-NEXT:    vmov.32 d10[0], r0
+; LE-I64-NEON-NEXT:    ldr r0, [sp, #28] @ 4-byte Reload
+; LE-I64-NEON-NEXT:    ldr r10, [sp, #496]
+; LE-I64-NEON-NEXT:    vmov.32 d16[1], r5
+; LE-I64-NEON-NEXT:    add r5, r9, #192
+; LE-I64-NEON-NEXT:    ldr r8, [sp, #608]
+; LE-I64-NEON-NEXT:    vmov.32 d10[1], r1
+; LE-I64-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEON-NEXT:    add lr, sp, #176
+; LE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEON-NEXT:    vmov.32 d16[1], r0
+; LE-I64-NEON-NEXT:    ldr r0, [sp, #640]
+; LE-I64-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEON-NEXT:    add lr, sp, #96
+; LE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEON-NEXT:    add lr, sp, #8
+; LE-I64-NEON-NEXT:    vmov.32 d16[1], r2
+; LE-I64-NEON-NEXT:    ldr r2, [sp, #648]
+; LE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r5:128]!
+; LE-I64-NEON-NEXT:    vst1.64 {d10, d11}, [r5:128]!
+; LE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r5:128]!
+; LE-I64-NEON-NEXT:    ldr r1, [sp, #48] @ 4-byte Reload
+; LE-I64-NEON-NEXT:    vmov.32 d9[0], r1
+; LE-I64-NEON-NEXT:    mov r1, r6
+; LE-I64-NEON-NEXT:    bl lrintl
+; LE-I64-NEON-NEXT:    add r3, sp, #660
+; LE-I64-NEON-NEXT:    mov r11, r1
+; LE-I64-NEON-NEXT:    vmov.32 d12[0], r0
+; LE-I64-NEON-NEXT:    mov r0, r7
+; LE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; LE-I64-NEON-NEXT:    bl lrintl
+; LE-I64-NEON-NEXT:    add r3, sp, #484
+; LE-I64-NEON-NEXT:    mov r7, r1
+; LE-I64-NEON-NEXT:    vmov.32 d13[0], r0
+; LE-I64-NEON-NEXT:    mov r0, r4
+; LE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; LE-I64-NEON-NEXT:    bl lrintl
+; LE-I64-NEON-NEXT:    add r3, sp, #500
+; LE-I64-NEON-NEXT:    mov r6, r1
+; LE-I64-NEON-NEXT:    vmov.32 d10[0], r0
+; LE-I64-NEON-NEXT:    mov r0, r10
+; LE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; LE-I64-NEON-NEXT:    bl lrintl
+; LE-I64-NEON-NEXT:    add r3, sp, #612
+; LE-I64-NEON-NEXT:    mov r4, r1
+; LE-I64-NEON-NEXT:    vmov.32 d11[0], r0
+; LE-I64-NEON-NEXT:    mov r0, r8
+; LE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; LE-I64-NEON-NEXT:    bl lrintl
+; LE-I64-NEON-NEXT:    vmov.32 d8[0], r0
+; LE-I64-NEON-NEXT:    ldr r0, [sp, #64] @ 4-byte Reload
+; LE-I64-NEON-NEXT:    add lr, sp, #96
+; LE-I64-NEON-NEXT:    add r8, r9, #128
+; LE-I64-NEON-NEXT:    vmov.32 d13[1], r7
+; LE-I64-NEON-NEXT:    ldr r2, [sp, #344]
+; LE-I64-NEON-NEXT:    ldr r3, [sp, #348]
+; LE-I64-NEON-NEXT:    vmov.32 d12[1], r11
+; LE-I64-NEON-NEXT:    ldr r7, [sp, #452]
+; LE-I64-NEON-NEXT:    ldr r10, [sp, #416]
+; LE-I64-NEON-NEXT:    vmov.32 d9[1], r0
+; LE-I64-NEON-NEXT:    ldr r0, [sp, #336]
+; LE-I64-NEON-NEXT:    vmov.32 d8[1], r1
+; LE-I64-NEON-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-I64-NEON-NEXT:    add lr, sp, #64
+; LE-I64-NEON-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-I64-NEON-NEXT:    add lr, sp, #32
+; LE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEON-NEXT:    add lr, sp, #144
+; LE-I64-NEON-NEXT:    vmov.32 d11[1], r4
+; LE-I64-NEON-NEXT:    ldr r4, [sp, #340]
+; LE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r5:128]
+; LE-I64-NEON-NEXT:    mov r1, r4
+; LE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEON-NEXT:    add lr, sp, #80
+; LE-I64-NEON-NEXT:    vmov.32 d10[1], r6
+; LE-I64-NEON-NEXT:    ldr r6, [sp, #448]
+; LE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r8:128]!
+; LE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r8:128]!
+; LE-I64-NEON-NEXT:    bl lrintl
+; LE-I64-NEON-NEXT:    ldr r2, [sp, #456]
+; LE-I64-NEON-NEXT:    mov r11, r1
+; LE-I64-NEON-NEXT:    ldr r3, [sp, #460]
+; LE-I64-NEON-NEXT:    vmov.32 d15[0], r0
+; LE-I64-NEON-NEXT:    mov r0, r6
+; LE-I64-NEON-NEXT:    mov r1, r7
+; LE-I64-NEON-NEXT:    ldr r5, [sp, #432]
+; LE-I64-NEON-NEXT:    bl lrintl
+; LE-I64-NEON-NEXT:    add r3, sp, #468
+; LE-I64-NEON-NEXT:    vmov.32 d12[0], r0
+; LE-I64-NEON-NEXT:    ldr r0, [sp, #464]
+; LE-I64-NEON-NEXT:    mov r6, r1
+; LE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; LE-I64-NEON-NEXT:    bl lrintl
+; LE-I64-NEON-NEXT:    add r3, sp, #420
+; LE-I64-NEON-NEXT:    mov r7, r1
+; LE-I64-NEON-NEXT:    vmov.32 d13[0], r0
+; LE-I64-NEON-NEXT:    mov r0, r10
+; LE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; LE-I64-NEON-NEXT:    bl lrintl
+; LE-I64-NEON-NEXT:    add r3, sp, #436
+; LE-I64-NEON-NEXT:    mov r4, r1
+; LE-I64-NEON-NEXT:    vmov.32 d8[0], r0
+; LE-I64-NEON-NEXT:    mov r0, r5
+; LE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; LE-I64-NEON-NEXT:    bl lrintl
+; LE-I64-NEON-NEXT:    add r3, sp, #324
+; LE-I64-NEON-NEXT:    vmov.32 d9[0], r0
+; LE-I64-NEON-NEXT:    ldr r0, [sp, #320]
+; LE-I64-NEON-NEXT:    mov r5, r1
+; LE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; LE-I64-NEON-NEXT:    bl lrintl
+; LE-I64-NEON-NEXT:    add lr, sp, #64
+; LE-I64-NEON-NEXT:    vmov.32 d9[1], r5
+; LE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEON-NEXT:    add lr, sp, #96
+; LE-I64-NEON-NEXT:    vmov.32 d13[1], r7
+; LE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r8:128]!
+; LE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEON-NEXT:    add lr, sp, #176
+; LE-I64-NEON-NEXT:    vmov.32 d8[1], r4
+; LE-I64-NEON-NEXT:    vmov.32 d12[1], r6
+; LE-I64-NEON-NEXT:    vmov.32 d14[0], r0
+; LE-I64-NEON-NEXT:    add r0, r9, #64
+; LE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r8:128]
+; LE-I64-NEON-NEXT:    vst1.64 {d8, d9}, [r0:128]!
+; LE-I64-NEON-NEXT:    vst1.64 {d12, d13}, [r0:128]!
+; LE-I64-NEON-NEXT:    vst1.64 {d10, d11}, [r0:128]!
+; LE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEON-NEXT:    add lr, sp, #160
+; LE-I64-NEON-NEXT:    vmov.32 d15[1], r11
+; LE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r0:128]
+; LE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEON-NEXT:    add lr, sp, #112
+; LE-I64-NEON-NEXT:    vmov.32 d14[1], r1
+; LE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r9:128]!
+; LE-I64-NEON-NEXT:    vst1.64 {d14, d15}, [r9:128]!
+; LE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEON-NEXT:    add lr, sp, #128
+; LE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r9:128]!
+; LE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r9:128]
+; LE-I64-NEON-NEXT:    add sp, sp, #192
+; LE-I64-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEON-NEXT:    add sp, sp, #4
+; LE-I64-NEON-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; BE-I32-LABEL: lrint_v32fp128:
+; BE-I32:       @ %bb.0:
+; BE-I32-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-I32-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-I32-NEXT:    .pad #4
+; BE-I32-NEXT:    sub sp, sp, #4
+; BE-I32-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I32-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I32-NEXT:    .pad #104
+; BE-I32-NEXT:    sub sp, sp, #104
+; BE-I32-NEXT:    mov r4, r3
+; BE-I32-NEXT:    add r3, sp, #248
+; BE-I32-NEXT:    mov r8, r2
+; BE-I32-NEXT:    mov r11, r0
+; BE-I32-NEXT:    ldm r3, {r0, r1, r2, r3}
+; BE-I32-NEXT:    bl lrintl
+; BE-I32-NEXT:    add r3, sp, #616
+; BE-I32-NEXT:    mov r9, r0
+; BE-I32-NEXT:    ldm r3, {r0, r1, r2, r3}
+; BE-I32-NEXT:    bl lrintl
+; BE-I32-NEXT:    add r3, sp, #680
+; BE-I32-NEXT:    mov r5, r0
+; BE-I32-NEXT:    ldm r3, {r0, r1, r2, r3}
+; BE-I32-NEXT:    bl lrintl
+; BE-I32-NEXT:    ldr r7, [sp, #232]
+; BE-I32-NEXT:    add lr, sp, #72
+; BE-I32-NEXT:    ldr r1, [sp, #236]
+; BE-I32-NEXT:    vmov.32 d17[0], r0
+; BE-I32-NEXT:    ldr r2, [sp, #240]
+; BE-I32-NEXT:    ldr r3, [sp, #244]
+; BE-I32-NEXT:    mov r0, r7
+; BE-I32-NEXT:    ldr r10, [sp, #376]
+; BE-I32-NEXT:    vmov.32 d11[0], r5
+; BE-I32-NEXT:    ldr r6, [sp, #296]
+; BE-I32-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I32-NEXT:    bl lrintl
+; BE-I32-NEXT:    add r3, sp, #300
+; BE-I32-NEXT:    vmov.32 d15[0], r0
+; BE-I32-NEXT:    mov r0, r6
+; BE-I32-NEXT:    ldm r3, {r1, r2, r3}
+; BE-I32-NEXT:    bl lrintl
+; BE-I32-NEXT:    add r3, sp, #380
+; BE-I32-NEXT:    vmov.32 d13[0], r0
+; BE-I32-NEXT:    mov r0, r10
+; BE-I32-NEXT:    ldm r3, {r1, r2, r3}
+; BE-I32-NEXT:    bl lrintl
+; BE-I32-NEXT:    add r3, sp, #360
+; BE-I32-NEXT:    mov r5, r0
+; BE-I32-NEXT:    ldm r3, {r0, r1, r2, r3}
+; BE-I32-NEXT:    bl lrintl
+; BE-I32-NEXT:    vmov.32 d17[0], r0
+; BE-I32-NEXT:    ldr r6, [sp, #312]
+; BE-I32-NEXT:    ldr r1, [sp, #316]
+; BE-I32-NEXT:    ldr r2, [sp, #320]
+; BE-I32-NEXT:    ldr r3, [sp, #324]
+; BE-I32-NEXT:    vmov.32 d17[1], r5
+; BE-I32-NEXT:    mov r0, r6
+; BE-I32-NEXT:    ldr r7, [sp, #572]
+; BE-I32-NEXT:    vorr q4, q8, q8
+; BE-I32-NEXT:    bl lrintl
+; BE-I32-NEXT:    ldr r6, [sp, #632]
+; BE-I32-NEXT:    add lr, sp, #88
+; BE-I32-NEXT:    ldr r1, [sp, #636]
+; BE-I32-NEXT:    vmov.32 d13[1], r0
+; BE-I32-NEXT:    ldr r2, [sp, #640]
+; BE-I32-NEXT:    ldr r3, [sp, #644]
+; BE-I32-NEXT:    mov r0, r6
+; BE-I32-NEXT:    ldr r5, [sp, #576]
+; BE-I32-NEXT:    vmov.32 d15[1], r9
+; BE-I32-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
+; BE-I32-NEXT:    bl lrintl
+; BE-I32-NEXT:    vmov.32 d11[1], r0
+; BE-I32-NEXT:    ldr r3, [sp, #580]
+; BE-I32-NEXT:    ldr r0, [sp, #568]
+; BE-I32-NEXT:    mov r1, r7
+; BE-I32-NEXT:    mov r2, r5
+; BE-I32-NEXT:    vorr q6, q5, q5
+; BE-I32-NEXT:    bl lrintl
+; BE-I32-NEXT:    add r3, sp, #552
+; BE-I32-NEXT:    mov r9, r0
+; BE-I32-NEXT:    ldm r3, {r0, r1, r2, r3}
+; BE-I32-NEXT:    bl lrintl
+; BE-I32-NEXT:    add r3, sp, #520
+; BE-I32-NEXT:    mov r5, r0
+; BE-I32-NEXT:    ldm r3, {r0, r1, r2, r3}
+; BE-I32-NEXT:    bl lrintl
+; BE-I32-NEXT:    ldr r6, [sp, #584]
+; BE-I32-NEXT:    add lr, sp, #8
+; BE-I32-NEXT:    ldr r1, [sp, #588]
+; BE-I32-NEXT:    vmov.32 d16[0], r0
+; BE-I32-NEXT:    ldr r2, [sp, #592]
+; BE-I32-NEXT:    ldr r3, [sp, #596]
+; BE-I32-NEXT:    mov r0, r6
+; BE-I32-NEXT:    vmov.32 d17[0], r5
+; BE-I32-NEXT:    ldr r7, [sp, #216]
+; BE-I32-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I32-NEXT:    bl lrintl
+; BE-I32-NEXT:    add r3, sp, #220
+; BE-I32-NEXT:    vmov.32 d12[0], r0
+; BE-I32-NEXT:    mov r0, r7
+; BE-I32-NEXT:    ldm r3, {r1, r2, r3}
+; BE-I32-NEXT:    bl lrintl
+; BE-I32-NEXT:    ldr r2, [sp, #208]
+; BE-I32-NEXT:    mov r7, r0
+; BE-I32-NEXT:    ldr r3, [sp, #212]
+; BE-I32-NEXT:    mov r0, r8
+; BE-I32-NEXT:    mov r1, r4
+; BE-I32-NEXT:    bl lrintl
+; BE-I32-NEXT:    add r3, sp, #456
+; BE-I32-NEXT:    mov r5, r0
+; BE-I32-NEXT:    ldm r3, {r0, r1, r2, r3}
+; BE-I32-NEXT:    bl lrintl
+; BE-I32-NEXT:    ldr r6, [sp, #328]
+; BE-I32-NEXT:    vmov.32 d10[0], r0
+; BE-I32-NEXT:    ldr r1, [sp, #332]
+; BE-I32-NEXT:    ldr r2, [sp, #336]
+; BE-I32-NEXT:    vmov.32 d14[0], r5
+; BE-I32-NEXT:    ldr r3, [sp, #340]
+; BE-I32-NEXT:    mov r0, r6
+; BE-I32-NEXT:    ldr r10, [sp, #504]
+; BE-I32-NEXT:    bl lrintl
+; BE-I32-NEXT:    ldr r6, [sp, #344]
+; BE-I32-NEXT:    vmov.32 d8[0], r0
+; BE-I32-NEXT:    ldr r1, [sp, #348]
+; BE-I32-NEXT:    ldr r2, [sp, #352]
+; BE-I32-NEXT:    ldr r3, [sp, #356]
+; BE-I32-NEXT:    mov r0, r6
+; BE-I32-NEXT:    bl lrintl
+; BE-I32-NEXT:    vmov.32 d8[1], r0
+; BE-I32-NEXT:    ldr r6, [sp, #600]
+; BE-I32-NEXT:    add lr, sp, #56
+; BE-I32-NEXT:    ldr r1, [sp, #604]
+; BE-I32-NEXT:    vmov.32 d14[1], r7
+; BE-I32-NEXT:    ldr r2, [sp, #608]
+; BE-I32-NEXT:    ldr r3, [sp, #612]
+; BE-I32-NEXT:    mov r0, r6
+; BE-I32-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
+; BE-I32-NEXT:    add lr, sp, #40
+; BE-I32-NEXT:    ldr r5, [sp, #508]
+; BE-I32-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
+; BE-I32-NEXT:    bl lrintl
+; BE-I32-NEXT:    vmov.32 d12[1], r0
+; BE-I32-NEXT:    add lr, sp, #24
+; BE-I32-NEXT:    ldr r7, [sp, #536]
+; BE-I32-NEXT:    ldr r1, [sp, #540]
+; BE-I32-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
+; BE-I32-NEXT:    add lr, sp, #8
+; BE-I32-NEXT:    mov r0, r7
+; BE-I32-NEXT:    ldr r2, [sp, #544]
+; BE-I32-NEXT:    ldr r3, [sp, #548]
+; BE-I32-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
+; BE-I32-NEXT:    ldr r6, [sp, #512]
+; BE-I32-NEXT:    vmov.32 d13[1], r9
+; BE-I32-NEXT:    bl lrintl
+; BE-I32-NEXT:    ldr r3, [sp, #516]
+; BE-I32-NEXT:    vmov.32 d12[1], r0
+; BE-I32-NEXT:    mov r0, r10
+; BE-I32-NEXT:    mov r1, r5
+; BE-I32-NEXT:    mov r2, r6
+; BE-I32-NEXT:    bl lrintl
+; BE-I32-NEXT:    add r3, sp, #488
+; BE-I32-NEXT:    mov r5, r0
+; BE-I32-NEXT:    ldm r3, {r0, r1, r2, r3}
+; BE-I32-NEXT:    bl lrintl
+; BE-I32-NEXT:    add r3, sp, #424
+; BE-I32-NEXT:    mov r7, r0
+; BE-I32-NEXT:    ldm r3, {r0, r1, r2, r3}
+; BE-I32-NEXT:    bl lrintl
+; BE-I32-NEXT:    ldr r6, [sp, #264]
+; BE-I32-NEXT:    vmov.32 d15[0], r0
+; BE-I32-NEXT:    ldr r1, [sp, #268]
+; BE-I32-NEXT:    ldr r2, [sp, #272]
+; BE-I32-NEXT:    vmov.32 d11[0], r7
+; BE-I32-NEXT:    ldr r3, [sp, #276]
+; BE-I32-NEXT:    mov r0, r6
+; BE-I32-NEXT:    ldr r8, [sp, #696]
+; BE-I32-NEXT:    bl lrintl
+; BE-I32-NEXT:    add lr, sp, #88
+; BE-I32-NEXT:    ldr r4, [sp, #472]
+; BE-I32-NEXT:    ldr r1, [sp, #476]
+; BE-I32-NEXT:    vmov.32 d11[1], r5
+; BE-I32-NEXT:    ldr r2, [sp, #480]
+; BE-I32-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I32-NEXT:    ldr r3, [sp, #484]
+; BE-I32-NEXT:    vmov.32 d16[0], r0
+; BE-I32-NEXT:    mov r0, r4
+; BE-I32-NEXT:    ldr r6, [sp, #700]
+; BE-I32-NEXT:    ldr r7, [sp, #704]
+; BE-I32-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I32-NEXT:    bl lrintl
+; BE-I32-NEXT:    ldr r3, [sp, #708]
+; BE-I32-NEXT:    vmov.32 d10[1], r0
+; BE-I32-NEXT:    mov r0, r8
+; BE-I32-NEXT:    mov r1, r6
+; BE-I32-NEXT:    mov r2, r7
+; BE-I32-NEXT:    bl lrintl
+; BE-I32-NEXT:    add r3, sp, #648
+; BE-I32-NEXT:    mov r4, r0
+; BE-I32-NEXT:    ldm r3, {r0, r1, r2, r3}
+; BE-I32-NEXT:    bl lrintl
+; BE-I32-NEXT:    add lr, sp, #72
+; BE-I32-NEXT:    ldr r5, [sp, #664]
+; BE-I32-NEXT:    ldr r1, [sp, #668]
+; BE-I32-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
+; BE-I32-NEXT:    ldr r2, [sp, #672]
+; BE-I32-NEXT:    ldr r3, [sp, #676]
+; BE-I32-NEXT:    vmov.32 d8[0], r0
+; BE-I32-NEXT:    mov r0, r5
+; BE-I32-NEXT:    ldr r6, [sp, #444]
+; BE-I32-NEXT:    vmov.32 d9[1], r4
+; BE-I32-NEXT:    ldr r7, [sp, #448]
+; BE-I32-NEXT:    ldr r8, [sp, #412]
+; BE-I32-NEXT:    ldr r4, [sp, #416]
+; BE-I32-NEXT:    bl lrintl
+; BE-I32-NEXT:    vmov.32 d8[1], r0
+; BE-I32-NEXT:    ldr r3, [sp, #452]
+; BE-I32-NEXT:    ldr r0, [sp, #440]
+; BE-I32-NEXT:    mov r1, r6
+; BE-I32-NEXT:    mov r2, r7
+; BE-I32-NEXT:    bl lrintl
+; BE-I32-NEXT:    vmov.32 d15[1], r0
+; BE-I32-NEXT:    ldr r3, [sp, #420]
+; BE-I32-NEXT:    ldr r0, [sp, #408]
+; BE-I32-NEXT:    mov r1, r8
+; BE-I32-NEXT:    mov r2, r4
+; BE-I32-NEXT:    bl lrintl
+; BE-I32-NEXT:    add r3, sp, #392
+; BE-I32-NEXT:    mov r4, r0
+; BE-I32-NEXT:    ldm r3, {r0, r1, r2, r3}
+; BE-I32-NEXT:    bl lrintl
+; BE-I32-NEXT:    add r3, sp, #284
+; BE-I32-NEXT:    ldr r7, [sp, #280]
+; BE-I32-NEXT:    vmov.32 d14[0], r0
+; BE-I32-NEXT:    ldm r3, {r1, r2, r3}
+; BE-I32-NEXT:    mov r0, r7
+; BE-I32-NEXT:    vmov.32 d14[1], r4
+; BE-I32-NEXT:    bl lrintl
+; BE-I32-NEXT:    add lr, sp, #88
+; BE-I32-NEXT:    vrev64.32 q9, q4
+; BE-I32-NEXT:    vldmia lr, {d20, d21} @ 16-byte Reload
+; BE-I32-NEXT:    add lr, sp, #24
+; BE-I32-NEXT:    vrev64.32 q8, q7
+; BE-I32-NEXT:    vmov.32 d20[1], r0
+; BE-I32-NEXT:    add r0, r11, #64
+; BE-I32-NEXT:    vst1.32 {d10, d11}, [r0:128]!
+; BE-I32-NEXT:    vst1.32 {d12, d13}, [r0:128]!
+; BE-I32-NEXT:    vldmia lr, {d22, d23} @ 16-byte Reload
+; BE-I32-NEXT:    add lr, sp, #40
+; BE-I32-NEXT:    vst1.32 {d22, d23}, [r0:128]!
+; BE-I32-NEXT:    vst1.64 {d18, d19}, [r0:128]
+; BE-I32-NEXT:    vldmia lr, {d18, d19} @ 16-byte Reload
+; BE-I32-NEXT:    add lr, sp, #56
+; BE-I32-NEXT:    vst1.32 {d18, d19}, [r11:128]!
+; BE-I32-NEXT:    vst1.32 {d20, d21}, [r11:128]!
+; BE-I32-NEXT:    vldmia lr, {d18, d19} @ 16-byte Reload
+; BE-I32-NEXT:    vst1.32 {d18, d19}, [r11:128]!
+; BE-I32-NEXT:    vst1.64 {d16, d17}, [r11:128]
+; BE-I32-NEXT:    add sp, sp, #104
+; BE-I32-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I32-NEXT:    add sp, sp, #4
+; BE-I32-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; BE-I64-LABEL: lrint_v32fp128:
+; BE-I64:       @ %bb.0:
+; BE-I64-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-I64-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-I64-NEXT:    .pad #4
+; BE-I64-NEXT:    sub sp, sp, #4
+; BE-I64-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEXT:    .pad #152
+; BE-I64-NEXT:    sub sp, sp, #152
+; BE-I64-NEXT:    str r3, [sp, #120] @ 4-byte Spill
+; BE-I64-NEXT:    add r3, sp, #712
+; BE-I64-NEXT:    str r2, [sp, #112] @ 4-byte Spill
+; BE-I64-NEXT:    mov r9, r0
+; BE-I64-NEXT:    ldm r3, {r0, r1, r2, r3}
+; BE-I64-NEXT:    bl lrintl
+; BE-I64-NEXT:    ldr r7, [sp, #648]
+; BE-I64-NEXT:    add r3, sp, #652
+; BE-I64-NEXT:    mov r5, r1
+; BE-I64-NEXT:    vmov.32 d9[0], r0
+; BE-I64-NEXT:    ldm r3, {r1, r2, r3}
+; BE-I64-NEXT:    mov r0, r7
+; BE-I64-NEXT:    ldr r6, [sp, #520]
+; BE-I64-NEXT:    ldr r8, [sp, #632]
+; BE-I64-NEXT:    bl lrintl
+; BE-I64-NEXT:    add r3, sp, #524
+; BE-I64-NEXT:    mov r7, r1
+; BE-I64-NEXT:    vmov.32 d10[0], r0
+; BE-I64-NEXT:    mov r0, r6
+; BE-I64-NEXT:    ldm r3, {r1, r2, r3}
+; BE-I64-NEXT:    bl lrintl
+; BE-I64-NEXT:    add r3, sp, #636
+; BE-I64-NEXT:    mov r4, r1
+; BE-I64-NEXT:    vmov.32 d8[0], r0
+; BE-I64-NEXT:    mov r0, r8
+; BE-I64-NEXT:    ldm r3, {r1, r2, r3}
+; BE-I64-NEXT:    bl lrintl
+; BE-I64-NEXT:    mov r6, r1
+; BE-I64-NEXT:    vmov.32 d11[0], r0
+; BE-I64-NEXT:    ldr r0, [sp, #488]
+; BE-I64-NEXT:    vmov.32 d8[1], r4
+; BE-I64-NEXT:    ldr r1, [sp, #492]
+; BE-I64-NEXT:    ldr r2, [sp, #496]
+; BE-I64-NEXT:    vmov.32 d10[1], r7
+; BE-I64-NEXT:    ldr r3, [sp, #500]
+; BE-I64-NEXT:    vmov.32 d9[1], r5
+; BE-I64-NEXT:    vstr d8, [sp, #144] @ 8-byte Spill
+; BE-I64-NEXT:    vstr d10, [sp, #136] @ 8-byte Spill
+; BE-I64-NEXT:    vstr d9, [sp, #128] @ 8-byte Spill
+; BE-I64-NEXT:    bl lrintl
+; BE-I64-NEXT:    add r3, sp, #680
+; BE-I64-NEXT:    str r0, [sp, #104] @ 4-byte Spill
+; BE-I64-NEXT:    str r1, [sp, #88] @ 4-byte Spill
+; BE-I64-NEXT:    ldm r3, {r0, r1, r2, r3}
+; BE-I64-NEXT:    bl lrintl
+; BE-I64-NEXT:    vmov.32 d16[0], r0
+; BE-I64-NEXT:    ldr r0, [sp, #728]
+; BE-I64-NEXT:    ldr r2, [sp, #736]
+; BE-I64-NEXT:    vmov.32 d11[1], r6
+; BE-I64-NEXT:    ldr r6, [sp, #732]
+; BE-I64-NEXT:    ldr r3, [sp, #740]
+; BE-I64-NEXT:    vmov.32 d16[1], r1
+; BE-I64-NEXT:    ldr r5, [sp, #504]
+; BE-I64-NEXT:    mov r1, r6
+; BE-I64-NEXT:    ldr r7, [sp, #744]
+; BE-I64-NEXT:    ldr r4, [sp, #748]
+; BE-I64-NEXT:    vstr d11, [sp, #24] @ 8-byte Spill
+; BE-I64-NEXT:    vstr d16, [sp, #8] @ 8-byte Spill
+; BE-I64-NEXT:    bl lrintl
+; BE-I64-NEXT:    ldr r2, [sp, #752]
+; BE-I64-NEXT:    mov r11, r1
+; BE-I64-NEXT:    ldr r3, [sp, #756]
+; BE-I64-NEXT:    vmov.32 d13[0], r0
+; BE-I64-NEXT:    mov r0, r7
+; BE-I64-NEXT:    mov r1, r4
+; BE-I64-NEXT:    ldr r10, [sp, #552]
+; BE-I64-NEXT:    ldr r6, [sp, #664]
+; BE-I64-NEXT:    bl lrintl
+; BE-I64-NEXT:    add r3, sp, #508
+; BE-I64-NEXT:    mov r8, r1
+; BE-I64-NEXT:    vmov.32 d8[0], r0
+; BE-I64-NEXT:    mov r0, r5
+; BE-I64-NEXT:    ldm r3, {r1, r2, r3}
+; BE-I64-NEXT:    bl lrintl
+; BE-I64-NEXT:    add r3, sp, #540
+; BE-I64-NEXT:    vmov.32 d10[0], r0
+; BE-I64-NEXT:    ldr r0, [sp, #536]
+; BE-I64-NEXT:    mov r7, r1
+; BE-I64-NEXT:    ldm r3, {r1, r2, r3}
+; BE-I64-NEXT:    bl lrintl
+; BE-I64-NEXT:    add r3, sp, #556
+; BE-I64-NEXT:    mov r5, r1
+; BE-I64-NEXT:    vmov.32 d12[0], r0
+; BE-I64-NEXT:    mov r0, r10
+; BE-I64-NEXT:    ldm r3, {r1, r2, r3}
+; BE-I64-NEXT:    bl lrintl
+; BE-I64-NEXT:    add r3, sp, #668
+; BE-I64-NEXT:    mov r4, r1
+; BE-I64-NEXT:    vmov.32 d9[0], r0
+; BE-I64-NEXT:    mov r0, r6
+; BE-I64-NEXT:    ldm r3, {r1, r2, r3}
+; BE-I64-NEXT:    bl lrintl
+; BE-I64-NEXT:    add r3, sp, #700
+; BE-I64-NEXT:    vmov.32 d14[0], r0
+; BE-I64-NEXT:    ldr r0, [sp, #696]
+; BE-I64-NEXT:    mov r6, r1
+; BE-I64-NEXT:    ldm r3, {r1, r2, r3}
+; BE-I64-NEXT:    bl lrintl
+; BE-I64-NEXT:    vmov.32 d11[0], r0
+; BE-I64-NEXT:    ldr r0, [sp, #104] @ 4-byte Reload
+; BE-I64-NEXT:    ldr r2, [sp, #256]
+; BE-I64-NEXT:    vmov.32 d13[1], r11
+; BE-I64-NEXT:    ldr r3, [sp, #260]
+; BE-I64-NEXT:    vmov.32 d14[1], r6
+; BE-I64-NEXT:    ldr r6, [sp, #264]
+; BE-I64-NEXT:    vmov.32 d9[1], r4
+; BE-I64-NEXT:    ldr r4, [sp, #344]
+; BE-I64-NEXT:    vmov.32 d12[1], r5
+; BE-I64-NEXT:    ldr r5, [sp, #312]
+; BE-I64-NEXT:    vmov.32 d8[1], r8
+; BE-I64-NEXT:    ldr r8, [sp, #328]
+; BE-I64-NEXT:    vmov.32 d10[1], r7
+; BE-I64-NEXT:    vstr d13, [sp, #32] @ 8-byte Spill
+; BE-I64-NEXT:    vmov.32 d11[1], r1
+; BE-I64-NEXT:    ldr r1, [sp, #120] @ 4-byte Reload
+; BE-I64-NEXT:    vmov.32 d13[0], r0
+; BE-I64-NEXT:    ldr r0, [sp, #112] @ 4-byte Reload
+; BE-I64-NEXT:    vstr d14, [sp] @ 8-byte Spill
+; BE-I64-NEXT:    vstr d9, [sp, #16] @ 8-byte Spill
+; BE-I64-NEXT:    vstr d12, [sp, #56] @ 8-byte Spill
+; BE-I64-NEXT:    vstr d10, [sp, #64] @ 8-byte Spill
+; BE-I64-NEXT:    vstr d8, [sp, #40] @ 8-byte Spill
+; BE-I64-NEXT:    bl lrintl
+; BE-I64-NEXT:    add r3, sp, #268
+; BE-I64-NEXT:    mov r11, r1
+; BE-I64-NEXT:    vmov.32 d14[0], r0
+; BE-I64-NEXT:    mov r0, r6
+; BE-I64-NEXT:    ldm r3, {r1, r2, r3}
+; BE-I64-NEXT:    bl lrintl
+; BE-I64-NEXT:    add r3, sp, #316
+; BE-I64-NEXT:    mov r10, r1
+; BE-I64-NEXT:    vmov.32 d8[0], r0
+; BE-I64-NEXT:    mov r0, r5
+; BE-I64-NEXT:    ldm r3, {r1, r2, r3}
+; BE-I64-NEXT:    bl lrintl
+; BE-I64-NEXT:    add r3, sp, #332
+; BE-I64-NEXT:    mov r7, r1
+; BE-I64-NEXT:    vmov.32 d12[0], r0
+; BE-I64-NEXT:    mov r0, r8
+; BE-I64-NEXT:    ldm r3, {r1, r2, r3}
+; BE-I64-NEXT:    bl lrintl
+; BE-I64-NEXT:    add r3, sp, #348
+; BE-I64-NEXT:    mov r5, r1
+; BE-I64-NEXT:    vmov.32 d15[0], r0
+; BE-I64-NEXT:    mov r0, r4
+; BE-I64-NEXT:    ldm r3, {r1, r2, r3}
+; BE-I64-NEXT:    bl lrintl
+; BE-I64-NEXT:    add r3, sp, #364
+; BE-I64-NEXT:    vmov.32 d9[0], r0
+; BE-I64-NEXT:    ldr r0, [sp, #360]
+; BE-I64-NEXT:    mov r4, r1
+; BE-I64-NEXT:    ldm r3, {r1, r2, r3}
+; BE-I64-NEXT:    bl lrintl
+; BE-I64-NEXT:    add r3, sp, #476
+; BE-I64-NEXT:    vmov.32 d10[0], r0
+; BE-I64-NEXT:    ldr r0, [sp, #472]
+; BE-I64-NEXT:    mov r6, r1
+; BE-I64-NEXT:    ldm r3, {r1, r2, r3}
+; BE-I64-NEXT:    bl lrintl
+; BE-I64-NEXT:    vmov.32 d16[0], r0
+; BE-I64-NEXT:    ldr r0, [sp, #88] @ 4-byte Reload
+; BE-I64-NEXT:    ldr r2, [sp, #592]
+; BE-I64-NEXT:    vldr d20, [sp, #136] @ 8-byte Reload
+; BE-I64-NEXT:    vmov.32 d16[1], r1
+; BE-I64-NEXT:    ldr r1, [sp, #588]
+; BE-I64-NEXT:    ldr r3, [sp, #596]
+; BE-I64-NEXT:    vldr d22, [sp, #24] @ 8-byte Reload
+; BE-I64-NEXT:    vldr d18, [sp, #8] @ 8-byte Reload
+; BE-I64-NEXT:    vrev64.32 d21, d20
+; BE-I64-NEXT:    vmov.32 d10[1], r6
+; BE-I64-NEXT:    ldr r6, [sp, #600]
+; BE-I64-NEXT:    vmov.32 d9[1], r4
+; BE-I64-NEXT:    ldr r4, [sp, #616]
+; BE-I64-NEXT:    vmov.32 d12[1], r7
+; BE-I64-NEXT:    ldr r7, [sp, #604]
+; BE-I64-NEXT:    vmov.32 d8[1], r10
+; BE-I64-NEXT:    add r10, r9, #192
+; BE-I64-NEXT:    vmov.32 d14[1], r11
+; BE-I64-NEXT:    ldr r11, [sp, #440]
+; BE-I64-NEXT:    vmov.32 d13[1], r0
+; BE-I64-NEXT:    ldr r0, [sp, #584]
+; BE-I64-NEXT:    vmov.32 d15[1], r5
+; BE-I64-NEXT:    vstr d16, [sp, #48] @ 8-byte Spill
+; BE-I64-NEXT:    vldr d16, [sp, #128] @ 8-byte Reload
+; BE-I64-NEXT:    vrev64.32 d20, d22
+; BE-I64-NEXT:    vldr d22, [sp] @ 8-byte Reload
+; BE-I64-NEXT:    vrev64.32 d19, d18
+; BE-I64-NEXT:    vrev64.32 d17, d16
+; BE-I64-NEXT:    vrev64.32 d18, d22
+; BE-I64-NEXT:    vstr d10, [sp, #120] @ 8-byte Spill
+; BE-I64-NEXT:    vstr d9, [sp, #112] @ 8-byte Spill
+; BE-I64-NEXT:    vstr d15, [sp, #104] @ 8-byte Spill
+; BE-I64-NEXT:    vstr d12, [sp, #96] @ 8-byte Spill
+; BE-I64-NEXT:    vstr d8, [sp, #80] @ 8-byte Spill
+; BE-I64-NEXT:    vstr d14, [sp, #72] @ 8-byte Spill
+; BE-I64-NEXT:    vstr d13, [sp, #88] @ 8-byte Spill
+; BE-I64-NEXT:    vst1.64 {d20, d21}, [r10:128]!
+; BE-I64-NEXT:    vrev64.32 d16, d11
+; BE-I64-NEXT:    vst1.64 {d18, d19}, [r10:128]!
+; BE-I64-NEXT:    vst1.64 {d16, d17}, [r10:128]!
+; BE-I64-NEXT:    bl lrintl
+; BE-I64-NEXT:    ldr r2, [sp, #608]
+; BE-I64-NEXT:    mov r8, r1
+; BE-I64-NEXT:    ldr r3, [sp, #612]
+; BE-I64-NEXT:    vmov.32 d8[0], r0
+; BE-I64-NEXT:    mov r0, r6
+; BE-I64-NEXT:    mov r1, r7
+; BE-I64-NEXT:    ldr r5, [sp, #456]
+; BE-I64-NEXT:    bl lrintl
+; BE-I64-NEXT:    add r3, sp, #620
+; BE-I64-NEXT:    mov r6, r1
+; BE-I64-NEXT:    vmov.32 d9[0], r0
+; BE-I64-NEXT:    mov r0, r4
+; BE-I64-NEXT:    ldm r3, {r1, r2, r3}
+; BE-I64-NEXT:    bl lrintl
+; BE-I64-NEXT:    add r3, sp, #444
+; BE-I64-NEXT:    mov r7, r1
+; BE-I64-NEXT:    vmov.32 d10[0], r0
+; BE-I64-NEXT:    mov r0, r11
+; BE-I64-NEXT:    ldm r3, {r1, r2, r3}
+; BE-I64-NEXT:    bl lrintl
+; BE-I64-NEXT:    add r3, sp, #460
+; BE-I64-NEXT:    mov r4, r1
+; BE-I64-NEXT:    vmov.32 d11[0], r0
+; BE-I64-NEXT:    mov r0, r5
+; BE-I64-NEXT:    ldm r3, {r1, r2, r3}
+; BE-I64-NEXT:    bl lrintl
+; BE-I64-NEXT:    add r3, sp, #572
+; BE-I64-NEXT:    vmov.32 d13[0], r0
+; BE-I64-NEXT:    ldr r0, [sp, #568]
+; BE-I64-NEXT:    mov r5, r1
+; BE-I64-NEXT:    ldm r3, {r1, r2, r3}
+; BE-I64-NEXT:    bl lrintl
+; BE-I64-NEXT:    vldr d16, [sp, #16] @ 8-byte Reload
+; BE-I64-NEXT:    vldr d18, [sp, #56] @ 8-byte Reload
+; BE-I64-NEXT:    vrev64.32 d17, d16
+; BE-I64-NEXT:    ldr r2, [sp, #304]
+; BE-I64-NEXT:    vrev64.32 d16, d18
+; BE-I64-NEXT:    ldr r3, [sp, #308]
+; BE-I64-NEXT:    vldr d18, [sp, #144] @ 8-byte Reload
+; BE-I64-NEXT:    vldr d20, [sp, #64] @ 8-byte Reload
+; BE-I64-NEXT:    vrev64.32 d19, d18
+; BE-I64-NEXT:    vrev64.32 d18, d20
+; BE-I64-NEXT:    vldr d20, [sp, #40] @ 8-byte Reload
+; BE-I64-NEXT:    vldr d22, [sp, #32] @ 8-byte Reload
+; BE-I64-NEXT:    vmov.32 d14[0], r0
+; BE-I64-NEXT:    ldr r0, [sp, #296]
+; BE-I64-NEXT:    vmov.32 d10[1], r7
+; BE-I64-NEXT:    ldr r7, [sp, #412]
+; BE-I64-NEXT:    vmov.32 d9[1], r6
+; BE-I64-NEXT:    ldr r6, [sp, #408]
+; BE-I64-NEXT:    vmov.32 d8[1], r8
+; BE-I64-NEXT:    add r8, r9, #128
+; BE-I64-NEXT:    vrev64.32 d21, d20
+; BE-I64-NEXT:    vmov.32 d13[1], r5
+; BE-I64-NEXT:    ldr r5, [sp, #300]
+; BE-I64-NEXT:    vrev64.32 d20, d22
+; BE-I64-NEXT:    vmov.32 d14[1], r1
+; BE-I64-NEXT:    mov r1, r5
+; BE-I64-NEXT:    vstr d10, [sp, #136] @ 8-byte Spill
+; BE-I64-NEXT:    vstr d9, [sp, #128] @ 8-byte Spill
+; BE-I64-NEXT:    vstr d8, [sp, #24] @ 8-byte Spill
+; BE-I64-NEXT:    vst1.64 {d20, d21}, [r10:128]
+; BE-I64-NEXT:    vst1.64 {d18, d19}, [r8:128]!
+; BE-I64-NEXT:    vmov.32 d11[1], r4
+; BE-I64-NEXT:    ldr r4, [sp, #424]
+; BE-I64-NEXT:    ldr r10, [sp, #376]
+; BE-I64-NEXT:    vst1.64 {d16, d17}, [r8:128]!
+; BE-I64-NEXT:    bl lrintl
+; BE-I64-NEXT:    ldr r2, [sp, #416]
+; BE-I64-NEXT:    mov r11, r1
+; BE-I64-NEXT:    ldr r3, [sp, #420]
+; BE-I64-NEXT:    vmov.32 d15[0], r0
+; BE-I64-NEXT:    mov r0, r6
+; BE-I64-NEXT:    mov r1, r7
+; BE-I64-NEXT:    ldr r5, [sp, #392]
+; BE-I64-NEXT:    bl lrintl
+; BE-I64-NEXT:    add r3, sp, #428
+; BE-I64-NEXT:    mov r6, r1
+; BE-I64-NEXT:    vmov.32 d8[0], r0
+; BE-I64-NEXT:    mov r0, r4
+; BE-I64-NEXT:    ldm r3, {r1, r2, r3}
+; BE-I64-NEXT:    bl lrintl
+; BE-I64-NEXT:    add r3, sp, #380
+; BE-I64-NEXT:    mov r7, r1
+; BE-I64-NEXT:    vmov.32 d9[0], r0
+; BE-I64-NEXT:    mov r0, r10
+; BE-I64-NEXT:    ldm r3, {r1, r2, r3}
+; BE-I64-NEXT:    bl lrintl
+; BE-I64-NEXT:    add r3, sp, #396
+; BE-I64-NEXT:    mov r4, r1
+; BE-I64-NEXT:    vmov.32 d12[0], r0
+; BE-I64-NEXT:    mov r0, r5
+; BE-I64-NEXT:    ldm r3, {r1, r2, r3}
+; BE-I64-NEXT:    bl lrintl
+; BE-I64-NEXT:    add r3, sp, #284
+; BE-I64-NEXT:    vmov.32 d10[0], r0
+; BE-I64-NEXT:    ldr r0, [sp, #280]
+; BE-I64-NEXT:    mov r5, r1
+; BE-I64-NEXT:    ldm r3, {r1, r2, r3}
+; BE-I64-NEXT:    bl lrintl
+; BE-I64-NEXT:    vldr d16, [sp, #120] @ 8-byte Reload
+; BE-I64-NEXT:    vldr d18, [sp, #112] @ 8-byte Reload
+; BE-I64-NEXT:    vrev64.32 d17, d16
+; BE-I64-NEXT:    vldr d26, [sp, #136] @ 8-byte Reload
+; BE-I64-NEXT:    vrev64.32 d16, d18
+; BE-I64-NEXT:    vldr d18, [sp, #104] @ 8-byte Reload
+; BE-I64-NEXT:    vrev64.32 d31, d26
+; BE-I64-NEXT:    vldr d26, [sp, #128] @ 8-byte Reload
+; BE-I64-NEXT:    vldr d20, [sp, #96] @ 8-byte Reload
+; BE-I64-NEXT:    vrev64.32 d19, d18
+; BE-I64-NEXT:    vrev64.32 d18, d20
+; BE-I64-NEXT:    vldr d20, [sp, #80] @ 8-byte Reload
+; BE-I64-NEXT:    vrev64.32 d30, d26
+; BE-I64-NEXT:    vldr d26, [sp, #24] @ 8-byte Reload
+; BE-I64-NEXT:    vmov.32 d10[1], r5
+; BE-I64-NEXT:    vldr d22, [sp, #72] @ 8-byte Reload
+; BE-I64-NEXT:    vrev64.32 d21, d20
+; BE-I64-NEXT:    vrev64.32 d1, d26
+; BE-I64-NEXT:    vmov.32 d9[1], r7
+; BE-I64-NEXT:    vmov.32 d12[1], r4
+; BE-I64-NEXT:    vrev64.32 d20, d22
+; BE-I64-NEXT:    vldr d22, [sp, #88] @ 8-byte Reload
+; BE-I64-NEXT:    vmov.32 d8[1], r6
+; BE-I64-NEXT:    vrev64.32 d0, d14
+; BE-I64-NEXT:    vmov.32 d28[0], r0
+; BE-I64-NEXT:    add r0, r9, #64
+; BE-I64-NEXT:    vrev64.32 d3, d10
+; BE-I64-NEXT:    vldr d24, [sp, #48] @ 8-byte Reload
+; BE-I64-NEXT:    vrev64.32 d23, d22
+; BE-I64-NEXT:    vrev64.32 d5, d9
+; BE-I64-NEXT:    vst1.64 {d0, d1}, [r8:128]!
+; BE-I64-NEXT:    vrev64.32 d2, d12
+; BE-I64-NEXT:    vmov.32 d15[1], r11
+; BE-I64-NEXT:    vrev64.32 d22, d24
+; BE-I64-NEXT:    vrev64.32 d25, d13
+; BE-I64-NEXT:    vrev64.32 d4, d8
+; BE-I64-NEXT:    vst1.64 {d30, d31}, [r8:128]
+; BE-I64-NEXT:    vst1.64 {d2, d3}, [r0:128]!
+; BE-I64-NEXT:    vmov.32 d28[1], r1
+; BE-I64-NEXT:    vrev64.32 d24, d11
+; BE-I64-NEXT:    vst1.64 {d4, d5}, [r0:128]!
+; BE-I64-NEXT:    vrev64.32 d27, d15
+; BE-I64-NEXT:    vst1.64 {d24, d25}, [r0:128]!
+; BE-I64-NEXT:    vrev64.32 d26, d28
+; BE-I64-NEXT:    vst1.64 {d22, d23}, [r0:128]
+; BE-I64-NEXT:    vst1.64 {d20, d21}, [r9:128]!
+; BE-I64-NEXT:    vst1.64 {d26, d27}, [r9:128]!
+; BE-I64-NEXT:    vst1.64 {d18, d19}, [r9:128]!
+; BE-I64-NEXT:    vst1.64 {d16, d17}, [r9:128]
+; BE-I64-NEXT:    add sp, sp, #152
+; BE-I64-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEXT:    add sp, sp, #4
+; BE-I64-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; BE-I32-NEON-LABEL: lrint_v32fp128:
+; BE-I32-NEON:       @ %bb.0:
+; BE-I32-NEON-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-I32-NEON-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-I32-NEON-NEXT:    .pad #4
+; BE-I32-NEON-NEXT:    sub sp, sp, #4
+; BE-I32-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I32-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I32-NEON-NEXT:    .pad #104
+; BE-I32-NEON-NEXT:    sub sp, sp, #104
+; BE-I32-NEON-NEXT:    mov r4, r3
+; BE-I32-NEON-NEXT:    add r3, sp, #248
+; BE-I32-NEON-NEXT:    mov r8, r2
+; BE-I32-NEON-NEXT:    mov r11, r0
+; BE-I32-NEON-NEXT:    ldm r3, {r0, r1, r2, r3}
+; BE-I32-NEON-NEXT:    bl lrintl
+; BE-I32-NEON-NEXT:    add r3, sp, #616
+; BE-I32-NEON-NEXT:    mov r9, r0
+; BE-I32-NEON-NEXT:    ldm r3, {r0, r1, r2, r3}
+; BE-I32-NEON-NEXT:    bl lrintl
+; BE-I32-NEON-NEXT:    add r3, sp, #680
+; BE-I32-NEON-NEXT:    mov r5, r0
+; BE-I32-NEON-NEXT:    ldm r3, {r0, r1, r2, r3}
+; BE-I32-NEON-NEXT:    bl lrintl
+; BE-I32-NEON-NEXT:    ldr r7, [sp, #232]
+; BE-I32-NEON-NEXT:    add lr, sp, #72
+; BE-I32-NEON-NEXT:    ldr r1, [sp, #236]
+; BE-I32-NEON-NEXT:    vmov.32 d17[0], r0
+; BE-I32-NEON-NEXT:    ldr r2, [sp, #240]
+; BE-I32-NEON-NEXT:    ldr r3, [sp, #244]
+; BE-I32-NEON-NEXT:    mov r0, r7
+; BE-I32-NEON-NEXT:    ldr r10, [sp, #376]
+; BE-I32-NEON-NEXT:    vmov.32 d11[0], r5
+; BE-I32-NEON-NEXT:    ldr r6, [sp, #296]
+; BE-I32-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I32-NEON-NEXT:    bl lrintl
+; BE-I32-NEON-NEXT:    add r3, sp, #300
+; BE-I32-NEON-NEXT:    vmov.32 d15[0], r0
+; BE-I32-NEON-NEXT:    mov r0, r6
+; BE-I32-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; BE-I32-NEON-NEXT:    bl lrintl
+; BE-I32-NEON-NEXT:    add r3, sp, #380
+; BE-I32-NEON-NEXT:    vmov.32 d13[0], r0
+; BE-I32-NEON-NEXT:    mov r0, r10
+; BE-I32-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; BE-I32-NEON-NEXT:    bl lrintl
+; BE-I32-NEON-NEXT:    add r3, sp, #360
+; BE-I32-NEON-NEXT:    mov r5, r0
+; BE-I32-NEON-NEXT:    ldm r3, {r0, r1, r2, r3}
+; BE-I32-NEON-NEXT:    bl lrintl
+; BE-I32-NEON-NEXT:    vmov.32 d17[0], r0
+; BE-I32-NEON-NEXT:    ldr r6, [sp, #312]
+; BE-I32-NEON-NEXT:    ldr r1, [sp, #316]
+; BE-I32-NEON-NEXT:    ldr r2, [sp, #320]
+; BE-I32-NEON-NEXT:    ldr r3, [sp, #324]
+; BE-I32-NEON-NEXT:    vmov.32 d17[1], r5
+; BE-I32-NEON-NEXT:    mov r0, r6
+; BE-I32-NEON-NEXT:    ldr r7, [sp, #572]
+; BE-I32-NEON-NEXT:    vorr q4, q8, q8
+; BE-I32-NEON-NEXT:    bl lrintl
+; BE-I32-NEON-NEXT:    ldr r6, [sp, #632]
+; BE-I32-NEON-NEXT:    add lr, sp, #88
+; BE-I32-NEON-NEXT:    ldr r1, [sp, #636]
+; BE-I32-NEON-NEXT:    vmov.32 d13[1], r0
+; BE-I32-NEON-NEXT:    ldr r2, [sp, #640]
+; BE-I32-NEON-NEXT:    ldr r3, [sp, #644]
+; BE-I32-NEON-NEXT:    mov r0, r6
+; BE-I32-NEON-NEXT:    ldr r5, [sp, #576]
+; BE-I32-NEON-NEXT:    vmov.32 d15[1], r9
+; BE-I32-NEON-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
+; BE-I32-NEON-NEXT:    bl lrintl
+; BE-I32-NEON-NEXT:    vmov.32 d11[1], r0
+; BE-I32-NEON-NEXT:    ldr r3, [sp, #580]
+; BE-I32-NEON-NEXT:    ldr r0, [sp, #568]
+; BE-I32-NEON-NEXT:    mov r1, r7
+; BE-I32-NEON-NEXT:    mov r2, r5
+; BE-I32-NEON-NEXT:    vorr q6, q5, q5
+; BE-I32-NEON-NEXT:    bl lrintl
+; BE-I32-NEON-NEXT:    add r3, sp, #552
+; BE-I32-NEON-NEXT:    mov r9, r0
+; BE-I32-NEON-NEXT:    ldm r3, {r0, r1, r2, r3}
+; BE-I32-NEON-NEXT:    bl lrintl
+; BE-I32-NEON-NEXT:    add r3, sp, #520
+; BE-I32-NEON-NEXT:    mov r5, r0
+; BE-I32-NEON-NEXT:    ldm r3, {r0, r1, r2, r3}
+; BE-I32-NEON-NEXT:    bl lrintl
+; BE-I32-NEON-NEXT:    ldr r6, [sp, #584]
+; BE-I32-NEON-NEXT:    add lr, sp, #8
+; BE-I32-NEON-NEXT:    ldr r1, [sp, #588]
+; BE-I32-NEON-NEXT:    vmov.32 d16[0], r0
+; BE-I32-NEON-NEXT:    ldr r2, [sp, #592]
+; BE-I32-NEON-NEXT:    ldr r3, [sp, #596]
+; BE-I32-NEON-NEXT:    mov r0, r6
+; BE-I32-NEON-NEXT:    vmov.32 d17[0], r5
+; BE-I32-NEON-NEXT:    ldr r7, [sp, #216]
+; BE-I32-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I32-NEON-NEXT:    bl lrintl
+; BE-I32-NEON-NEXT:    add r3, sp, #220
+; BE-I32-NEON-NEXT:    vmov.32 d12[0], r0
+; BE-I32-NEON-NEXT:    mov r0, r7
+; BE-I32-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; BE-I32-NEON-NEXT:    bl lrintl
+; BE-I32-NEON-NEXT:    ldr r2, [sp, #208]
+; BE-I32-NEON-NEXT:    mov r7, r0
+; BE-I32-NEON-NEXT:    ldr r3, [sp, #212]
+; BE-I32-NEON-NEXT:    mov r0, r8
+; BE-I32-NEON-NEXT:    mov r1, r4
+; BE-I32-NEON-NEXT:    bl lrintl
+; BE-I32-NEON-NEXT:    add r3, sp, #456
+; BE-I32-NEON-NEXT:    mov r5, r0
+; BE-I32-NEON-NEXT:    ldm r3, {r0, r1, r2, r3}
+; BE-I32-NEON-NEXT:    bl lrintl
+; BE-I32-NEON-NEXT:    ldr r6, [sp, #328]
+; BE-I32-NEON-NEXT:    vmov.32 d10[0], r0
+; BE-I32-NEON-NEXT:    ldr r1, [sp, #332]
+; BE-I32-NEON-NEXT:    ldr r2, [sp, #336]
+; BE-I32-NEON-NEXT:    vmov.32 d14[0], r5
+; BE-I32-NEON-NEXT:    ldr r3, [sp, #340]
+; BE-I32-NEON-NEXT:    mov r0, r6
+; BE-I32-NEON-NEXT:    ldr r10, [sp, #504]
+; BE-I32-NEON-NEXT:    bl lrintl
+; BE-I32-NEON-NEXT:    ldr r6, [sp, #344]
+; BE-I32-NEON-NEXT:    vmov.32 d8[0], r0
+; BE-I32-NEON-NEXT:    ldr r1, [sp, #348]
+; BE-I32-NEON-NEXT:    ldr r2, [sp, #352]
+; BE-I32-NEON-NEXT:    ldr r3, [sp, #356]
+; BE-I32-NEON-NEXT:    mov r0, r6
+; BE-I32-NEON-NEXT:    bl lrintl
+; BE-I32-NEON-NEXT:    vmov.32 d8[1], r0
+; BE-I32-NEON-NEXT:    ldr r6, [sp, #600]
+; BE-I32-NEON-NEXT:    add lr, sp, #56
+; BE-I32-NEON-NEXT:    ldr r1, [sp, #604]
+; BE-I32-NEON-NEXT:    vmov.32 d14[1], r7
+; BE-I32-NEON-NEXT:    ldr r2, [sp, #608]
+; BE-I32-NEON-NEXT:    ldr r3, [sp, #612]
+; BE-I32-NEON-NEXT:    mov r0, r6
+; BE-I32-NEON-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
+; BE-I32-NEON-NEXT:    add lr, sp, #40
+; BE-I32-NEON-NEXT:    ldr r5, [sp, #508]
+; BE-I32-NEON-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
+; BE-I32-NEON-NEXT:    bl lrintl
+; BE-I32-NEON-NEXT:    vmov.32 d12[1], r0
+; BE-I32-NEON-NEXT:    add lr, sp, #24
+; BE-I32-NEON-NEXT:    ldr r7, [sp, #536]
+; BE-I32-NEON-NEXT:    ldr r1, [sp, #540]
+; BE-I32-NEON-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
+; BE-I32-NEON-NEXT:    add lr, sp, #8
+; BE-I32-NEON-NEXT:    mov r0, r7
+; BE-I32-NEON-NEXT:    ldr r2, [sp, #544]
+; BE-I32-NEON-NEXT:    ldr r3, [sp, #548]
+; BE-I32-NEON-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
+; BE-I32-NEON-NEXT:    ldr r6, [sp, #512]
+; BE-I32-NEON-NEXT:    vmov.32 d13[1], r9
+; BE-I32-NEON-NEXT:    bl lrintl
+; BE-I32-NEON-NEXT:    ldr r3, [sp, #516]
+; BE-I32-NEON-NEXT:    vmov.32 d12[1], r0
+; BE-I32-NEON-NEXT:    mov r0, r10
+; BE-I32-NEON-NEXT:    mov r1, r5
+; BE-I32-NEON-NEXT:    mov r2, r6
+; BE-I32-NEON-NEXT:    bl lrintl
+; BE-I32-NEON-NEXT:    add r3, sp, #488
+; BE-I32-NEON-NEXT:    mov r5, r0
+; BE-I32-NEON-NEXT:    ldm r3, {r0, r1, r2, r3}
+; BE-I32-NEON-NEXT:    bl lrintl
+; BE-I32-NEON-NEXT:    add r3, sp, #424
+; BE-I32-NEON-NEXT:    mov r7, r0
+; BE-I32-NEON-NEXT:    ldm r3, {r0, r1, r2, r3}
+; BE-I32-NEON-NEXT:    bl lrintl
+; BE-I32-NEON-NEXT:    ldr r6, [sp, #264]
+; BE-I32-NEON-NEXT:    vmov.32 d15[0], r0
+; BE-I32-NEON-NEXT:    ldr r1, [sp, #268]
+; BE-I32-NEON-NEXT:    ldr r2, [sp, #272]
+; BE-I32-NEON-NEXT:    vmov.32 d11[0], r7
+; BE-I32-NEON-NEXT:    ldr r3, [sp, #276]
+; BE-I32-NEON-NEXT:    mov r0, r6
+; BE-I32-NEON-NEXT:    ldr r8, [sp, #696]
+; BE-I32-NEON-NEXT:    bl lrintl
+; BE-I32-NEON-NEXT:    add lr, sp, #88
+; BE-I32-NEON-NEXT:    ldr r4, [sp, #472]
+; BE-I32-NEON-NEXT:    ldr r1, [sp, #476]
+; BE-I32-NEON-NEXT:    vmov.32 d11[1], r5
+; BE-I32-NEON-NEXT:    ldr r2, [sp, #480]
+; BE-I32-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I32-NEON-NEXT:    ldr r3, [sp, #484]
+; BE-I32-NEON-NEXT:    vmov.32 d16[0], r0
+; BE-I32-NEON-NEXT:    mov r0, r4
+; BE-I32-NEON-NEXT:    ldr r6, [sp, #700]
+; BE-I32-NEON-NEXT:    ldr r7, [sp, #704]
+; BE-I32-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I32-NEON-NEXT:    bl lrintl
+; BE-I32-NEON-NEXT:    ldr r3, [sp, #708]
+; BE-I32-NEON-NEXT:    vmov.32 d10[1], r0
+; BE-I32-NEON-NEXT:    mov r0, r8
+; BE-I32-NEON-NEXT:    mov r1, r6
+; BE-I32-NEON-NEXT:    mov r2, r7
+; BE-I32-NEON-NEXT:    bl lrintl
+; BE-I32-NEON-NEXT:    add r3, sp, #648
+; BE-I32-NEON-NEXT:    mov r4, r0
+; BE-I32-NEON-NEXT:    ldm r3, {r0, r1, r2, r3}
+; BE-I32-NEON-NEXT:    bl lrintl
+; BE-I32-NEON-NEXT:    add lr, sp, #72
+; BE-I32-NEON-NEXT:    ldr r5, [sp, #664]
+; BE-I32-NEON-NEXT:    ldr r1, [sp, #668]
+; BE-I32-NEON-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
+; BE-I32-NEON-NEXT:    ldr r2, [sp, #672]
+; BE-I32-NEON-NEXT:    ldr r3, [sp, #676]
+; BE-I32-NEON-NEXT:    vmov.32 d8[0], r0
+; BE-I32-NEON-NEXT:    mov r0, r5
+; BE-I32-NEON-NEXT:    ldr r6, [sp, #444]
+; BE-I32-NEON-NEXT:    vmov.32 d9[1], r4
+; BE-I32-NEON-NEXT:    ldr r7, [sp, #448]
+; BE-I32-NEON-NEXT:    ldr r8, [sp, #412]
+; BE-I32-NEON-NEXT:    ldr r4, [sp, #416]
+; BE-I32-NEON-NEXT:    bl lrintl
+; BE-I32-NEON-NEXT:    vmov.32 d8[1], r0
+; BE-I32-NEON-NEXT:    ldr r3, [sp, #452]
+; BE-I32-NEON-NEXT:    ldr r0, [sp, #440]
+; BE-I32-NEON-NEXT:    mov r1, r6
+; BE-I32-NEON-NEXT:    mov r2, r7
+; BE-I32-NEON-NEXT:    bl lrintl
+; BE-I32-NEON-NEXT:    vmov.32 d15[1], r0
+; BE-I32-NEON-NEXT:    ldr r3, [sp, #420]
+; BE-I32-NEON-NEXT:    ldr r0, [sp, #408]
+; BE-I32-NEON-NEXT:    mov r1, r8
+; BE-I32-NEON-NEXT:    mov r2, r4
+; BE-I32-NEON-NEXT:    bl lrintl
+; BE-I32-NEON-NEXT:    add r3, sp, #392
+; BE-I32-NEON-NEXT:    mov r4, r0
+; BE-I32-NEON-NEXT:    ldm r3, {r0, r1, r2, r3}
+; BE-I32-NEON-NEXT:    bl lrintl
+; BE-I32-NEON-NEXT:    add r3, sp, #284
+; BE-I32-NEON-NEXT:    ldr r7, [sp, #280]
+; BE-I32-NEON-NEXT:    vmov.32 d14[0], r0
+; BE-I32-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; BE-I32-NEON-NEXT:    mov r0, r7
+; BE-I32-NEON-NEXT:    vmov.32 d14[1], r4
+; BE-I32-NEON-NEXT:    bl lrintl
+; BE-I32-NEON-NEXT:    add lr, sp, #88
+; BE-I32-NEON-NEXT:    vrev64.32 q9, q4
+; BE-I32-NEON-NEXT:    vldmia lr, {d20, d21} @ 16-byte Reload
+; BE-I32-NEON-NEXT:    add lr, sp, #24
+; BE-I32-NEON-NEXT:    vrev64.32 q8, q7
+; BE-I32-NEON-NEXT:    vmov.32 d20[1], r0
+; BE-I32-NEON-NEXT:    add r0, r11, #64
+; BE-I32-NEON-NEXT:    vst1.32 {d10, d11}, [r0:128]!
+; BE-I32-NEON-NEXT:    vst1.32 {d12, d13}, [r0:128]!
+; BE-I32-NEON-NEXT:    vldmia lr, {d22, d23} @ 16-byte Reload
+; BE-I32-NEON-NEXT:    add lr, sp, #40
+; BE-I32-NEON-NEXT:    vst1.32 {d22, d23}, [r0:128]!
+; BE-I32-NEON-NEXT:    vst1.64 {d18, d19}, [r0:128]
+; BE-I32-NEON-NEXT:    vldmia lr, {d18, d19} @ 16-byte Reload
+; BE-I32-NEON-NEXT:    add lr, sp, #56
+; BE-I32-NEON-NEXT:    vst1.32 {d18, d19}, [r11:128]!
+; BE-I32-NEON-NEXT:    vst1.32 {d20, d21}, [r11:128]!
+; BE-I32-NEON-NEXT:    vldmia lr, {d18, d19} @ 16-byte Reload
+; BE-I32-NEON-NEXT:    vst1.32 {d18, d19}, [r11:128]!
+; BE-I32-NEON-NEXT:    vst1.64 {d16, d17}, [r11:128]
+; BE-I32-NEON-NEXT:    add sp, sp, #104
+; BE-I32-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I32-NEON-NEXT:    add sp, sp, #4
+; BE-I32-NEON-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; BE-I64-NEON-LABEL: lrint_v32fp128:
+; BE-I64-NEON:       @ %bb.0:
+; BE-I64-NEON-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-I64-NEON-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-I64-NEON-NEXT:    .pad #4
+; BE-I64-NEON-NEXT:    sub sp, sp, #4
+; BE-I64-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEON-NEXT:    .pad #152
+; BE-I64-NEON-NEXT:    sub sp, sp, #152
+; BE-I64-NEON-NEXT:    str r3, [sp, #120] @ 4-byte Spill
+; BE-I64-NEON-NEXT:    add r3, sp, #712
+; BE-I64-NEON-NEXT:    str r2, [sp, #112] @ 4-byte Spill
+; BE-I64-NEON-NEXT:    mov r9, r0
+; BE-I64-NEON-NEXT:    ldm r3, {r0, r1, r2, r3}
+; BE-I64-NEON-NEXT:    bl lrintl
+; BE-I64-NEON-NEXT:    ldr r7, [sp, #648]
+; BE-I64-NEON-NEXT:    add r3, sp, #652
+; BE-I64-NEON-NEXT:    mov r5, r1
+; BE-I64-NEON-NEXT:    vmov.32 d9[0], r0
+; BE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; BE-I64-NEON-NEXT:    mov r0, r7
+; BE-I64-NEON-NEXT:    ldr r6, [sp, #520]
+; BE-I64-NEON-NEXT:    ldr r8, [sp, #632]
+; BE-I64-NEON-NEXT:    bl lrintl
+; BE-I64-NEON-NEXT:    add r3, sp, #524
+; BE-I64-NEON-NEXT:    mov r7, r1
+; BE-I64-NEON-NEXT:    vmov.32 d10[0], r0
+; BE-I64-NEON-NEXT:    mov r0, r6
+; BE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; BE-I64-NEON-NEXT:    bl lrintl
+; BE-I64-NEON-NEXT:    add r3, sp, #636
+; BE-I64-NEON-NEXT:    mov r4, r1
+; BE-I64-NEON-NEXT:    vmov.32 d8[0], r0
+; BE-I64-NEON-NEXT:    mov r0, r8
+; BE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; BE-I64-NEON-NEXT:    bl lrintl
+; BE-I64-NEON-NEXT:    mov r6, r1
+; BE-I64-NEON-NEXT:    vmov.32 d11[0], r0
+; BE-I64-NEON-NEXT:    ldr r0, [sp, #488]
+; BE-I64-NEON-NEXT:    vmov.32 d8[1], r4
+; BE-I64-NEON-NEXT:    ldr r1, [sp, #492]
+; BE-I64-NEON-NEXT:    ldr r2, [sp, #496]
+; BE-I64-NEON-NEXT:    vmov.32 d10[1], r7
+; BE-I64-NEON-NEXT:    ldr r3, [sp, #500]
+; BE-I64-NEON-NEXT:    vmov.32 d9[1], r5
+; BE-I64-NEON-NEXT:    vstr d8, [sp, #144] @ 8-byte Spill
+; BE-I64-NEON-NEXT:    vstr d10, [sp, #136] @ 8-byte Spill
+; BE-I64-NEON-NEXT:    vstr d9, [sp, #128] @ 8-byte Spill
+; BE-I64-NEON-NEXT:    bl lrintl
+; BE-I64-NEON-NEXT:    add r3, sp, #680
+; BE-I64-NEON-NEXT:    str r0, [sp, #104] @ 4-byte Spill
+; BE-I64-NEON-NEXT:    str r1, [sp, #88] @ 4-byte Spill
+; BE-I64-NEON-NEXT:    ldm r3, {r0, r1, r2, r3}
+; BE-I64-NEON-NEXT:    bl lrintl
+; BE-I64-NEON-NEXT:    vmov.32 d16[0], r0
+; BE-I64-NEON-NEXT:    ldr r0, [sp, #728]
+; BE-I64-NEON-NEXT:    ldr r2, [sp, #736]
+; BE-I64-NEON-NEXT:    vmov.32 d11[1], r6
+; BE-I64-NEON-NEXT:    ldr r6, [sp, #732]
+; BE-I64-NEON-NEXT:    ldr r3, [sp, #740]
+; BE-I64-NEON-NEXT:    vmov.32 d16[1], r1
+; BE-I64-NEON-NEXT:    ldr r5, [sp, #504]
+; BE-I64-NEON-NEXT:    mov r1, r6
+; BE-I64-NEON-NEXT:    ldr r7, [sp, #744]
+; BE-I64-NEON-NEXT:    ldr r4, [sp, #748]
+; BE-I64-NEON-NEXT:    vstr d11, [sp, #24] @ 8-byte Spill
+; BE-I64-NEON-NEXT:    vstr d16, [sp, #8] @ 8-byte Spill
+; BE-I64-NEON-NEXT:    bl lrintl
+; BE-I64-NEON-NEXT:    ldr r2, [sp, #752]
+; BE-I64-NEON-NEXT:    mov r11, r1
+; BE-I64-NEON-NEXT:    ldr r3, [sp, #756]
+; BE-I64-NEON-NEXT:    vmov.32 d13[0], r0
+; BE-I64-NEON-NEXT:    mov r0, r7
+; BE-I64-NEON-NEXT:    mov r1, r4
+; BE-I64-NEON-NEXT:    ldr r10, [sp, #552]
+; BE-I64-NEON-NEXT:    ldr r6, [sp, #664]
+; BE-I64-NEON-NEXT:    bl lrintl
+; BE-I64-NEON-NEXT:    add r3, sp, #508
+; BE-I64-NEON-NEXT:    mov r8, r1
+; BE-I64-NEON-NEXT:    vmov.32 d8[0], r0
+; BE-I64-NEON-NEXT:    mov r0, r5
+; BE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; BE-I64-NEON-NEXT:    bl lrintl
+; BE-I64-NEON-NEXT:    add r3, sp, #540
+; BE-I64-NEON-NEXT:    vmov.32 d10[0], r0
+; BE-I64-NEON-NEXT:    ldr r0, [sp, #536]
+; BE-I64-NEON-NEXT:    mov r7, r1
+; BE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; BE-I64-NEON-NEXT:    bl lrintl
+; BE-I64-NEON-NEXT:    add r3, sp, #556
+; BE-I64-NEON-NEXT:    mov r5, r1
+; BE-I64-NEON-NEXT:    vmov.32 d12[0], r0
+; BE-I64-NEON-NEXT:    mov r0, r10
+; BE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; BE-I64-NEON-NEXT:    bl lrintl
+; BE-I64-NEON-NEXT:    add r3, sp, #668
+; BE-I64-NEON-NEXT:    mov r4, r1
+; BE-I64-NEON-NEXT:    vmov.32 d9[0], r0
+; BE-I64-NEON-NEXT:    mov r0, r6
+; BE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; BE-I64-NEON-NEXT:    bl lrintl
+; BE-I64-NEON-NEXT:    add r3, sp, #700
+; BE-I64-NEON-NEXT:    vmov.32 d14[0], r0
+; BE-I64-NEON-NEXT:    ldr r0, [sp, #696]
+; BE-I64-NEON-NEXT:    mov r6, r1
+; BE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; BE-I64-NEON-NEXT:    bl lrintl
+; BE-I64-NEON-NEXT:    vmov.32 d11[0], r0
+; BE-I64-NEON-NEXT:    ldr r0, [sp, #104] @ 4-byte Reload
+; BE-I64-NEON-NEXT:    ldr r2, [sp, #256]
+; BE-I64-NEON-NEXT:    vmov.32 d13[1], r11
+; BE-I64-NEON-NEXT:    ldr r3, [sp, #260]
+; BE-I64-NEON-NEXT:    vmov.32 d14[1], r6
+; BE-I64-NEON-NEXT:    ldr r6, [sp, #264]
+; BE-I64-NEON-NEXT:    vmov.32 d9[1], r4
+; BE-I64-NEON-NEXT:    ldr r4, [sp, #344]
+; BE-I64-NEON-NEXT:    vmov.32 d12[1], r5
+; BE-I64-NEON-NEXT:    ldr r5, [sp, #312]
+; BE-I64-NEON-NEXT:    vmov.32 d8[1], r8
+; BE-I64-NEON-NEXT:    ldr r8, [sp, #328]
+; BE-I64-NEON-NEXT:    vmov.32 d10[1], r7
+; BE-I64-NEON-NEXT:    vstr d13, [sp, #32] @ 8-byte Spill
+; BE-I64-NEON-NEXT:    vmov.32 d11[1], r1
+; BE-I64-NEON-NEXT:    ldr r1, [sp, #120] @ 4-byte Reload
+; BE-I64-NEON-NEXT:    vmov.32 d13[0], r0
+; BE-I64-NEON-NEXT:    ldr r0, [sp, #112] @ 4-byte Reload
+; BE-I64-NEON-NEXT:    vstr d14, [sp] @ 8-byte Spill
+; BE-I64-NEON-NEXT:    vstr d9, [sp, #16] @ 8-byte Spill
+; BE-I64-NEON-NEXT:    vstr d12, [sp, #56] @ 8-byte Spill
+; BE-I64-NEON-NEXT:    vstr d10, [sp, #64] @ 8-byte Spill
+; BE-I64-NEON-NEXT:    vstr d8, [sp, #40] @ 8-byte Spill
+; BE-I64-NEON-NEXT:    bl lrintl
+; BE-I64-NEON-NEXT:    add r3, sp, #268
+; BE-I64-NEON-NEXT:    mov r11, r1
+; BE-I64-NEON-NEXT:    vmov.32 d14[0], r0
+; BE-I64-NEON-NEXT:    mov r0, r6
+; BE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; BE-I64-NEON-NEXT:    bl lrintl
+; BE-I64-NEON-NEXT:    add r3, sp, #316
+; BE-I64-NEON-NEXT:    mov r10, r1
+; BE-I64-NEON-NEXT:    vmov.32 d8[0], r0
+; BE-I64-NEON-NEXT:    mov r0, r5
+; BE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; BE-I64-NEON-NEXT:    bl lrintl
+; BE-I64-NEON-NEXT:    add r3, sp, #332
+; BE-I64-NEON-NEXT:    mov r7, r1
+; BE-I64-NEON-NEXT:    vmov.32 d12[0], r0
+; BE-I64-NEON-NEXT:    mov r0, r8
+; BE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; BE-I64-NEON-NEXT:    bl lrintl
+; BE-I64-NEON-NEXT:    add r3, sp, #348
+; BE-I64-NEON-NEXT:    mov r5, r1
+; BE-I64-NEON-NEXT:    vmov.32 d15[0], r0
+; BE-I64-NEON-NEXT:    mov r0, r4
+; BE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; BE-I64-NEON-NEXT:    bl lrintl
+; BE-I64-NEON-NEXT:    add r3, sp, #364
+; BE-I64-NEON-NEXT:    vmov.32 d9[0], r0
+; BE-I64-NEON-NEXT:    ldr r0, [sp, #360]
+; BE-I64-NEON-NEXT:    mov r4, r1
+; BE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; BE-I64-NEON-NEXT:    bl lrintl
+; BE-I64-NEON-NEXT:    add r3, sp, #476
+; BE-I64-NEON-NEXT:    vmov.32 d10[0], r0
+; BE-I64-NEON-NEXT:    ldr r0, [sp, #472]
+; BE-I64-NEON-NEXT:    mov r6, r1
+; BE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; BE-I64-NEON-NEXT:    bl lrintl
+; BE-I64-NEON-NEXT:    vmov.32 d16[0], r0
+; BE-I64-NEON-NEXT:    ldr r0, [sp, #88] @ 4-byte Reload
+; BE-I64-NEON-NEXT:    ldr r2, [sp, #592]
+; BE-I64-NEON-NEXT:    vldr d20, [sp, #136] @ 8-byte Reload
+; BE-I64-NEON-NEXT:    vmov.32 d16[1], r1
+; BE-I64-NEON-NEXT:    ldr r1, [sp, #588]
+; BE-I64-NEON-NEXT:    ldr r3, [sp, #596]
+; BE-I64-NEON-NEXT:    vldr d22, [sp, #24] @ 8-byte Reload
+; BE-I64-NEON-NEXT:    vldr d18, [sp, #8] @ 8-byte Reload
+; BE-I64-NEON-NEXT:    vrev64.32 d21, d20
+; BE-I64-NEON-NEXT:    vmov.32 d10[1], r6
+; BE-I64-NEON-NEXT:    ldr r6, [sp, #600]
+; BE-I64-NEON-NEXT:    vmov.32 d9[1], r4
+; BE-I64-NEON-NEXT:    ldr r4, [sp, #616]
+; BE-I64-NEON-NEXT:    vmov.32 d12[1], r7
+; BE-I64-NEON-NEXT:    ldr r7, [sp, #604]
+; BE-I64-NEON-NEXT:    vmov.32 d8[1], r10
+; BE-I64-NEON-NEXT:    add r10, r9, #192
+; BE-I64-NEON-NEXT:    vmov.32 d14[1], r11
+; BE-I64-NEON-NEXT:    ldr r11, [sp, #440]
+; BE-I64-NEON-NEXT:    vmov.32 d13[1], r0
+; BE-I64-NEON-NEXT:    ldr r0, [sp, #584]
+; BE-I64-NEON-NEXT:    vmov.32 d15[1], r5
+; BE-I64-NEON-NEXT:    vstr d16, [sp, #48] @ 8-byte Spill
+; BE-I64-NEON-NEXT:    vldr d16, [sp, #128] @ 8-byte Reload
+; BE-I64-NEON-NEXT:    vrev64.32 d20, d22
+; BE-I64-NEON-NEXT:    vldr d22, [sp] @ 8-byte Reload
+; BE-I64-NEON-NEXT:    vrev64.32 d19, d18
+; BE-I64-NEON-NEXT:    vrev64.32 d17, d16
+; BE-I64-NEON-NEXT:    vrev64.32 d18, d22
+; BE-I64-NEON-NEXT:    vstr d10, [sp, #120] @ 8-byte Spill
+; BE-I64-NEON-NEXT:    vstr d9, [sp, #112] @ 8-byte Spill
+; BE-I64-NEON-NEXT:    vstr d15, [sp, #104] @ 8-byte Spill
+; BE-I64-NEON-NEXT:    vstr d12, [sp, #96] @ 8-byte Spill
+; BE-I64-NEON-NEXT:    vstr d8, [sp, #80] @ 8-byte Spill
+; BE-I64-NEON-NEXT:    vstr d14, [sp, #72] @ 8-byte Spill
+; BE-I64-NEON-NEXT:    vstr d13, [sp, #88] @ 8-byte Spill
+; BE-I64-NEON-NEXT:    vst1.64 {d20, d21}, [r10:128]!
+; BE-I64-NEON-NEXT:    vrev64.32 d16, d11
+; BE-I64-NEON-NEXT:    vst1.64 {d18, d19}, [r10:128]!
+; BE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r10:128]!
+; BE-I64-NEON-NEXT:    bl lrintl
+; BE-I64-NEON-NEXT:    ldr r2, [sp, #608]
+; BE-I64-NEON-NEXT:    mov r8, r1
+; BE-I64-NEON-NEXT:    ldr r3, [sp, #612]
+; BE-I64-NEON-NEXT:    vmov.32 d8[0], r0
+; BE-I64-NEON-NEXT:    mov r0, r6
+; BE-I64-NEON-NEXT:    mov r1, r7
+; BE-I64-NEON-NEXT:    ldr r5, [sp, #456]
+; BE-I64-NEON-NEXT:    bl lrintl
+; BE-I64-NEON-NEXT:    add r3, sp, #620
+; BE-I64-NEON-NEXT:    mov r6, r1
+; BE-I64-NEON-NEXT:    vmov.32 d9[0], r0
+; BE-I64-NEON-NEXT:    mov r0, r4
+; BE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; BE-I64-NEON-NEXT:    bl lrintl
+; BE-I64-NEON-NEXT:    add r3, sp, #444
+; BE-I64-NEON-NEXT:    mov r7, r1
+; BE-I64-NEON-NEXT:    vmov.32 d10[0], r0
+; BE-I64-NEON-NEXT:    mov r0, r11
+; BE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; BE-I64-NEON-NEXT:    bl lrintl
+; BE-I64-NEON-NEXT:    add r3, sp, #460
+; BE-I64-NEON-NEXT:    mov r4, r1
+; BE-I64-NEON-NEXT:    vmov.32 d11[0], r0
+; BE-I64-NEON-NEXT:    mov r0, r5
+; BE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; BE-I64-NEON-NEXT:    bl lrintl
+; BE-I64-NEON-NEXT:    add r3, sp, #572
+; BE-I64-NEON-NEXT:    vmov.32 d13[0], r0
+; BE-I64-NEON-NEXT:    ldr r0, [sp, #568]
+; BE-I64-NEON-NEXT:    mov r5, r1
+; BE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; BE-I64-NEON-NEXT:    bl lrintl
+; BE-I64-NEON-NEXT:    vldr d16, [sp, #16] @ 8-byte Reload
+; BE-I64-NEON-NEXT:    vldr d18, [sp, #56] @ 8-byte Reload
+; BE-I64-NEON-NEXT:    vrev64.32 d17, d16
+; BE-I64-NEON-NEXT:    ldr r2, [sp, #304]
+; BE-I64-NEON-NEXT:    vrev64.32 d16, d18
+; BE-I64-NEON-NEXT:    ldr r3, [sp, #308]
+; BE-I64-NEON-NEXT:    vldr d18, [sp, #144] @ 8-byte Reload
+; BE-I64-NEON-NEXT:    vldr d20, [sp, #64] @ 8-byte Reload
+; BE-I64-NEON-NEXT:    vrev64.32 d19, d18
+; BE-I64-NEON-NEXT:    vrev64.32 d18, d20
+; BE-I64-NEON-NEXT:    vldr d20, [sp, #40] @ 8-byte Reload
+; BE-I64-NEON-NEXT:    vldr d22, [sp, #32] @ 8-byte Reload
+; BE-I64-NEON-NEXT:    vmov.32 d14[0], r0
+; BE-I64-NEON-NEXT:    ldr r0, [sp, #296]
+; BE-I64-NEON-NEXT:    vmov.32 d10[1], r7
+; BE-I64-NEON-NEXT:    ldr r7, [sp, #412]
+; BE-I64-NEON-NEXT:    vmov.32 d9[1], r6
+; BE-I64-NEON-NEXT:    ldr r6, [sp, #408]
+; BE-I64-NEON-NEXT:    vmov.32 d8[1], r8
+; BE-I64-NEON-NEXT:    add r8, r9, #128
+; BE-I64-NEON-NEXT:    vrev64.32 d21, d20
+; BE-I64-NEON-NEXT:    vmov.32 d13[1], r5
+; BE-I64-NEON-NEXT:    ldr r5, [sp, #300]
+; BE-I64-NEON-NEXT:    vrev64.32 d20, d22
+; BE-I64-NEON-NEXT:    vmov.32 d14[1], r1
+; BE-I64-NEON-NEXT:    mov r1, r5
+; BE-I64-NEON-NEXT:    vstr d10, [sp, #136] @ 8-byte Spill
+; BE-I64-NEON-NEXT:    vstr d9, [sp, #128] @ 8-byte Spill
+; BE-I64-NEON-NEXT:    vstr d8, [sp, #24] @ 8-byte Spill
+; BE-I64-NEON-NEXT:    vst1.64 {d20, d21}, [r10:128]
+; BE-I64-NEON-NEXT:    vst1.64 {d18, d19}, [r8:128]!
+; BE-I64-NEON-NEXT:    vmov.32 d11[1], r4
+; BE-I64-NEON-NEXT:    ldr r4, [sp, #424]
+; BE-I64-NEON-NEXT:    ldr r10, [sp, #376]
+; BE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r8:128]!
+; BE-I64-NEON-NEXT:    bl lrintl
+; BE-I64-NEON-NEXT:    ldr r2, [sp, #416]
+; BE-I64-NEON-NEXT:    mov r11, r1
+; BE-I64-NEON-NEXT:    ldr r3, [sp, #420]
+; BE-I64-NEON-NEXT:    vmov.32 d15[0], r0
+; BE-I64-NEON-NEXT:    mov r0, r6
+; BE-I64-NEON-NEXT:    mov r1, r7
+; BE-I64-NEON-NEXT:    ldr r5, [sp, #392]
+; BE-I64-NEON-NEXT:    bl lrintl
+; BE-I64-NEON-NEXT:    add r3, sp, #428
+; BE-I64-NEON-NEXT:    mov r6, r1
+; BE-I64-NEON-NEXT:    vmov.32 d8[0], r0
+; BE-I64-NEON-NEXT:    mov r0, r4
+; BE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; BE-I64-NEON-NEXT:    bl lrintl
+; BE-I64-NEON-NEXT:    add r3, sp, #380
+; BE-I64-NEON-NEXT:    mov r7, r1
+; BE-I64-NEON-NEXT:    vmov.32 d9[0], r0
+; BE-I64-NEON-NEXT:    mov r0, r10
+; BE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; BE-I64-NEON-NEXT:    bl lrintl
+; BE-I64-NEON-NEXT:    add r3, sp, #396
+; BE-I64-NEON-NEXT:    mov r4, r1
+; BE-I64-NEON-NEXT:    vmov.32 d12[0], r0
+; BE-I64-NEON-NEXT:    mov r0, r5
+; BE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; BE-I64-NEON-NEXT:    bl lrintl
+; BE-I64-NEON-NEXT:    add r3, sp, #284
+; BE-I64-NEON-NEXT:    vmov.32 d10[0], r0
+; BE-I64-NEON-NEXT:    ldr r0, [sp, #280]
+; BE-I64-NEON-NEXT:    mov r5, r1
+; BE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
+; BE-I64-NEON-NEXT:    bl lrintl
+; BE-I64-NEON-NEXT:    vldr d16, [sp, #120] @ 8-byte Reload
+; BE-I64-NEON-NEXT:    vldr d18, [sp, #112] @ 8-byte Reload
+; BE-I64-NEON-NEXT:    vrev64.32 d17, d16
+; BE-I64-NEON-NEXT:    vldr d26, [sp, #136] @ 8-byte Reload
+; BE-I64-NEON-NEXT:    vrev64.32 d16, d18
+; BE-I64-NEON-NEXT:    vldr d18, [sp, #104] @ 8-byte Reload
+; BE-I64-NEON-NEXT:    vrev64.32 d31, d26
+; BE-I64-NEON-NEXT:    vldr d26, [sp, #128] @ 8-byte Reload
+; BE-I64-NEON-NEXT:    vldr d20, [sp, #96] @ 8-byte Reload
+; BE-I64-NEON-NEXT:    vrev64.32 d19, d18
+; BE-I64-NEON-NEXT:    vrev64.32 d18, d20
+; BE-I64-NEON-NEXT:    vldr d20, [sp, #80] @ 8-byte Reload
+; BE-I64-NEON-NEXT:    vrev64.32 d30, d26
+; BE-I64-NEON-NEXT:    vldr d26, [sp, #24] @ 8-byte Reload
+; BE-I64-NEON-NEXT:    vmov.32 d10[1], r5
+; BE-I64-NEON-NEXT:    vldr d22, [sp, #72] @ 8-byte Reload
+; BE-I64-NEON-NEXT:    vrev64.32 d21, d20
+; BE-I64-NEON-NEXT:    vrev64.32 d1, d26
+; BE-I64-NEON-NEXT:    vmov.32 d9[1], r7
+; BE-I64-NEON-NEXT:    vmov.32 d12[1], r4
+; BE-I64-NEON-NEXT:    vrev64.32 d20, d22
+; BE-I64-NEON-NEXT:    vldr d22, [sp, #88] @ 8-byte Reload
+; BE-I64-NEON-NEXT:    vmov.32 d8[1], r6
+; BE-I64-NEON-NEXT:    vrev64.32 d0, d14
+; BE-I64-NEON-NEXT:    vmov.32 d28[0], r0
+; BE-I64-NEON-NEXT:    add r0, r9, #64
+; BE-I64-NEON-NEXT:    vrev64.32 d3, d10
+; BE-I64-NEON-NEXT:    vldr d24, [sp, #48] @ 8-byte Reload
+; BE-I64-NEON-NEXT:    vrev64.32 d23, d22
+; BE-I64-NEON-NEXT:    vrev64.32 d5, d9
+; BE-I64-NEON-NEXT:    vst1.64 {d0, d1}, [r8:128]!
+; BE-I64-NEON-NEXT:    vrev64.32 d2, d12
+; BE-I64-NEON-NEXT:    vmov.32 d15[1], r11
+; BE-I64-NEON-NEXT:    vrev64.32 d22, d24
+; BE-I64-NEON-NEXT:    vrev64.32 d25, d13
+; BE-I64-NEON-NEXT:    vrev64.32 d4, d8
+; BE-I64-NEON-NEXT:    vst1.64 {d30, d31}, [r8:128]
+; BE-I64-NEON-NEXT:    vst1.64 {d2, d3}, [r0:128]!
+; BE-I64-NEON-NEXT:    vmov.32 d28[1], r1
+; BE-I64-NEON-NEXT:    vrev64.32 d24, d11
+; BE-I64-NEON-NEXT:    vst1.64 {d4, d5}, [r0:128]!
+; BE-I64-NEON-NEXT:    vrev64.32 d27, d15
+; BE-I64-NEON-NEXT:    vst1.64 {d24, d25}, [r0:128]!
+; BE-I64-NEON-NEXT:    vrev64.32 d26, d28
+; BE-I64-NEON-NEXT:    vst1.64 {d22, d23}, [r0:128]
+; BE-I64-NEON-NEXT:    vst1.64 {d20, d21}, [r9:128]!
+; BE-I64-NEON-NEXT:    vst1.64 {d26, d27}, [r9:128]!
+; BE-I64-NEON-NEXT:    vst1.64 {d18, d19}, [r9:128]!
+; BE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r9:128]
+; BE-I64-NEON-NEXT:    add sp, sp, #152
+; BE-I64-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEON-NEXT:    add sp, sp, #4
+; BE-I64-NEON-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+  %a = call <32 x iXLen> @llvm.lrint.v32iXLen.v16fp128(<32 x fp128> %x)
+  ret <32 x iXLen> %a
+}
+declare <32 x iXLen> @llvm.lrint.v32iXLen.v32fp128(<32 x fp128>)

>From 9aadce5ec090e3a403f516031d807639f4da2524 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross at umich.edu>
Date: Fri, 8 Aug 2025 07:27:23 -0500
Subject: [PATCH 5/7] nounwind for vector tests since cfi directives are
 causing CI failures

---
 .../AArch64/sve-fixed-vector-llrint.ll        |   76 +-
 .../CodeGen/AArch64/sve-fixed-vector-lrint.ll |  170 +--
 llvm/test/CodeGen/AArch64/vector-llrint.ll    |   73 +-
 llvm/test/CodeGen/AArch64/vector-lrint.ll     |   95 +-
 llvm/test/CodeGen/PowerPC/vector-llrint.ll    |  916 ++++---------
 llvm/test/CodeGen/PowerPC/vector-lrint.ll     | 1213 +++++------------
 llvm/test/CodeGen/X86/vector-llrint-f16.ll    |   12 +-
 llvm/test/CodeGen/X86/vector-llrint.ll        |  134 +-
 llvm/test/CodeGen/X86/vector-lrint-f16.ll     |   12 +-
 llvm/test/CodeGen/X86/vector-lrint.ll         |  259 +---
 10 files changed, 760 insertions(+), 2200 deletions(-)

diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-vector-llrint.ll b/llvm/test/CodeGen/AArch64/sve-fixed-vector-llrint.ll
index 838aac0edcb73..38ba9240d15b4 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-vector-llrint.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-vector-llrint.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=aarch64 -mattr=+sve -aarch64-sve-vector-bits-min=256 | FileCheck %s
 
-define <1 x i64> @llrint_v1i64_v1f16(<1 x half> %x) {
+define <1 x i64> @llrint_v1i64_v1f16(<1 x half> %x) nounwind {
 ; CHECK-LABEL: llrint_v1i64_v1f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    frintx h0, h0
@@ -13,7 +13,7 @@ define <1 x i64> @llrint_v1i64_v1f16(<1 x half> %x) {
 }
 declare <1 x i64> @llvm.llrint.v1i64.v1f16(<1 x half>)
 
-define <2 x i64> @llrint_v1i64_v2f16(<2 x half> %x) {
+define <2 x i64> @llrint_v1i64_v2f16(<2 x half> %x) nounwind {
 ; CHECK-LABEL: llrint_v1i64_v2f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
@@ -30,7 +30,7 @@ define <2 x i64> @llrint_v1i64_v2f16(<2 x half> %x) {
 }
 declare <2 x i64> @llvm.llrint.v2i64.v2f16(<2 x half>)
 
-define <4 x i64> @llrint_v4i64_v4f16(<4 x half> %x) {
+define <4 x i64> @llrint_v4i64_v4f16(<4 x half> %x) nounwind {
 ; CHECK-LABEL: llrint_v4i64_v4f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    frintx v0.4h, v0.4h
@@ -51,7 +51,7 @@ define <4 x i64> @llrint_v4i64_v4f16(<4 x half> %x) {
 }
 declare <4 x i64> @llvm.llrint.v4i64.v4f16(<4 x half>)
 
-define <8 x i64> @llrint_v8i64_v8f16(<8 x half> %x) {
+define <8 x i64> @llrint_v8i64_v8f16(<8 x half> %x) nounwind {
 ; CHECK-LABEL: llrint_v8i64_v8f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
@@ -85,7 +85,7 @@ define <8 x i64> @llrint_v8i64_v8f16(<8 x half> %x) {
 }
 declare <8 x i64> @llvm.llrint.v8i64.v8f16(<8 x half>)
 
-define <16 x i64> @llrint_v16i64_v16f16(<16 x half> %x) {
+define <16 x i64> @llrint_v16i64_v16f16(<16 x half> %x) nounwind {
 ; CHECK-LABEL: llrint_v16i64_v16f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ext v2.16b, v1.16b, v1.16b, #8
@@ -144,16 +144,13 @@ define <16 x i64> @llrint_v16i64_v16f16(<16 x half> %x) {
 }
 declare <16 x i64> @llvm.llrint.v16i64.v16f16(<16 x half>)
 
-define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) {
+define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) nounwind {
 ; CHECK-LABEL: llrint_v32i64_v32f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
 ; CHECK-NEXT:    sub x9, sp, #272
 ; CHECK-NEXT:    mov x29, sp
 ; CHECK-NEXT:    and sp, x9, #0xffffffffffffffe0
-; CHECK-NEXT:    .cfi_def_cfa w29, 16
-; CHECK-NEXT:    .cfi_offset w30, -8
-; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    frintx v5.4h, v0.4h
 ; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
 ; CHECK-NEXT:    ext v4.16b, v1.16b, v1.16b, #8
@@ -278,7 +275,7 @@ define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) {
 }
 declare <32 x i64> @llvm.llrint.v32i64.v32f16(<32 x half>)
 
-define <1 x i64> @llrint_v1i64_v1f32(<1 x float> %x) {
+define <1 x i64> @llrint_v1i64_v1f32(<1 x float> %x) nounwind {
 ; CHECK-LABEL: llrint_v1i64_v1f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
@@ -291,7 +288,7 @@ define <1 x i64> @llrint_v1i64_v1f32(<1 x float> %x) {
 }
 declare <1 x i64> @llvm.llrint.v1i64.v1f32(<1 x float>)
 
-define <2 x i64> @llrint_v2i64_v2f32(<2 x float> %x) {
+define <2 x i64> @llrint_v2i64_v2f32(<2 x float> %x) nounwind {
 ; CHECK-LABEL: llrint_v2i64_v2f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    frintx v0.2s, v0.2s
@@ -303,7 +300,7 @@ define <2 x i64> @llrint_v2i64_v2f32(<2 x float> %x) {
 }
 declare <2 x i64> @llvm.llrint.v2i64.v2f32(<2 x float>)
 
-define <4 x i64> @llrint_v4i64_v4f32(<4 x float> %x) {
+define <4 x i64> @llrint_v4i64_v4f32(<4 x float> %x) nounwind {
 ; CHECK-LABEL: llrint_v4i64_v4f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    frintx v0.4s, v0.4s
@@ -324,7 +321,7 @@ define <4 x i64> @llrint_v4i64_v4f32(<4 x float> %x) {
 }
 declare <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float>)
 
-define <8 x i64> @llrint_v8i64_v8f32(<8 x float> %x) {
+define <8 x i64> @llrint_v8i64_v8f32(<8 x float> %x) nounwind {
 ; CHECK-LABEL: llrint_v8i64_v8f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    frintx v0.4s, v0.4s
@@ -357,7 +354,7 @@ define <8 x i64> @llrint_v8i64_v8f32(<8 x float> %x) {
 }
 declare <8 x i64> @llvm.llrint.v8i64.v8f32(<8 x float>)
 
-define <16 x i64> @llrint_v16i64_v16f32(<16 x float> %x) {
+define <16 x i64> @llrint_v16i64_v16f32(<16 x float> %x) nounwind {
 ; CHECK-LABEL: llrint_v16i64_v16f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    frintx v3.4s, v3.4s
@@ -414,16 +411,13 @@ define <16 x i64> @llrint_v16i64_v16f32(<16 x float> %x) {
 }
 declare <16 x i64> @llvm.llrint.v16i64.v16f32(<16 x float>)
 
-define <32 x i64> @llrint_v32i64_v32f32(<32 x float> %x) {
+define <32 x i64> @llrint_v32i64_v32f32(<32 x float> %x) nounwind {
 ; CHECK-LABEL: llrint_v32i64_v32f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
 ; CHECK-NEXT:    sub x9, sp, #272
 ; CHECK-NEXT:    mov x29, sp
 ; CHECK-NEXT:    and sp, x9, #0xffffffffffffffe0
-; CHECK-NEXT:    .cfi_def_cfa w29, 16
-; CHECK-NEXT:    .cfi_offset w30, -8
-; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    frintx v0.4s, v0.4s
 ; CHECK-NEXT:    frintx v1.4s, v1.4s
 ; CHECK-NEXT:    frintx v2.4s, v2.4s
@@ -544,7 +538,7 @@ define <32 x i64> @llrint_v32i64_v32f32(<32 x float> %x) {
 }
 declare <32 x i64> @llvm.llrint.v32i64.v32f32(<32 x float>)
 
-define <1 x i64> @llrint_v1i64_v1f64(<1 x double> %x) {
+define <1 x i64> @llrint_v1i64_v1f64(<1 x double> %x) nounwind {
 ; CHECK-LABEL: llrint_v1i64_v1f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    frintx d0, d0
@@ -556,7 +550,7 @@ define <1 x i64> @llrint_v1i64_v1f64(<1 x double> %x) {
 }
 declare <1 x i64> @llvm.llrint.v1i64.v1f64(<1 x double>)
 
-define <2 x i64> @llrint_v2i64_v2f64(<2 x double> %x) {
+define <2 x i64> @llrint_v2i64_v2f64(<2 x double> %x) nounwind {
 ; CHECK-LABEL: llrint_v2i64_v2f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    frintx v0.2d, v0.2d
@@ -567,7 +561,7 @@ define <2 x i64> @llrint_v2i64_v2f64(<2 x double> %x) {
 }
 declare <2 x i64> @llvm.llrint.v2i64.v2f64(<2 x double>)
 
-define <4 x i64> @llrint_v4i64_v4f64(<4 x double> %x) {
+define <4 x i64> @llrint_v4i64_v4f64(<4 x double> %x) nounwind {
 ; CHECK-LABEL: llrint_v4i64_v4f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
@@ -593,7 +587,7 @@ define <4 x i64> @llrint_v4i64_v4f64(<4 x double> %x) {
 }
 declare <4 x i64> @llvm.llrint.v4i64.v4f64(<4 x double>)
 
-define <8 x i64> @llrint_v8i64_v8f64(<8 x double> %x) {
+define <8 x i64> @llrint_v8i64_v8f64(<8 x double> %x) nounwind {
 ; CHECK-LABEL: llrint_v8i64_v8f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
@@ -635,7 +629,7 @@ define <8 x i64> @llrint_v8i64_v8f64(<8 x double> %x) {
 }
 declare <8 x i64> @llvm.llrint.v8i64.v8f64(<8 x double>)
 
-define <16 x i64> @llrint_v16f64(<16 x double> %x) {
+define <16 x i64> @llrint_v16f64(<16 x double> %x) nounwind {
 ; CHECK-LABEL: llrint_v16f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p1.d, vl2
@@ -708,16 +702,13 @@ define <16 x i64> @llrint_v16f64(<16 x double> %x) {
 }
 declare <16 x i64> @llvm.llrint.v16i64.v16f64(<16 x double>)
 
-define <32 x i64> @llrint_v32f64(<32 x double> %x) {
+define <32 x i64> @llrint_v32f64(<32 x double> %x) nounwind {
 ; CHECK-LABEL: llrint_v32f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
 ; CHECK-NEXT:    sub x9, sp, #272
 ; CHECK-NEXT:    mov x29, sp
 ; CHECK-NEXT:    and sp, x9, #0xffffffffffffffe0
-; CHECK-NEXT:    .cfi_def_cfa w29, 16
-; CHECK-NEXT:    .cfi_offset w30, -8
-; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    ptrue p1.d, vl2
 ; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
@@ -862,12 +853,10 @@ define <32 x i64> @llrint_v32f64(<32 x double> %x) {
 }
 declare <32 x i64> @llvm.llrint.v32i64.v32f64(<32 x double>)
 
-define <1 x i64> @llrint_v1i64_v1fp128(<1 x fp128> %x) {
+define <1 x i64> @llrint_v1i64_v1fp128(<1 x fp128> %x) nounwind {
 ; CHECK-LABEL: llrint_v1i64_v1fp128:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    .cfi_offset w30, -16
 ; CHECK-NEXT:    bl llrintl
 ; CHECK-NEXT:    fmov d0, x0
 ; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
@@ -877,15 +866,13 @@ define <1 x i64> @llrint_v1i64_v1fp128(<1 x fp128> %x) {
 }
 declare <1 x i64> @llvm.llrint.v1i64.v1fp128(<1 x fp128>)
 
-define <2 x i64> @llrint_v2i64_v2fp128(<2 x fp128> %x) {
+define <2 x i64> @llrint_v2i64_v2fp128(<2 x fp128> %x) nounwind {
 ; CHECK-LABEL: llrint_v2i64_v2fp128:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sub sp, sp, #48
-; CHECK-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 48
-; CHECK-NEXT:    .cfi_offset w30, -16
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    mov v0.16b, v1.16b
+; CHECK-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
 ; CHECK-NEXT:    bl llrintl
 ; CHECK-NEXT:    fmov d0, x0
 ; CHECK-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
@@ -902,15 +889,12 @@ define <2 x i64> @llrint_v2i64_v2fp128(<2 x fp128> %x) {
 }
 declare <2 x i64> @llvm.llrint.v2i64.v2fp128(<2 x fp128>)
 
-define <4 x i64> @llrint_v4i64_v4fp128(<4 x fp128> %x) {
+define <4 x i64> @llrint_v4i64_v4fp128(<4 x fp128> %x) nounwind {
 ; CHECK-LABEL: llrint_v4i64_v4fp128:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
 ; CHECK-NEXT:    sub sp, sp, #64
 ; CHECK-NEXT:    addvl sp, sp, #-1
-; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0xd0, 0x00, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 80 + 8 * VG
-; CHECK-NEXT:    .cfi_offset w30, -8
-; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    str q0, [sp, #48] // 16-byte Folded Spill
 ; CHECK-NEXT:    mov v0.16b, v3.16b
 ; CHECK-NEXT:    stp q2, q1, [sp, #16] // 32-byte Folded Spill
@@ -950,15 +934,12 @@ define <4 x i64> @llrint_v4i64_v4fp128(<4 x fp128> %x) {
 }
 declare <4 x i64> @llvm.llrint.v4i64.v4fp128(<4 x fp128>)
 
-define <8 x i64> @llrint_v8i64_v8fp128(<8 x fp128> %x) {
+define <8 x i64> @llrint_v8i64_v8fp128(<8 x fp128> %x) nounwind {
 ; CHECK-LABEL: llrint_v8i64_v8fp128:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
 ; CHECK-NEXT:    sub sp, sp, #128
 ; CHECK-NEXT:    addvl sp, sp, #-2
-; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x90, 0x01, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 144 + 16 * VG
-; CHECK-NEXT:    .cfi_offset w30, -8
-; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    str q0, [sp, #112] // 16-byte Folded Spill
 ; CHECK-NEXT:    mov v0.16b, v7.16b
 ; CHECK-NEXT:    stp q6, q5, [sp, #16] // 32-byte Folded Spill
@@ -1030,15 +1011,12 @@ define <8 x i64> @llrint_v8i64_v8fp128(<8 x fp128> %x) {
 }
 declare <8 x i64> @llvm.llrint.v8i64.v8fp128(<8 x fp128>)
 
-define <16 x i64> @llrint_v16fp128(<16 x fp128> %x) {
+define <16 x i64> @llrint_v16fp128(<16 x fp128> %x) nounwind {
 ; CHECK-LABEL: llrint_v16fp128:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
 ; CHECK-NEXT:    sub sp, sp, #256
 ; CHECK-NEXT:    addvl sp, sp, #-4
-; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x90, 0x02, 0x22, 0x11, 0x20, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 272 + 32 * VG
-; CHECK-NEXT:    .cfi_offset w30, -8
-; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    addvl x8, sp, #4
 ; CHECK-NEXT:    str q1, [sp, #240] // 16-byte Folded Spill
 ; CHECK-NEXT:    ldr q1, [x8, #272]
@@ -1194,17 +1172,13 @@ define <16 x i64> @llrint_v16fp128(<16 x fp128> %x) {
 }
 declare <16 x i64> @llvm.llrint.v16i64.v16fp128(<16 x fp128>)
 
-define <32 x i64> @llrint_v32fp128(<32 x fp128> %x) {
+define <32 x i64> @llrint_v32fp128(<32 x fp128> %x) nounwind {
 ; CHECK-LABEL: llrint_v32fp128:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x29, [sp, #-32]! // 8-byte Folded Spill
 ; CHECK-NEXT:    stp x30, x19, [sp, #16] // 16-byte Folded Spill
 ; CHECK-NEXT:    sub sp, sp, #512
 ; CHECK-NEXT:    addvl sp, sp, #-8
-; CHECK-NEXT:    .cfi_escape 0x0f, 0x0e, 0x8f, 0x00, 0x11, 0xa0, 0x04, 0x22, 0x11, 0xc0, 0x00, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 544 + 64 * VG
-; CHECK-NEXT:    .cfi_offset w19, -8
-; CHECK-NEXT:    .cfi_offset w30, -16
-; CHECK-NEXT:    .cfi_offset w29, -32
 ; CHECK-NEXT:    addvl x9, sp, #8
 ; CHECK-NEXT:    stp q2, q1, [sp, #16] // 32-byte Folded Spill
 ; CHECK-NEXT:    mov x19, x8
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-vector-lrint.ll b/llvm/test/CodeGen/AArch64/sve-fixed-vector-lrint.ll
index 0b5e27f9fe15d..175f4993d06c9 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-vector-lrint.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-vector-lrint.ll
@@ -4,7 +4,7 @@
 ; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=aarch64 -mattr=+sve \
 ; RUN:   -aarch64-sve-vector-bits-min=256 | FileCheck --check-prefixes=CHECK-i64 %s
 
-define <1 x iXLen> @lrint_v1f16(<1 x half> %x) {
+define <1 x iXLen> @lrint_v1f16(<1 x half> %x) nounwind {
 ; CHECK-i32-LABEL: lrint_v1f16:
 ; CHECK-i32:       // %bb.0:
 ; CHECK-i32-NEXT:    frintx h0, h0
@@ -23,7 +23,7 @@ define <1 x iXLen> @lrint_v1f16(<1 x half> %x) {
 }
 declare <1 x iXLen> @llvm.lrint.v1iXLen.v1f16(<1 x half>)
 
-define <2 x iXLen> @lrint_v2f16(<2 x half> %x) {
+define <2 x iXLen> @lrint_v2f16(<2 x half> %x) nounwind {
 ; CHECK-i32-LABEL: lrint_v2f16:
 ; CHECK-i32:       // %bb.0:
 ; CHECK-i32-NEXT:    // kill: def $d0 killed $d0 def $q0
@@ -53,7 +53,7 @@ define <2 x iXLen> @lrint_v2f16(<2 x half> %x) {
 }
 declare <2 x iXLen> @llvm.lrint.v2iXLen.v2f16(<2 x half>)
 
-define <4 x iXLen> @lrint_v4f16(<4 x half> %x) {
+define <4 x iXLen> @lrint_v4f16(<4 x half> %x) nounwind {
 ; CHECK-i32-LABEL: lrint_v4f16:
 ; CHECK-i32:       // %bb.0:
 ; CHECK-i32-NEXT:    frintx v0.4h, v0.4h
@@ -81,7 +81,7 @@ define <4 x iXLen> @lrint_v4f16(<4 x half> %x) {
 }
 declare <4 x iXLen> @llvm.lrint.v4iXLen.v4f16(<4 x half>)
 
-define <8 x iXLen> @lrint_v8f16(<8 x half> %x) {
+define <8 x iXLen> @lrint_v8f16(<8 x half> %x) nounwind {
 ; CHECK-i32-LABEL: lrint_v8f16:
 ; CHECK-i32:       // %bb.0:
 ; CHECK-i32-NEXT:    frintx v2.8h, v0.8h
@@ -143,7 +143,7 @@ define <8 x iXLen> @lrint_v8f16(<8 x half> %x) {
 }
 declare <8 x iXLen> @llvm.lrint.v8iXLen.v8f16(<8 x half>)
 
-define <16 x iXLen> @lrint_v16f16(<16 x half> %x) {
+define <16 x iXLen> @lrint_v16f16(<16 x half> %x) nounwind {
 ; CHECK-i32-LABEL: lrint_v16f16:
 ; CHECK-i32:       // %bb.0:
 ; CHECK-i32-NEXT:    frintx v1.8h, v1.8h
@@ -254,26 +254,17 @@ define <16 x iXLen> @lrint_v16f16(<16 x half> %x) {
 }
 declare <16 x iXLen> @llvm.lrint.v16iXLen.v16f16(<16 x half>)
 
-define <32 x iXLen> @lrint_v32f16(<32 x half> %x) {
+define <32 x iXLen> @lrint_v32f16(<32 x half> %x) nounwind {
 ; CHECK-i32-LABEL: lrint_v32f16:
 ; CHECK-i32:       // %bb.0:
 ; CHECK-i32-NEXT:    stp x26, x25, [sp, #-64]! // 16-byte Folded Spill
-; CHECK-i32-NEXT:    stp x24, x23, [sp, #16] // 16-byte Folded Spill
-; CHECK-i32-NEXT:    stp x22, x21, [sp, #32] // 16-byte Folded Spill
-; CHECK-i32-NEXT:    stp x20, x19, [sp, #48] // 16-byte Folded Spill
-; CHECK-i32-NEXT:    .cfi_def_cfa_offset 64
-; CHECK-i32-NEXT:    .cfi_offset w19, -8
-; CHECK-i32-NEXT:    .cfi_offset w20, -16
-; CHECK-i32-NEXT:    .cfi_offset w21, -24
-; CHECK-i32-NEXT:    .cfi_offset w22, -32
-; CHECK-i32-NEXT:    .cfi_offset w23, -40
-; CHECK-i32-NEXT:    .cfi_offset w24, -48
-; CHECK-i32-NEXT:    .cfi_offset w25, -56
-; CHECK-i32-NEXT:    .cfi_offset w26, -64
 ; CHECK-i32-NEXT:    frintx v3.8h, v3.8h
 ; CHECK-i32-NEXT:    frintx v2.8h, v2.8h
+; CHECK-i32-NEXT:    stp x20, x19, [sp, #48] // 16-byte Folded Spill
 ; CHECK-i32-NEXT:    frintx v1.8h, v1.8h
 ; CHECK-i32-NEXT:    frintx v0.8h, v0.8h
+; CHECK-i32-NEXT:    stp x24, x23, [sp, #16] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    stp x22, x21, [sp, #32] // 16-byte Folded Spill
 ; CHECK-i32-NEXT:    mov h4, v3.h[7]
 ; CHECK-i32-NEXT:    mov h5, v3.h[6]
 ; CHECK-i32-NEXT:    mov h6, v3.h[5]
@@ -378,9 +369,6 @@ define <32 x iXLen> @lrint_v32f16(<32 x half> %x) {
 ; CHECK-i64-NEXT:    sub x9, sp, #272
 ; CHECK-i64-NEXT:    mov x29, sp
 ; CHECK-i64-NEXT:    and sp, x9, #0xffffffffffffffe0
-; CHECK-i64-NEXT:    .cfi_def_cfa w29, 16
-; CHECK-i64-NEXT:    .cfi_offset w30, -8
-; CHECK-i64-NEXT:    .cfi_offset w29, -16
 ; CHECK-i64-NEXT:    frintx v5.4h, v0.4h
 ; CHECK-i64-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
 ; CHECK-i64-NEXT:    ext v4.16b, v1.16b, v1.16b, #8
@@ -505,7 +493,7 @@ define <32 x iXLen> @lrint_v32f16(<32 x half> %x) {
 }
 declare <32 x iXLen> @llvm.lrint.v32iXLen.v32f16(<32 x half>)
 
-define <1 x iXLen> @lrint_v1f32(<1 x float> %x) {
+define <1 x iXLen> @lrint_v1f32(<1 x float> %x) nounwind {
 ; CHECK-i32-LABEL: lrint_v1f32:
 ; CHECK-i32:       // %bb.0:
 ; CHECK-i32-NEXT:    frintx v0.2s, v0.2s
@@ -524,7 +512,7 @@ define <1 x iXLen> @lrint_v1f32(<1 x float> %x) {
 }
 declare <1 x iXLen> @llvm.lrint.v1iXLen.v1f32(<1 x float>)
 
-define <2 x iXLen> @lrint_v2f32(<2 x float> %x) {
+define <2 x iXLen> @lrint_v2f32(<2 x float> %x) nounwind {
 ; CHECK-i32-LABEL: lrint_v2f32:
 ; CHECK-i32:       // %bb.0:
 ; CHECK-i32-NEXT:    frintx v0.2s, v0.2s
@@ -542,7 +530,7 @@ define <2 x iXLen> @lrint_v2f32(<2 x float> %x) {
 }
 declare <2 x iXLen> @llvm.lrint.v2iXLen.v2f32(<2 x float>)
 
-define <4 x iXLen> @lrint_v4f32(<4 x float> %x) {
+define <4 x iXLen> @lrint_v4f32(<4 x float> %x) nounwind {
 ; CHECK-i32-LABEL: lrint_v4f32:
 ; CHECK-i32:       // %bb.0:
 ; CHECK-i32-NEXT:    frintx v0.4s, v0.4s
@@ -569,7 +557,7 @@ define <4 x iXLen> @lrint_v4f32(<4 x float> %x) {
 }
 declare <4 x iXLen> @llvm.lrint.v4iXLen.v4f32(<4 x float>)
 
-define <8 x iXLen> @lrint_v8f32(<8 x float> %x) {
+define <8 x iXLen> @lrint_v8f32(<8 x float> %x) nounwind {
 ; CHECK-i32-LABEL: lrint_v8f32:
 ; CHECK-i32:       // %bb.0:
 ; CHECK-i32-NEXT:    ptrue p0.d, vl2
@@ -636,7 +624,7 @@ define <8 x iXLen> @lrint_v8f32(<8 x float> %x) {
 }
 declare <8 x iXLen> @llvm.lrint.v8iXLen.v8f32(<8 x float>)
 
-define <16 x iXLen> @lrint_v16f32(<16 x float> %x) {
+define <16 x iXLen> @lrint_v16f32(<16 x float> %x) nounwind {
 ; CHECK-i32-LABEL: lrint_v16f32:
 ; CHECK-i32:       // %bb.0:
 ; CHECK-i32-NEXT:    ptrue p0.d, vl2
@@ -754,24 +742,10 @@ define <16 x iXLen> @lrint_v16f32(<16 x float> %x) {
 }
 declare <16 x iXLen> @llvm.lrint.v16iXLen.v16f32(<16 x float>)
 
-define <32 x iXLen> @lrint_v32f32(<32 x float> %x) {
+define <32 x iXLen> @lrint_v32f32(<32 x float> %x) nounwind {
 ; CHECK-i32-LABEL: lrint_v32f32:
 ; CHECK-i32:       // %bb.0:
 ; CHECK-i32-NEXT:    str x27, [sp, #-80]! // 8-byte Folded Spill
-; CHECK-i32-NEXT:    stp x26, x25, [sp, #16] // 16-byte Folded Spill
-; CHECK-i32-NEXT:    stp x24, x23, [sp, #32] // 16-byte Folded Spill
-; CHECK-i32-NEXT:    stp x22, x21, [sp, #48] // 16-byte Folded Spill
-; CHECK-i32-NEXT:    stp x20, x19, [sp, #64] // 16-byte Folded Spill
-; CHECK-i32-NEXT:    .cfi_def_cfa_offset 80
-; CHECK-i32-NEXT:    .cfi_offset w19, -8
-; CHECK-i32-NEXT:    .cfi_offset w20, -16
-; CHECK-i32-NEXT:    .cfi_offset w21, -24
-; CHECK-i32-NEXT:    .cfi_offset w22, -32
-; CHECK-i32-NEXT:    .cfi_offset w23, -40
-; CHECK-i32-NEXT:    .cfi_offset w24, -48
-; CHECK-i32-NEXT:    .cfi_offset w25, -56
-; CHECK-i32-NEXT:    .cfi_offset w26, -64
-; CHECK-i32-NEXT:    .cfi_offset w27, -80
 ; CHECK-i32-NEXT:    ptrue p1.d, vl2
 ; CHECK-i32-NEXT:    // kill: def $q6 killed $q6 def $z6
 ; CHECK-i32-NEXT:    // kill: def $q7 killed $q7 def $z7
@@ -781,11 +755,15 @@ define <32 x iXLen> @lrint_v32f32(<32 x float> %x) {
 ; CHECK-i32-NEXT:    // kill: def $q5 killed $q5 def $z5
 ; CHECK-i32-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-i32-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-i32-NEXT:    stp x24, x23, [sp, #32] // 16-byte Folded Spill
 ; CHECK-i32-NEXT:    ptrue p0.s, vl8
+; CHECK-i32-NEXT:    stp x22, x21, [sp, #48] // 16-byte Folded Spill
 ; CHECK-i32-NEXT:    splice z6.d, p1, z6.d, z7.d
 ; CHECK-i32-NEXT:    splice z2.d, p1, z2.d, z3.d
 ; CHECK-i32-NEXT:    splice z4.d, p1, z4.d, z5.d
 ; CHECK-i32-NEXT:    splice z0.d, p1, z0.d, z1.d
+; CHECK-i32-NEXT:    stp x26, x25, [sp, #16] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    stp x20, x19, [sp, #64] // 16-byte Folded Spill
 ; CHECK-i32-NEXT:    movprfx z3, z6
 ; CHECK-i32-NEXT:    frintx z3.s, p0/m, z6.s
 ; CHECK-i32-NEXT:    frintx z2.s, p0/m, z2.s
@@ -897,9 +875,6 @@ define <32 x iXLen> @lrint_v32f32(<32 x float> %x) {
 ; CHECK-i64-NEXT:    sub x9, sp, #272
 ; CHECK-i64-NEXT:    mov x29, sp
 ; CHECK-i64-NEXT:    and sp, x9, #0xffffffffffffffe0
-; CHECK-i64-NEXT:    .cfi_def_cfa w29, 16
-; CHECK-i64-NEXT:    .cfi_offset w30, -8
-; CHECK-i64-NEXT:    .cfi_offset w29, -16
 ; CHECK-i64-NEXT:    frintx v0.4s, v0.4s
 ; CHECK-i64-NEXT:    frintx v1.4s, v1.4s
 ; CHECK-i64-NEXT:    frintx v2.4s, v2.4s
@@ -1020,7 +995,7 @@ define <32 x iXLen> @lrint_v32f32(<32 x float> %x) {
 }
 declare <32 x iXLen> @llvm.lrint.v32iXLen.v32f32(<32 x float>)
 
-define <1 x iXLen> @lrint_v1f64(<1 x double> %x) {
+define <1 x iXLen> @lrint_v1f64(<1 x double> %x) nounwind {
 ; CHECK-i32-LABEL: lrint_v1f64:
 ; CHECK-i32:       // %bb.0:
 ; CHECK-i32-NEXT:    frintx d0, d0
@@ -1039,7 +1014,7 @@ define <1 x iXLen> @lrint_v1f64(<1 x double> %x) {
 }
 declare <1 x iXLen> @llvm.lrint.v1iXLen.v1f64(<1 x double>)
 
-define <2 x iXLen> @lrint_v2f64(<2 x double> %x) {
+define <2 x iXLen> @lrint_v2f64(<2 x double> %x) nounwind {
 ; CHECK-i32-LABEL: lrint_v2f64:
 ; CHECK-i32:       // %bb.0:
 ; CHECK-i32-NEXT:    frintx v0.2d, v0.2d
@@ -1061,7 +1036,7 @@ define <2 x iXLen> @lrint_v2f64(<2 x double> %x) {
 }
 declare <2 x iXLen> @llvm.lrint.v2iXLen.v2f64(<2 x double>)
 
-define <4 x iXLen> @lrint_v4f64(<4 x double> %x) {
+define <4 x iXLen> @lrint_v4f64(<4 x double> %x) nounwind {
 ; CHECK-i32-LABEL: lrint_v4f64:
 ; CHECK-i32:       // %bb.0:
 ; CHECK-i32-NEXT:    ptrue p0.d, vl2
@@ -1109,7 +1084,7 @@ define <4 x iXLen> @lrint_v4f64(<4 x double> %x) {
 }
 declare <4 x iXLen> @llvm.lrint.v4iXLen.v4f64(<4 x double>)
 
-define <8 x iXLen> @lrint_v8f64(<8 x double> %x) {
+define <8 x iXLen> @lrint_v8f64(<8 x double> %x) nounwind {
 ; CHECK-i32-LABEL: lrint_v8f64:
 ; CHECK-i32:       // %bb.0:
 ; CHECK-i32-NEXT:    ptrue p0.d, vl2
@@ -1188,7 +1163,7 @@ define <8 x iXLen> @lrint_v8f64(<8 x double> %x) {
 }
 declare <8 x iXLen> @llvm.lrint.v8iXLen.v8f64(<8 x double>)
 
-define <16 x iXLen> @lrint_v16f64(<16 x double> %x) {
+define <16 x iXLen> @lrint_v16f64(<16 x double> %x) nounwind {
 ; CHECK-i32-LABEL: lrint_v16f64:
 ; CHECK-i32:       // %bb.0:
 ; CHECK-i32-NEXT:    ptrue p1.d, vl2
@@ -1329,7 +1304,7 @@ define <16 x iXLen> @lrint_v16f64(<16 x double> %x) {
 }
 declare <16 x iXLen> @llvm.lrint.v16iXLen.v16f64(<16 x double>)
 
-define <32 x iXLen> @lrint_v32f64(<32 x double> %x) {
+define <32 x iXLen> @lrint_v32f64(<32 x double> %x) nounwind {
 ; CHECK-i32-LABEL: lrint_v32f64:
 ; CHECK-i32:       // %bb.0:
 ; CHECK-i32-NEXT:    ptrue p1.d, vl2
@@ -1465,9 +1440,6 @@ define <32 x iXLen> @lrint_v32f64(<32 x double> %x) {
 ; CHECK-i64-NEXT:    sub x9, sp, #272
 ; CHECK-i64-NEXT:    mov x29, sp
 ; CHECK-i64-NEXT:    and sp, x9, #0xffffffffffffffe0
-; CHECK-i64-NEXT:    .cfi_def_cfa w29, 16
-; CHECK-i64-NEXT:    .cfi_offset w30, -8
-; CHECK-i64-NEXT:    .cfi_offset w29, -16
 ; CHECK-i64-NEXT:    ptrue p1.d, vl2
 ; CHECK-i64-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-i64-NEXT:    // kill: def $q1 killed $q1 def $z1
@@ -1612,12 +1584,10 @@ define <32 x iXLen> @lrint_v32f64(<32 x double> %x) {
 }
 declare <32 x iXLen> @llvm.lrint.v32iXLen.v32f64(<32 x double>)
 
-define <1 x iXLen> @lrint_v1fp128(<1 x fp128> %x) {
+define <1 x iXLen> @lrint_v1fp128(<1 x fp128> %x) nounwind {
 ; CHECK-i32-LABEL: lrint_v1fp128:
 ; CHECK-i32:       // %bb.0:
 ; CHECK-i32-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-i32-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-i32-NEXT:    .cfi_offset w30, -16
 ; CHECK-i32-NEXT:    bl lrintl
 ; CHECK-i32-NEXT:    fmov s0, w0
 ; CHECK-i32-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
@@ -1626,8 +1596,6 @@ define <1 x iXLen> @lrint_v1fp128(<1 x fp128> %x) {
 ; CHECK-i64-LABEL: lrint_v1fp128:
 ; CHECK-i64:       // %bb.0:
 ; CHECK-i64-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-i64-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-i64-NEXT:    .cfi_offset w30, -16
 ; CHECK-i64-NEXT:    bl lrintl
 ; CHECK-i64-NEXT:    fmov d0, x0
 ; CHECK-i64-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
@@ -1637,13 +1605,11 @@ define <1 x iXLen> @lrint_v1fp128(<1 x fp128> %x) {
 }
 declare <1 x iXLen> @llvm.lrint.v1iXLen.v1fp128(<1 x fp128>)
 
-define <2 x iXLen> @lrint_v2fp128(<2 x fp128> %x) {
+define <2 x iXLen> @lrint_v2fp128(<2 x fp128> %x) nounwind {
 ; CHECK-i32-LABEL: lrint_v2fp128:
 ; CHECK-i32:       // %bb.0:
 ; CHECK-i32-NEXT:    sub sp, sp, #48
 ; CHECK-i32-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
-; CHECK-i32-NEXT:    .cfi_def_cfa_offset 48
-; CHECK-i32-NEXT:    .cfi_offset w30, -16
 ; CHECK-i32-NEXT:    str q1, [sp, #16] // 16-byte Folded Spill
 ; CHECK-i32-NEXT:    bl lrintl
 ; CHECK-i32-NEXT:    fmov s0, w0
@@ -1660,11 +1626,9 @@ define <2 x iXLen> @lrint_v2fp128(<2 x fp128> %x) {
 ; CHECK-i64-LABEL: lrint_v2fp128:
 ; CHECK-i64:       // %bb.0:
 ; CHECK-i64-NEXT:    sub sp, sp, #48
-; CHECK-i64-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
-; CHECK-i64-NEXT:    .cfi_def_cfa_offset 48
-; CHECK-i64-NEXT:    .cfi_offset w30, -16
 ; CHECK-i64-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-i64-NEXT:    mov v0.16b, v1.16b
+; CHECK-i64-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
 ; CHECK-i64-NEXT:    bl lrintl
 ; CHECK-i64-NEXT:    fmov d0, x0
 ; CHECK-i64-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
@@ -1681,13 +1645,11 @@ define <2 x iXLen> @lrint_v2fp128(<2 x fp128> %x) {
 }
 declare <2 x iXLen> @llvm.lrint.v2iXLen.v2fp128(<2 x fp128>)
 
-define <4 x iXLen> @lrint_v4fp128(<4 x fp128> %x) {
+define <4 x iXLen> @lrint_v4fp128(<4 x fp128> %x) nounwind {
 ; CHECK-i32-LABEL: lrint_v4fp128:
 ; CHECK-i32:       // %bb.0:
 ; CHECK-i32-NEXT:    sub sp, sp, #80
 ; CHECK-i32-NEXT:    str x30, [sp, #64] // 8-byte Folded Spill
-; CHECK-i32-NEXT:    .cfi_def_cfa_offset 80
-; CHECK-i32-NEXT:    .cfi_offset w30, -16
 ; CHECK-i32-NEXT:    stp q2, q3, [sp, #16] // 32-byte Folded Spill
 ; CHECK-i32-NEXT:    str q1, [sp] // 16-byte Folded Spill
 ; CHECK-i32-NEXT:    bl lrintl
@@ -1716,9 +1678,6 @@ define <4 x iXLen> @lrint_v4fp128(<4 x fp128> %x) {
 ; CHECK-i64-NEXT:    stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
 ; CHECK-i64-NEXT:    sub sp, sp, #64
 ; CHECK-i64-NEXT:    addvl sp, sp, #-1
-; CHECK-i64-NEXT:    .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0xd0, 0x00, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 80 + 8 * VG
-; CHECK-i64-NEXT:    .cfi_offset w30, -8
-; CHECK-i64-NEXT:    .cfi_offset w29, -16
 ; CHECK-i64-NEXT:    str q0, [sp, #48] // 16-byte Folded Spill
 ; CHECK-i64-NEXT:    mov v0.16b, v3.16b
 ; CHECK-i64-NEXT:    stp q2, q1, [sp, #16] // 32-byte Folded Spill
@@ -1758,25 +1717,16 @@ define <4 x iXLen> @lrint_v4fp128(<4 x fp128> %x) {
 }
 declare <4 x iXLen> @llvm.lrint.v4iXLen.v4fp128(<4 x fp128>)
 
-define <8 x iXLen> @lrint_v8fp128(<8 x fp128> %x) {
+define <8 x iXLen> @lrint_v8fp128(<8 x fp128> %x) nounwind {
 ; CHECK-i32-LABEL: lrint_v8fp128:
 ; CHECK-i32:       // %bb.0:
 ; CHECK-i32-NEXT:    sub sp, sp, #176
+; CHECK-i32-NEXT:    str q0, [sp, #96] // 16-byte Folded Spill
+; CHECK-i32-NEXT:    mov v0.16b, v7.16b
 ; CHECK-i32-NEXT:    stp x30, x25, [sp, #112] // 16-byte Folded Spill
 ; CHECK-i32-NEXT:    stp x24, x23, [sp, #128] // 16-byte Folded Spill
 ; CHECK-i32-NEXT:    stp x22, x21, [sp, #144] // 16-byte Folded Spill
 ; CHECK-i32-NEXT:    stp x20, x19, [sp, #160] // 16-byte Folded Spill
-; CHECK-i32-NEXT:    .cfi_def_cfa_offset 176
-; CHECK-i32-NEXT:    .cfi_offset w19, -8
-; CHECK-i32-NEXT:    .cfi_offset w20, -16
-; CHECK-i32-NEXT:    .cfi_offset w21, -24
-; CHECK-i32-NEXT:    .cfi_offset w22, -32
-; CHECK-i32-NEXT:    .cfi_offset w23, -40
-; CHECK-i32-NEXT:    .cfi_offset w24, -48
-; CHECK-i32-NEXT:    .cfi_offset w25, -56
-; CHECK-i32-NEXT:    .cfi_offset w30, -64
-; CHECK-i32-NEXT:    str q0, [sp, #96] // 16-byte Folded Spill
-; CHECK-i32-NEXT:    mov v0.16b, v7.16b
 ; CHECK-i32-NEXT:    stp q6, q5, [sp] // 32-byte Folded Spill
 ; CHECK-i32-NEXT:    stp q4, q3, [sp, #32] // 32-byte Folded Spill
 ; CHECK-i32-NEXT:    stp q2, q1, [sp, #64] // 32-byte Folded Spill
@@ -1822,9 +1772,6 @@ define <8 x iXLen> @lrint_v8fp128(<8 x fp128> %x) {
 ; CHECK-i64-NEXT:    stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
 ; CHECK-i64-NEXT:    sub sp, sp, #128
 ; CHECK-i64-NEXT:    addvl sp, sp, #-2
-; CHECK-i64-NEXT:    .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x90, 0x01, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 144 + 16 * VG
-; CHECK-i64-NEXT:    .cfi_offset w30, -8
-; CHECK-i64-NEXT:    .cfi_offset w29, -16
 ; CHECK-i64-NEXT:    str q0, [sp, #112] // 16-byte Folded Spill
 ; CHECK-i64-NEXT:    mov v0.16b, v7.16b
 ; CHECK-i64-NEXT:    stp q6, q5, [sp, #16] // 32-byte Folded Spill
@@ -1896,48 +1843,35 @@ define <8 x iXLen> @lrint_v8fp128(<8 x fp128> %x) {
 }
 declare <8 x iXLen> @llvm.lrint.v8iXLen.v8fp128(<8 x fp128>)
 
-define <16 x iXLen> @lrint_v16fp128(<16 x fp128> %x) {
+define <16 x iXLen> @lrint_v16fp128(<16 x fp128> %x) nounwind {
 ; CHECK-i32-LABEL: lrint_v16fp128:
 ; CHECK-i32:       // %bb.0:
 ; CHECK-i32-NEXT:    sub sp, sp, #368
-; CHECK-i32-NEXT:    stp x29, x30, [sp, #272] // 16-byte Folded Spill
-; CHECK-i32-NEXT:    stp x28, x27, [sp, #288] // 16-byte Folded Spill
-; CHECK-i32-NEXT:    stp x26, x25, [sp, #304] // 16-byte Folded Spill
-; CHECK-i32-NEXT:    stp x24, x23, [sp, #320] // 16-byte Folded Spill
-; CHECK-i32-NEXT:    stp x22, x21, [sp, #336] // 16-byte Folded Spill
-; CHECK-i32-NEXT:    stp x20, x19, [sp, #352] // 16-byte Folded Spill
-; CHECK-i32-NEXT:    .cfi_def_cfa_offset 368
-; CHECK-i32-NEXT:    .cfi_offset w19, -8
-; CHECK-i32-NEXT:    .cfi_offset w20, -16
-; CHECK-i32-NEXT:    .cfi_offset w21, -24
-; CHECK-i32-NEXT:    .cfi_offset w22, -32
-; CHECK-i32-NEXT:    .cfi_offset w23, -40
-; CHECK-i32-NEXT:    .cfi_offset w24, -48
-; CHECK-i32-NEXT:    .cfi_offset w25, -56
-; CHECK-i32-NEXT:    .cfi_offset w26, -64
-; CHECK-i32-NEXT:    .cfi_offset w27, -72
-; CHECK-i32-NEXT:    .cfi_offset w28, -80
-; CHECK-i32-NEXT:    .cfi_offset w30, -88
-; CHECK-i32-NEXT:    .cfi_offset w29, -96
-; CHECK-i32-NEXT:    stp q7, q6, [sp, #80] // 32-byte Folded Spill
-; CHECK-i32-NEXT:    stp q5, q4, [sp, #112] // 32-byte Folded Spill
 ; CHECK-i32-NEXT:    stp q3, q0, [sp, #144] // 32-byte Folded Spill
 ; CHECK-i32-NEXT:    stp q2, q1, [sp, #176] // 32-byte Folded Spill
 ; CHECK-i32-NEXT:    ldr q1, [sp, #368]
+; CHECK-i32-NEXT:    stp x29, x30, [sp, #272] // 16-byte Folded Spill
 ; CHECK-i32-NEXT:    str q1, [sp, #64] // 16-byte Folded Spill
 ; CHECK-i32-NEXT:    ldr q1, [sp, #384]
+; CHECK-i32-NEXT:    stp x28, x27, [sp, #288] // 16-byte Folded Spill
 ; CHECK-i32-NEXT:    str q1, [sp, #48] // 16-byte Folded Spill
 ; CHECK-i32-NEXT:    ldr q1, [sp, #400]
+; CHECK-i32-NEXT:    stp x26, x25, [sp, #304] // 16-byte Folded Spill
 ; CHECK-i32-NEXT:    str q1, [sp, #32] // 16-byte Folded Spill
 ; CHECK-i32-NEXT:    ldr q1, [sp, #416]
+; CHECK-i32-NEXT:    stp x24, x23, [sp, #320] // 16-byte Folded Spill
 ; CHECK-i32-NEXT:    str q1, [sp, #208] // 16-byte Folded Spill
 ; CHECK-i32-NEXT:    ldr q1, [sp, #432]
+; CHECK-i32-NEXT:    stp x22, x21, [sp, #336] // 16-byte Folded Spill
 ; CHECK-i32-NEXT:    str q1, [sp, #16] // 16-byte Folded Spill
 ; CHECK-i32-NEXT:    ldr q1, [sp, #448]
+; CHECK-i32-NEXT:    stp x20, x19, [sp, #352] // 16-byte Folded Spill
 ; CHECK-i32-NEXT:    str q1, [sp, #224] // 16-byte Folded Spill
 ; CHECK-i32-NEXT:    ldr q1, [sp, #464]
+; CHECK-i32-NEXT:    stp q7, q6, [sp, #80] // 32-byte Folded Spill
 ; CHECK-i32-NEXT:    str q1, [sp, #240] // 16-byte Folded Spill
 ; CHECK-i32-NEXT:    ldr q1, [sp, #480]
+; CHECK-i32-NEXT:    stp q5, q4, [sp, #112] // 32-byte Folded Spill
 ; CHECK-i32-NEXT:    mov v0.16b, v1.16b
 ; CHECK-i32-NEXT:    bl lrintl
 ; CHECK-i32-NEXT:    ldr q0, [sp, #240] // 16-byte Folded Reload
@@ -2019,9 +1953,6 @@ define <16 x iXLen> @lrint_v16fp128(<16 x fp128> %x) {
 ; CHECK-i64-NEXT:    stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
 ; CHECK-i64-NEXT:    sub sp, sp, #256
 ; CHECK-i64-NEXT:    addvl sp, sp, #-4
-; CHECK-i64-NEXT:    .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x90, 0x02, 0x22, 0x11, 0x20, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 272 + 32 * VG
-; CHECK-i64-NEXT:    .cfi_offset w30, -8
-; CHECK-i64-NEXT:    .cfi_offset w29, -16
 ; CHECK-i64-NEXT:    addvl x8, sp, #4
 ; CHECK-i64-NEXT:    str q1, [sp, #240] // 16-byte Folded Spill
 ; CHECK-i64-NEXT:    ldr q1, [x8, #272]
@@ -2177,7 +2108,7 @@ define <16 x iXLen> @lrint_v16fp128(<16 x fp128> %x) {
 }
 declare <16 x iXLen> @llvm.lrint.v16iXLen.v16fp128(<16 x fp128>)
 
-define <32 x iXLen> @lrint_v32fp128(<32 x fp128> %x) {
+define <32 x iXLen> @lrint_v32fp128(<32 x fp128> %x) nounwind {
 ; CHECK-i32-LABEL: lrint_v32fp128:
 ; CHECK-i32:       // %bb.0:
 ; CHECK-i32-NEXT:    stp x29, x30, [sp, #-96]! // 16-byte Folded Spill
@@ -2187,19 +2118,6 @@ define <32 x iXLen> @lrint_v32fp128(<32 x fp128> %x) {
 ; CHECK-i32-NEXT:    stp x22, x21, [sp, #64] // 16-byte Folded Spill
 ; CHECK-i32-NEXT:    stp x20, x19, [sp, #80] // 16-byte Folded Spill
 ; CHECK-i32-NEXT:    sub sp, sp, #528
-; CHECK-i32-NEXT:    .cfi_def_cfa_offset 624
-; CHECK-i32-NEXT:    .cfi_offset w19, -8
-; CHECK-i32-NEXT:    .cfi_offset w20, -16
-; CHECK-i32-NEXT:    .cfi_offset w21, -24
-; CHECK-i32-NEXT:    .cfi_offset w22, -32
-; CHECK-i32-NEXT:    .cfi_offset w23, -40
-; CHECK-i32-NEXT:    .cfi_offset w24, -48
-; CHECK-i32-NEXT:    .cfi_offset w25, -56
-; CHECK-i32-NEXT:    .cfi_offset w26, -64
-; CHECK-i32-NEXT:    .cfi_offset w27, -72
-; CHECK-i32-NEXT:    .cfi_offset w28, -80
-; CHECK-i32-NEXT:    .cfi_offset w30, -88
-; CHECK-i32-NEXT:    .cfi_offset w29, -96
 ; CHECK-i32-NEXT:    stp q2, q1, [sp, #368] // 32-byte Folded Spill
 ; CHECK-i32-NEXT:    ldr q1, [sp, #624]
 ; CHECK-i32-NEXT:    str q0, [sp, #224] // 16-byte Folded Spill
@@ -2412,10 +2330,6 @@ define <32 x iXLen> @lrint_v32fp128(<32 x fp128> %x) {
 ; CHECK-i64-NEXT:    stp x30, x19, [sp, #16] // 16-byte Folded Spill
 ; CHECK-i64-NEXT:    sub sp, sp, #512
 ; CHECK-i64-NEXT:    addvl sp, sp, #-8
-; CHECK-i64-NEXT:    .cfi_escape 0x0f, 0x0e, 0x8f, 0x00, 0x11, 0xa0, 0x04, 0x22, 0x11, 0xc0, 0x00, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 544 + 64 * VG
-; CHECK-i64-NEXT:    .cfi_offset w19, -8
-; CHECK-i64-NEXT:    .cfi_offset w30, -16
-; CHECK-i64-NEXT:    .cfi_offset w29, -32
 ; CHECK-i64-NEXT:    addvl x9, sp, #8
 ; CHECK-i64-NEXT:    stp q2, q1, [sp, #16] // 32-byte Folded Spill
 ; CHECK-i64-NEXT:    mov x19, x8
diff --git a/llvm/test/CodeGen/AArch64/vector-llrint.ll b/llvm/test/CodeGen/AArch64/vector-llrint.ll
index 9e6f46df05fec..8f139cc225a67 100644
--- a/llvm/test/CodeGen/AArch64/vector-llrint.ll
+++ b/llvm/test/CodeGen/AArch64/vector-llrint.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=aarch64 -mattr=+neon | FileCheck %s
 
-define <1 x i64> @llrint_v1i64_v1f16(<1 x half> %x) {
+define <1 x i64> @llrint_v1i64_v1f16(<1 x half> %x) nounwind {
 ; CHECK-LABEL: llrint_v1i64_v1f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fcvt s0, h0
@@ -14,7 +14,7 @@ define <1 x i64> @llrint_v1i64_v1f16(<1 x half> %x) {
 }
 declare <1 x i64> @llvm.llrint.v1i64.v1f16(<1 x half>)
 
-define <2 x i64> @llrint_v1i64_v2f16(<2 x half> %x) {
+define <2 x i64> @llrint_v1i64_v2f16(<2 x half> %x) nounwind {
 ; CHECK-LABEL: llrint_v1i64_v2f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
@@ -33,7 +33,7 @@ define <2 x i64> @llrint_v1i64_v2f16(<2 x half> %x) {
 }
 declare <2 x i64> @llvm.llrint.v2i64.v2f16(<2 x half>)
 
-define <4 x i64> @llrint_v4i64_v4f16(<4 x half> %x) {
+define <4 x i64> @llrint_v4i64_v4f16(<4 x half> %x) nounwind {
 ; CHECK-LABEL: llrint_v4i64_v4f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
@@ -62,7 +62,7 @@ define <4 x i64> @llrint_v4i64_v4f16(<4 x half> %x) {
 }
 declare <4 x i64> @llvm.llrint.v4i64.v4f16(<4 x half>)
 
-define <8 x i64> @llrint_v8i64_v8f16(<8 x half> %x) {
+define <8 x i64> @llrint_v8i64_v8f16(<8 x half> %x) nounwind {
 ; CHECK-LABEL: llrint_v8i64_v8f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
@@ -110,7 +110,7 @@ define <8 x i64> @llrint_v8i64_v8f16(<8 x half> %x) {
 }
 declare <8 x i64> @llvm.llrint.v8i64.v8f16(<8 x half>)
 
-define <16 x i64> @llrint_v16i64_v16f16(<16 x half> %x) {
+define <16 x i64> @llrint_v16i64_v16f16(<16 x half> %x) nounwind {
 ; CHECK-LABEL: llrint_v16i64_v16f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ext v2.16b, v0.16b, v0.16b, #8
@@ -197,7 +197,7 @@ define <16 x i64> @llrint_v16i64_v16f16(<16 x half> %x) {
 }
 declare <16 x i64> @llvm.llrint.v16i64.v16f16(<16 x half>)
 
-define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) {
+define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) nounwind {
 ; CHECK-LABEL: llrint_v32i64_v32f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ext v4.16b, v1.16b, v1.16b, #8
@@ -370,7 +370,7 @@ define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) {
 }
 declare <32 x i64> @llvm.llrint.v32i64.v32f16(<32 x half>)
 
-define <1 x i64> @llrint_v1i64_v1f32(<1 x float> %x) {
+define <1 x i64> @llrint_v1i64_v1f32(<1 x float> %x) nounwind {
 ; CHECK-LABEL: llrint_v1i64_v1f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
@@ -383,7 +383,7 @@ define <1 x i64> @llrint_v1i64_v1f32(<1 x float> %x) {
 }
 declare <1 x i64> @llvm.llrint.v1i64.v1f32(<1 x float>)
 
-define <2 x i64> @llrint_v2i64_v2f32(<2 x float> %x) {
+define <2 x i64> @llrint_v2i64_v2f32(<2 x float> %x) nounwind {
 ; CHECK-LABEL: llrint_v2i64_v2f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    frintx v0.2s, v0.2s
@@ -395,7 +395,7 @@ define <2 x i64> @llrint_v2i64_v2f32(<2 x float> %x) {
 }
 declare <2 x i64> @llvm.llrint.v2i64.v2f32(<2 x float>)
 
-define <4 x i64> @llrint_v4i64_v4f32(<4 x float> %x) {
+define <4 x i64> @llrint_v4i64_v4f32(<4 x float> %x) nounwind {
 ; CHECK-LABEL: llrint_v4i64_v4f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
@@ -411,7 +411,7 @@ define <4 x i64> @llrint_v4i64_v4f32(<4 x float> %x) {
 }
 declare <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float>)
 
-define <8 x i64> @llrint_v8i64_v8f32(<8 x float> %x) {
+define <8 x i64> @llrint_v8i64_v8f32(<8 x float> %x) nounwind {
 ; CHECK-LABEL: llrint_v8i64_v8f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ext v2.16b, v0.16b, v0.16b, #8
@@ -434,7 +434,7 @@ define <8 x i64> @llrint_v8i64_v8f32(<8 x float> %x) {
 }
 declare <8 x i64> @llvm.llrint.v8i64.v8f32(<8 x float>)
 
-define <16 x i64> @llrint_v16i64_v16f32(<16 x float> %x) {
+define <16 x i64> @llrint_v16i64_v16f32(<16 x float> %x) nounwind {
 ; CHECK-LABEL: llrint_v16i64_v16f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ext v4.16b, v1.16b, v1.16b, #8
@@ -471,7 +471,7 @@ define <16 x i64> @llrint_v16i64_v16f32(<16 x float> %x) {
 }
 declare <16 x i64> @llvm.llrint.v16i64.v16f32(<16 x float>)
 
-define <32 x i64> @llrint_v32i64_v32f32(<32 x float> %x) {
+define <32 x i64> @llrint_v32i64_v32f32(<32 x float> %x) nounwind {
 ; CHECK-LABEL: llrint_v32i64_v32f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ext v16.16b, v7.16b, v7.16b, #8
@@ -544,7 +544,7 @@ define <32 x i64> @llrint_v32i64_v32f32(<32 x float> %x) {
 }
 declare <32 x i64> @llvm.llrint.v32i64.v32f32(<32 x float>)
 
-define <1 x i64> @llrint_v1i64_v1f64(<1 x double> %x) {
+define <1 x i64> @llrint_v1i64_v1f64(<1 x double> %x) nounwind {
 ; CHECK-LABEL: llrint_v1i64_v1f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    frintx d0, d0
@@ -556,7 +556,7 @@ define <1 x i64> @llrint_v1i64_v1f64(<1 x double> %x) {
 }
 declare <1 x i64> @llvm.llrint.v1i64.v1f64(<1 x double>)
 
-define <2 x i64> @llrint_v2i64_v2f64(<2 x double> %x) {
+define <2 x i64> @llrint_v2i64_v2f64(<2 x double> %x) nounwind {
 ; CHECK-LABEL: llrint_v2i64_v2f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    frintx v0.2d, v0.2d
@@ -567,7 +567,7 @@ define <2 x i64> @llrint_v2i64_v2f64(<2 x double> %x) {
 }
 declare <2 x i64> @llvm.llrint.v2i64.v2f64(<2 x double>)
 
-define <4 x i64> @llrint_v4i64_v4f64(<4 x double> %x) {
+define <4 x i64> @llrint_v4i64_v4f64(<4 x double> %x) nounwind {
 ; CHECK-LABEL: llrint_v4i64_v4f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    frintx v0.2d, v0.2d
@@ -580,7 +580,7 @@ define <4 x i64> @llrint_v4i64_v4f64(<4 x double> %x) {
 }
 declare <4 x i64> @llvm.llrint.v4i64.v4f64(<4 x double>)
 
-define <8 x i64> @llrint_v8i64_v8f64(<8 x double> %x) {
+define <8 x i64> @llrint_v8i64_v8f64(<8 x double> %x) nounwind {
 ; CHECK-LABEL: llrint_v8i64_v8f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    frintx v0.2d, v0.2d
@@ -597,7 +597,7 @@ define <8 x i64> @llrint_v8i64_v8f64(<8 x double> %x) {
 }
 declare <8 x i64> @llvm.llrint.v8i64.v8f64(<8 x double>)
 
-define <16 x i64> @llrint_v16f64(<16 x double> %x) {
+define <16 x i64> @llrint_v16f64(<16 x double> %x) nounwind {
 ; CHECK-LABEL: llrint_v16f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    frintx v0.2d, v0.2d
@@ -622,7 +622,7 @@ define <16 x i64> @llrint_v16f64(<16 x double> %x) {
 }
 declare <16 x i64> @llvm.llrint.v16i64.v16f64(<16 x double>)
 
-define <32 x i64> @llrint_v32f64(<32 x double> %x) {
+define <32 x i64> @llrint_v32f64(<32 x double> %x) nounwind {
 ; CHECK-LABEL: llrint_v32f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ldp q17, q16, [sp, #96]
@@ -675,12 +675,10 @@ define <32 x i64> @llrint_v32f64(<32 x double> %x) {
 }
 declare <32 x i64> @llvm.llrint.v32i64.v32f64(<32 x double>)
 
-define <1 x i64> @llrint_v1i64_v1f128(<1 x fp128> %x) {
+define <1 x i64> @llrint_v1i64_v1f128(<1 x fp128> %x) nounwind {
 ; CHECK-LABEL: llrint_v1i64_v1f128:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    .cfi_offset w30, -16
 ; CHECK-NEXT:    bl llrintl
 ; CHECK-NEXT:    fmov d0, x0
 ; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
@@ -690,15 +688,13 @@ define <1 x i64> @llrint_v1i64_v1f128(<1 x fp128> %x) {
 }
 declare <1 x i64> @llvm.llrint.v1i64.v1f128(<1 x fp128>)
 
-define <2 x i64> @llrint_v2i64_v2f128(<2 x fp128> %x) {
+define <2 x i64> @llrint_v2i64_v2f128(<2 x fp128> %x) nounwind {
 ; CHECK-LABEL: llrint_v2i64_v2f128:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sub sp, sp, #48
-; CHECK-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 48
-; CHECK-NEXT:    .cfi_offset w30, -16
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    mov v0.16b, v1.16b
+; CHECK-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
 ; CHECK-NEXT:    bl llrintl
 ; CHECK-NEXT:    fmov d0, x0
 ; CHECK-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
@@ -715,15 +711,13 @@ define <2 x i64> @llrint_v2i64_v2f128(<2 x fp128> %x) {
 }
 declare <2 x i64> @llvm.llrint.v2i64.v2f128(<2 x fp128>)
 
-define <4 x i64> @llrint_v4i64_v4f128(<4 x fp128> %x) {
+define <4 x i64> @llrint_v4i64_v4f128(<4 x fp128> %x) nounwind {
 ; CHECK-LABEL: llrint_v4i64_v4f128:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sub sp, sp, #80
-; CHECK-NEXT:    str x30, [sp, #64] // 8-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 80
-; CHECK-NEXT:    .cfi_offset w30, -16
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    mov v0.16b, v1.16b
+; CHECK-NEXT:    str x30, [sp, #64] // 8-byte Folded Spill
 ; CHECK-NEXT:    stp q3, q2, [sp, #32] // 32-byte Folded Spill
 ; CHECK-NEXT:    bl llrintl
 ; CHECK-NEXT:    fmov d0, x0
@@ -751,15 +745,13 @@ define <4 x i64> @llrint_v4i64_v4f128(<4 x fp128> %x) {
 }
 declare <4 x i64> @llvm.llrint.v4i64.v4f128(<4 x fp128>)
 
-define <8 x i64> @llrint_v8i64_v8f128(<8 x fp128> %x) {
+define <8 x i64> @llrint_v8i64_v8f128(<8 x fp128> %x) nounwind {
 ; CHECK-LABEL: llrint_v8i64_v8f128:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sub sp, sp, #144
-; CHECK-NEXT:    str x30, [sp, #128] // 8-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 144
-; CHECK-NEXT:    .cfi_offset w30, -16
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    mov v0.16b, v1.16b
+; CHECK-NEXT:    str x30, [sp, #128] // 8-byte Folded Spill
 ; CHECK-NEXT:    stp q3, q2, [sp, #16] // 32-byte Folded Spill
 ; CHECK-NEXT:    stp q5, q4, [sp, #48] // 32-byte Folded Spill
 ; CHECK-NEXT:    stp q7, q6, [sp, #96] // 32-byte Folded Spill
@@ -811,23 +803,20 @@ define <8 x i64> @llrint_v8i64_v8f128(<8 x fp128> %x) {
 }
 declare <8 x i64> @llvm.llrint.v8i64.v8f128(<8 x fp128>)
 
-define <16 x i64> @llrint_v16f128(<16 x fp128> %x) {
+define <16 x i64> @llrint_v16f128(<16 x fp128> %x) nounwind {
 ; CHECK-LABEL: llrint_v16f128:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sub sp, sp, #272
-; CHECK-NEXT:    stp x29, x30, [sp, #256] // 16-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 272
-; CHECK-NEXT:    .cfi_offset w30, -8
-; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    str q2, [sp, #160] // 16-byte Folded Spill
 ; CHECK-NEXT:    ldr q2, [sp, #368]
 ; CHECK-NEXT:    stp q0, q3, [sp] // 32-byte Folded Spill
 ; CHECK-NEXT:    mov v0.16b, v1.16b
 ; CHECK-NEXT:    str q2, [sp, #240] // 16-byte Folded Spill
 ; CHECK-NEXT:    ldr q2, [sp, #384]
-; CHECK-NEXT:    stp q5, q7, [sp, #32] // 32-byte Folded Spill
+; CHECK-NEXT:    stp x29, x30, [sp, #256] // 16-byte Folded Spill
 ; CHECK-NEXT:    str q2, [sp, #224] // 16-byte Folded Spill
 ; CHECK-NEXT:    ldr q2, [sp, #336]
+; CHECK-NEXT:    stp q5, q7, [sp, #32] // 32-byte Folded Spill
 ; CHECK-NEXT:    str q2, [sp, #192] // 16-byte Folded Spill
 ; CHECK-NEXT:    ldr q2, [sp, #352]
 ; CHECK-NEXT:    str q2, [sp, #176] // 16-byte Folded Spill
@@ -929,16 +918,12 @@ define <16 x i64> @llrint_v16f128(<16 x fp128> %x) {
 }
 declare <16 x i64> @llvm.llrint.v16i64.v16f128(<16 x fp128>)
 
-define <32 x i64> @llrint_v32f128(<32 x fp128> %x) {
+define <32 x i64> @llrint_v32f128(<32 x fp128> %x) nounwind {
 ; CHECK-LABEL: llrint_v32f128:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x29, [sp, #-32]! // 8-byte Folded Spill
 ; CHECK-NEXT:    stp x30, x19, [sp, #16] // 16-byte Folded Spill
 ; CHECK-NEXT:    sub sp, sp, #512
-; CHECK-NEXT:    .cfi_def_cfa_offset 544
-; CHECK-NEXT:    .cfi_offset w19, -8
-; CHECK-NEXT:    .cfi_offset w30, -16
-; CHECK-NEXT:    .cfi_offset w29, -32
 ; CHECK-NEXT:    str q0, [sp, #464] // 16-byte Folded Spill
 ; CHECK-NEXT:    ldr q0, [sp, #896]
 ; CHECK-NEXT:    mov x19, x8
diff --git a/llvm/test/CodeGen/AArch64/vector-lrint.ll b/llvm/test/CodeGen/AArch64/vector-lrint.ll
index cb7fe14273a42..b899db839a65a 100644
--- a/llvm/test/CodeGen/AArch64/vector-lrint.ll
+++ b/llvm/test/CodeGen/AArch64/vector-lrint.ll
@@ -45,7 +45,7 @@
 ; CHECK-i64-GI-NEXT:  warning: Instruction selection used fallback path for lrint_v16f64
 ; CHECK-i64-GI-NEXT:  warning: Instruction selection used fallback path for lrint_v32f64
 
-define <1 x iXLen> @lrint_v1f16(<1 x half> %x) {
+define <1 x iXLen> @lrint_v1f16(<1 x half> %x) nounwind {
 ; CHECK-i32-LABEL: lrint_v1f16:
 ; CHECK-i32:       // %bb.0:
 ; CHECK-i32-NEXT:    fcvt s0, h0
@@ -66,7 +66,7 @@ define <1 x iXLen> @lrint_v1f16(<1 x half> %x) {
 }
 declare <1 x iXLen> @llvm.lrint.v1iXLen.v1f16(<1 x half>)
 
-define <2 x iXLen> @lrint_v2f16(<2 x half> %x) {
+define <2 x iXLen> @lrint_v2f16(<2 x half> %x) nounwind {
 ; CHECK-i32-LABEL: lrint_v2f16:
 ; CHECK-i32:       // %bb.0:
 ; CHECK-i32-NEXT:    // kill: def $d0 killed $d0 def $q0
@@ -100,7 +100,7 @@ define <2 x iXLen> @lrint_v2f16(<2 x half> %x) {
 }
 declare <2 x iXLen> @llvm.lrint.v2iXLen.v2f16(<2 x half>)
 
-define <4 x iXLen> @lrint_v4f16(<4 x half> %x) {
+define <4 x iXLen> @lrint_v4f16(<4 x half> %x) nounwind {
 ; CHECK-i32-LABEL: lrint_v4f16:
 ; CHECK-i32:       // %bb.0:
 ; CHECK-i32-NEXT:    // kill: def $d0 killed $d0 def $q0
@@ -153,7 +153,7 @@ define <4 x iXLen> @lrint_v4f16(<4 x half> %x) {
 }
 declare <4 x iXLen> @llvm.lrint.v4iXLen.v4f16(<4 x half>)
 
-define <8 x iXLen> @lrint_v8f16(<8 x half> %x) {
+define <8 x iXLen> @lrint_v8f16(<8 x half> %x) nounwind {
 ; CHECK-i32-LABEL: lrint_v8f16:
 ; CHECK-i32:       // %bb.0:
 ; CHECK-i32-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
@@ -244,7 +244,7 @@ define <8 x iXLen> @lrint_v8f16(<8 x half> %x) {
 }
 declare <8 x iXLen> @llvm.lrint.v8iXLen.v8f16(<8 x half>)
 
-define <16 x iXLen> @lrint_v16f16(<16 x half> %x) {
+define <16 x iXLen> @lrint_v16f16(<16 x half> %x) nounwind {
 ; CHECK-i32-LABEL: lrint_v16f16:
 ; CHECK-i32:       // %bb.0:
 ; CHECK-i32-NEXT:    ext v2.16b, v0.16b, v0.16b, #8
@@ -413,7 +413,7 @@ define <16 x iXLen> @lrint_v16f16(<16 x half> %x) {
 }
 declare <16 x iXLen> @llvm.lrint.v16iXLen.v16f16(<16 x half>)
 
-define <32 x iXLen> @lrint_v32f16(<32 x half> %x) {
+define <32 x iXLen> @lrint_v32f16(<32 x half> %x) nounwind {
 ; CHECK-i32-LABEL: lrint_v32f16:
 ; CHECK-i32:       // %bb.0:
 ; CHECK-i32-NEXT:    ext v5.16b, v0.16b, v0.16b, #8
@@ -748,7 +748,7 @@ define <32 x iXLen> @lrint_v32f16(<32 x half> %x) {
 }
 declare <32 x iXLen> @llvm.lrint.v32iXLen.v32f16(<32 x half>)
 
-define <1 x iXLen> @lrint_v1f32(<1 x float> %x) {
+define <1 x iXLen> @lrint_v1f32(<1 x float> %x) nounwind {
 ; CHECK-i32-LABEL: lrint_v1f32:
 ; CHECK-i32:       // %bb.0:
 ; CHECK-i32-NEXT:    frintx v0.2s, v0.2s
@@ -774,7 +774,7 @@ define <1 x iXLen> @lrint_v1f32(<1 x float> %x) {
 }
 declare <1 x iXLen> @llvm.lrint.v1iXLen.v1f32(<1 x float>)
 
-define <2 x iXLen> @lrint_v2f32(<2 x float> %x) {
+define <2 x iXLen> @lrint_v2f32(<2 x float> %x) nounwind {
 ; CHECK-i32-LABEL: lrint_v2f32:
 ; CHECK-i32:       // %bb.0:
 ; CHECK-i32-NEXT:    frintx v0.2s, v0.2s
@@ -792,7 +792,7 @@ define <2 x iXLen> @lrint_v2f32(<2 x float> %x) {
 }
 declare <2 x iXLen> @llvm.lrint.v2iXLen.v2f32(<2 x float>)
 
-define <4 x iXLen> @lrint_v4f32(<4 x float> %x) {
+define <4 x iXLen> @lrint_v4f32(<4 x float> %x) nounwind {
 ; CHECK-i32-LABEL: lrint_v4f32:
 ; CHECK-i32:       // %bb.0:
 ; CHECK-i32-NEXT:    frintx v0.4s, v0.4s
@@ -814,7 +814,7 @@ define <4 x iXLen> @lrint_v4f32(<4 x float> %x) {
 }
 declare <4 x iXLen> @llvm.lrint.v4iXLen.v4f32(<4 x float>)
 
-define <8 x iXLen> @lrint_v8f32(<8 x float> %x) {
+define <8 x iXLen> @lrint_v8f32(<8 x float> %x) nounwind {
 ; CHECK-i32-LABEL: lrint_v8f32:
 ; CHECK-i32:       // %bb.0:
 ; CHECK-i32-NEXT:    frintx v0.4s, v0.4s
@@ -845,7 +845,7 @@ define <8 x iXLen> @lrint_v8f32(<8 x float> %x) {
 }
 declare <8 x iXLen> @llvm.lrint.v8iXLen.v8f32(<8 x float>)
 
-define <16 x iXLen> @lrint_v16f32(<16 x float> %x) {
+define <16 x iXLen> @lrint_v16f32(<16 x float> %x) nounwind {
 ; CHECK-i32-LABEL: lrint_v16f32:
 ; CHECK-i32:       // %bb.0:
 ; CHECK-i32-NEXT:    frintx v0.4s, v0.4s
@@ -894,7 +894,7 @@ define <16 x iXLen> @lrint_v16f32(<16 x float> %x) {
 }
 declare <16 x iXLen> @llvm.lrint.v16iXLen.v16f32(<16 x float>)
 
-define <32 x iXLen> @lrint_v32f32(<32 x float> %x) {
+define <32 x iXLen> @lrint_v32f32(<32 x float> %x) nounwind {
 ; CHECK-i32-LABEL: lrint_v32f32:
 ; CHECK-i32:       // %bb.0:
 ; CHECK-i32-NEXT:    frintx v0.4s, v0.4s
@@ -987,7 +987,7 @@ define <32 x iXLen> @lrint_v32f32(<32 x float> %x) {
 }
 declare <32 x iXLen> @llvm.lrint.v32iXLen.v32f32(<32 x float>)
 
-define <1 x iXLen> @lrint_v1f64(<1 x double> %x) {
+define <1 x iXLen> @lrint_v1f64(<1 x double> %x) nounwind {
 ; CHECK-i32-LABEL: lrint_v1f64:
 ; CHECK-i32:       // %bb.0:
 ; CHECK-i32-NEXT:    frintx d0, d0
@@ -1006,7 +1006,7 @@ define <1 x iXLen> @lrint_v1f64(<1 x double> %x) {
 }
 declare <1 x iXLen> @llvm.lrint.v1iXLen.v1f64(<1 x double>)
 
-define <2 x iXLen> @lrint_v2f64(<2 x double> %x) {
+define <2 x iXLen> @lrint_v2f64(<2 x double> %x) nounwind {
 ; CHECK-i32-LABEL: lrint_v2f64:
 ; CHECK-i32:       // %bb.0:
 ; CHECK-i32-NEXT:    frintx v0.2d, v0.2d
@@ -1028,7 +1028,7 @@ define <2 x iXLen> @lrint_v2f64(<2 x double> %x) {
 }
 declare <2 x iXLen> @llvm.lrint.v2iXLen.v2f64(<2 x double>)
 
-define <4 x iXLen> @lrint_v4f64(<4 x double> %x) {
+define <4 x iXLen> @lrint_v4f64(<4 x double> %x) nounwind {
 ; CHECK-i32-LABEL: lrint_v4f64:
 ; CHECK-i32:       // %bb.0:
 ; CHECK-i32-NEXT:    frintx v0.2d, v0.2d
@@ -1057,7 +1057,7 @@ define <4 x iXLen> @lrint_v4f64(<4 x double> %x) {
 }
 declare <4 x iXLen> @llvm.lrint.v4iXLen.v4f64(<4 x double>)
 
-define <8 x iXLen> @lrint_v8f64(<8 x double> %x) {
+define <8 x iXLen> @lrint_v8f64(<8 x double> %x) nounwind {
 ; CHECK-i32-LABEL: lrint_v8f64:
 ; CHECK-i32:       // %bb.0:
 ; CHECK-i32-NEXT:    frintx v2.2d, v2.2d
@@ -1102,7 +1102,7 @@ define <8 x iXLen> @lrint_v8f64(<8 x double> %x) {
 }
 declare <8 x iXLen> @llvm.lrint.v8iXLen.v8f64(<8 x double>)
 
-define <16 x iXLen> @lrint_v16f64(<16 x double> %x) {
+define <16 x iXLen> @lrint_v16f64(<16 x double> %x) nounwind {
 ; CHECK-i32-LABEL: lrint_v16f64:
 ; CHECK-i32:       // %bb.0:
 ; CHECK-i32-NEXT:    frintx v0.2d, v0.2d
@@ -1179,7 +1179,7 @@ define <16 x iXLen> @lrint_v16f64(<16 x double> %x) {
 }
 declare <16 x iXLen> @llvm.lrint.v16iXLen.v16f64(<16 x double>)
 
-define <32 x iXLen> @lrint_v32f64(<32 x double> %x) {
+define <32 x iXLen> @lrint_v32f64(<32 x double> %x) nounwind {
 ; CHECK-i32-LABEL: lrint_v32f64:
 ; CHECK-i32:       // %bb.0:
 ; CHECK-i32-NEXT:    frintx v17.2d, v0.2d
@@ -1336,12 +1336,10 @@ define <32 x iXLen> @lrint_v32f64(<32 x double> %x) {
 }
 declare <32 x iXLen> @llvm.lrint.v32iXLen.v32f64(<32 x double>)
 
-define <1 x iXLen> @lrint_v1fp128(<1 x fp128> %x) {
+define <1 x iXLen> @lrint_v1fp128(<1 x fp128> %x) nounwind {
 ; CHECK-i32-LABEL: lrint_v1fp128:
 ; CHECK-i32:       // %bb.0:
 ; CHECK-i32-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-i32-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-i32-NEXT:    .cfi_offset w30, -16
 ; CHECK-i32-NEXT:    bl lrintl
 ; CHECK-i32-NEXT:    fmov s0, w0
 ; CHECK-i32-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
@@ -1350,8 +1348,6 @@ define <1 x iXLen> @lrint_v1fp128(<1 x fp128> %x) {
 ; CHECK-i64-LABEL: lrint_v1fp128:
 ; CHECK-i64:       // %bb.0:
 ; CHECK-i64-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-i64-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-i64-NEXT:    .cfi_offset w30, -16
 ; CHECK-i64-NEXT:    bl lrintl
 ; CHECK-i64-NEXT:    fmov d0, x0
 ; CHECK-i64-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
@@ -1361,13 +1357,11 @@ define <1 x iXLen> @lrint_v1fp128(<1 x fp128> %x) {
 }
 declare <1 x iXLen> @llvm.lrint.v1iXLen.v1fp128(<1 x fp128>)
 
-define <2 x iXLen> @lrint_v2fp128(<2 x fp128> %x) {
+define <2 x iXLen> @lrint_v2fp128(<2 x fp128> %x) nounwind {
 ; CHECK-i32-LABEL: lrint_v2fp128:
 ; CHECK-i32:       // %bb.0:
 ; CHECK-i32-NEXT:    sub sp, sp, #48
 ; CHECK-i32-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
-; CHECK-i32-NEXT:    .cfi_def_cfa_offset 48
-; CHECK-i32-NEXT:    .cfi_offset w30, -16
 ; CHECK-i32-NEXT:    str q1, [sp, #16] // 16-byte Folded Spill
 ; CHECK-i32-NEXT:    bl lrintl
 ; CHECK-i32-NEXT:    fmov s0, w0
@@ -1384,11 +1378,9 @@ define <2 x iXLen> @lrint_v2fp128(<2 x fp128> %x) {
 ; CHECK-i64-LABEL: lrint_v2fp128:
 ; CHECK-i64:       // %bb.0:
 ; CHECK-i64-NEXT:    sub sp, sp, #48
-; CHECK-i64-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
-; CHECK-i64-NEXT:    .cfi_def_cfa_offset 48
-; CHECK-i64-NEXT:    .cfi_offset w30, -16
 ; CHECK-i64-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-i64-NEXT:    mov v0.16b, v1.16b
+; CHECK-i64-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
 ; CHECK-i64-NEXT:    bl lrintl
 ; CHECK-i64-NEXT:    fmov d0, x0
 ; CHECK-i64-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
@@ -1405,13 +1397,11 @@ define <2 x iXLen> @lrint_v2fp128(<2 x fp128> %x) {
 }
 declare <2 x iXLen> @llvm.lrint.v2iXLen.v2fp128(<2 x fp128>)
 
-define <4 x iXLen> @lrint_v4fp128(<4 x fp128> %x) {
+define <4 x iXLen> @lrint_v4fp128(<4 x fp128> %x) nounwind {
 ; CHECK-i32-LABEL: lrint_v4fp128:
 ; CHECK-i32:       // %bb.0:
 ; CHECK-i32-NEXT:    sub sp, sp, #80
 ; CHECK-i32-NEXT:    str x30, [sp, #64] // 8-byte Folded Spill
-; CHECK-i32-NEXT:    .cfi_def_cfa_offset 80
-; CHECK-i32-NEXT:    .cfi_offset w30, -16
 ; CHECK-i32-NEXT:    stp q2, q3, [sp, #16] // 32-byte Folded Spill
 ; CHECK-i32-NEXT:    str q1, [sp] // 16-byte Folded Spill
 ; CHECK-i32-NEXT:    bl lrintl
@@ -1438,11 +1428,9 @@ define <4 x iXLen> @lrint_v4fp128(<4 x fp128> %x) {
 ; CHECK-i64-LABEL: lrint_v4fp128:
 ; CHECK-i64:       // %bb.0:
 ; CHECK-i64-NEXT:    sub sp, sp, #80
-; CHECK-i64-NEXT:    str x30, [sp, #64] // 8-byte Folded Spill
-; CHECK-i64-NEXT:    .cfi_def_cfa_offset 80
-; CHECK-i64-NEXT:    .cfi_offset w30, -16
 ; CHECK-i64-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-i64-NEXT:    mov v0.16b, v1.16b
+; CHECK-i64-NEXT:    str x30, [sp, #64] // 8-byte Folded Spill
 ; CHECK-i64-NEXT:    stp q3, q2, [sp, #32] // 32-byte Folded Spill
 ; CHECK-i64-NEXT:    bl lrintl
 ; CHECK-i64-NEXT:    fmov d0, x0
@@ -1470,17 +1458,15 @@ define <4 x iXLen> @lrint_v4fp128(<4 x fp128> %x) {
 }
 declare <4 x iXLen> @llvm.lrint.v4iXLen.v4fp128(<4 x fp128>)
 
-define <8 x iXLen> @lrint_v8fp128(<8 x fp128> %x) {
+define <8 x iXLen> @lrint_v8fp128(<8 x fp128> %x) nounwind {
 ; CHECK-i32-LABEL: lrint_v8fp128:
 ; CHECK-i32:       // %bb.0:
 ; CHECK-i32-NEXT:    sub sp, sp, #144
 ; CHECK-i32-NEXT:    str x30, [sp, #128] // 8-byte Folded Spill
-; CHECK-i32-NEXT:    .cfi_def_cfa_offset 144
-; CHECK-i32-NEXT:    .cfi_offset w30, -16
+; CHECK-i32-NEXT:    str q4, [sp, #96] // 16-byte Folded Spill
 ; CHECK-i32-NEXT:    stp q1, q2, [sp] // 32-byte Folded Spill
 ; CHECK-i32-NEXT:    stp q3, q5, [sp, #32] // 32-byte Folded Spill
 ; CHECK-i32-NEXT:    stp q6, q7, [sp, #64] // 32-byte Folded Spill
-; CHECK-i32-NEXT:    str q4, [sp, #96] // 16-byte Folded Spill
 ; CHECK-i32-NEXT:    bl lrintl
 ; CHECK-i32-NEXT:    fmov s0, w0
 ; CHECK-i32-NEXT:    str q0, [sp, #112] // 16-byte Folded Spill
@@ -1524,11 +1510,9 @@ define <8 x iXLen> @lrint_v8fp128(<8 x fp128> %x) {
 ; CHECK-i64-LABEL: lrint_v8fp128:
 ; CHECK-i64:       // %bb.0:
 ; CHECK-i64-NEXT:    sub sp, sp, #144
-; CHECK-i64-NEXT:    str x30, [sp, #128] // 8-byte Folded Spill
-; CHECK-i64-NEXT:    .cfi_def_cfa_offset 144
-; CHECK-i64-NEXT:    .cfi_offset w30, -16
 ; CHECK-i64-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-i64-NEXT:    mov v0.16b, v1.16b
+; CHECK-i64-NEXT:    str x30, [sp, #128] // 8-byte Folded Spill
 ; CHECK-i64-NEXT:    stp q3, q2, [sp, #16] // 32-byte Folded Spill
 ; CHECK-i64-NEXT:    stp q5, q4, [sp, #48] // 32-byte Folded Spill
 ; CHECK-i64-NEXT:    stp q7, q6, [sp, #96] // 32-byte Folded Spill
@@ -1580,22 +1564,19 @@ define <8 x iXLen> @lrint_v8fp128(<8 x fp128> %x) {
 }
 declare <8 x iXLen> @llvm.lrint.v8iXLen.v8fp128(<8 x fp128>)
 
-define <16 x iXLen> @lrint_v16fp128(<16 x fp128> %x) {
+define <16 x iXLen> @lrint_v16fp128(<16 x fp128> %x) nounwind {
 ; CHECK-i32-LABEL: lrint_v16fp128:
 ; CHECK-i32:       // %bb.0:
 ; CHECK-i32-NEXT:    sub sp, sp, #272
-; CHECK-i32-NEXT:    stp x29, x30, [sp, #256] // 16-byte Folded Spill
-; CHECK-i32-NEXT:    .cfi_def_cfa_offset 272
-; CHECK-i32-NEXT:    .cfi_offset w30, -8
-; CHECK-i32-NEXT:    .cfi_offset w29, -16
 ; CHECK-i32-NEXT:    stp q1, q2, [sp] // 32-byte Folded Spill
 ; CHECK-i32-NEXT:    ldr q1, [sp, #384]
-; CHECK-i32-NEXT:    stp q3, q5, [sp, #32] // 32-byte Folded Spill
+; CHECK-i32-NEXT:    stp x29, x30, [sp, #256] // 16-byte Folded Spill
 ; CHECK-i32-NEXT:    str q1, [sp, #176] // 16-byte Folded Spill
 ; CHECK-i32-NEXT:    ldr q1, [sp, #368]
-; CHECK-i32-NEXT:    stp q7, q4, [sp, #208] // 32-byte Folded Spill
+; CHECK-i32-NEXT:    stp q3, q5, [sp, #32] // 32-byte Folded Spill
 ; CHECK-i32-NEXT:    str q1, [sp, #160] // 16-byte Folded Spill
 ; CHECK-i32-NEXT:    ldr q1, [sp, #352]
+; CHECK-i32-NEXT:    stp q7, q4, [sp, #208] // 32-byte Folded Spill
 ; CHECK-i32-NEXT:    str q1, [sp, #144] // 16-byte Folded Spill
 ; CHECK-i32-NEXT:    ldr q1, [sp, #336]
 ; CHECK-i32-NEXT:    str q1, [sp, #192] // 16-byte Folded Spill
@@ -1689,19 +1670,16 @@ define <16 x iXLen> @lrint_v16fp128(<16 x fp128> %x) {
 ; CHECK-i64-LABEL: lrint_v16fp128:
 ; CHECK-i64:       // %bb.0:
 ; CHECK-i64-NEXT:    sub sp, sp, #272
-; CHECK-i64-NEXT:    stp x29, x30, [sp, #256] // 16-byte Folded Spill
-; CHECK-i64-NEXT:    .cfi_def_cfa_offset 272
-; CHECK-i64-NEXT:    .cfi_offset w30, -8
-; CHECK-i64-NEXT:    .cfi_offset w29, -16
 ; CHECK-i64-NEXT:    str q2, [sp, #160] // 16-byte Folded Spill
 ; CHECK-i64-NEXT:    ldr q2, [sp, #368]
 ; CHECK-i64-NEXT:    stp q0, q3, [sp] // 32-byte Folded Spill
 ; CHECK-i64-NEXT:    mov v0.16b, v1.16b
 ; CHECK-i64-NEXT:    str q2, [sp, #240] // 16-byte Folded Spill
 ; CHECK-i64-NEXT:    ldr q2, [sp, #384]
-; CHECK-i64-NEXT:    stp q5, q7, [sp, #32] // 32-byte Folded Spill
+; CHECK-i64-NEXT:    stp x29, x30, [sp, #256] // 16-byte Folded Spill
 ; CHECK-i64-NEXT:    str q2, [sp, #224] // 16-byte Folded Spill
 ; CHECK-i64-NEXT:    ldr q2, [sp, #336]
+; CHECK-i64-NEXT:    stp q5, q7, [sp, #32] // 32-byte Folded Spill
 ; CHECK-i64-NEXT:    str q2, [sp, #192] // 16-byte Folded Spill
 ; CHECK-i64-NEXT:    ldr q2, [sp, #352]
 ; CHECK-i64-NEXT:    str q2, [sp, #176] // 16-byte Folded Spill
@@ -1803,14 +1781,11 @@ define <16 x iXLen> @lrint_v16fp128(<16 x fp128> %x) {
 }
 declare <16 x iXLen> @llvm.lrint.v16iXLen.v16fp128(<16 x fp128>)
 
-define <32 x iXLen> @lrint_v32fp128(<32 x fp128> %x) {
+define <32 x iXLen> @lrint_v32fp128(<32 x fp128> %x) nounwind {
 ; CHECK-i32-LABEL: lrint_v32fp128:
 ; CHECK-i32:       // %bb.0:
 ; CHECK-i32-NEXT:    stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
 ; CHECK-i32-NEXT:    sub sp, sp, #512
-; CHECK-i32-NEXT:    .cfi_def_cfa_offset 528
-; CHECK-i32-NEXT:    .cfi_offset w30, -8
-; CHECK-i32-NEXT:    .cfi_offset w29, -16
 ; CHECK-i32-NEXT:    str q1, [sp] // 16-byte Folded Spill
 ; CHECK-i32-NEXT:    ldr q1, [sp, #896]
 ; CHECK-i32-NEXT:    stp q2, q3, [sp, #16] // 32-byte Folded Spill
@@ -2026,10 +2001,6 @@ define <32 x iXLen> @lrint_v32fp128(<32 x fp128> %x) {
 ; CHECK-i64-NEXT:    str x29, [sp, #-32]! // 8-byte Folded Spill
 ; CHECK-i64-NEXT:    stp x30, x19, [sp, #16] // 16-byte Folded Spill
 ; CHECK-i64-NEXT:    sub sp, sp, #512
-; CHECK-i64-NEXT:    .cfi_def_cfa_offset 544
-; CHECK-i64-NEXT:    .cfi_offset w19, -8
-; CHECK-i64-NEXT:    .cfi_offset w30, -16
-; CHECK-i64-NEXT:    .cfi_offset w29, -32
 ; CHECK-i64-NEXT:    str q0, [sp, #464] // 16-byte Folded Spill
 ; CHECK-i64-NEXT:    ldr q0, [sp, #896]
 ; CHECK-i64-NEXT:    mov x19, x8
diff --git a/llvm/test/CodeGen/PowerPC/vector-llrint.ll b/llvm/test/CodeGen/PowerPC/vector-llrint.ll
index d57bf6b2e706c..8a9e48e002381 100644
--- a/llvm/test/CodeGen/PowerPC/vector-llrint.ll
+++ b/llvm/test/CodeGen/PowerPC/vector-llrint.ll
@@ -13,14 +13,12 @@
 ; RUN:   -mtriple=powerpc64le-unknown-unknown -verify-machineinstrs < %s \
 ; RUN:   --enable-unsafe-fp-math | FileCheck %s --check-prefix=FAST
 
-define <1 x i64> @llrint_v1i64_v1f16(<1 x half> %x) {
+define <1 x i64> @llrint_v1i64_v1f16(<1 x half> %x) nounwind {
 ; BE-LABEL: llrint_v1i64_v1f16:
 ; BE:       # %bb.0:
 ; BE-NEXT:    mflr r0
 ; BE-NEXT:    stdu r1, -112(r1)
 ; BE-NEXT:    std r0, 128(r1)
-; BE-NEXT:    .cfi_def_cfa_offset 112
-; BE-NEXT:    .cfi_offset lr, 16
 ; BE-NEXT:    bl __truncsfhf2
 ; BE-NEXT:    nop
 ; BE-NEXT:    clrldi r3, r3, 48
@@ -38,8 +36,6 @@ define <1 x i64> @llrint_v1i64_v1f16(<1 x half> %x) {
 ; CHECK-NEXT:    mflr r0
 ; CHECK-NEXT:    stdu r1, -32(r1)
 ; CHECK-NEXT:    std r0, 48(r1)
-; CHECK-NEXT:    .cfi_def_cfa_offset 32
-; CHECK-NEXT:    .cfi_offset lr, 16
 ; CHECK-NEXT:    bl __truncsfhf2
 ; CHECK-NEXT:    nop
 ; CHECK-NEXT:    clrldi r3, r3, 48
@@ -57,8 +53,6 @@ define <1 x i64> @llrint_v1i64_v1f16(<1 x half> %x) {
 ; FAST-NEXT:    mflr r0
 ; FAST-NEXT:    stdu r1, -32(r1)
 ; FAST-NEXT:    std r0, 48(r1)
-; FAST-NEXT:    .cfi_def_cfa_offset 32
-; FAST-NEXT:    .cfi_offset lr, 16
 ; FAST-NEXT:    bl __truncsfhf2
 ; FAST-NEXT:    nop
 ; FAST-NEXT:    clrldi r3, r3, 48
@@ -75,16 +69,12 @@ define <1 x i64> @llrint_v1i64_v1f16(<1 x half> %x) {
 }
 declare <1 x i64> @llvm.llrint.v1i64.v1f16(<1 x half>)
 
-define <2 x i64> @llrint_v1i64_v2f16(<2 x half> %x) {
+define <2 x i64> @llrint_v1i64_v2f16(<2 x half> %x) nounwind {
 ; BE-LABEL: llrint_v1i64_v2f16:
 ; BE:       # %bb.0:
 ; BE-NEXT:    mflr r0
 ; BE-NEXT:    stdu r1, -160(r1)
 ; BE-NEXT:    std r0, 176(r1)
-; BE-NEXT:    .cfi_def_cfa_offset 160
-; BE-NEXT:    .cfi_offset lr, 16
-; BE-NEXT:    .cfi_offset r30, -24
-; BE-NEXT:    .cfi_offset f31, -8
 ; BE-NEXT:    stfd f31, 152(r1) # 8-byte Folded Spill
 ; BE-NEXT:    fmr f31, f1
 ; BE-NEXT:    fmr f1, f2
@@ -122,17 +112,12 @@ define <2 x i64> @llrint_v1i64_v2f16(<2 x half> %x) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    mflr r0
 ; CHECK-NEXT:    stdu r1, -96(r1)
-; CHECK-NEXT:    std r0, 112(r1)
-; CHECK-NEXT:    .cfi_def_cfa_offset 96
-; CHECK-NEXT:    .cfi_offset lr, 16
-; CHECK-NEXT:    .cfi_offset r30, -24
-; CHECK-NEXT:    .cfi_offset f31, -8
-; CHECK-NEXT:    .cfi_offset v31, -48
 ; CHECK-NEXT:    li r3, 48
+; CHECK-NEXT:    std r0, 112(r1)
 ; CHECK-NEXT:    std r30, 72(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    stfd f31, 88(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    fmr f31, f2
-; CHECK-NEXT:    stvx v31, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    stxvd2x v31, r1, r3 # 16-byte Folded Spill
 ; CHECK-NEXT:    bl __truncsfhf2
 ; CHECK-NEXT:    nop
 ; CHECK-NEXT:    fmr f1, f31
@@ -157,7 +142,7 @@ define <2 x i64> @llrint_v1i64_v2f16(<2 x half> %x) {
 ; CHECK-NEXT:    lfd f31, 88(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld r30, 72(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    xxmrghd v2, vs0, v31
-; CHECK-NEXT:    lvx v31, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v31, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    addi r1, r1, 96
 ; CHECK-NEXT:    ld r0, 16(r1)
 ; CHECK-NEXT:    mtlr r0
@@ -166,10 +151,6 @@ define <2 x i64> @llrint_v1i64_v2f16(<2 x half> %x) {
 ; FAST-LABEL: llrint_v1i64_v2f16:
 ; FAST:       # %bb.0:
 ; FAST-NEXT:    mflr r0
-; FAST-NEXT:    .cfi_def_cfa_offset 48
-; FAST-NEXT:    .cfi_offset lr, 16
-; FAST-NEXT:    .cfi_offset f30, -16
-; FAST-NEXT:    .cfi_offset f31, -8
 ; FAST-NEXT:    stfd f30, -16(r1) # 8-byte Folded Spill
 ; FAST-NEXT:    stfd f31, -8(r1) # 8-byte Folded Spill
 ; FAST-NEXT:    stdu r1, -48(r1)
@@ -206,20 +187,12 @@ define <2 x i64> @llrint_v1i64_v2f16(<2 x half> %x) {
 }
 declare <2 x i64> @llvm.llrint.v2i64.v2f16(<2 x half>)
 
-define <4 x i64> @llrint_v4i64_v4f16(<4 x half> %x) {
+define <4 x i64> @llrint_v4i64_v4f16(<4 x half> %x) nounwind {
 ; BE-LABEL: llrint_v4i64_v4f16:
 ; BE:       # %bb.0:
 ; BE-NEXT:    mflr r0
 ; BE-NEXT:    stdu r1, -208(r1)
 ; BE-NEXT:    std r0, 224(r1)
-; BE-NEXT:    .cfi_def_cfa_offset 208
-; BE-NEXT:    .cfi_offset lr, 16
-; BE-NEXT:    .cfi_offset r28, -56
-; BE-NEXT:    .cfi_offset r29, -48
-; BE-NEXT:    .cfi_offset r30, -40
-; BE-NEXT:    .cfi_offset f29, -24
-; BE-NEXT:    .cfi_offset f30, -16
-; BE-NEXT:    .cfi_offset f31, -8
 ; BE-NEXT:    stfd f29, 184(r1) # 8-byte Folded Spill
 ; BE-NEXT:    fmr f29, f1
 ; BE-NEXT:    fmr f1, f2
@@ -293,18 +266,8 @@ define <4 x i64> @llrint_v4i64_v4f16(<4 x half> %x) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    mflr r0
 ; CHECK-NEXT:    stdu r1, -144(r1)
-; CHECK-NEXT:    std r0, 160(r1)
-; CHECK-NEXT:    .cfi_def_cfa_offset 144
-; CHECK-NEXT:    .cfi_offset lr, 16
-; CHECK-NEXT:    .cfi_offset r28, -56
-; CHECK-NEXT:    .cfi_offset r29, -48
-; CHECK-NEXT:    .cfi_offset r30, -40
-; CHECK-NEXT:    .cfi_offset f29, -24
-; CHECK-NEXT:    .cfi_offset f30, -16
-; CHECK-NEXT:    .cfi_offset f31, -8
-; CHECK-NEXT:    .cfi_offset v30, -96
-; CHECK-NEXT:    .cfi_offset v31, -80
 ; CHECK-NEXT:    li r3, 48
+; CHECK-NEXT:    std r0, 160(r1)
 ; CHECK-NEXT:    std r28, 88(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    std r29, 96(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    std r30, 104(r1) # 8-byte Folded Spill
@@ -312,11 +275,11 @@ define <4 x i64> @llrint_v4i64_v4f16(<4 x half> %x) {
 ; CHECK-NEXT:    fmr f29, f2
 ; CHECK-NEXT:    stfd f30, 128(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    fmr f30, f3
-; CHECK-NEXT:    stvx v30, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    stxvd2x v30, r1, r3 # 16-byte Folded Spill
 ; CHECK-NEXT:    li r3, 64
 ; CHECK-NEXT:    stfd f31, 136(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    fmr f31, f4
-; CHECK-NEXT:    stvx v31, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    stxvd2x v31, r1, r3 # 16-byte Folded Spill
 ; CHECK-NEXT:    bl __truncsfhf2
 ; CHECK-NEXT:    nop
 ; CHECK-NEXT:    fmr f1, f29
@@ -369,11 +332,11 @@ define <4 x i64> @llrint_v4i64_v4f16(<4 x half> %x) {
 ; CHECK-NEXT:    lfd f29, 120(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld r30, 104(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld r29, 96(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    lvx v31, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v31, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 48
 ; CHECK-NEXT:    ld r28, 88(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    xxmrghd v3, vs0, v30
-; CHECK-NEXT:    lvx v30, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v30, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    addi r1, r1, 144
 ; CHECK-NEXT:    ld r0, 16(r1)
 ; CHECK-NEXT:    mtlr r0
@@ -382,12 +345,6 @@ define <4 x i64> @llrint_v4i64_v4f16(<4 x half> %x) {
 ; FAST-LABEL: llrint_v4i64_v4f16:
 ; FAST:       # %bb.0:
 ; FAST-NEXT:    mflr r0
-; FAST-NEXT:    .cfi_def_cfa_offset 64
-; FAST-NEXT:    .cfi_offset lr, 16
-; FAST-NEXT:    .cfi_offset f28, -32
-; FAST-NEXT:    .cfi_offset f29, -24
-; FAST-NEXT:    .cfi_offset f30, -16
-; FAST-NEXT:    .cfi_offset f31, -8
 ; FAST-NEXT:    stfd f28, -32(r1) # 8-byte Folded Spill
 ; FAST-NEXT:    stfd f29, -24(r1) # 8-byte Folded Spill
 ; FAST-NEXT:    stfd f30, -16(r1) # 8-byte Folded Spill
@@ -451,28 +408,12 @@ define <4 x i64> @llrint_v4i64_v4f16(<4 x half> %x) {
 }
 declare <4 x i64> @llvm.llrint.v4i64.v4f16(<4 x half>)
 
-define <8 x i64> @llrint_v8i64_v8f16(<8 x half> %x) {
+define <8 x i64> @llrint_v8i64_v8f16(<8 x half> %x) nounwind {
 ; BE-LABEL: llrint_v8i64_v8f16:
 ; BE:       # %bb.0:
 ; BE-NEXT:    mflr r0
 ; BE-NEXT:    stdu r1, -304(r1)
 ; BE-NEXT:    std r0, 320(r1)
-; BE-NEXT:    .cfi_def_cfa_offset 304
-; BE-NEXT:    .cfi_offset lr, 16
-; BE-NEXT:    .cfi_offset r24, -120
-; BE-NEXT:    .cfi_offset r25, -112
-; BE-NEXT:    .cfi_offset r26, -104
-; BE-NEXT:    .cfi_offset r27, -96
-; BE-NEXT:    .cfi_offset r28, -88
-; BE-NEXT:    .cfi_offset r29, -80
-; BE-NEXT:    .cfi_offset r30, -72
-; BE-NEXT:    .cfi_offset f25, -56
-; BE-NEXT:    .cfi_offset f26, -48
-; BE-NEXT:    .cfi_offset f27, -40
-; BE-NEXT:    .cfi_offset f28, -32
-; BE-NEXT:    .cfi_offset f29, -24
-; BE-NEXT:    .cfi_offset f30, -16
-; BE-NEXT:    .cfi_offset f31, -8
 ; BE-NEXT:    stfd f25, 248(r1) # 8-byte Folded Spill
 ; BE-NEXT:    fmr f25, f1
 ; BE-NEXT:    fmr f1, f2
@@ -618,44 +559,24 @@ define <8 x i64> @llrint_v8i64_v8f16(<8 x half> %x) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    mflr r0
 ; CHECK-NEXT:    stdu r1, -240(r1)
-; CHECK-NEXT:    std r0, 256(r1)
-; CHECK-NEXT:    .cfi_def_cfa_offset 240
-; CHECK-NEXT:    .cfi_offset lr, 16
-; CHECK-NEXT:    .cfi_offset r24, -120
-; CHECK-NEXT:    .cfi_offset r25, -112
-; CHECK-NEXT:    .cfi_offset r26, -104
-; CHECK-NEXT:    .cfi_offset r27, -96
-; CHECK-NEXT:    .cfi_offset r28, -88
-; CHECK-NEXT:    .cfi_offset r29, -80
-; CHECK-NEXT:    .cfi_offset r30, -72
-; CHECK-NEXT:    .cfi_offset f25, -56
-; CHECK-NEXT:    .cfi_offset f26, -48
-; CHECK-NEXT:    .cfi_offset f27, -40
-; CHECK-NEXT:    .cfi_offset f28, -32
-; CHECK-NEXT:    .cfi_offset f29, -24
-; CHECK-NEXT:    .cfi_offset f30, -16
-; CHECK-NEXT:    .cfi_offset f31, -8
-; CHECK-NEXT:    .cfi_offset v28, -192
-; CHECK-NEXT:    .cfi_offset v29, -176
-; CHECK-NEXT:    .cfi_offset v30, -160
-; CHECK-NEXT:    .cfi_offset v31, -144
 ; CHECK-NEXT:    li r3, 48
+; CHECK-NEXT:    std r0, 256(r1)
 ; CHECK-NEXT:    std r24, 120(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    std r25, 128(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    std r26, 136(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    std r27, 144(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    std r28, 152(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    std r29, 160(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    std r30, 168(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    stvx v28, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    stxvd2x v28, r1, r3 # 16-byte Folded Spill
 ; CHECK-NEXT:    li r3, 64
+; CHECK-NEXT:    std r30, 168(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    stfd f25, 184(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    fmr f25, f2
 ; CHECK-NEXT:    stfd f26, 192(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    fmr f26, f3
 ; CHECK-NEXT:    stfd f27, 200(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    fmr f27, f4
-; CHECK-NEXT:    stvx v29, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    stxvd2x v29, r1, r3 # 16-byte Folded Spill
 ; CHECK-NEXT:    li r3, 80
 ; CHECK-NEXT:    stfd f28, 208(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    fmr f28, f5
@@ -663,11 +584,11 @@ define <8 x i64> @llrint_v8i64_v8f16(<8 x half> %x) {
 ; CHECK-NEXT:    fmr f29, f6
 ; CHECK-NEXT:    stfd f30, 224(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    fmr f30, f7
-; CHECK-NEXT:    stvx v30, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    stxvd2x v30, r1, r3 # 16-byte Folded Spill
 ; CHECK-NEXT:    li r3, 96
 ; CHECK-NEXT:    stfd f31, 232(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    fmr f31, f8
-; CHECK-NEXT:    stvx v31, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    stxvd2x v31, r1, r3 # 16-byte Folded Spill
 ; CHECK-NEXT:    bl __truncsfhf2
 ; CHECK-NEXT:    nop
 ; CHECK-NEXT:    fmr f1, f25
@@ -770,7 +691,7 @@ define <8 x i64> @llrint_v8i64_v8f16(<8 x half> %x) {
 ; CHECK-NEXT:    vmr v4, v29
 ; CHECK-NEXT:    lfd f30, 224(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    lfd f29, 216(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    lvx v31, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v31, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 80
 ; CHECK-NEXT:    lfd f28, 208(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    lfd f27, 200(r1) # 8-byte Folded Reload
@@ -778,7 +699,7 @@ define <8 x i64> @llrint_v8i64_v8f16(<8 x half> %x) {
 ; CHECK-NEXT:    lfd f25, 184(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld r30, 168(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld r29, 160(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    lvx v30, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v30, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 64
 ; CHECK-NEXT:    ld r28, 152(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld r27, 144(r1) # 8-byte Folded Reload
@@ -786,9 +707,9 @@ define <8 x i64> @llrint_v8i64_v8f16(<8 x half> %x) {
 ; CHECK-NEXT:    ld r26, 136(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld r25, 128(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld r24, 120(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    lvx v29, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v29, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 48
-; CHECK-NEXT:    lvx v28, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v28, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    addi r1, r1, 240
 ; CHECK-NEXT:    ld r0, 16(r1)
 ; CHECK-NEXT:    mtlr r0
@@ -797,16 +718,6 @@ define <8 x i64> @llrint_v8i64_v8f16(<8 x half> %x) {
 ; FAST-LABEL: llrint_v8i64_v8f16:
 ; FAST:       # %bb.0:
 ; FAST-NEXT:    mflr r0
-; FAST-NEXT:    .cfi_def_cfa_offset 96
-; FAST-NEXT:    .cfi_offset lr, 16
-; FAST-NEXT:    .cfi_offset f24, -64
-; FAST-NEXT:    .cfi_offset f25, -56
-; FAST-NEXT:    .cfi_offset f26, -48
-; FAST-NEXT:    .cfi_offset f27, -40
-; FAST-NEXT:    .cfi_offset f28, -32
-; FAST-NEXT:    .cfi_offset f29, -24
-; FAST-NEXT:    .cfi_offset f30, -16
-; FAST-NEXT:    .cfi_offset f31, -8
 ; FAST-NEXT:    stfd f24, -64(r1) # 8-byte Folded Spill
 ; FAST-NEXT:    stfd f25, -56(r1) # 8-byte Folded Spill
 ; FAST-NEXT:    stfd f26, -48(r1) # 8-byte Folded Spill
@@ -924,44 +835,12 @@ define <8 x i64> @llrint_v8i64_v8f16(<8 x half> %x) {
 }
 declare <8 x i64> @llvm.llrint.v8i64.v8f16(<8 x half>)
 
-define <16 x i64> @llrint_v16i64_v16f16(<16 x half> %x) {
+define <16 x i64> @llrint_v16i64_v16f16(<16 x half> %x) nounwind {
 ; BE-LABEL: llrint_v16i64_v16f16:
 ; BE:       # %bb.0:
 ; BE-NEXT:    mflr r0
 ; BE-NEXT:    stdu r1, -496(r1)
 ; BE-NEXT:    std r0, 512(r1)
-; BE-NEXT:    .cfi_def_cfa_offset 496
-; BE-NEXT:    .cfi_offset lr, 16
-; BE-NEXT:    .cfi_offset r16, -248
-; BE-NEXT:    .cfi_offset r17, -240
-; BE-NEXT:    .cfi_offset r18, -232
-; BE-NEXT:    .cfi_offset r19, -224
-; BE-NEXT:    .cfi_offset r20, -216
-; BE-NEXT:    .cfi_offset r21, -208
-; BE-NEXT:    .cfi_offset r22, -200
-; BE-NEXT:    .cfi_offset r23, -192
-; BE-NEXT:    .cfi_offset r24, -184
-; BE-NEXT:    .cfi_offset r25, -176
-; BE-NEXT:    .cfi_offset r26, -168
-; BE-NEXT:    .cfi_offset r27, -160
-; BE-NEXT:    .cfi_offset r28, -152
-; BE-NEXT:    .cfi_offset r29, -144
-; BE-NEXT:    .cfi_offset r30, -136
-; BE-NEXT:    .cfi_offset f17, -120
-; BE-NEXT:    .cfi_offset f18, -112
-; BE-NEXT:    .cfi_offset f19, -104
-; BE-NEXT:    .cfi_offset f20, -96
-; BE-NEXT:    .cfi_offset f21, -88
-; BE-NEXT:    .cfi_offset f22, -80
-; BE-NEXT:    .cfi_offset f23, -72
-; BE-NEXT:    .cfi_offset f24, -64
-; BE-NEXT:    .cfi_offset f25, -56
-; BE-NEXT:    .cfi_offset f26, -48
-; BE-NEXT:    .cfi_offset f27, -40
-; BE-NEXT:    .cfi_offset f28, -32
-; BE-NEXT:    .cfi_offset f29, -24
-; BE-NEXT:    .cfi_offset f30, -16
-; BE-NEXT:    .cfi_offset f31, -8
 ; BE-NEXT:    stfd f20, 400(r1) # 8-byte Folded Spill
 ; BE-NEXT:    fmr f20, f1
 ; BE-NEXT:    fmr f1, f2
@@ -1248,105 +1127,65 @@ define <16 x i64> @llrint_v16i64_v16f16(<16 x half> %x) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    mflr r0
 ; CHECK-NEXT:    stdu r1, -432(r1)
-; CHECK-NEXT:    std r0, 448(r1)
-; CHECK-NEXT:    .cfi_def_cfa_offset 432
-; CHECK-NEXT:    .cfi_offset lr, 16
-; CHECK-NEXT:    .cfi_offset r16, -248
-; CHECK-NEXT:    .cfi_offset r17, -240
-; CHECK-NEXT:    .cfi_offset r18, -232
-; CHECK-NEXT:    .cfi_offset r19, -224
-; CHECK-NEXT:    .cfi_offset r20, -216
-; CHECK-NEXT:    .cfi_offset r21, -208
-; CHECK-NEXT:    .cfi_offset r22, -200
-; CHECK-NEXT:    .cfi_offset r23, -192
-; CHECK-NEXT:    .cfi_offset r24, -184
-; CHECK-NEXT:    .cfi_offset r25, -176
-; CHECK-NEXT:    .cfi_offset r26, -168
-; CHECK-NEXT:    .cfi_offset r27, -160
-; CHECK-NEXT:    .cfi_offset r28, -152
-; CHECK-NEXT:    .cfi_offset r29, -144
-; CHECK-NEXT:    .cfi_offset r30, -136
-; CHECK-NEXT:    .cfi_offset f17, -120
-; CHECK-NEXT:    .cfi_offset f18, -112
-; CHECK-NEXT:    .cfi_offset f19, -104
-; CHECK-NEXT:    .cfi_offset f20, -96
-; CHECK-NEXT:    .cfi_offset f21, -88
-; CHECK-NEXT:    .cfi_offset f22, -80
-; CHECK-NEXT:    .cfi_offset f23, -72
-; CHECK-NEXT:    .cfi_offset f24, -64
-; CHECK-NEXT:    .cfi_offset f25, -56
-; CHECK-NEXT:    .cfi_offset f26, -48
-; CHECK-NEXT:    .cfi_offset f27, -40
-; CHECK-NEXT:    .cfi_offset f28, -32
-; CHECK-NEXT:    .cfi_offset f29, -24
-; CHECK-NEXT:    .cfi_offset f30, -16
-; CHECK-NEXT:    .cfi_offset f31, -8
-; CHECK-NEXT:    .cfi_offset v24, -384
-; CHECK-NEXT:    .cfi_offset v25, -368
-; CHECK-NEXT:    .cfi_offset v26, -352
-; CHECK-NEXT:    .cfi_offset v27, -336
-; CHECK-NEXT:    .cfi_offset v28, -320
-; CHECK-NEXT:    .cfi_offset v29, -304
-; CHECK-NEXT:    .cfi_offset v30, -288
-; CHECK-NEXT:    .cfi_offset v31, -272
 ; CHECK-NEXT:    li r3, 48
+; CHECK-NEXT:    std r0, 448(r1)
 ; CHECK-NEXT:    std r16, 184(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    std r17, 192(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    std r18, 200(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    std r19, 208(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    std r20, 216(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    std r21, 224(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    std r22, 232(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    stvx v24, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    stxvd2x v24, r1, r3 # 16-byte Folded Spill
 ; CHECK-NEXT:    li r3, 64
+; CHECK-NEXT:    std r22, 232(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    std r23, 240(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    std r24, 248(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    std r25, 256(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    std r26, 264(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    std r27, 272(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    std r28, 280(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    stvx v25, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    stxvd2x v25, r1, r3 # 16-byte Folded Spill
 ; CHECK-NEXT:    li r3, 80
+; CHECK-NEXT:    std r28, 280(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    std r29, 288(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    std r30, 296(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    stfd f17, 312(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    stfd f18, 320(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    stfd f19, 328(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    stxvd2x v26, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    li r3, 96
 ; CHECK-NEXT:    stfd f20, 336(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    fmr f20, f2
-; CHECK-NEXT:    stvx v26, r1, r3 # 16-byte Folded Spill
-; CHECK-NEXT:    li r3, 96
 ; CHECK-NEXT:    stfd f21, 344(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    fmr f21, f3
 ; CHECK-NEXT:    stfd f22, 352(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    fmr f22, f4
+; CHECK-NEXT:    stxvd2x v27, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    li r3, 112
 ; CHECK-NEXT:    stfd f23, 360(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    fmr f23, f5
-; CHECK-NEXT:    stvx v27, r1, r3 # 16-byte Folded Spill
-; CHECK-NEXT:    li r3, 112
 ; CHECK-NEXT:    stfd f24, 368(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    fmr f24, f6
 ; CHECK-NEXT:    stfd f25, 376(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    fmr f25, f7
+; CHECK-NEXT:    stxvd2x v28, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    li r3, 128
 ; CHECK-NEXT:    stfd f26, 384(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    fmr f26, f8
-; CHECK-NEXT:    stvx v28, r1, r3 # 16-byte Folded Spill
-; CHECK-NEXT:    li r3, 128
 ; CHECK-NEXT:    stfd f27, 392(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    fmr f27, f9
 ; CHECK-NEXT:    stfd f28, 400(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    fmr f28, f10
+; CHECK-NEXT:    stxvd2x v29, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    li r3, 144
 ; CHECK-NEXT:    stfd f29, 408(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    fmr f29, f11
-; CHECK-NEXT:    stvx v29, r1, r3 # 16-byte Folded Spill
-; CHECK-NEXT:    li r3, 144
 ; CHECK-NEXT:    stfd f30, 416(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    fmr f30, f12
 ; CHECK-NEXT:    stfd f31, 424(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    fmr f31, f13
-; CHECK-NEXT:    stvx v30, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    stxvd2x v30, r1, r3 # 16-byte Folded Spill
 ; CHECK-NEXT:    li r3, 160
-; CHECK-NEXT:    stvx v31, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    stxvd2x v31, r1, r3 # 16-byte Folded Spill
 ; CHECK-NEXT:    bl __truncsfhf2
 ; CHECK-NEXT:    nop
 ; CHECK-NEXT:    fmr f1, f20
@@ -1549,7 +1388,7 @@ define <16 x i64> @llrint_v16i64_v16f16(<16 x half> %x) {
 ; CHECK-NEXT:    vmr v4, v29
 ; CHECK-NEXT:    lfd f30, 416(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    lfd f29, 408(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    lvx v31, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v31, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 144
 ; CHECK-NEXT:    vmr v5, v28
 ; CHECK-NEXT:    vmr v6, v27
@@ -1557,7 +1396,7 @@ define <16 x i64> @llrint_v16i64_v16f16(<16 x half> %x) {
 ; CHECK-NEXT:    vmr v8, v25
 ; CHECK-NEXT:    lfd f28, 400(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    lfd f27, 392(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    lvx v30, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v30, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 128
 ; CHECK-NEXT:    lfd f26, 384(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    lfd f25, 376(r1) # 8-byte Folded Reload
@@ -1565,7 +1404,7 @@ define <16 x i64> @llrint_v16i64_v16f16(<16 x half> %x) {
 ; CHECK-NEXT:    lfd f24, 368(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    lfd f23, 360(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    lfd f22, 352(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    lvx v29, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v29, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 112
 ; CHECK-NEXT:    lfd f21, 344(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld r30, 296(r1) # 8-byte Folded Reload
@@ -1573,7 +1412,7 @@ define <16 x i64> @llrint_v16i64_v16f16(<16 x half> %x) {
 ; CHECK-NEXT:    lfd f19, 328(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld r29, 288(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld r28, 280(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    lvx v28, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v28, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 96
 ; CHECK-NEXT:    lfd f18, 320(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld r27, 272(r1) # 8-byte Folded Reload
@@ -1581,7 +1420,7 @@ define <16 x i64> @llrint_v16i64_v16f16(<16 x half> %x) {
 ; CHECK-NEXT:    ld r26, 264(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld r25, 256(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld r24, 248(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    lvx v27, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v27, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 80
 ; CHECK-NEXT:    ld r23, 240(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld r22, 232(r1) # 8-byte Folded Reload
@@ -1589,13 +1428,13 @@ define <16 x i64> @llrint_v16i64_v16f16(<16 x half> %x) {
 ; CHECK-NEXT:    ld r20, 216(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld r19, 208(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld r18, 200(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    lvx v26, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v26, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 64
 ; CHECK-NEXT:    ld r17, 192(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld r16, 184(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    lvx v25, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v25, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 48
-; CHECK-NEXT:    lvx v24, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v24, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    addi r1, r1, 432
 ; CHECK-NEXT:    ld r0, 16(r1)
 ; CHECK-NEXT:    mtlr r0
@@ -1604,24 +1443,6 @@ define <16 x i64> @llrint_v16i64_v16f16(<16 x half> %x) {
 ; FAST-LABEL: llrint_v16i64_v16f16:
 ; FAST:       # %bb.0:
 ; FAST-NEXT:    mflr r0
-; FAST-NEXT:    .cfi_def_cfa_offset 160
-; FAST-NEXT:    .cfi_offset lr, 16
-; FAST-NEXT:    .cfi_offset f16, -128
-; FAST-NEXT:    .cfi_offset f17, -120
-; FAST-NEXT:    .cfi_offset f18, -112
-; FAST-NEXT:    .cfi_offset f19, -104
-; FAST-NEXT:    .cfi_offset f20, -96
-; FAST-NEXT:    .cfi_offset f21, -88
-; FAST-NEXT:    .cfi_offset f22, -80
-; FAST-NEXT:    .cfi_offset f23, -72
-; FAST-NEXT:    .cfi_offset f24, -64
-; FAST-NEXT:    .cfi_offset f25, -56
-; FAST-NEXT:    .cfi_offset f26, -48
-; FAST-NEXT:    .cfi_offset f27, -40
-; FAST-NEXT:    .cfi_offset f28, -32
-; FAST-NEXT:    .cfi_offset f29, -24
-; FAST-NEXT:    .cfi_offset f30, -16
-; FAST-NEXT:    .cfi_offset f31, -8
 ; FAST-NEXT:    stfd f16, -128(r1) # 8-byte Folded Spill
 ; FAST-NEXT:    stfd f17, -120(r1) # 8-byte Folded Spill
 ; FAST-NEXT:    stfd f18, -112(r1) # 8-byte Folded Spill
@@ -1845,50 +1666,12 @@ define <16 x i64> @llrint_v16i64_v16f16(<16 x half> %x) {
 }
 declare <16 x i64> @llvm.llrint.v16i64.v16f16(<16 x half>)
 
-define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) {
+define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) nounwind {
 ; BE-LABEL: llrint_v32i64_v32f16:
 ; BE:       # %bb.0:
 ; BE-NEXT:    mflr r0
 ; BE-NEXT:    stdu r1, -864(r1)
 ; BE-NEXT:    std r0, 880(r1)
-; BE-NEXT:    .cfi_def_cfa_offset 864
-; BE-NEXT:    .cfi_offset lr, 16
-; BE-NEXT:    .cfi_offset r14, -288
-; BE-NEXT:    .cfi_offset r15, -280
-; BE-NEXT:    .cfi_offset r16, -272
-; BE-NEXT:    .cfi_offset r17, -264
-; BE-NEXT:    .cfi_offset r18, -256
-; BE-NEXT:    .cfi_offset r19, -248
-; BE-NEXT:    .cfi_offset r20, -240
-; BE-NEXT:    .cfi_offset r21, -232
-; BE-NEXT:    .cfi_offset r22, -224
-; BE-NEXT:    .cfi_offset r23, -216
-; BE-NEXT:    .cfi_offset r24, -208
-; BE-NEXT:    .cfi_offset r25, -200
-; BE-NEXT:    .cfi_offset r26, -192
-; BE-NEXT:    .cfi_offset r27, -184
-; BE-NEXT:    .cfi_offset r28, -176
-; BE-NEXT:    .cfi_offset r29, -168
-; BE-NEXT:    .cfi_offset r30, -160
-; BE-NEXT:    .cfi_offset r31, -152
-; BE-NEXT:    .cfi_offset f14, -144
-; BE-NEXT:    .cfi_offset f15, -136
-; BE-NEXT:    .cfi_offset f16, -128
-; BE-NEXT:    .cfi_offset f17, -120
-; BE-NEXT:    .cfi_offset f18, -112
-; BE-NEXT:    .cfi_offset f19, -104
-; BE-NEXT:    .cfi_offset f20, -96
-; BE-NEXT:    .cfi_offset f21, -88
-; BE-NEXT:    .cfi_offset f22, -80
-; BE-NEXT:    .cfi_offset f23, -72
-; BE-NEXT:    .cfi_offset f24, -64
-; BE-NEXT:    .cfi_offset f25, -56
-; BE-NEXT:    .cfi_offset f26, -48
-; BE-NEXT:    .cfi_offset f27, -40
-; BE-NEXT:    .cfi_offset f28, -32
-; BE-NEXT:    .cfi_offset f29, -24
-; BE-NEXT:    .cfi_offset f30, -16
-; BE-NEXT:    .cfi_offset f31, -8
 ; BE-NEXT:    stfd f20, 768(r1) # 8-byte Folded Spill
 ; BE-NEXT:    fmr f20, f1
 ; BE-NEXT:    fmr f1, f2
@@ -1928,6 +1711,7 @@ define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) {
 ; BE-NEXT:    stfd f30, 848(r1) # 8-byte Folded Spill
 ; BE-NEXT:    stfd f31, 856(r1) # 8-byte Folded Spill
 ; BE-NEXT:    fmr f31, f13
+; BE-NEXT:    mr r30, r3
 ; BE-NEXT:    fmr f29, f12
 ; BE-NEXT:    fmr f30, f11
 ; BE-NEXT:    fmr f28, f10
@@ -1938,7 +1722,6 @@ define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) {
 ; BE-NEXT:    fmr f23, f5
 ; BE-NEXT:    fmr f22, f4
 ; BE-NEXT:    fmr f21, f3
-; BE-NEXT:    mr r30, r3
 ; BE-NEXT:    bl __truncsfhf2
 ; BE-NEXT:    nop
 ; BE-NEXT:    fmr f1, f20
@@ -2441,98 +2224,48 @@ define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    mflr r0
 ; CHECK-NEXT:    stdu r1, -688(r1)
-; CHECK-NEXT:    std r0, 704(r1)
-; CHECK-NEXT:    .cfi_def_cfa_offset 688
-; CHECK-NEXT:    .cfi_offset lr, 16
-; CHECK-NEXT:    .cfi_offset r14, -288
-; CHECK-NEXT:    .cfi_offset r15, -280
-; CHECK-NEXT:    .cfi_offset r16, -272
-; CHECK-NEXT:    .cfi_offset r17, -264
-; CHECK-NEXT:    .cfi_offset r18, -256
-; CHECK-NEXT:    .cfi_offset r19, -248
-; CHECK-NEXT:    .cfi_offset r20, -240
-; CHECK-NEXT:    .cfi_offset r21, -232
-; CHECK-NEXT:    .cfi_offset r22, -224
-; CHECK-NEXT:    .cfi_offset r23, -216
-; CHECK-NEXT:    .cfi_offset r24, -208
-; CHECK-NEXT:    .cfi_offset r25, -200
-; CHECK-NEXT:    .cfi_offset r26, -192
-; CHECK-NEXT:    .cfi_offset r27, -184
-; CHECK-NEXT:    .cfi_offset r28, -176
-; CHECK-NEXT:    .cfi_offset r29, -168
-; CHECK-NEXT:    .cfi_offset r30, -160
-; CHECK-NEXT:    .cfi_offset r31, -152
-; CHECK-NEXT:    .cfi_offset f14, -144
-; CHECK-NEXT:    .cfi_offset f15, -136
-; CHECK-NEXT:    .cfi_offset f16, -128
-; CHECK-NEXT:    .cfi_offset f17, -120
-; CHECK-NEXT:    .cfi_offset f18, -112
-; CHECK-NEXT:    .cfi_offset f19, -104
-; CHECK-NEXT:    .cfi_offset f20, -96
-; CHECK-NEXT:    .cfi_offset f21, -88
-; CHECK-NEXT:    .cfi_offset f22, -80
-; CHECK-NEXT:    .cfi_offset f23, -72
-; CHECK-NEXT:    .cfi_offset f24, -64
-; CHECK-NEXT:    .cfi_offset f25, -56
-; CHECK-NEXT:    .cfi_offset f26, -48
-; CHECK-NEXT:    .cfi_offset f27, -40
-; CHECK-NEXT:    .cfi_offset f28, -32
-; CHECK-NEXT:    .cfi_offset f29, -24
-; CHECK-NEXT:    .cfi_offset f30, -16
-; CHECK-NEXT:    .cfi_offset f31, -8
-; CHECK-NEXT:    .cfi_offset v20, -480
-; CHECK-NEXT:    .cfi_offset v21, -464
-; CHECK-NEXT:    .cfi_offset v22, -448
-; CHECK-NEXT:    .cfi_offset v23, -432
-; CHECK-NEXT:    .cfi_offset v24, -416
-; CHECK-NEXT:    .cfi_offset v25, -400
-; CHECK-NEXT:    .cfi_offset v26, -384
-; CHECK-NEXT:    .cfi_offset v27, -368
-; CHECK-NEXT:    .cfi_offset v28, -352
-; CHECK-NEXT:    .cfi_offset v29, -336
-; CHECK-NEXT:    .cfi_offset v30, -320
-; CHECK-NEXT:    .cfi_offset v31, -304
 ; CHECK-NEXT:    li r4, 208
+; CHECK-NEXT:    std r0, 704(r1)
 ; CHECK-NEXT:    std r14, 400(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    std r15, 408(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    std r16, 416(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    std r17, 424(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    std r18, 432(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    std r19, 440(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    std r20, 448(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    stvx v20, r1, r4 # 16-byte Folded Spill
+; CHECK-NEXT:    stxvd2x v20, r1, r4 # 16-byte Folded Spill
 ; CHECK-NEXT:    li r4, 224
+; CHECK-NEXT:    std r20, 448(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    std r21, 456(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    std r22, 464(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    std r23, 472(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    std r24, 480(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    std r25, 488(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    std r26, 496(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    stvx v21, r1, r4 # 16-byte Folded Spill
+; CHECK-NEXT:    stxvd2x v21, r1, r4 # 16-byte Folded Spill
 ; CHECK-NEXT:    li r4, 240
+; CHECK-NEXT:    std r26, 496(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    std r27, 504(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    std r28, 512(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    std r29, 520(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    std r30, 528(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    mr r30, r3
-; CHECK-NEXT:    std r31, 536(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    stvx v22, r1, r4 # 16-byte Folded Spill
+; CHECK-NEXT:    stxvd2x v22, r1, r4 # 16-byte Folded Spill
 ; CHECK-NEXT:    li r4, 256
+; CHECK-NEXT:    std r31, 536(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    stfd f14, 544(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    stfd f15, 552(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    stfd f16, 560(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    stfd f17, 568(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    stfd f18, 576(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    stfd f19, 584(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    stvx v23, r1, r4 # 16-byte Folded Spill
+; CHECK-NEXT:    stxvd2x v23, r1, r4 # 16-byte Folded Spill
 ; CHECK-NEXT:    li r4, 272
+; CHECK-NEXT:    stfd f19, 584(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    stfd f20, 592(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    fmr f20, f2
 ; CHECK-NEXT:    stfd f21, 600(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    fmr f21, f3
 ; CHECK-NEXT:    stfd f22, 608(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    fmr f22, f4
-; CHECK-NEXT:    stvx v24, r1, r4 # 16-byte Folded Spill
+; CHECK-NEXT:    stxvd2x v24, r1, r4 # 16-byte Folded Spill
 ; CHECK-NEXT:    li r4, 288
 ; CHECK-NEXT:    stfd f23, 616(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    fmr f23, f5
@@ -2540,7 +2273,7 @@ define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) {
 ; CHECK-NEXT:    fmr f24, f6
 ; CHECK-NEXT:    stfd f25, 632(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    fmr f25, f7
-; CHECK-NEXT:    stvx v25, r1, r4 # 16-byte Folded Spill
+; CHECK-NEXT:    stxvd2x v25, r1, r4 # 16-byte Folded Spill
 ; CHECK-NEXT:    li r4, 304
 ; CHECK-NEXT:    stfd f26, 640(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    fmr f26, f8
@@ -2548,7 +2281,7 @@ define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) {
 ; CHECK-NEXT:    fmr f27, f9
 ; CHECK-NEXT:    stfd f28, 656(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    fmr f28, f10
-; CHECK-NEXT:    stvx v26, r1, r4 # 16-byte Folded Spill
+; CHECK-NEXT:    stxvd2x v26, r1, r4 # 16-byte Folded Spill
 ; CHECK-NEXT:    li r4, 320
 ; CHECK-NEXT:    stfd f29, 664(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    fmr f29, f11
@@ -2556,15 +2289,15 @@ define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) {
 ; CHECK-NEXT:    fmr f30, f12
 ; CHECK-NEXT:    stfd f31, 680(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    fmr f31, f13
-; CHECK-NEXT:    stvx v27, r1, r4 # 16-byte Folded Spill
+; CHECK-NEXT:    stxvd2x v27, r1, r4 # 16-byte Folded Spill
 ; CHECK-NEXT:    li r4, 336
-; CHECK-NEXT:    stvx v28, r1, r4 # 16-byte Folded Spill
+; CHECK-NEXT:    stxvd2x v28, r1, r4 # 16-byte Folded Spill
 ; CHECK-NEXT:    li r4, 352
-; CHECK-NEXT:    stvx v29, r1, r4 # 16-byte Folded Spill
+; CHECK-NEXT:    stxvd2x v29, r1, r4 # 16-byte Folded Spill
 ; CHECK-NEXT:    li r4, 368
-; CHECK-NEXT:    stvx v30, r1, r4 # 16-byte Folded Spill
+; CHECK-NEXT:    stxvd2x v30, r1, r4 # 16-byte Folded Spill
 ; CHECK-NEXT:    li r4, 384
-; CHECK-NEXT:    stvx v31, r1, r4 # 16-byte Folded Spill
+; CHECK-NEXT:    stxvd2x v31, r1, r4 # 16-byte Folded Spill
 ; CHECK-NEXT:    bl __truncsfhf2
 ; CHECK-NEXT:    nop
 ; CHECK-NEXT:    fmr f1, f20
@@ -3043,7 +2776,7 @@ define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) {
 ; CHECK-NEXT:    li r3, 384
 ; CHECK-NEXT:    xxswapd vs4, vs4
 ; CHECK-NEXT:    stxvd2x vs4, 0, r30
-; CHECK-NEXT:    lvx v31, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v31, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 368
 ; CHECK-NEXT:    lfd f31, 680(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    lfd f30, 672(r1) # 8-byte Folded Reload
@@ -3061,7 +2794,7 @@ define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) {
 ; CHECK-NEXT:    lfd f18, 576(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    lfd f17, 568(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    lfd f16, 560(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    lvx v30, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v30, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 352
 ; CHECK-NEXT:    lfd f15, 552(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    lfd f14, 544(r1) # 8-byte Folded Reload
@@ -3069,7 +2802,7 @@ define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) {
 ; CHECK-NEXT:    ld r30, 528(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld r29, 520(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld r28, 512(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    lvx v29, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v29, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 336
 ; CHECK-NEXT:    ld r27, 504(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld r26, 496(r1) # 8-byte Folded Reload
@@ -3077,7 +2810,7 @@ define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) {
 ; CHECK-NEXT:    ld r24, 480(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld r23, 472(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld r22, 464(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    lvx v28, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v28, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 320
 ; CHECK-NEXT:    ld r21, 456(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld r20, 448(r1) # 8-byte Folded Reload
@@ -3085,23 +2818,23 @@ define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) {
 ; CHECK-NEXT:    ld r18, 432(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld r17, 424(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld r16, 416(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    lvx v27, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v27, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 304
 ; CHECK-NEXT:    ld r15, 408(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld r14, 400(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    lvx v26, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v26, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 288
-; CHECK-NEXT:    lvx v25, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v25, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 272
-; CHECK-NEXT:    lvx v24, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v24, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 256
-; CHECK-NEXT:    lvx v23, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v23, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 240
-; CHECK-NEXT:    lvx v22, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v22, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 224
-; CHECK-NEXT:    lvx v21, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v21, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 208
-; CHECK-NEXT:    lvx v20, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v20, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    addi r1, r1, 688
 ; CHECK-NEXT:    ld r0, 16(r1)
 ; CHECK-NEXT:    mtlr r0
@@ -3111,95 +2844,62 @@ define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) {
 ; FAST:       # %bb.0:
 ; FAST-NEXT:    mflr r0
 ; FAST-NEXT:    stdu r1, -480(r1)
-; FAST-NEXT:    std r0, 496(r1)
-; FAST-NEXT:    .cfi_def_cfa_offset 480
-; FAST-NEXT:    .cfi_offset lr, 16
-; FAST-NEXT:    .cfi_offset r30, -160
-; FAST-NEXT:    .cfi_offset f14, -144
-; FAST-NEXT:    .cfi_offset f15, -136
-; FAST-NEXT:    .cfi_offset f16, -128
-; FAST-NEXT:    .cfi_offset f17, -120
-; FAST-NEXT:    .cfi_offset f18, -112
-; FAST-NEXT:    .cfi_offset f19, -104
-; FAST-NEXT:    .cfi_offset f20, -96
-; FAST-NEXT:    .cfi_offset f21, -88
-; FAST-NEXT:    .cfi_offset f22, -80
-; FAST-NEXT:    .cfi_offset f23, -72
-; FAST-NEXT:    .cfi_offset f24, -64
-; FAST-NEXT:    .cfi_offset f25, -56
-; FAST-NEXT:    .cfi_offset f26, -48
-; FAST-NEXT:    .cfi_offset f27, -40
-; FAST-NEXT:    .cfi_offset f28, -32
-; FAST-NEXT:    .cfi_offset f29, -24
-; FAST-NEXT:    .cfi_offset f30, -16
-; FAST-NEXT:    .cfi_offset f31, -8
-; FAST-NEXT:    .cfi_offset v20, -352
-; FAST-NEXT:    .cfi_offset v21, -336
-; FAST-NEXT:    .cfi_offset v22, -320
-; FAST-NEXT:    .cfi_offset v23, -304
-; FAST-NEXT:    .cfi_offset v24, -288
-; FAST-NEXT:    .cfi_offset v25, -272
-; FAST-NEXT:    .cfi_offset v26, -256
-; FAST-NEXT:    .cfi_offset v27, -240
-; FAST-NEXT:    .cfi_offset v28, -224
-; FAST-NEXT:    .cfi_offset v29, -208
-; FAST-NEXT:    .cfi_offset v30, -192
-; FAST-NEXT:    .cfi_offset v31, -176
 ; FAST-NEXT:    li r4, 128
+; FAST-NEXT:    std r0, 496(r1)
 ; FAST-NEXT:    std r30, 320(r1) # 8-byte Folded Spill
+; FAST-NEXT:    mr r30, r3
 ; FAST-NEXT:    stfd f14, 336(r1) # 8-byte Folded Spill
-; FAST-NEXT:    fmr f14, f5
 ; FAST-NEXT:    stfd f15, 344(r1) # 8-byte Folded Spill
+; FAST-NEXT:    fmr f14, f5
 ; FAST-NEXT:    stfd f16, 352(r1) # 8-byte Folded Spill
-; FAST-NEXT:    fmr f16, f4
-; FAST-NEXT:    mr r30, r3
-; FAST-NEXT:    stvx v20, r1, r4 # 16-byte Folded Spill
+; FAST-NEXT:    stxvd2x v20, r1, r4 # 16-byte Folded Spill
 ; FAST-NEXT:    li r4, 144
+; FAST-NEXT:    fmr f16, f4
 ; FAST-NEXT:    stfd f17, 360(r1) # 8-byte Folded Spill
 ; FAST-NEXT:    stfd f18, 368(r1) # 8-byte Folded Spill
 ; FAST-NEXT:    stfd f19, 376(r1) # 8-byte Folded Spill
 ; FAST-NEXT:    stfd f20, 384(r1) # 8-byte Folded Spill
 ; FAST-NEXT:    stfd f21, 392(r1) # 8-byte Folded Spill
-; FAST-NEXT:    stfd f22, 400(r1) # 8-byte Folded Spill
-; FAST-NEXT:    stvx v21, r1, r4 # 16-byte Folded Spill
+; FAST-NEXT:    stxvd2x v21, r1, r4 # 16-byte Folded Spill
 ; FAST-NEXT:    li r4, 160
+; FAST-NEXT:    stfd f22, 400(r1) # 8-byte Folded Spill
 ; FAST-NEXT:    stfd f23, 408(r1) # 8-byte Folded Spill
 ; FAST-NEXT:    stfd f24, 416(r1) # 8-byte Folded Spill
 ; FAST-NEXT:    stfd f25, 424(r1) # 8-byte Folded Spill
 ; FAST-NEXT:    stfd f26, 432(r1) # 8-byte Folded Spill
 ; FAST-NEXT:    stfd f27, 440(r1) # 8-byte Folded Spill
-; FAST-NEXT:    stfd f28, 448(r1) # 8-byte Folded Spill
-; FAST-NEXT:    stvx v22, r1, r4 # 16-byte Folded Spill
+; FAST-NEXT:    stxvd2x v22, r1, r4 # 16-byte Folded Spill
 ; FAST-NEXT:    li r4, 176
 ; FAST-NEXT:    xxlor v22, f3, f3
+; FAST-NEXT:    stfd f28, 448(r1) # 8-byte Folded Spill
 ; FAST-NEXT:    stfd f29, 456(r1) # 8-byte Folded Spill
 ; FAST-NEXT:    fmr f29, f9
 ; FAST-NEXT:    stfd f30, 464(r1) # 8-byte Folded Spill
 ; FAST-NEXT:    stfd f31, 472(r1) # 8-byte Folded Spill
-; FAST-NEXT:    stvx v23, r1, r4 # 16-byte Folded Spill
+; FAST-NEXT:    stxvd2x v23, r1, r4 # 16-byte Folded Spill
 ; FAST-NEXT:    li r4, 192
 ; FAST-NEXT:    xxlor v23, f2, f2
-; FAST-NEXT:    stvx v24, r1, r4 # 16-byte Folded Spill
+; FAST-NEXT:    stxvd2x v24, r1, r4 # 16-byte Folded Spill
 ; FAST-NEXT:    li r4, 208
-; FAST-NEXT:    stvx v25, r1, r4 # 16-byte Folded Spill
+; FAST-NEXT:    stxvd2x v25, r1, r4 # 16-byte Folded Spill
 ; FAST-NEXT:    li r4, 224
 ; FAST-NEXT:    xxlor v25, f13, f13
-; FAST-NEXT:    stvx v26, r1, r4 # 16-byte Folded Spill
+; FAST-NEXT:    stxvd2x v26, r1, r4 # 16-byte Folded Spill
 ; FAST-NEXT:    li r4, 240
 ; FAST-NEXT:    xxlor v26, f12, f12
-; FAST-NEXT:    stvx v27, r1, r4 # 16-byte Folded Spill
+; FAST-NEXT:    stxvd2x v27, r1, r4 # 16-byte Folded Spill
 ; FAST-NEXT:    li r4, 256
 ; FAST-NEXT:    xxlor v27, f11, f11
-; FAST-NEXT:    stvx v28, r1, r4 # 16-byte Folded Spill
+; FAST-NEXT:    stxvd2x v28, r1, r4 # 16-byte Folded Spill
 ; FAST-NEXT:    li r4, 272
 ; FAST-NEXT:    xxlor v28, f10, f10
-; FAST-NEXT:    stvx v29, r1, r4 # 16-byte Folded Spill
+; FAST-NEXT:    stxvd2x v29, r1, r4 # 16-byte Folded Spill
 ; FAST-NEXT:    li r4, 288
 ; FAST-NEXT:    xxlor v29, f8, f8
-; FAST-NEXT:    stvx v30, r1, r4 # 16-byte Folded Spill
+; FAST-NEXT:    stxvd2x v30, r1, r4 # 16-byte Folded Spill
 ; FAST-NEXT:    li r4, 304
 ; FAST-NEXT:    xxlor v30, f7, f7
-; FAST-NEXT:    stvx v31, r1, r4 # 16-byte Folded Spill
+; FAST-NEXT:    stxvd2x v31, r1, r4 # 16-byte Folded Spill
 ; FAST-NEXT:    li r4, 44
 ; FAST-NEXT:    xxlor v31, f6, f6
 ; FAST-NEXT:    stxsspx f1, r1, r4 # 4-byte Folded Spill
@@ -3628,30 +3328,30 @@ define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) {
 ; FAST-NEXT:    lfd f16, 352(r1) # 8-byte Folded Reload
 ; FAST-NEXT:    lfd f15, 344(r1) # 8-byte Folded Reload
 ; FAST-NEXT:    lfd f14, 336(r1) # 8-byte Folded Reload
-; FAST-NEXT:    lvx v31, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    lxvd2x v31, r1, r3 # 16-byte Folded Reload
 ; FAST-NEXT:    li r3, 288
 ; FAST-NEXT:    ld r30, 320(r1) # 8-byte Folded Reload
-; FAST-NEXT:    lvx v30, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    lxvd2x v30, r1, r3 # 16-byte Folded Reload
 ; FAST-NEXT:    li r3, 272
-; FAST-NEXT:    lvx v29, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    lxvd2x v29, r1, r3 # 16-byte Folded Reload
 ; FAST-NEXT:    li r3, 256
-; FAST-NEXT:    lvx v28, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    lxvd2x v28, r1, r3 # 16-byte Folded Reload
 ; FAST-NEXT:    li r3, 240
-; FAST-NEXT:    lvx v27, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    lxvd2x v27, r1, r3 # 16-byte Folded Reload
 ; FAST-NEXT:    li r3, 224
-; FAST-NEXT:    lvx v26, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    lxvd2x v26, r1, r3 # 16-byte Folded Reload
 ; FAST-NEXT:    li r3, 208
-; FAST-NEXT:    lvx v25, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    lxvd2x v25, r1, r3 # 16-byte Folded Reload
 ; FAST-NEXT:    li r3, 192
-; FAST-NEXT:    lvx v24, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    lxvd2x v24, r1, r3 # 16-byte Folded Reload
 ; FAST-NEXT:    li r3, 176
-; FAST-NEXT:    lvx v23, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    lxvd2x v23, r1, r3 # 16-byte Folded Reload
 ; FAST-NEXT:    li r3, 160
-; FAST-NEXT:    lvx v22, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    lxvd2x v22, r1, r3 # 16-byte Folded Reload
 ; FAST-NEXT:    li r3, 144
-; FAST-NEXT:    lvx v21, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    lxvd2x v21, r1, r3 # 16-byte Folded Reload
 ; FAST-NEXT:    li r3, 128
-; FAST-NEXT:    lvx v20, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    lxvd2x v20, r1, r3 # 16-byte Folded Reload
 ; FAST-NEXT:    addi r1, r1, 480
 ; FAST-NEXT:    ld r0, 16(r1)
 ; FAST-NEXT:    mtlr r0
@@ -3661,14 +3361,12 @@ define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) {
 }
 declare <32 x i64> @llvm.llrint.v32i64.v32f16(<32 x half>)
 
-define <1 x i64> @llrint_v1i64_v1f32(<1 x float> %x) {
+define <1 x i64> @llrint_v1i64_v1f32(<1 x float> %x) nounwind {
 ; BE-LABEL: llrint_v1i64_v1f32:
 ; BE:       # %bb.0:
 ; BE-NEXT:    mflr r0
 ; BE-NEXT:    stdu r1, -112(r1)
 ; BE-NEXT:    std r0, 128(r1)
-; BE-NEXT:    .cfi_def_cfa_offset 112
-; BE-NEXT:    .cfi_offset lr, 16
 ; BE-NEXT:    bl llrintf
 ; BE-NEXT:    nop
 ; BE-NEXT:    addi r1, r1, 112
@@ -3681,8 +3379,6 @@ define <1 x i64> @llrint_v1i64_v1f32(<1 x float> %x) {
 ; CHECK-NEXT:    mflr r0
 ; CHECK-NEXT:    stdu r1, -32(r1)
 ; CHECK-NEXT:    std r0, 48(r1)
-; CHECK-NEXT:    .cfi_def_cfa_offset 32
-; CHECK-NEXT:    .cfi_offset lr, 16
 ; CHECK-NEXT:    bl llrintf
 ; CHECK-NEXT:    nop
 ; CHECK-NEXT:    addi r1, r1, 32
@@ -3700,15 +3396,13 @@ define <1 x i64> @llrint_v1i64_v1f32(<1 x float> %x) {
 }
 declare <1 x i64> @llvm.llrint.v1i64.v1f32(<1 x float>)
 
-define <2 x i64> @llrint_v2i64_v2f32(<2 x float> %x) {
+define <2 x i64> @llrint_v2i64_v2f32(<2 x float> %x) nounwind {
 ; BE-LABEL: llrint_v2i64_v2f32:
 ; BE:       # %bb.0:
 ; BE-NEXT:    mflr r0
 ; BE-NEXT:    stdu r1, -144(r1)
-; BE-NEXT:    std r0, 160(r1)
-; BE-NEXT:    .cfi_def_cfa_offset 144
-; BE-NEXT:    .cfi_offset lr, 16
 ; BE-NEXT:    addi r3, r1, 112
+; BE-NEXT:    std r0, 160(r1)
 ; BE-NEXT:    stxvw4x v2, 0, r3
 ; BE-NEXT:    lfs f1, 116(r1)
 ; BE-NEXT:    bl llrintf
@@ -3729,14 +3423,11 @@ define <2 x i64> @llrint_v2i64_v2f32(<2 x float> %x) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    mflr r0
 ; CHECK-NEXT:    stdu r1, -64(r1)
-; CHECK-NEXT:    std r0, 80(r1)
-; CHECK-NEXT:    .cfi_def_cfa_offset 64
-; CHECK-NEXT:    .cfi_offset lr, 16
-; CHECK-NEXT:    .cfi_offset v31, -16
 ; CHECK-NEXT:    xxsldwi vs0, v2, v2, 3
 ; CHECK-NEXT:    li r3, 48
+; CHECK-NEXT:    std r0, 80(r1)
 ; CHECK-NEXT:    xscvspdpn f1, vs0
-; CHECK-NEXT:    stvx v31, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    stxvd2x v31, r1, r3 # 16-byte Folded Spill
 ; CHECK-NEXT:    vmr v31, v2
 ; CHECK-NEXT:    bl llrintf
 ; CHECK-NEXT:    nop
@@ -3748,7 +3439,7 @@ define <2 x i64> @llrint_v2i64_v2f32(<2 x float> %x) {
 ; CHECK-NEXT:    mtfprd f0, r3
 ; CHECK-NEXT:    li r3, 48
 ; CHECK-NEXT:    xxmrghd v2, vs0, v31
-; CHECK-NEXT:    lvx v31, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v31, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    addi r1, r1, 64
 ; CHECK-NEXT:    ld r0, 16(r1)
 ; CHECK-NEXT:    mtlr r0
@@ -3773,15 +3464,13 @@ define <2 x i64> @llrint_v2i64_v2f32(<2 x float> %x) {
 }
 declare <2 x i64> @llvm.llrint.v2i64.v2f32(<2 x float>)
 
-define <4 x i64> @llrint_v4i64_v4f32(<4 x float> %x) {
+define <4 x i64> @llrint_v4i64_v4f32(<4 x float> %x) nounwind {
 ; BE-LABEL: llrint_v4i64_v4f32:
 ; BE:       # %bb.0:
 ; BE-NEXT:    mflr r0
 ; BE-NEXT:    stdu r1, -160(r1)
-; BE-NEXT:    std r0, 176(r1)
-; BE-NEXT:    .cfi_def_cfa_offset 160
-; BE-NEXT:    .cfi_offset lr, 16
 ; BE-NEXT:    addi r3, r1, 112
+; BE-NEXT:    std r0, 176(r1)
 ; BE-NEXT:    stxvw4x v2, 0, r3
 ; BE-NEXT:    lfs f1, 116(r1)
 ; BE-NEXT:    bl llrintf
@@ -3812,17 +3501,13 @@ define <4 x i64> @llrint_v4i64_v4f32(<4 x float> %x) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    mflr r0
 ; CHECK-NEXT:    stdu r1, -80(r1)
-; CHECK-NEXT:    std r0, 96(r1)
-; CHECK-NEXT:    .cfi_def_cfa_offset 80
-; CHECK-NEXT:    .cfi_offset lr, 16
-; CHECK-NEXT:    .cfi_offset v30, -32
-; CHECK-NEXT:    .cfi_offset v31, -16
 ; CHECK-NEXT:    xxsldwi vs0, v2, v2, 3
 ; CHECK-NEXT:    li r3, 48
+; CHECK-NEXT:    std r0, 96(r1)
 ; CHECK-NEXT:    xscvspdpn f1, vs0
-; CHECK-NEXT:    stvx v30, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    stxvd2x v30, r1, r3 # 16-byte Folded Spill
 ; CHECK-NEXT:    li r3, 64
-; CHECK-NEXT:    stvx v31, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    stxvd2x v31, r1, r3 # 16-byte Folded Spill
 ; CHECK-NEXT:    vmr v31, v2
 ; CHECK-NEXT:    bl llrintf
 ; CHECK-NEXT:    nop
@@ -3845,9 +3530,9 @@ define <4 x i64> @llrint_v4i64_v4f32(<4 x float> %x) {
 ; CHECK-NEXT:    li r3, 64
 ; CHECK-NEXT:    vmr v2, v30
 ; CHECK-NEXT:    xxmrghd v3, v31, vs0
-; CHECK-NEXT:    lvx v31, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v31, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 48
-; CHECK-NEXT:    lvx v30, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v30, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    addi r1, r1, 80
 ; CHECK-NEXT:    ld r0, 16(r1)
 ; CHECK-NEXT:    mtlr r0
@@ -3883,15 +3568,13 @@ define <4 x i64> @llrint_v4i64_v4f32(<4 x float> %x) {
 }
 declare <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float>)
 
-define <8 x i64> @llrint_v8i64_v8f32(<8 x float> %x) {
+define <8 x i64> @llrint_v8i64_v8f32(<8 x float> %x) nounwind {
 ; BE-LABEL: llrint_v8i64_v8f32:
 ; BE:       # %bb.0:
 ; BE-NEXT:    mflr r0
 ; BE-NEXT:    stdu r1, -208(r1)
-; BE-NEXT:    std r0, 224(r1)
-; BE-NEXT:    .cfi_def_cfa_offset 208
-; BE-NEXT:    .cfi_offset lr, 16
 ; BE-NEXT:    addi r3, r1, 112
+; BE-NEXT:    std r0, 224(r1)
 ; BE-NEXT:    stxvw4x v2, 0, r3
 ; BE-NEXT:    addi r3, r1, 128
 ; BE-NEXT:    stxvw4x v3, 0, r3
@@ -3944,24 +3627,18 @@ define <8 x i64> @llrint_v8i64_v8f32(<8 x float> %x) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    mflr r0
 ; CHECK-NEXT:    stdu r1, -112(r1)
-; CHECK-NEXT:    std r0, 128(r1)
-; CHECK-NEXT:    .cfi_def_cfa_offset 112
-; CHECK-NEXT:    .cfi_offset lr, 16
-; CHECK-NEXT:    .cfi_offset v28, -64
-; CHECK-NEXT:    .cfi_offset v29, -48
-; CHECK-NEXT:    .cfi_offset v30, -32
-; CHECK-NEXT:    .cfi_offset v31, -16
 ; CHECK-NEXT:    li r3, 48
 ; CHECK-NEXT:    xxsldwi vs0, v2, v2, 3
-; CHECK-NEXT:    stvx v28, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    std r0, 128(r1)
+; CHECK-NEXT:    stxvd2x v28, r1, r3 # 16-byte Folded Spill
 ; CHECK-NEXT:    li r3, 64
 ; CHECK-NEXT:    xscvspdpn f1, vs0
-; CHECK-NEXT:    stvx v29, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    stxvd2x v29, r1, r3 # 16-byte Folded Spill
 ; CHECK-NEXT:    li r3, 80
-; CHECK-NEXT:    stvx v30, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    stxvd2x v30, r1, r3 # 16-byte Folded Spill
 ; CHECK-NEXT:    li r3, 96
 ; CHECK-NEXT:    vmr v30, v2
-; CHECK-NEXT:    stvx v31, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    stxvd2x v31, r1, r3 # 16-byte Folded Spill
 ; CHECK-NEXT:    vmr v31, v3
 ; CHECK-NEXT:    bl llrintf
 ; CHECK-NEXT:    nop
@@ -4007,13 +3684,13 @@ define <8 x i64> @llrint_v8i64_v8f32(<8 x float> %x) {
 ; CHECK-NEXT:    vmr v2, v29
 ; CHECK-NEXT:    vmr v4, v28
 ; CHECK-NEXT:    xxmrghd v5, v31, vs0
-; CHECK-NEXT:    lvx v31, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v31, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 80
-; CHECK-NEXT:    lvx v30, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v30, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 64
-; CHECK-NEXT:    lvx v29, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v29, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 48
-; CHECK-NEXT:    lvx v28, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v28, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    addi r1, r1, 112
 ; CHECK-NEXT:    ld r0, 16(r1)
 ; CHECK-NEXT:    mtlr r0
@@ -4071,15 +3748,13 @@ define <8 x i64> @llrint_v8i64_v8f32(<8 x float> %x) {
 }
 declare <8 x i64> @llvm.llrint.v8i64.v8f32(<8 x float>)
 
-define <16 x i64> @llrint_v16i64_v16f32(<16 x float> %x) {
+define <16 x i64> @llrint_v16i64_v16f32(<16 x float> %x) nounwind {
 ; BE-LABEL: llrint_v16i64_v16f32:
 ; BE:       # %bb.0:
 ; BE-NEXT:    mflr r0
 ; BE-NEXT:    stdu r1, -304(r1)
-; BE-NEXT:    std r0, 320(r1)
-; BE-NEXT:    .cfi_def_cfa_offset 304
-; BE-NEXT:    .cfi_offset lr, 16
 ; BE-NEXT:    addi r3, r1, 112
+; BE-NEXT:    std r0, 320(r1)
 ; BE-NEXT:    stxvw4x v2, 0, r3
 ; BE-NEXT:    addi r3, r1, 128
 ; BE-NEXT:    stxvw4x v3, 0, r3
@@ -4176,38 +3851,28 @@ define <16 x i64> @llrint_v16i64_v16f32(<16 x float> %x) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    mflr r0
 ; CHECK-NEXT:    stdu r1, -176(r1)
-; CHECK-NEXT:    std r0, 192(r1)
-; CHECK-NEXT:    .cfi_def_cfa_offset 176
-; CHECK-NEXT:    .cfi_offset lr, 16
-; CHECK-NEXT:    .cfi_offset v24, -128
-; CHECK-NEXT:    .cfi_offset v25, -112
-; CHECK-NEXT:    .cfi_offset v26, -96
-; CHECK-NEXT:    .cfi_offset v27, -80
-; CHECK-NEXT:    .cfi_offset v28, -64
-; CHECK-NEXT:    .cfi_offset v29, -48
-; CHECK-NEXT:    .cfi_offset v30, -32
-; CHECK-NEXT:    .cfi_offset v31, -16
 ; CHECK-NEXT:    li r3, 48
+; CHECK-NEXT:    std r0, 192(r1)
 ; CHECK-NEXT:    xxsldwi vs0, v2, v2, 3
-; CHECK-NEXT:    stvx v24, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    stxvd2x v24, r1, r3 # 16-byte Folded Spill
 ; CHECK-NEXT:    li r3, 64
 ; CHECK-NEXT:    xscvspdpn f1, vs0
-; CHECK-NEXT:    stvx v25, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    stxvd2x v25, r1, r3 # 16-byte Folded Spill
 ; CHECK-NEXT:    li r3, 80
-; CHECK-NEXT:    stvx v26, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    stxvd2x v26, r1, r3 # 16-byte Folded Spill
 ; CHECK-NEXT:    li r3, 96
 ; CHECK-NEXT:    vmr v26, v3
-; CHECK-NEXT:    stvx v27, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    stxvd2x v27, r1, r3 # 16-byte Folded Spill
 ; CHECK-NEXT:    li r3, 112
-; CHECK-NEXT:    stvx v28, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    stxvd2x v28, r1, r3 # 16-byte Folded Spill
 ; CHECK-NEXT:    li r3, 128
 ; CHECK-NEXT:    vmr v28, v4
-; CHECK-NEXT:    stvx v29, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    stxvd2x v29, r1, r3 # 16-byte Folded Spill
 ; CHECK-NEXT:    li r3, 144
 ; CHECK-NEXT:    vmr v29, v2
-; CHECK-NEXT:    stvx v30, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    stxvd2x v30, r1, r3 # 16-byte Folded Spill
 ; CHECK-NEXT:    li r3, 160
-; CHECK-NEXT:    stvx v31, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    stxvd2x v31, r1, r3 # 16-byte Folded Spill
 ; CHECK-NEXT:    vmr v31, v5
 ; CHECK-NEXT:    bl llrintf
 ; CHECK-NEXT:    nop
@@ -4299,21 +3964,21 @@ define <16 x i64> @llrint_v16i64_v16f32(<16 x float> %x) {
 ; CHECK-NEXT:    vmr v6, v25
 ; CHECK-NEXT:    vmr v8, v24
 ; CHECK-NEXT:    xxmrghd v9, v31, vs0
-; CHECK-NEXT:    lvx v31, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v31, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 144
-; CHECK-NEXT:    lvx v30, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v30, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 128
-; CHECK-NEXT:    lvx v29, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v29, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 112
-; CHECK-NEXT:    lvx v28, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v28, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 96
-; CHECK-NEXT:    lvx v27, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v27, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 80
-; CHECK-NEXT:    lvx v26, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v26, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 64
-; CHECK-NEXT:    lvx v25, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v25, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 48
-; CHECK-NEXT:    lvx v24, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v24, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    addi r1, r1, 176
 ; CHECK-NEXT:    ld r0, 16(r1)
 ; CHECK-NEXT:    mtlr r0
@@ -4415,14 +4080,12 @@ define <16 x i64> @llrint_v16i64_v16f32(<16 x float> %x) {
 }
 declare <16 x i64> @llvm.llrint.v16i64.v16f32(<16 x float>)
 
-define <1 x i64> @llrint_v1i64_v1f64(<1 x double> %x) {
+define <1 x i64> @llrint_v1i64_v1f64(<1 x double> %x) nounwind {
 ; BE-LABEL: llrint_v1i64_v1f64:
 ; BE:       # %bb.0:
 ; BE-NEXT:    mflr r0
 ; BE-NEXT:    stdu r1, -112(r1)
 ; BE-NEXT:    std r0, 128(r1)
-; BE-NEXT:    .cfi_def_cfa_offset 112
-; BE-NEXT:    .cfi_offset lr, 16
 ; BE-NEXT:    bl llrint
 ; BE-NEXT:    nop
 ; BE-NEXT:    addi r1, r1, 112
@@ -4435,8 +4098,6 @@ define <1 x i64> @llrint_v1i64_v1f64(<1 x double> %x) {
 ; CHECK-NEXT:    mflr r0
 ; CHECK-NEXT:    stdu r1, -32(r1)
 ; CHECK-NEXT:    std r0, 48(r1)
-; CHECK-NEXT:    .cfi_def_cfa_offset 32
-; CHECK-NEXT:    .cfi_offset lr, 16
 ; CHECK-NEXT:    bl llrint
 ; CHECK-NEXT:    nop
 ; CHECK-NEXT:    addi r1, r1, 32
@@ -4454,16 +4115,13 @@ define <1 x i64> @llrint_v1i64_v1f64(<1 x double> %x) {
 }
 declare <1 x i64> @llvm.llrint.v1i64.v1f64(<1 x double>)
 
-define <2 x i64> @llrint_v2i64_v2f64(<2 x double> %x) {
+define <2 x i64> @llrint_v2i64_v2f64(<2 x double> %x) nounwind {
 ; BE-LABEL: llrint_v2i64_v2f64:
 ; BE:       # %bb.0:
 ; BE-NEXT:    mflr r0
 ; BE-NEXT:    stdu r1, -160(r1)
-; BE-NEXT:    std r0, 176(r1)
-; BE-NEXT:    .cfi_def_cfa_offset 160
-; BE-NEXT:    .cfi_offset lr, 16
-; BE-NEXT:    .cfi_offset v31, -16
 ; BE-NEXT:    li r3, 144
+; BE-NEXT:    std r0, 176(r1)
 ; BE-NEXT:    stxvd2x v31, r1, r3 # 16-byte Folded Spill
 ; BE-NEXT:    vmr v31, v2
 ; BE-NEXT:    xxlor f1, v31, v31
@@ -4487,12 +4145,9 @@ define <2 x i64> @llrint_v2i64_v2f64(<2 x double> %x) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    mflr r0
 ; CHECK-NEXT:    stdu r1, -64(r1)
-; CHECK-NEXT:    std r0, 80(r1)
-; CHECK-NEXT:    .cfi_def_cfa_offset 64
-; CHECK-NEXT:    .cfi_offset lr, 16
-; CHECK-NEXT:    .cfi_offset v31, -16
 ; CHECK-NEXT:    li r3, 48
-; CHECK-NEXT:    stvx v31, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    std r0, 80(r1)
+; CHECK-NEXT:    stxvd2x v31, r1, r3 # 16-byte Folded Spill
 ; CHECK-NEXT:    vmr v31, v2
 ; CHECK-NEXT:    xxlor f1, v31, v31
 ; CHECK-NEXT:    bl llrint
@@ -4504,7 +4159,7 @@ define <2 x i64> @llrint_v2i64_v2f64(<2 x double> %x) {
 ; CHECK-NEXT:    mtfprd f0, r3
 ; CHECK-NEXT:    li r3, 48
 ; CHECK-NEXT:    xxmrghd v2, v31, vs0
-; CHECK-NEXT:    lvx v31, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v31, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    addi r1, r1, 64
 ; CHECK-NEXT:    ld r0, 16(r1)
 ; CHECK-NEXT:    mtlr r0
@@ -4527,17 +4182,13 @@ define <2 x i64> @llrint_v2i64_v2f64(<2 x double> %x) {
 }
 declare <2 x i64> @llvm.llrint.v2i64.v2f64(<2 x double>)
 
-define <4 x i64> @llrint_v4i64_v4f64(<4 x double> %x) {
+define <4 x i64> @llrint_v4i64_v4f64(<4 x double> %x) nounwind {
 ; BE-LABEL: llrint_v4i64_v4f64:
 ; BE:       # %bb.0:
 ; BE-NEXT:    mflr r0
 ; BE-NEXT:    stdu r1, -192(r1)
-; BE-NEXT:    std r0, 208(r1)
-; BE-NEXT:    .cfi_def_cfa_offset 192
-; BE-NEXT:    .cfi_offset lr, 16
-; BE-NEXT:    .cfi_offset v30, -32
-; BE-NEXT:    .cfi_offset v31, -16
 ; BE-NEXT:    li r3, 160
+; BE-NEXT:    std r0, 208(r1)
 ; BE-NEXT:    stxvd2x v30, r1, r3 # 16-byte Folded Spill
 ; BE-NEXT:    vmr v30, v2
 ; BE-NEXT:    li r3, 176
@@ -4576,17 +4227,13 @@ define <4 x i64> @llrint_v4i64_v4f64(<4 x double> %x) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    mflr r0
 ; CHECK-NEXT:    stdu r1, -80(r1)
-; CHECK-NEXT:    std r0, 96(r1)
-; CHECK-NEXT:    .cfi_def_cfa_offset 80
-; CHECK-NEXT:    .cfi_offset lr, 16
-; CHECK-NEXT:    .cfi_offset v30, -32
-; CHECK-NEXT:    .cfi_offset v31, -16
 ; CHECK-NEXT:    li r3, 48
-; CHECK-NEXT:    stvx v30, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    std r0, 96(r1)
+; CHECK-NEXT:    stxvd2x v30, r1, r3 # 16-byte Folded Spill
 ; CHECK-NEXT:    vmr v30, v2
 ; CHECK-NEXT:    li r3, 64
 ; CHECK-NEXT:    xxlor f1, v30, v30
-; CHECK-NEXT:    stvx v31, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    stxvd2x v31, r1, r3 # 16-byte Folded Spill
 ; CHECK-NEXT:    vmr v31, v3
 ; CHECK-NEXT:    bl llrint
 ; CHECK-NEXT:    nop
@@ -4607,9 +4254,9 @@ define <4 x i64> @llrint_v4i64_v4f64(<4 x double> %x) {
 ; CHECK-NEXT:    li r3, 64
 ; CHECK-NEXT:    vmr v2, v30
 ; CHECK-NEXT:    xxmrghd v3, v31, vs0
-; CHECK-NEXT:    lvx v31, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v31, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 48
-; CHECK-NEXT:    lvx v30, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v30, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    addi r1, r1, 80
 ; CHECK-NEXT:    ld r0, 16(r1)
 ; CHECK-NEXT:    mtlr r0
@@ -4641,25 +4288,19 @@ define <4 x i64> @llrint_v4i64_v4f64(<4 x double> %x) {
 }
 declare <4 x i64> @llvm.llrint.v4i64.v4f64(<4 x double>)
 
-define <8 x i64> @llrint_v8i64_v8f64(<8 x double> %x) {
+define <8 x i64> @llrint_v8i64_v8f64(<8 x double> %x) nounwind {
 ; BE-LABEL: llrint_v8i64_v8f64:
 ; BE:       # %bb.0:
 ; BE-NEXT:    mflr r0
 ; BE-NEXT:    stdu r1, -256(r1)
-; BE-NEXT:    std r0, 272(r1)
-; BE-NEXT:    .cfi_def_cfa_offset 256
-; BE-NEXT:    .cfi_offset lr, 16
-; BE-NEXT:    .cfi_offset v28, -64
-; BE-NEXT:    .cfi_offset v29, -48
-; BE-NEXT:    .cfi_offset v30, -32
-; BE-NEXT:    .cfi_offset v31, -16
 ; BE-NEXT:    li r3, 192
+; BE-NEXT:    std r0, 272(r1)
 ; BE-NEXT:    stxvd2x v28, r1, r3 # 16-byte Folded Spill
 ; BE-NEXT:    li r3, 208
 ; BE-NEXT:    vmr v28, v2
-; BE-NEXT:    xxlor f1, v28, v28
 ; BE-NEXT:    stxvd2x v29, r1, r3 # 16-byte Folded Spill
 ; BE-NEXT:    li r3, 224
+; BE-NEXT:    xxlor f1, v28, v28
 ; BE-NEXT:    vmr v29, v3
 ; BE-NEXT:    stxvd2x v30, r1, r3 # 16-byte Folded Spill
 ; BE-NEXT:    li r3, 240
@@ -4722,25 +4363,19 @@ define <8 x i64> @llrint_v8i64_v8f64(<8 x double> %x) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    mflr r0
 ; CHECK-NEXT:    stdu r1, -112(r1)
-; CHECK-NEXT:    std r0, 128(r1)
-; CHECK-NEXT:    .cfi_def_cfa_offset 112
-; CHECK-NEXT:    .cfi_offset lr, 16
-; CHECK-NEXT:    .cfi_offset v28, -64
-; CHECK-NEXT:    .cfi_offset v29, -48
-; CHECK-NEXT:    .cfi_offset v30, -32
-; CHECK-NEXT:    .cfi_offset v31, -16
 ; CHECK-NEXT:    li r3, 48
-; CHECK-NEXT:    stvx v28, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    std r0, 128(r1)
+; CHECK-NEXT:    stxvd2x v28, r1, r3 # 16-byte Folded Spill
 ; CHECK-NEXT:    li r3, 64
 ; CHECK-NEXT:    vmr v28, v2
-; CHECK-NEXT:    stvx v29, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    stxvd2x v29, r1, r3 # 16-byte Folded Spill
 ; CHECK-NEXT:    li r3, 80
 ; CHECK-NEXT:    xxlor f1, v28, v28
 ; CHECK-NEXT:    vmr v29, v3
-; CHECK-NEXT:    stvx v30, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    stxvd2x v30, r1, r3 # 16-byte Folded Spill
 ; CHECK-NEXT:    li r3, 96
 ; CHECK-NEXT:    vmr v30, v4
-; CHECK-NEXT:    stvx v31, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    stxvd2x v31, r1, r3 # 16-byte Folded Spill
 ; CHECK-NEXT:    vmr v31, v5
 ; CHECK-NEXT:    bl llrint
 ; CHECK-NEXT:    nop
@@ -4781,13 +4416,13 @@ define <8 x i64> @llrint_v8i64_v8f64(<8 x double> %x) {
 ; CHECK-NEXT:    vmr v3, v29
 ; CHECK-NEXT:    vmr v2, v28
 ; CHECK-NEXT:    xxmrghd v5, v31, vs0
-; CHECK-NEXT:    lvx v31, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v31, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 80
-; CHECK-NEXT:    lvx v30, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v30, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 64
-; CHECK-NEXT:    lvx v29, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v29, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 48
-; CHECK-NEXT:    lvx v28, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v28, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    addi r1, r1, 112
 ; CHECK-NEXT:    ld r0, 16(r1)
 ; CHECK-NEXT:    mtlr r0
@@ -4837,14 +4472,12 @@ define <8 x i64> @llrint_v8i64_v8f64(<8 x double> %x) {
 }
 declare <8 x i64> @llvm.llrint.v8i64.v8f64(<8 x double>)
 
-define <1 x i64> @llrint_v1i64_v1f128(<1 x fp128> %x) {
+define <1 x i64> @llrint_v1i64_v1f128(<1 x fp128> %x) nounwind {
 ; BE-LABEL: llrint_v1i64_v1f128:
 ; BE:       # %bb.0:
 ; BE-NEXT:    mflr r0
 ; BE-NEXT:    stdu r1, -112(r1)
 ; BE-NEXT:    std r0, 128(r1)
-; BE-NEXT:    .cfi_def_cfa_offset 112
-; BE-NEXT:    .cfi_offset lr, 16
 ; BE-NEXT:    bl llrintf128
 ; BE-NEXT:    nop
 ; BE-NEXT:    addi r1, r1, 112
@@ -4857,8 +4490,6 @@ define <1 x i64> @llrint_v1i64_v1f128(<1 x fp128> %x) {
 ; CHECK-NEXT:    mflr r0
 ; CHECK-NEXT:    stdu r1, -32(r1)
 ; CHECK-NEXT:    std r0, 48(r1)
-; CHECK-NEXT:    .cfi_def_cfa_offset 32
-; CHECK-NEXT:    .cfi_offset lr, 16
 ; CHECK-NEXT:    bl llrintf128
 ; CHECK-NEXT:    nop
 ; CHECK-NEXT:    addi r1, r1, 32
@@ -4871,8 +4502,6 @@ define <1 x i64> @llrint_v1i64_v1f128(<1 x fp128> %x) {
 ; FAST-NEXT:    mflr r0
 ; FAST-NEXT:    stdu r1, -32(r1)
 ; FAST-NEXT:    std r0, 48(r1)
-; FAST-NEXT:    .cfi_def_cfa_offset 32
-; FAST-NEXT:    .cfi_offset lr, 16
 ; FAST-NEXT:    bl llrintf128
 ; FAST-NEXT:    nop
 ; FAST-NEXT:    addi r1, r1, 32
@@ -4884,16 +4513,13 @@ define <1 x i64> @llrint_v1i64_v1f128(<1 x fp128> %x) {
 }
 declare <1 x i64> @llvm.llrint.v1i64.v1f128(<1 x fp128>)
 
-define <2 x i64> @llrint_v2i64_v2f128(<2 x fp128> %x) {
+define <2 x i64> @llrint_v2i64_v2f128(<2 x fp128> %x) nounwind {
 ; BE-LABEL: llrint_v2i64_v2f128:
 ; BE:       # %bb.0:
 ; BE-NEXT:    mflr r0
 ; BE-NEXT:    stdu r1, -160(r1)
-; BE-NEXT:    std r0, 176(r1)
-; BE-NEXT:    .cfi_def_cfa_offset 160
-; BE-NEXT:    .cfi_offset lr, 16
-; BE-NEXT:    .cfi_offset v31, -16
 ; BE-NEXT:    li r3, 144
+; BE-NEXT:    std r0, 176(r1)
 ; BE-NEXT:    stxvd2x v31, r1, r3 # 16-byte Folded Spill
 ; BE-NEXT:    vmr v31, v2
 ; BE-NEXT:    vmr v2, v3
@@ -4917,15 +4543,11 @@ define <2 x i64> @llrint_v2i64_v2f128(<2 x fp128> %x) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    mflr r0
 ; CHECK-NEXT:    stdu r1, -80(r1)
-; CHECK-NEXT:    std r0, 96(r1)
-; CHECK-NEXT:    .cfi_def_cfa_offset 80
-; CHECK-NEXT:    .cfi_offset lr, 16
-; CHECK-NEXT:    .cfi_offset v30, -32
-; CHECK-NEXT:    .cfi_offset v31, -16
 ; CHECK-NEXT:    li r3, 48
-; CHECK-NEXT:    stvx v30, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    std r0, 96(r1)
+; CHECK-NEXT:    stxvd2x v30, r1, r3 # 16-byte Folded Spill
 ; CHECK-NEXT:    li r3, 64
-; CHECK-NEXT:    stvx v31, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    stxvd2x v31, r1, r3 # 16-byte Folded Spill
 ; CHECK-NEXT:    vmr v31, v3
 ; CHECK-NEXT:    bl llrintf128
 ; CHECK-NEXT:    nop
@@ -4935,10 +4557,10 @@ define <2 x i64> @llrint_v2i64_v2f128(<2 x fp128> %x) {
 ; CHECK-NEXT:    nop
 ; CHECK-NEXT:    mtfprd f0, r3
 ; CHECK-NEXT:    li r3, 64
-; CHECK-NEXT:    lvx v31, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v31, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 48
 ; CHECK-NEXT:    xxmrghd v2, vs0, v30
-; CHECK-NEXT:    lvx v30, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v30, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    addi r1, r1, 80
 ; CHECK-NEXT:    ld r0, 16(r1)
 ; CHECK-NEXT:    mtlr r0
@@ -4948,15 +4570,11 @@ define <2 x i64> @llrint_v2i64_v2f128(<2 x fp128> %x) {
 ; FAST:       # %bb.0:
 ; FAST-NEXT:    mflr r0
 ; FAST-NEXT:    stdu r1, -80(r1)
-; FAST-NEXT:    std r0, 96(r1)
-; FAST-NEXT:    .cfi_def_cfa_offset 80
-; FAST-NEXT:    .cfi_offset lr, 16
-; FAST-NEXT:    .cfi_offset v30, -32
-; FAST-NEXT:    .cfi_offset v31, -16
 ; FAST-NEXT:    li r3, 48
-; FAST-NEXT:    stvx v30, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT:    std r0, 96(r1)
+; FAST-NEXT:    stxvd2x v30, r1, r3 # 16-byte Folded Spill
 ; FAST-NEXT:    li r3, 64
-; FAST-NEXT:    stvx v31, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT:    stxvd2x v31, r1, r3 # 16-byte Folded Spill
 ; FAST-NEXT:    vmr v31, v3
 ; FAST-NEXT:    bl llrintf128
 ; FAST-NEXT:    nop
@@ -4966,10 +4584,10 @@ define <2 x i64> @llrint_v2i64_v2f128(<2 x fp128> %x) {
 ; FAST-NEXT:    nop
 ; FAST-NEXT:    mtfprd f0, r3
 ; FAST-NEXT:    li r3, 64
-; FAST-NEXT:    lvx v31, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    lxvd2x v31, r1, r3 # 16-byte Folded Reload
 ; FAST-NEXT:    li r3, 48
 ; FAST-NEXT:    xxmrghd v2, vs0, v30
-; FAST-NEXT:    lvx v30, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    lxvd2x v30, r1, r3 # 16-byte Folded Reload
 ; FAST-NEXT:    addi r1, r1, 80
 ; FAST-NEXT:    ld r0, 16(r1)
 ; FAST-NEXT:    mtlr r0
@@ -4979,18 +4597,13 @@ define <2 x i64> @llrint_v2i64_v2f128(<2 x fp128> %x) {
 }
 declare <2 x i64> @llvm.llrint.v2i64.v2f128(<2 x fp128>)
 
-define <4 x i64> @llrint_v4i64_v4f128(<4 x fp128> %x) {
+define <4 x i64> @llrint_v4i64_v4f128(<4 x fp128> %x) nounwind {
 ; BE-LABEL: llrint_v4i64_v4f128:
 ; BE:       # %bb.0:
 ; BE-NEXT:    mflr r0
 ; BE-NEXT:    stdu r1, -208(r1)
-; BE-NEXT:    std r0, 224(r1)
-; BE-NEXT:    .cfi_def_cfa_offset 208
-; BE-NEXT:    .cfi_offset lr, 16
-; BE-NEXT:    .cfi_offset v29, -48
-; BE-NEXT:    .cfi_offset v30, -32
-; BE-NEXT:    .cfi_offset v31, -16
 ; BE-NEXT:    li r3, 160
+; BE-NEXT:    std r0, 224(r1)
 ; BE-NEXT:    stxvd2x v29, r1, r3 # 16-byte Folded Spill
 ; BE-NEXT:    li r3, 176
 ; BE-NEXT:    vmr v29, v2
@@ -5034,23 +4647,17 @@ define <4 x i64> @llrint_v4i64_v4f128(<4 x fp128> %x) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    mflr r0
 ; CHECK-NEXT:    stdu r1, -112(r1)
-; CHECK-NEXT:    std r0, 128(r1)
-; CHECK-NEXT:    .cfi_def_cfa_offset 112
-; CHECK-NEXT:    .cfi_offset lr, 16
-; CHECK-NEXT:    .cfi_offset v28, -64
-; CHECK-NEXT:    .cfi_offset v29, -48
-; CHECK-NEXT:    .cfi_offset v30, -32
-; CHECK-NEXT:    .cfi_offset v31, -16
 ; CHECK-NEXT:    li r3, 48
-; CHECK-NEXT:    stvx v28, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    std r0, 128(r1)
+; CHECK-NEXT:    stxvd2x v28, r1, r3 # 16-byte Folded Spill
 ; CHECK-NEXT:    li r3, 64
-; CHECK-NEXT:    stvx v29, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    stxvd2x v29, r1, r3 # 16-byte Folded Spill
 ; CHECK-NEXT:    li r3, 80
 ; CHECK-NEXT:    vmr v29, v3
-; CHECK-NEXT:    stvx v30, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    stxvd2x v30, r1, r3 # 16-byte Folded Spill
 ; CHECK-NEXT:    li r3, 96
 ; CHECK-NEXT:    vmr v30, v4
-; CHECK-NEXT:    stvx v31, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    stxvd2x v31, r1, r3 # 16-byte Folded Spill
 ; CHECK-NEXT:    vmr v31, v5
 ; CHECK-NEXT:    bl llrintf128
 ; CHECK-NEXT:    nop
@@ -5070,14 +4677,14 @@ define <4 x i64> @llrint_v4i64_v4f128(<4 x fp128> %x) {
 ; CHECK-NEXT:    mtfprd f0, r3
 ; CHECK-NEXT:    li r3, 96
 ; CHECK-NEXT:    vmr v2, v29
-; CHECK-NEXT:    lvx v31, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v31, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 80
 ; CHECK-NEXT:    xxmrghd v3, vs0, v30
-; CHECK-NEXT:    lvx v30, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v30, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 64
-; CHECK-NEXT:    lvx v29, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v29, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 48
-; CHECK-NEXT:    lvx v28, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v28, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    addi r1, r1, 112
 ; CHECK-NEXT:    ld r0, 16(r1)
 ; CHECK-NEXT:    mtlr r0
@@ -5087,23 +4694,17 @@ define <4 x i64> @llrint_v4i64_v4f128(<4 x fp128> %x) {
 ; FAST:       # %bb.0:
 ; FAST-NEXT:    mflr r0
 ; FAST-NEXT:    stdu r1, -112(r1)
-; FAST-NEXT:    std r0, 128(r1)
-; FAST-NEXT:    .cfi_def_cfa_offset 112
-; FAST-NEXT:    .cfi_offset lr, 16
-; FAST-NEXT:    .cfi_offset v28, -64
-; FAST-NEXT:    .cfi_offset v29, -48
-; FAST-NEXT:    .cfi_offset v30, -32
-; FAST-NEXT:    .cfi_offset v31, -16
 ; FAST-NEXT:    li r3, 48
-; FAST-NEXT:    stvx v28, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT:    std r0, 128(r1)
+; FAST-NEXT:    stxvd2x v28, r1, r3 # 16-byte Folded Spill
 ; FAST-NEXT:    li r3, 64
-; FAST-NEXT:    stvx v29, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT:    stxvd2x v29, r1, r3 # 16-byte Folded Spill
 ; FAST-NEXT:    li r3, 80
 ; FAST-NEXT:    vmr v29, v3
-; FAST-NEXT:    stvx v30, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT:    stxvd2x v30, r1, r3 # 16-byte Folded Spill
 ; FAST-NEXT:    li r3, 96
 ; FAST-NEXT:    vmr v30, v4
-; FAST-NEXT:    stvx v31, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT:    stxvd2x v31, r1, r3 # 16-byte Folded Spill
 ; FAST-NEXT:    vmr v31, v5
 ; FAST-NEXT:    bl llrintf128
 ; FAST-NEXT:    nop
@@ -5123,14 +4724,14 @@ define <4 x i64> @llrint_v4i64_v4f128(<4 x fp128> %x) {
 ; FAST-NEXT:    mtfprd f0, r3
 ; FAST-NEXT:    li r3, 96
 ; FAST-NEXT:    vmr v2, v29
-; FAST-NEXT:    lvx v31, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    lxvd2x v31, r1, r3 # 16-byte Folded Reload
 ; FAST-NEXT:    li r3, 80
 ; FAST-NEXT:    xxmrghd v3, vs0, v30
-; FAST-NEXT:    lvx v30, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    lxvd2x v30, r1, r3 # 16-byte Folded Reload
 ; FAST-NEXT:    li r3, 64
-; FAST-NEXT:    lvx v29, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    lxvd2x v29, r1, r3 # 16-byte Folded Reload
 ; FAST-NEXT:    li r3, 48
-; FAST-NEXT:    lvx v28, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    lxvd2x v28, r1, r3 # 16-byte Folded Reload
 ; FAST-NEXT:    addi r1, r1, 112
 ; FAST-NEXT:    ld r0, 16(r1)
 ; FAST-NEXT:    mtlr r0
@@ -5140,22 +4741,13 @@ define <4 x i64> @llrint_v4i64_v4f128(<4 x fp128> %x) {
 }
 declare <4 x i64> @llvm.llrint.v4i64.v4f128(<4 x fp128>)
 
-define <8 x i64> @llrint_v8i64_v8f128(<8 x fp128> %x) {
+define <8 x i64> @llrint_v8i64_v8f128(<8 x fp128> %x) nounwind {
 ; BE-LABEL: llrint_v8i64_v8f128:
 ; BE:       # %bb.0:
 ; BE-NEXT:    mflr r0
 ; BE-NEXT:    stdu r1, -304(r1)
-; BE-NEXT:    std r0, 320(r1)
-; BE-NEXT:    .cfi_def_cfa_offset 304
-; BE-NEXT:    .cfi_offset lr, 16
-; BE-NEXT:    .cfi_offset v25, -112
-; BE-NEXT:    .cfi_offset v26, -96
-; BE-NEXT:    .cfi_offset v27, -80
-; BE-NEXT:    .cfi_offset v28, -64
-; BE-NEXT:    .cfi_offset v29, -48
-; BE-NEXT:    .cfi_offset v30, -32
-; BE-NEXT:    .cfi_offset v31, -16
 ; BE-NEXT:    li r3, 192
+; BE-NEXT:    std r0, 320(r1)
 ; BE-NEXT:    stxvd2x v25, r1, r3 # 16-byte Folded Spill
 ; BE-NEXT:    li r3, 208
 ; BE-NEXT:    vmr v25, v2
@@ -5239,39 +4831,29 @@ define <8 x i64> @llrint_v8i64_v8f128(<8 x fp128> %x) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    mflr r0
 ; CHECK-NEXT:    stdu r1, -176(r1)
-; CHECK-NEXT:    std r0, 192(r1)
-; CHECK-NEXT:    .cfi_def_cfa_offset 176
-; CHECK-NEXT:    .cfi_offset lr, 16
-; CHECK-NEXT:    .cfi_offset v24, -128
-; CHECK-NEXT:    .cfi_offset v25, -112
-; CHECK-NEXT:    .cfi_offset v26, -96
-; CHECK-NEXT:    .cfi_offset v27, -80
-; CHECK-NEXT:    .cfi_offset v28, -64
-; CHECK-NEXT:    .cfi_offset v29, -48
-; CHECK-NEXT:    .cfi_offset v30, -32
-; CHECK-NEXT:    .cfi_offset v31, -16
 ; CHECK-NEXT:    li r3, 48
-; CHECK-NEXT:    stvx v24, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    std r0, 192(r1)
+; CHECK-NEXT:    stxvd2x v24, r1, r3 # 16-byte Folded Spill
 ; CHECK-NEXT:    li r3, 64
-; CHECK-NEXT:    stvx v25, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    stxvd2x v25, r1, r3 # 16-byte Folded Spill
 ; CHECK-NEXT:    li r3, 80
 ; CHECK-NEXT:    vmr v25, v3
-; CHECK-NEXT:    stvx v26, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    stxvd2x v26, r1, r3 # 16-byte Folded Spill
 ; CHECK-NEXT:    li r3, 96
 ; CHECK-NEXT:    vmr v26, v4
-; CHECK-NEXT:    stvx v27, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    stxvd2x v27, r1, r3 # 16-byte Folded Spill
 ; CHECK-NEXT:    li r3, 112
 ; CHECK-NEXT:    vmr v27, v5
-; CHECK-NEXT:    stvx v28, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    stxvd2x v28, r1, r3 # 16-byte Folded Spill
 ; CHECK-NEXT:    li r3, 128
 ; CHECK-NEXT:    vmr v28, v6
-; CHECK-NEXT:    stvx v29, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    stxvd2x v29, r1, r3 # 16-byte Folded Spill
 ; CHECK-NEXT:    li r3, 144
 ; CHECK-NEXT:    vmr v29, v7
-; CHECK-NEXT:    stvx v30, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    stxvd2x v30, r1, r3 # 16-byte Folded Spill
 ; CHECK-NEXT:    li r3, 160
 ; CHECK-NEXT:    vmr v30, v8
-; CHECK-NEXT:    stvx v31, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    stxvd2x v31, r1, r3 # 16-byte Folded Spill
 ; CHECK-NEXT:    vmr v31, v9
 ; CHECK-NEXT:    bl llrintf128
 ; CHECK-NEXT:    nop
@@ -5309,24 +4891,24 @@ define <8 x i64> @llrint_v8i64_v8f128(<8 x fp128> %x) {
 ; CHECK-NEXT:    mtfprd f0, r3
 ; CHECK-NEXT:    li r3, 160
 ; CHECK-NEXT:    vmr v4, v29
-; CHECK-NEXT:    lvx v31, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v31, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 144
 ; CHECK-NEXT:    vmr v3, v27
 ; CHECK-NEXT:    vmr v2, v25
 ; CHECK-NEXT:    xxmrghd v5, vs0, v30
-; CHECK-NEXT:    lvx v30, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v30, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 128
-; CHECK-NEXT:    lvx v29, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v29, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 112
-; CHECK-NEXT:    lvx v28, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v28, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 96
-; CHECK-NEXT:    lvx v27, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v27, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 80
-; CHECK-NEXT:    lvx v26, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v26, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 64
-; CHECK-NEXT:    lvx v25, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v25, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 48
-; CHECK-NEXT:    lvx v24, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v24, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    addi r1, r1, 176
 ; CHECK-NEXT:    ld r0, 16(r1)
 ; CHECK-NEXT:    mtlr r0
@@ -5336,39 +4918,29 @@ define <8 x i64> @llrint_v8i64_v8f128(<8 x fp128> %x) {
 ; FAST:       # %bb.0:
 ; FAST-NEXT:    mflr r0
 ; FAST-NEXT:    stdu r1, -176(r1)
-; FAST-NEXT:    std r0, 192(r1)
-; FAST-NEXT:    .cfi_def_cfa_offset 176
-; FAST-NEXT:    .cfi_offset lr, 16
-; FAST-NEXT:    .cfi_offset v24, -128
-; FAST-NEXT:    .cfi_offset v25, -112
-; FAST-NEXT:    .cfi_offset v26, -96
-; FAST-NEXT:    .cfi_offset v27, -80
-; FAST-NEXT:    .cfi_offset v28, -64
-; FAST-NEXT:    .cfi_offset v29, -48
-; FAST-NEXT:    .cfi_offset v30, -32
-; FAST-NEXT:    .cfi_offset v31, -16
 ; FAST-NEXT:    li r3, 48
-; FAST-NEXT:    stvx v24, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT:    std r0, 192(r1)
+; FAST-NEXT:    stxvd2x v24, r1, r3 # 16-byte Folded Spill
 ; FAST-NEXT:    li r3, 64
-; FAST-NEXT:    stvx v25, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT:    stxvd2x v25, r1, r3 # 16-byte Folded Spill
 ; FAST-NEXT:    li r3, 80
 ; FAST-NEXT:    vmr v25, v3
-; FAST-NEXT:    stvx v26, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT:    stxvd2x v26, r1, r3 # 16-byte Folded Spill
 ; FAST-NEXT:    li r3, 96
 ; FAST-NEXT:    vmr v26, v4
-; FAST-NEXT:    stvx v27, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT:    stxvd2x v27, r1, r3 # 16-byte Folded Spill
 ; FAST-NEXT:    li r3, 112
 ; FAST-NEXT:    vmr v27, v5
-; FAST-NEXT:    stvx v28, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT:    stxvd2x v28, r1, r3 # 16-byte Folded Spill
 ; FAST-NEXT:    li r3, 128
 ; FAST-NEXT:    vmr v28, v6
-; FAST-NEXT:    stvx v29, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT:    stxvd2x v29, r1, r3 # 16-byte Folded Spill
 ; FAST-NEXT:    li r3, 144
 ; FAST-NEXT:    vmr v29, v7
-; FAST-NEXT:    stvx v30, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT:    stxvd2x v30, r1, r3 # 16-byte Folded Spill
 ; FAST-NEXT:    li r3, 160
 ; FAST-NEXT:    vmr v30, v8
-; FAST-NEXT:    stvx v31, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT:    stxvd2x v31, r1, r3 # 16-byte Folded Spill
 ; FAST-NEXT:    vmr v31, v9
 ; FAST-NEXT:    bl llrintf128
 ; FAST-NEXT:    nop
@@ -5406,24 +4978,24 @@ define <8 x i64> @llrint_v8i64_v8f128(<8 x fp128> %x) {
 ; FAST-NEXT:    mtfprd f0, r3
 ; FAST-NEXT:    li r3, 160
 ; FAST-NEXT:    vmr v4, v29
-; FAST-NEXT:    lvx v31, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    lxvd2x v31, r1, r3 # 16-byte Folded Reload
 ; FAST-NEXT:    li r3, 144
 ; FAST-NEXT:    vmr v3, v27
 ; FAST-NEXT:    vmr v2, v25
 ; FAST-NEXT:    xxmrghd v5, vs0, v30
-; FAST-NEXT:    lvx v30, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    lxvd2x v30, r1, r3 # 16-byte Folded Reload
 ; FAST-NEXT:    li r3, 128
-; FAST-NEXT:    lvx v29, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    lxvd2x v29, r1, r3 # 16-byte Folded Reload
 ; FAST-NEXT:    li r3, 112
-; FAST-NEXT:    lvx v28, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    lxvd2x v28, r1, r3 # 16-byte Folded Reload
 ; FAST-NEXT:    li r3, 96
-; FAST-NEXT:    lvx v27, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    lxvd2x v27, r1, r3 # 16-byte Folded Reload
 ; FAST-NEXT:    li r3, 80
-; FAST-NEXT:    lvx v26, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    lxvd2x v26, r1, r3 # 16-byte Folded Reload
 ; FAST-NEXT:    li r3, 64
-; FAST-NEXT:    lvx v25, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    lxvd2x v25, r1, r3 # 16-byte Folded Reload
 ; FAST-NEXT:    li r3, 48
-; FAST-NEXT:    lvx v24, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    lxvd2x v24, r1, r3 # 16-byte Folded Reload
 ; FAST-NEXT:    addi r1, r1, 176
 ; FAST-NEXT:    ld r0, 16(r1)
 ; FAST-NEXT:    mtlr r0
diff --git a/llvm/test/CodeGen/PowerPC/vector-lrint.ll b/llvm/test/CodeGen/PowerPC/vector-lrint.ll
index c64c2e15179cb..6c824be017e81 100644
--- a/llvm/test/CodeGen/PowerPC/vector-lrint.ll
+++ b/llvm/test/CodeGen/PowerPC/vector-lrint.ll
@@ -28,14 +28,12 @@
 ; RUN:   -verify-machineinstrs --enable-unsafe-fp-math | \
 ; RUN:   FileCheck %s --check-prefixes=FAST
 
-define <1 x i64> @lrint_v1f16(<1 x half> %x) {
+define <1 x i64> @lrint_v1f16(<1 x half> %x) nounwind {
 ; BE-LABEL: lrint_v1f16:
 ; BE:       # %bb.0:
 ; BE-NEXT:    mflr r0
 ; BE-NEXT:    stdu r1, -112(r1)
 ; BE-NEXT:    std r0, 128(r1)
-; BE-NEXT:    .cfi_def_cfa_offset 112
-; BE-NEXT:    .cfi_offset lr, 16
 ; BE-NEXT:    bl __truncsfhf2
 ; BE-NEXT:    nop
 ; BE-NEXT:    clrldi r3, r3, 48
@@ -53,8 +51,6 @@ define <1 x i64> @lrint_v1f16(<1 x half> %x) {
 ; CHECK-NEXT:    mflr r0
 ; CHECK-NEXT:    stdu r1, -32(r1)
 ; CHECK-NEXT:    std r0, 48(r1)
-; CHECK-NEXT:    .cfi_def_cfa_offset 32
-; CHECK-NEXT:    .cfi_offset lr, 16
 ; CHECK-NEXT:    bl __truncsfhf2
 ; CHECK-NEXT:    nop
 ; CHECK-NEXT:    clrldi r3, r3, 48
@@ -72,8 +68,6 @@ define <1 x i64> @lrint_v1f16(<1 x half> %x) {
 ; FAST-NEXT:    mflr r0
 ; FAST-NEXT:    stdu r1, -32(r1)
 ; FAST-NEXT:    std r0, 48(r1)
-; FAST-NEXT:    .cfi_def_cfa_offset 32
-; FAST-NEXT:    .cfi_offset lr, 16
 ; FAST-NEXT:    bl __truncsfhf2
 ; FAST-NEXT:    nop
 ; FAST-NEXT:    clrldi r3, r3, 48
@@ -90,16 +84,12 @@ define <1 x i64> @lrint_v1f16(<1 x half> %x) {
 }
 declare <1 x i64> @llvm.lrint.v1i64.v1f16(<1 x half>)
 
-define <2 x i64> @lrint_v2f16(<2 x half> %x) {
+define <2 x i64> @lrint_v2f16(<2 x half> %x) nounwind {
 ; BE-LABEL: lrint_v2f16:
 ; BE:       # %bb.0:
 ; BE-NEXT:    mflr r0
 ; BE-NEXT:    stdu r1, -160(r1)
 ; BE-NEXT:    std r0, 176(r1)
-; BE-NEXT:    .cfi_def_cfa_offset 160
-; BE-NEXT:    .cfi_offset lr, 16
-; BE-NEXT:    .cfi_offset r30, -24
-; BE-NEXT:    .cfi_offset f31, -8
 ; BE-NEXT:    stfd f31, 152(r1) # 8-byte Folded Spill
 ; BE-NEXT:    fmr f31, f1
 ; BE-NEXT:    fmr f1, f2
@@ -137,17 +127,12 @@ define <2 x i64> @lrint_v2f16(<2 x half> %x) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    mflr r0
 ; CHECK-NEXT:    stdu r1, -96(r1)
-; CHECK-NEXT:    std r0, 112(r1)
-; CHECK-NEXT:    .cfi_def_cfa_offset 96
-; CHECK-NEXT:    .cfi_offset lr, 16
-; CHECK-NEXT:    .cfi_offset r30, -24
-; CHECK-NEXT:    .cfi_offset f31, -8
-; CHECK-NEXT:    .cfi_offset v31, -48
 ; CHECK-NEXT:    li r3, 48
+; CHECK-NEXT:    std r0, 112(r1)
 ; CHECK-NEXT:    std r30, 72(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    stfd f31, 88(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    fmr f31, f2
-; CHECK-NEXT:    stvx v31, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    stxvd2x v31, r1, r3 # 16-byte Folded Spill
 ; CHECK-NEXT:    bl __truncsfhf2
 ; CHECK-NEXT:    nop
 ; CHECK-NEXT:    fmr f1, f31
@@ -172,7 +157,7 @@ define <2 x i64> @lrint_v2f16(<2 x half> %x) {
 ; CHECK-NEXT:    lfd f31, 88(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld r30, 72(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    xxmrghd v2, vs0, v31
-; CHECK-NEXT:    lvx v31, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v31, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    addi r1, r1, 96
 ; CHECK-NEXT:    ld r0, 16(r1)
 ; CHECK-NEXT:    mtlr r0
@@ -181,10 +166,6 @@ define <2 x i64> @lrint_v2f16(<2 x half> %x) {
 ; FAST-LABEL: lrint_v2f16:
 ; FAST:       # %bb.0:
 ; FAST-NEXT:    mflr r0
-; FAST-NEXT:    .cfi_def_cfa_offset 48
-; FAST-NEXT:    .cfi_offset lr, 16
-; FAST-NEXT:    .cfi_offset f30, -16
-; FAST-NEXT:    .cfi_offset f31, -8
 ; FAST-NEXT:    stfd f30, -16(r1) # 8-byte Folded Spill
 ; FAST-NEXT:    stfd f31, -8(r1) # 8-byte Folded Spill
 ; FAST-NEXT:    stdu r1, -48(r1)
@@ -221,20 +202,12 @@ define <2 x i64> @lrint_v2f16(<2 x half> %x) {
 }
 declare <2 x i64> @llvm.lrint.v2i64.v2f16(<2 x half>)
 
-define <4 x i64> @lrint_v4f16(<4 x half> %x) {
+define <4 x i64> @lrint_v4f16(<4 x half> %x) nounwind {
 ; BE-LABEL: lrint_v4f16:
 ; BE:       # %bb.0:
 ; BE-NEXT:    mflr r0
 ; BE-NEXT:    stdu r1, -208(r1)
 ; BE-NEXT:    std r0, 224(r1)
-; BE-NEXT:    .cfi_def_cfa_offset 208
-; BE-NEXT:    .cfi_offset lr, 16
-; BE-NEXT:    .cfi_offset r28, -56
-; BE-NEXT:    .cfi_offset r29, -48
-; BE-NEXT:    .cfi_offset r30, -40
-; BE-NEXT:    .cfi_offset f29, -24
-; BE-NEXT:    .cfi_offset f30, -16
-; BE-NEXT:    .cfi_offset f31, -8
 ; BE-NEXT:    stfd f29, 184(r1) # 8-byte Folded Spill
 ; BE-NEXT:    fmr f29, f1
 ; BE-NEXT:    fmr f1, f2
@@ -308,18 +281,8 @@ define <4 x i64> @lrint_v4f16(<4 x half> %x) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    mflr r0
 ; CHECK-NEXT:    stdu r1, -144(r1)
-; CHECK-NEXT:    std r0, 160(r1)
-; CHECK-NEXT:    .cfi_def_cfa_offset 144
-; CHECK-NEXT:    .cfi_offset lr, 16
-; CHECK-NEXT:    .cfi_offset r28, -56
-; CHECK-NEXT:    .cfi_offset r29, -48
-; CHECK-NEXT:    .cfi_offset r30, -40
-; CHECK-NEXT:    .cfi_offset f29, -24
-; CHECK-NEXT:    .cfi_offset f30, -16
-; CHECK-NEXT:    .cfi_offset f31, -8
-; CHECK-NEXT:    .cfi_offset v30, -96
-; CHECK-NEXT:    .cfi_offset v31, -80
 ; CHECK-NEXT:    li r3, 48
+; CHECK-NEXT:    std r0, 160(r1)
 ; CHECK-NEXT:    std r28, 88(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    std r29, 96(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    std r30, 104(r1) # 8-byte Folded Spill
@@ -327,11 +290,11 @@ define <4 x i64> @lrint_v4f16(<4 x half> %x) {
 ; CHECK-NEXT:    fmr f29, f2
 ; CHECK-NEXT:    stfd f30, 128(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    fmr f30, f3
-; CHECK-NEXT:    stvx v30, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    stxvd2x v30, r1, r3 # 16-byte Folded Spill
 ; CHECK-NEXT:    li r3, 64
 ; CHECK-NEXT:    stfd f31, 136(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    fmr f31, f4
-; CHECK-NEXT:    stvx v31, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    stxvd2x v31, r1, r3 # 16-byte Folded Spill
 ; CHECK-NEXT:    bl __truncsfhf2
 ; CHECK-NEXT:    nop
 ; CHECK-NEXT:    fmr f1, f29
@@ -384,11 +347,11 @@ define <4 x i64> @lrint_v4f16(<4 x half> %x) {
 ; CHECK-NEXT:    lfd f29, 120(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld r30, 104(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld r29, 96(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    lvx v31, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v31, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 48
 ; CHECK-NEXT:    ld r28, 88(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    xxmrghd v3, vs0, v30
-; CHECK-NEXT:    lvx v30, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v30, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    addi r1, r1, 144
 ; CHECK-NEXT:    ld r0, 16(r1)
 ; CHECK-NEXT:    mtlr r0
@@ -397,12 +360,6 @@ define <4 x i64> @lrint_v4f16(<4 x half> %x) {
 ; FAST-LABEL: lrint_v4f16:
 ; FAST:       # %bb.0:
 ; FAST-NEXT:    mflr r0
-; FAST-NEXT:    .cfi_def_cfa_offset 64
-; FAST-NEXT:    .cfi_offset lr, 16
-; FAST-NEXT:    .cfi_offset f28, -32
-; FAST-NEXT:    .cfi_offset f29, -24
-; FAST-NEXT:    .cfi_offset f30, -16
-; FAST-NEXT:    .cfi_offset f31, -8
 ; FAST-NEXT:    stfd f28, -32(r1) # 8-byte Folded Spill
 ; FAST-NEXT:    stfd f29, -24(r1) # 8-byte Folded Spill
 ; FAST-NEXT:    stfd f30, -16(r1) # 8-byte Folded Spill
@@ -466,28 +423,12 @@ define <4 x i64> @lrint_v4f16(<4 x half> %x) {
 }
 declare <4 x i64> @llvm.lrint.v4i64.v4f16(<4 x half>)
 
-define <8 x i64> @lrint_v8f16(<8 x half> %x) {
+define <8 x i64> @lrint_v8f16(<8 x half> %x) nounwind {
 ; BE-LABEL: lrint_v8f16:
 ; BE:       # %bb.0:
 ; BE-NEXT:    mflr r0
 ; BE-NEXT:    stdu r1, -304(r1)
 ; BE-NEXT:    std r0, 320(r1)
-; BE-NEXT:    .cfi_def_cfa_offset 304
-; BE-NEXT:    .cfi_offset lr, 16
-; BE-NEXT:    .cfi_offset r24, -120
-; BE-NEXT:    .cfi_offset r25, -112
-; BE-NEXT:    .cfi_offset r26, -104
-; BE-NEXT:    .cfi_offset r27, -96
-; BE-NEXT:    .cfi_offset r28, -88
-; BE-NEXT:    .cfi_offset r29, -80
-; BE-NEXT:    .cfi_offset r30, -72
-; BE-NEXT:    .cfi_offset f25, -56
-; BE-NEXT:    .cfi_offset f26, -48
-; BE-NEXT:    .cfi_offset f27, -40
-; BE-NEXT:    .cfi_offset f28, -32
-; BE-NEXT:    .cfi_offset f29, -24
-; BE-NEXT:    .cfi_offset f30, -16
-; BE-NEXT:    .cfi_offset f31, -8
 ; BE-NEXT:    stfd f25, 248(r1) # 8-byte Folded Spill
 ; BE-NEXT:    fmr f25, f1
 ; BE-NEXT:    fmr f1, f2
@@ -633,44 +574,24 @@ define <8 x i64> @lrint_v8f16(<8 x half> %x) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    mflr r0
 ; CHECK-NEXT:    stdu r1, -240(r1)
-; CHECK-NEXT:    std r0, 256(r1)
-; CHECK-NEXT:    .cfi_def_cfa_offset 240
-; CHECK-NEXT:    .cfi_offset lr, 16
-; CHECK-NEXT:    .cfi_offset r24, -120
-; CHECK-NEXT:    .cfi_offset r25, -112
-; CHECK-NEXT:    .cfi_offset r26, -104
-; CHECK-NEXT:    .cfi_offset r27, -96
-; CHECK-NEXT:    .cfi_offset r28, -88
-; CHECK-NEXT:    .cfi_offset r29, -80
-; CHECK-NEXT:    .cfi_offset r30, -72
-; CHECK-NEXT:    .cfi_offset f25, -56
-; CHECK-NEXT:    .cfi_offset f26, -48
-; CHECK-NEXT:    .cfi_offset f27, -40
-; CHECK-NEXT:    .cfi_offset f28, -32
-; CHECK-NEXT:    .cfi_offset f29, -24
-; CHECK-NEXT:    .cfi_offset f30, -16
-; CHECK-NEXT:    .cfi_offset f31, -8
-; CHECK-NEXT:    .cfi_offset v28, -192
-; CHECK-NEXT:    .cfi_offset v29, -176
-; CHECK-NEXT:    .cfi_offset v30, -160
-; CHECK-NEXT:    .cfi_offset v31, -144
 ; CHECK-NEXT:    li r3, 48
+; CHECK-NEXT:    std r0, 256(r1)
 ; CHECK-NEXT:    std r24, 120(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    std r25, 128(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    std r26, 136(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    std r27, 144(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    std r28, 152(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    std r29, 160(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    std r30, 168(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    stvx v28, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    stxvd2x v28, r1, r3 # 16-byte Folded Spill
 ; CHECK-NEXT:    li r3, 64
+; CHECK-NEXT:    std r30, 168(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    stfd f25, 184(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    fmr f25, f2
 ; CHECK-NEXT:    stfd f26, 192(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    fmr f26, f3
 ; CHECK-NEXT:    stfd f27, 200(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    fmr f27, f4
-; CHECK-NEXT:    stvx v29, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    stxvd2x v29, r1, r3 # 16-byte Folded Spill
 ; CHECK-NEXT:    li r3, 80
 ; CHECK-NEXT:    stfd f28, 208(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    fmr f28, f5
@@ -678,11 +599,11 @@ define <8 x i64> @lrint_v8f16(<8 x half> %x) {
 ; CHECK-NEXT:    fmr f29, f6
 ; CHECK-NEXT:    stfd f30, 224(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    fmr f30, f7
-; CHECK-NEXT:    stvx v30, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    stxvd2x v30, r1, r3 # 16-byte Folded Spill
 ; CHECK-NEXT:    li r3, 96
 ; CHECK-NEXT:    stfd f31, 232(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    fmr f31, f8
-; CHECK-NEXT:    stvx v31, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    stxvd2x v31, r1, r3 # 16-byte Folded Spill
 ; CHECK-NEXT:    bl __truncsfhf2
 ; CHECK-NEXT:    nop
 ; CHECK-NEXT:    fmr f1, f25
@@ -785,7 +706,7 @@ define <8 x i64> @lrint_v8f16(<8 x half> %x) {
 ; CHECK-NEXT:    vmr v4, v29
 ; CHECK-NEXT:    lfd f30, 224(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    lfd f29, 216(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    lvx v31, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v31, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 80
 ; CHECK-NEXT:    lfd f28, 208(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    lfd f27, 200(r1) # 8-byte Folded Reload
@@ -793,7 +714,7 @@ define <8 x i64> @lrint_v8f16(<8 x half> %x) {
 ; CHECK-NEXT:    lfd f25, 184(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld r30, 168(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld r29, 160(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    lvx v30, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v30, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 64
 ; CHECK-NEXT:    ld r28, 152(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld r27, 144(r1) # 8-byte Folded Reload
@@ -801,9 +722,9 @@ define <8 x i64> @lrint_v8f16(<8 x half> %x) {
 ; CHECK-NEXT:    ld r26, 136(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld r25, 128(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld r24, 120(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    lvx v29, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v29, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 48
-; CHECK-NEXT:    lvx v28, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v28, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    addi r1, r1, 240
 ; CHECK-NEXT:    ld r0, 16(r1)
 ; CHECK-NEXT:    mtlr r0
@@ -812,16 +733,6 @@ define <8 x i64> @lrint_v8f16(<8 x half> %x) {
 ; FAST-LABEL: lrint_v8f16:
 ; FAST:       # %bb.0:
 ; FAST-NEXT:    mflr r0
-; FAST-NEXT:    .cfi_def_cfa_offset 96
-; FAST-NEXT:    .cfi_offset lr, 16
-; FAST-NEXT:    .cfi_offset f24, -64
-; FAST-NEXT:    .cfi_offset f25, -56
-; FAST-NEXT:    .cfi_offset f26, -48
-; FAST-NEXT:    .cfi_offset f27, -40
-; FAST-NEXT:    .cfi_offset f28, -32
-; FAST-NEXT:    .cfi_offset f29, -24
-; FAST-NEXT:    .cfi_offset f30, -16
-; FAST-NEXT:    .cfi_offset f31, -8
 ; FAST-NEXT:    stfd f24, -64(r1) # 8-byte Folded Spill
 ; FAST-NEXT:    stfd f25, -56(r1) # 8-byte Folded Spill
 ; FAST-NEXT:    stfd f26, -48(r1) # 8-byte Folded Spill
@@ -939,44 +850,12 @@ define <8 x i64> @lrint_v8f16(<8 x half> %x) {
 }
 declare <8 x i64> @llvm.lrint.v8i64.v8f16(<8 x half>)
 
-define <16 x i64> @lrint_v16i64_v16f16(<16 x half> %x) {
+define <16 x i64> @lrint_v16i64_v16f16(<16 x half> %x) nounwind {
 ; BE-LABEL: lrint_v16i64_v16f16:
 ; BE:       # %bb.0:
 ; BE-NEXT:    mflr r0
 ; BE-NEXT:    stdu r1, -496(r1)
 ; BE-NEXT:    std r0, 512(r1)
-; BE-NEXT:    .cfi_def_cfa_offset 496
-; BE-NEXT:    .cfi_offset lr, 16
-; BE-NEXT:    .cfi_offset r16, -248
-; BE-NEXT:    .cfi_offset r17, -240
-; BE-NEXT:    .cfi_offset r18, -232
-; BE-NEXT:    .cfi_offset r19, -224
-; BE-NEXT:    .cfi_offset r20, -216
-; BE-NEXT:    .cfi_offset r21, -208
-; BE-NEXT:    .cfi_offset r22, -200
-; BE-NEXT:    .cfi_offset r23, -192
-; BE-NEXT:    .cfi_offset r24, -184
-; BE-NEXT:    .cfi_offset r25, -176
-; BE-NEXT:    .cfi_offset r26, -168
-; BE-NEXT:    .cfi_offset r27, -160
-; BE-NEXT:    .cfi_offset r28, -152
-; BE-NEXT:    .cfi_offset r29, -144
-; BE-NEXT:    .cfi_offset r30, -136
-; BE-NEXT:    .cfi_offset f17, -120
-; BE-NEXT:    .cfi_offset f18, -112
-; BE-NEXT:    .cfi_offset f19, -104
-; BE-NEXT:    .cfi_offset f20, -96
-; BE-NEXT:    .cfi_offset f21, -88
-; BE-NEXT:    .cfi_offset f22, -80
-; BE-NEXT:    .cfi_offset f23, -72
-; BE-NEXT:    .cfi_offset f24, -64
-; BE-NEXT:    .cfi_offset f25, -56
-; BE-NEXT:    .cfi_offset f26, -48
-; BE-NEXT:    .cfi_offset f27, -40
-; BE-NEXT:    .cfi_offset f28, -32
-; BE-NEXT:    .cfi_offset f29, -24
-; BE-NEXT:    .cfi_offset f30, -16
-; BE-NEXT:    .cfi_offset f31, -8
 ; BE-NEXT:    stfd f20, 400(r1) # 8-byte Folded Spill
 ; BE-NEXT:    fmr f20, f1
 ; BE-NEXT:    fmr f1, f2
@@ -1263,105 +1142,65 @@ define <16 x i64> @lrint_v16i64_v16f16(<16 x half> %x) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    mflr r0
 ; CHECK-NEXT:    stdu r1, -432(r1)
-; CHECK-NEXT:    std r0, 448(r1)
-; CHECK-NEXT:    .cfi_def_cfa_offset 432
-; CHECK-NEXT:    .cfi_offset lr, 16
-; CHECK-NEXT:    .cfi_offset r16, -248
-; CHECK-NEXT:    .cfi_offset r17, -240
-; CHECK-NEXT:    .cfi_offset r18, -232
-; CHECK-NEXT:    .cfi_offset r19, -224
-; CHECK-NEXT:    .cfi_offset r20, -216
-; CHECK-NEXT:    .cfi_offset r21, -208
-; CHECK-NEXT:    .cfi_offset r22, -200
-; CHECK-NEXT:    .cfi_offset r23, -192
-; CHECK-NEXT:    .cfi_offset r24, -184
-; CHECK-NEXT:    .cfi_offset r25, -176
-; CHECK-NEXT:    .cfi_offset r26, -168
-; CHECK-NEXT:    .cfi_offset r27, -160
-; CHECK-NEXT:    .cfi_offset r28, -152
-; CHECK-NEXT:    .cfi_offset r29, -144
-; CHECK-NEXT:    .cfi_offset r30, -136
-; CHECK-NEXT:    .cfi_offset f17, -120
-; CHECK-NEXT:    .cfi_offset f18, -112
-; CHECK-NEXT:    .cfi_offset f19, -104
-; CHECK-NEXT:    .cfi_offset f20, -96
-; CHECK-NEXT:    .cfi_offset f21, -88
-; CHECK-NEXT:    .cfi_offset f22, -80
-; CHECK-NEXT:    .cfi_offset f23, -72
-; CHECK-NEXT:    .cfi_offset f24, -64
-; CHECK-NEXT:    .cfi_offset f25, -56
-; CHECK-NEXT:    .cfi_offset f26, -48
-; CHECK-NEXT:    .cfi_offset f27, -40
-; CHECK-NEXT:    .cfi_offset f28, -32
-; CHECK-NEXT:    .cfi_offset f29, -24
-; CHECK-NEXT:    .cfi_offset f30, -16
-; CHECK-NEXT:    .cfi_offset f31, -8
-; CHECK-NEXT:    .cfi_offset v24, -384
-; CHECK-NEXT:    .cfi_offset v25, -368
-; CHECK-NEXT:    .cfi_offset v26, -352
-; CHECK-NEXT:    .cfi_offset v27, -336
-; CHECK-NEXT:    .cfi_offset v28, -320
-; CHECK-NEXT:    .cfi_offset v29, -304
-; CHECK-NEXT:    .cfi_offset v30, -288
-; CHECK-NEXT:    .cfi_offset v31, -272
 ; CHECK-NEXT:    li r3, 48
+; CHECK-NEXT:    std r0, 448(r1)
 ; CHECK-NEXT:    std r16, 184(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    std r17, 192(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    std r18, 200(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    std r19, 208(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    std r20, 216(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    std r21, 224(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    std r22, 232(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    stvx v24, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    stxvd2x v24, r1, r3 # 16-byte Folded Spill
 ; CHECK-NEXT:    li r3, 64
+; CHECK-NEXT:    std r22, 232(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    std r23, 240(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    std r24, 248(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    std r25, 256(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    std r26, 264(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    std r27, 272(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    std r28, 280(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    stvx v25, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    stxvd2x v25, r1, r3 # 16-byte Folded Spill
 ; CHECK-NEXT:    li r3, 80
+; CHECK-NEXT:    std r28, 280(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    std r29, 288(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    std r30, 296(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    stfd f17, 312(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    stfd f18, 320(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    stfd f19, 328(r1) # 8-byte Folded Spill
+; CHECK-NEXT:    stxvd2x v26, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    li r3, 96
 ; CHECK-NEXT:    stfd f20, 336(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    fmr f20, f2
-; CHECK-NEXT:    stvx v26, r1, r3 # 16-byte Folded Spill
-; CHECK-NEXT:    li r3, 96
 ; CHECK-NEXT:    stfd f21, 344(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    fmr f21, f3
 ; CHECK-NEXT:    stfd f22, 352(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    fmr f22, f4
+; CHECK-NEXT:    stxvd2x v27, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    li r3, 112
 ; CHECK-NEXT:    stfd f23, 360(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    fmr f23, f5
-; CHECK-NEXT:    stvx v27, r1, r3 # 16-byte Folded Spill
-; CHECK-NEXT:    li r3, 112
 ; CHECK-NEXT:    stfd f24, 368(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    fmr f24, f6
 ; CHECK-NEXT:    stfd f25, 376(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    fmr f25, f7
+; CHECK-NEXT:    stxvd2x v28, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    li r3, 128
 ; CHECK-NEXT:    stfd f26, 384(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    fmr f26, f8
-; CHECK-NEXT:    stvx v28, r1, r3 # 16-byte Folded Spill
-; CHECK-NEXT:    li r3, 128
 ; CHECK-NEXT:    stfd f27, 392(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    fmr f27, f9
 ; CHECK-NEXT:    stfd f28, 400(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    fmr f28, f10
+; CHECK-NEXT:    stxvd2x v29, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    li r3, 144
 ; CHECK-NEXT:    stfd f29, 408(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    fmr f29, f11
-; CHECK-NEXT:    stvx v29, r1, r3 # 16-byte Folded Spill
-; CHECK-NEXT:    li r3, 144
 ; CHECK-NEXT:    stfd f30, 416(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    fmr f30, f12
 ; CHECK-NEXT:    stfd f31, 424(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    fmr f31, f13
-; CHECK-NEXT:    stvx v30, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    stxvd2x v30, r1, r3 # 16-byte Folded Spill
 ; CHECK-NEXT:    li r3, 160
-; CHECK-NEXT:    stvx v31, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    stxvd2x v31, r1, r3 # 16-byte Folded Spill
 ; CHECK-NEXT:    bl __truncsfhf2
 ; CHECK-NEXT:    nop
 ; CHECK-NEXT:    fmr f1, f20
@@ -1564,7 +1403,7 @@ define <16 x i64> @lrint_v16i64_v16f16(<16 x half> %x) {
 ; CHECK-NEXT:    vmr v4, v29
 ; CHECK-NEXT:    lfd f30, 416(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    lfd f29, 408(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    lvx v31, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v31, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 144
 ; CHECK-NEXT:    vmr v5, v28
 ; CHECK-NEXT:    vmr v6, v27
@@ -1572,7 +1411,7 @@ define <16 x i64> @lrint_v16i64_v16f16(<16 x half> %x) {
 ; CHECK-NEXT:    vmr v8, v25
 ; CHECK-NEXT:    lfd f28, 400(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    lfd f27, 392(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    lvx v30, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v30, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 128
 ; CHECK-NEXT:    lfd f26, 384(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    lfd f25, 376(r1) # 8-byte Folded Reload
@@ -1580,7 +1419,7 @@ define <16 x i64> @lrint_v16i64_v16f16(<16 x half> %x) {
 ; CHECK-NEXT:    lfd f24, 368(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    lfd f23, 360(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    lfd f22, 352(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    lvx v29, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v29, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 112
 ; CHECK-NEXT:    lfd f21, 344(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld r30, 296(r1) # 8-byte Folded Reload
@@ -1588,7 +1427,7 @@ define <16 x i64> @lrint_v16i64_v16f16(<16 x half> %x) {
 ; CHECK-NEXT:    lfd f19, 328(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld r29, 288(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld r28, 280(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    lvx v28, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v28, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 96
 ; CHECK-NEXT:    lfd f18, 320(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld r27, 272(r1) # 8-byte Folded Reload
@@ -1596,7 +1435,7 @@ define <16 x i64> @lrint_v16i64_v16f16(<16 x half> %x) {
 ; CHECK-NEXT:    ld r26, 264(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld r25, 256(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld r24, 248(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    lvx v27, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v27, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 80
 ; CHECK-NEXT:    ld r23, 240(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld r22, 232(r1) # 8-byte Folded Reload
@@ -1604,13 +1443,13 @@ define <16 x i64> @lrint_v16i64_v16f16(<16 x half> %x) {
 ; CHECK-NEXT:    ld r20, 216(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld r19, 208(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld r18, 200(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    lvx v26, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v26, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 64
 ; CHECK-NEXT:    ld r17, 192(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld r16, 184(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    lvx v25, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v25, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 48
-; CHECK-NEXT:    lvx v24, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v24, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    addi r1, r1, 432
 ; CHECK-NEXT:    ld r0, 16(r1)
 ; CHECK-NEXT:    mtlr r0
@@ -1619,24 +1458,6 @@ define <16 x i64> @lrint_v16i64_v16f16(<16 x half> %x) {
 ; FAST-LABEL: lrint_v16i64_v16f16:
 ; FAST:       # %bb.0:
 ; FAST-NEXT:    mflr r0
-; FAST-NEXT:    .cfi_def_cfa_offset 160
-; FAST-NEXT:    .cfi_offset lr, 16
-; FAST-NEXT:    .cfi_offset f16, -128
-; FAST-NEXT:    .cfi_offset f17, -120
-; FAST-NEXT:    .cfi_offset f18, -112
-; FAST-NEXT:    .cfi_offset f19, -104
-; FAST-NEXT:    .cfi_offset f20, -96
-; FAST-NEXT:    .cfi_offset f21, -88
-; FAST-NEXT:    .cfi_offset f22, -80
-; FAST-NEXT:    .cfi_offset f23, -72
-; FAST-NEXT:    .cfi_offset f24, -64
-; FAST-NEXT:    .cfi_offset f25, -56
-; FAST-NEXT:    .cfi_offset f26, -48
-; FAST-NEXT:    .cfi_offset f27, -40
-; FAST-NEXT:    .cfi_offset f28, -32
-; FAST-NEXT:    .cfi_offset f29, -24
-; FAST-NEXT:    .cfi_offset f30, -16
-; FAST-NEXT:    .cfi_offset f31, -8
 ; FAST-NEXT:    stfd f16, -128(r1) # 8-byte Folded Spill
 ; FAST-NEXT:    stfd f17, -120(r1) # 8-byte Folded Spill
 ; FAST-NEXT:    stfd f18, -112(r1) # 8-byte Folded Spill
@@ -1860,50 +1681,12 @@ define <16 x i64> @lrint_v16i64_v16f16(<16 x half> %x) {
 }
 declare <16 x i64> @llvm.lrint.v16i64.v16f16(<16 x half>)
 
-define <32 x i64> @lrint_v32i64_v32f16(<32 x half> %x) {
+define <32 x i64> @lrint_v32i64_v32f16(<32 x half> %x) nounwind {
 ; BE-LABEL: lrint_v32i64_v32f16:
 ; BE:       # %bb.0:
 ; BE-NEXT:    mflr r0
 ; BE-NEXT:    stdu r1, -864(r1)
 ; BE-NEXT:    std r0, 880(r1)
-; BE-NEXT:    .cfi_def_cfa_offset 864
-; BE-NEXT:    .cfi_offset lr, 16
-; BE-NEXT:    .cfi_offset r14, -288
-; BE-NEXT:    .cfi_offset r15, -280
-; BE-NEXT:    .cfi_offset r16, -272
-; BE-NEXT:    .cfi_offset r17, -264
-; BE-NEXT:    .cfi_offset r18, -256
-; BE-NEXT:    .cfi_offset r19, -248
-; BE-NEXT:    .cfi_offset r20, -240
-; BE-NEXT:    .cfi_offset r21, -232
-; BE-NEXT:    .cfi_offset r22, -224
-; BE-NEXT:    .cfi_offset r23, -216
-; BE-NEXT:    .cfi_offset r24, -208
-; BE-NEXT:    .cfi_offset r25, -200
-; BE-NEXT:    .cfi_offset r26, -192
-; BE-NEXT:    .cfi_offset r27, -184
-; BE-NEXT:    .cfi_offset r28, -176
-; BE-NEXT:    .cfi_offset r29, -168
-; BE-NEXT:    .cfi_offset r30, -160
-; BE-NEXT:    .cfi_offset r31, -152
-; BE-NEXT:    .cfi_offset f14, -144
-; BE-NEXT:    .cfi_offset f15, -136
-; BE-NEXT:    .cfi_offset f16, -128
-; BE-NEXT:    .cfi_offset f17, -120
-; BE-NEXT:    .cfi_offset f18, -112
-; BE-NEXT:    .cfi_offset f19, -104
-; BE-NEXT:    .cfi_offset f20, -96
-; BE-NEXT:    .cfi_offset f21, -88
-; BE-NEXT:    .cfi_offset f22, -80
-; BE-NEXT:    .cfi_offset f23, -72
-; BE-NEXT:    .cfi_offset f24, -64
-; BE-NEXT:    .cfi_offset f25, -56
-; BE-NEXT:    .cfi_offset f26, -48
-; BE-NEXT:    .cfi_offset f27, -40
-; BE-NEXT:    .cfi_offset f28, -32
-; BE-NEXT:    .cfi_offset f29, -24
-; BE-NEXT:    .cfi_offset f30, -16
-; BE-NEXT:    .cfi_offset f31, -8
 ; BE-NEXT:    stfd f20, 768(r1) # 8-byte Folded Spill
 ; BE-NEXT:    fmr f20, f1
 ; BE-NEXT:    fmr f1, f2
@@ -1943,6 +1726,7 @@ define <32 x i64> @lrint_v32i64_v32f16(<32 x half> %x) {
 ; BE-NEXT:    stfd f30, 848(r1) # 8-byte Folded Spill
 ; BE-NEXT:    stfd f31, 856(r1) # 8-byte Folded Spill
 ; BE-NEXT:    fmr f31, f13
+; BE-NEXT:    mr r30, r3
 ; BE-NEXT:    fmr f29, f12
 ; BE-NEXT:    fmr f30, f11
 ; BE-NEXT:    fmr f28, f10
@@ -1953,7 +1737,6 @@ define <32 x i64> @lrint_v32i64_v32f16(<32 x half> %x) {
 ; BE-NEXT:    fmr f23, f5
 ; BE-NEXT:    fmr f22, f4
 ; BE-NEXT:    fmr f21, f3
-; BE-NEXT:    mr r30, r3
 ; BE-NEXT:    bl __truncsfhf2
 ; BE-NEXT:    nop
 ; BE-NEXT:    fmr f1, f20
@@ -2456,98 +2239,48 @@ define <32 x i64> @lrint_v32i64_v32f16(<32 x half> %x) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    mflr r0
 ; CHECK-NEXT:    stdu r1, -688(r1)
-; CHECK-NEXT:    std r0, 704(r1)
-; CHECK-NEXT:    .cfi_def_cfa_offset 688
-; CHECK-NEXT:    .cfi_offset lr, 16
-; CHECK-NEXT:    .cfi_offset r14, -288
-; CHECK-NEXT:    .cfi_offset r15, -280
-; CHECK-NEXT:    .cfi_offset r16, -272
-; CHECK-NEXT:    .cfi_offset r17, -264
-; CHECK-NEXT:    .cfi_offset r18, -256
-; CHECK-NEXT:    .cfi_offset r19, -248
-; CHECK-NEXT:    .cfi_offset r20, -240
-; CHECK-NEXT:    .cfi_offset r21, -232
-; CHECK-NEXT:    .cfi_offset r22, -224
-; CHECK-NEXT:    .cfi_offset r23, -216
-; CHECK-NEXT:    .cfi_offset r24, -208
-; CHECK-NEXT:    .cfi_offset r25, -200
-; CHECK-NEXT:    .cfi_offset r26, -192
-; CHECK-NEXT:    .cfi_offset r27, -184
-; CHECK-NEXT:    .cfi_offset r28, -176
-; CHECK-NEXT:    .cfi_offset r29, -168
-; CHECK-NEXT:    .cfi_offset r30, -160
-; CHECK-NEXT:    .cfi_offset r31, -152
-; CHECK-NEXT:    .cfi_offset f14, -144
-; CHECK-NEXT:    .cfi_offset f15, -136
-; CHECK-NEXT:    .cfi_offset f16, -128
-; CHECK-NEXT:    .cfi_offset f17, -120
-; CHECK-NEXT:    .cfi_offset f18, -112
-; CHECK-NEXT:    .cfi_offset f19, -104
-; CHECK-NEXT:    .cfi_offset f20, -96
-; CHECK-NEXT:    .cfi_offset f21, -88
-; CHECK-NEXT:    .cfi_offset f22, -80
-; CHECK-NEXT:    .cfi_offset f23, -72
-; CHECK-NEXT:    .cfi_offset f24, -64
-; CHECK-NEXT:    .cfi_offset f25, -56
-; CHECK-NEXT:    .cfi_offset f26, -48
-; CHECK-NEXT:    .cfi_offset f27, -40
-; CHECK-NEXT:    .cfi_offset f28, -32
-; CHECK-NEXT:    .cfi_offset f29, -24
-; CHECK-NEXT:    .cfi_offset f30, -16
-; CHECK-NEXT:    .cfi_offset f31, -8
-; CHECK-NEXT:    .cfi_offset v20, -480
-; CHECK-NEXT:    .cfi_offset v21, -464
-; CHECK-NEXT:    .cfi_offset v22, -448
-; CHECK-NEXT:    .cfi_offset v23, -432
-; CHECK-NEXT:    .cfi_offset v24, -416
-; CHECK-NEXT:    .cfi_offset v25, -400
-; CHECK-NEXT:    .cfi_offset v26, -384
-; CHECK-NEXT:    .cfi_offset v27, -368
-; CHECK-NEXT:    .cfi_offset v28, -352
-; CHECK-NEXT:    .cfi_offset v29, -336
-; CHECK-NEXT:    .cfi_offset v30, -320
-; CHECK-NEXT:    .cfi_offset v31, -304
 ; CHECK-NEXT:    li r4, 208
+; CHECK-NEXT:    std r0, 704(r1)
 ; CHECK-NEXT:    std r14, 400(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    std r15, 408(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    std r16, 416(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    std r17, 424(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    std r18, 432(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    std r19, 440(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    std r20, 448(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    stvx v20, r1, r4 # 16-byte Folded Spill
+; CHECK-NEXT:    stxvd2x v20, r1, r4 # 16-byte Folded Spill
 ; CHECK-NEXT:    li r4, 224
+; CHECK-NEXT:    std r20, 448(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    std r21, 456(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    std r22, 464(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    std r23, 472(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    std r24, 480(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    std r25, 488(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    std r26, 496(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    stvx v21, r1, r4 # 16-byte Folded Spill
+; CHECK-NEXT:    stxvd2x v21, r1, r4 # 16-byte Folded Spill
 ; CHECK-NEXT:    li r4, 240
+; CHECK-NEXT:    std r26, 496(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    std r27, 504(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    std r28, 512(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    std r29, 520(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    std r30, 528(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    mr r30, r3
-; CHECK-NEXT:    std r31, 536(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    stvx v22, r1, r4 # 16-byte Folded Spill
+; CHECK-NEXT:    stxvd2x v22, r1, r4 # 16-byte Folded Spill
 ; CHECK-NEXT:    li r4, 256
+; CHECK-NEXT:    std r31, 536(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    stfd f14, 544(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    stfd f15, 552(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    stfd f16, 560(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    stfd f17, 568(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    stfd f18, 576(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    stfd f19, 584(r1) # 8-byte Folded Spill
-; CHECK-NEXT:    stvx v23, r1, r4 # 16-byte Folded Spill
+; CHECK-NEXT:    stxvd2x v23, r1, r4 # 16-byte Folded Spill
 ; CHECK-NEXT:    li r4, 272
+; CHECK-NEXT:    stfd f19, 584(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    stfd f20, 592(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    fmr f20, f2
 ; CHECK-NEXT:    stfd f21, 600(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    fmr f21, f3
 ; CHECK-NEXT:    stfd f22, 608(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    fmr f22, f4
-; CHECK-NEXT:    stvx v24, r1, r4 # 16-byte Folded Spill
+; CHECK-NEXT:    stxvd2x v24, r1, r4 # 16-byte Folded Spill
 ; CHECK-NEXT:    li r4, 288
 ; CHECK-NEXT:    stfd f23, 616(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    fmr f23, f5
@@ -2555,7 +2288,7 @@ define <32 x i64> @lrint_v32i64_v32f16(<32 x half> %x) {
 ; CHECK-NEXT:    fmr f24, f6
 ; CHECK-NEXT:    stfd f25, 632(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    fmr f25, f7
-; CHECK-NEXT:    stvx v25, r1, r4 # 16-byte Folded Spill
+; CHECK-NEXT:    stxvd2x v25, r1, r4 # 16-byte Folded Spill
 ; CHECK-NEXT:    li r4, 304
 ; CHECK-NEXT:    stfd f26, 640(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    fmr f26, f8
@@ -2563,7 +2296,7 @@ define <32 x i64> @lrint_v32i64_v32f16(<32 x half> %x) {
 ; CHECK-NEXT:    fmr f27, f9
 ; CHECK-NEXT:    stfd f28, 656(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    fmr f28, f10
-; CHECK-NEXT:    stvx v26, r1, r4 # 16-byte Folded Spill
+; CHECK-NEXT:    stxvd2x v26, r1, r4 # 16-byte Folded Spill
 ; CHECK-NEXT:    li r4, 320
 ; CHECK-NEXT:    stfd f29, 664(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    fmr f29, f11
@@ -2571,15 +2304,15 @@ define <32 x i64> @lrint_v32i64_v32f16(<32 x half> %x) {
 ; CHECK-NEXT:    fmr f30, f12
 ; CHECK-NEXT:    stfd f31, 680(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    fmr f31, f13
-; CHECK-NEXT:    stvx v27, r1, r4 # 16-byte Folded Spill
+; CHECK-NEXT:    stxvd2x v27, r1, r4 # 16-byte Folded Spill
 ; CHECK-NEXT:    li r4, 336
-; CHECK-NEXT:    stvx v28, r1, r4 # 16-byte Folded Spill
+; CHECK-NEXT:    stxvd2x v28, r1, r4 # 16-byte Folded Spill
 ; CHECK-NEXT:    li r4, 352
-; CHECK-NEXT:    stvx v29, r1, r4 # 16-byte Folded Spill
+; CHECK-NEXT:    stxvd2x v29, r1, r4 # 16-byte Folded Spill
 ; CHECK-NEXT:    li r4, 368
-; CHECK-NEXT:    stvx v30, r1, r4 # 16-byte Folded Spill
+; CHECK-NEXT:    stxvd2x v30, r1, r4 # 16-byte Folded Spill
 ; CHECK-NEXT:    li r4, 384
-; CHECK-NEXT:    stvx v31, r1, r4 # 16-byte Folded Spill
+; CHECK-NEXT:    stxvd2x v31, r1, r4 # 16-byte Folded Spill
 ; CHECK-NEXT:    bl __truncsfhf2
 ; CHECK-NEXT:    nop
 ; CHECK-NEXT:    fmr f1, f20
@@ -3058,7 +2791,7 @@ define <32 x i64> @lrint_v32i64_v32f16(<32 x half> %x) {
 ; CHECK-NEXT:    li r3, 384
 ; CHECK-NEXT:    xxswapd vs4, vs4
 ; CHECK-NEXT:    stxvd2x vs4, 0, r30
-; CHECK-NEXT:    lvx v31, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v31, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 368
 ; CHECK-NEXT:    lfd f31, 680(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    lfd f30, 672(r1) # 8-byte Folded Reload
@@ -3076,7 +2809,7 @@ define <32 x i64> @lrint_v32i64_v32f16(<32 x half> %x) {
 ; CHECK-NEXT:    lfd f18, 576(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    lfd f17, 568(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    lfd f16, 560(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    lvx v30, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v30, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 352
 ; CHECK-NEXT:    lfd f15, 552(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    lfd f14, 544(r1) # 8-byte Folded Reload
@@ -3084,7 +2817,7 @@ define <32 x i64> @lrint_v32i64_v32f16(<32 x half> %x) {
 ; CHECK-NEXT:    ld r30, 528(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld r29, 520(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld r28, 512(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    lvx v29, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v29, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 336
 ; CHECK-NEXT:    ld r27, 504(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld r26, 496(r1) # 8-byte Folded Reload
@@ -3092,7 +2825,7 @@ define <32 x i64> @lrint_v32i64_v32f16(<32 x half> %x) {
 ; CHECK-NEXT:    ld r24, 480(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld r23, 472(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld r22, 464(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    lvx v28, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v28, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 320
 ; CHECK-NEXT:    ld r21, 456(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld r20, 448(r1) # 8-byte Folded Reload
@@ -3100,23 +2833,23 @@ define <32 x i64> @lrint_v32i64_v32f16(<32 x half> %x) {
 ; CHECK-NEXT:    ld r18, 432(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld r17, 424(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld r16, 416(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    lvx v27, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v27, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 304
 ; CHECK-NEXT:    ld r15, 408(r1) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld r14, 400(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    lvx v26, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v26, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 288
-; CHECK-NEXT:    lvx v25, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v25, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 272
-; CHECK-NEXT:    lvx v24, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v24, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 256
-; CHECK-NEXT:    lvx v23, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v23, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 240
-; CHECK-NEXT:    lvx v22, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v22, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 224
-; CHECK-NEXT:    lvx v21, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v21, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 208
-; CHECK-NEXT:    lvx v20, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v20, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    addi r1, r1, 688
 ; CHECK-NEXT:    ld r0, 16(r1)
 ; CHECK-NEXT:    mtlr r0
@@ -3126,95 +2859,62 @@ define <32 x i64> @lrint_v32i64_v32f16(<32 x half> %x) {
 ; FAST:       # %bb.0:
 ; FAST-NEXT:    mflr r0
 ; FAST-NEXT:    stdu r1, -480(r1)
-; FAST-NEXT:    std r0, 496(r1)
-; FAST-NEXT:    .cfi_def_cfa_offset 480
-; FAST-NEXT:    .cfi_offset lr, 16
-; FAST-NEXT:    .cfi_offset r30, -160
-; FAST-NEXT:    .cfi_offset f14, -144
-; FAST-NEXT:    .cfi_offset f15, -136
-; FAST-NEXT:    .cfi_offset f16, -128
-; FAST-NEXT:    .cfi_offset f17, -120
-; FAST-NEXT:    .cfi_offset f18, -112
-; FAST-NEXT:    .cfi_offset f19, -104
-; FAST-NEXT:    .cfi_offset f20, -96
-; FAST-NEXT:    .cfi_offset f21, -88
-; FAST-NEXT:    .cfi_offset f22, -80
-; FAST-NEXT:    .cfi_offset f23, -72
-; FAST-NEXT:    .cfi_offset f24, -64
-; FAST-NEXT:    .cfi_offset f25, -56
-; FAST-NEXT:    .cfi_offset f26, -48
-; FAST-NEXT:    .cfi_offset f27, -40
-; FAST-NEXT:    .cfi_offset f28, -32
-; FAST-NEXT:    .cfi_offset f29, -24
-; FAST-NEXT:    .cfi_offset f30, -16
-; FAST-NEXT:    .cfi_offset f31, -8
-; FAST-NEXT:    .cfi_offset v20, -352
-; FAST-NEXT:    .cfi_offset v21, -336
-; FAST-NEXT:    .cfi_offset v22, -320
-; FAST-NEXT:    .cfi_offset v23, -304
-; FAST-NEXT:    .cfi_offset v24, -288
-; FAST-NEXT:    .cfi_offset v25, -272
-; FAST-NEXT:    .cfi_offset v26, -256
-; FAST-NEXT:    .cfi_offset v27, -240
-; FAST-NEXT:    .cfi_offset v28, -224
-; FAST-NEXT:    .cfi_offset v29, -208
-; FAST-NEXT:    .cfi_offset v30, -192
-; FAST-NEXT:    .cfi_offset v31, -176
 ; FAST-NEXT:    li r4, 128
+; FAST-NEXT:    std r0, 496(r1)
 ; FAST-NEXT:    std r30, 320(r1) # 8-byte Folded Spill
+; FAST-NEXT:    mr r30, r3
 ; FAST-NEXT:    stfd f14, 336(r1) # 8-byte Folded Spill
-; FAST-NEXT:    fmr f14, f5
 ; FAST-NEXT:    stfd f15, 344(r1) # 8-byte Folded Spill
+; FAST-NEXT:    fmr f14, f5
 ; FAST-NEXT:    stfd f16, 352(r1) # 8-byte Folded Spill
-; FAST-NEXT:    fmr f16, f4
-; FAST-NEXT:    mr r30, r3
-; FAST-NEXT:    stvx v20, r1, r4 # 16-byte Folded Spill
+; FAST-NEXT:    stxvd2x v20, r1, r4 # 16-byte Folded Spill
 ; FAST-NEXT:    li r4, 144
+; FAST-NEXT:    fmr f16, f4
 ; FAST-NEXT:    stfd f17, 360(r1) # 8-byte Folded Spill
 ; FAST-NEXT:    stfd f18, 368(r1) # 8-byte Folded Spill
 ; FAST-NEXT:    stfd f19, 376(r1) # 8-byte Folded Spill
 ; FAST-NEXT:    stfd f20, 384(r1) # 8-byte Folded Spill
 ; FAST-NEXT:    stfd f21, 392(r1) # 8-byte Folded Spill
-; FAST-NEXT:    stfd f22, 400(r1) # 8-byte Folded Spill
-; FAST-NEXT:    stvx v21, r1, r4 # 16-byte Folded Spill
+; FAST-NEXT:    stxvd2x v21, r1, r4 # 16-byte Folded Spill
 ; FAST-NEXT:    li r4, 160
+; FAST-NEXT:    stfd f22, 400(r1) # 8-byte Folded Spill
 ; FAST-NEXT:    stfd f23, 408(r1) # 8-byte Folded Spill
 ; FAST-NEXT:    stfd f24, 416(r1) # 8-byte Folded Spill
 ; FAST-NEXT:    stfd f25, 424(r1) # 8-byte Folded Spill
 ; FAST-NEXT:    stfd f26, 432(r1) # 8-byte Folded Spill
 ; FAST-NEXT:    stfd f27, 440(r1) # 8-byte Folded Spill
-; FAST-NEXT:    stfd f28, 448(r1) # 8-byte Folded Spill
-; FAST-NEXT:    stvx v22, r1, r4 # 16-byte Folded Spill
+; FAST-NEXT:    stxvd2x v22, r1, r4 # 16-byte Folded Spill
 ; FAST-NEXT:    li r4, 176
 ; FAST-NEXT:    xxlor v22, f3, f3
+; FAST-NEXT:    stfd f28, 448(r1) # 8-byte Folded Spill
 ; FAST-NEXT:    stfd f29, 456(r1) # 8-byte Folded Spill
 ; FAST-NEXT:    fmr f29, f9
 ; FAST-NEXT:    stfd f30, 464(r1) # 8-byte Folded Spill
 ; FAST-NEXT:    stfd f31, 472(r1) # 8-byte Folded Spill
-; FAST-NEXT:    stvx v23, r1, r4 # 16-byte Folded Spill
+; FAST-NEXT:    stxvd2x v23, r1, r4 # 16-byte Folded Spill
 ; FAST-NEXT:    li r4, 192
 ; FAST-NEXT:    xxlor v23, f2, f2
-; FAST-NEXT:    stvx v24, r1, r4 # 16-byte Folded Spill
+; FAST-NEXT:    stxvd2x v24, r1, r4 # 16-byte Folded Spill
 ; FAST-NEXT:    li r4, 208
-; FAST-NEXT:    stvx v25, r1, r4 # 16-byte Folded Spill
+; FAST-NEXT:    stxvd2x v25, r1, r4 # 16-byte Folded Spill
 ; FAST-NEXT:    li r4, 224
 ; FAST-NEXT:    xxlor v25, f13, f13
-; FAST-NEXT:    stvx v26, r1, r4 # 16-byte Folded Spill
+; FAST-NEXT:    stxvd2x v26, r1, r4 # 16-byte Folded Spill
 ; FAST-NEXT:    li r4, 240
 ; FAST-NEXT:    xxlor v26, f12, f12
-; FAST-NEXT:    stvx v27, r1, r4 # 16-byte Folded Spill
+; FAST-NEXT:    stxvd2x v27, r1, r4 # 16-byte Folded Spill
 ; FAST-NEXT:    li r4, 256
 ; FAST-NEXT:    xxlor v27, f11, f11
-; FAST-NEXT:    stvx v28, r1, r4 # 16-byte Folded Spill
+; FAST-NEXT:    stxvd2x v28, r1, r4 # 16-byte Folded Spill
 ; FAST-NEXT:    li r4, 272
 ; FAST-NEXT:    xxlor v28, f10, f10
-; FAST-NEXT:    stvx v29, r1, r4 # 16-byte Folded Spill
+; FAST-NEXT:    stxvd2x v29, r1, r4 # 16-byte Folded Spill
 ; FAST-NEXT:    li r4, 288
 ; FAST-NEXT:    xxlor v29, f8, f8
-; FAST-NEXT:    stvx v30, r1, r4 # 16-byte Folded Spill
+; FAST-NEXT:    stxvd2x v30, r1, r4 # 16-byte Folded Spill
 ; FAST-NEXT:    li r4, 304
 ; FAST-NEXT:    xxlor v30, f7, f7
-; FAST-NEXT:    stvx v31, r1, r4 # 16-byte Folded Spill
+; FAST-NEXT:    stxvd2x v31, r1, r4 # 16-byte Folded Spill
 ; FAST-NEXT:    li r4, 44
 ; FAST-NEXT:    xxlor v31, f6, f6
 ; FAST-NEXT:    stxsspx f1, r1, r4 # 4-byte Folded Spill
@@ -3643,30 +3343,30 @@ define <32 x i64> @lrint_v32i64_v32f16(<32 x half> %x) {
 ; FAST-NEXT:    lfd f16, 352(r1) # 8-byte Folded Reload
 ; FAST-NEXT:    lfd f15, 344(r1) # 8-byte Folded Reload
 ; FAST-NEXT:    lfd f14, 336(r1) # 8-byte Folded Reload
-; FAST-NEXT:    lvx v31, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    lxvd2x v31, r1, r3 # 16-byte Folded Reload
 ; FAST-NEXT:    li r3, 288
 ; FAST-NEXT:    ld r30, 320(r1) # 8-byte Folded Reload
-; FAST-NEXT:    lvx v30, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    lxvd2x v30, r1, r3 # 16-byte Folded Reload
 ; FAST-NEXT:    li r3, 272
-; FAST-NEXT:    lvx v29, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    lxvd2x v29, r1, r3 # 16-byte Folded Reload
 ; FAST-NEXT:    li r3, 256
-; FAST-NEXT:    lvx v28, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    lxvd2x v28, r1, r3 # 16-byte Folded Reload
 ; FAST-NEXT:    li r3, 240
-; FAST-NEXT:    lvx v27, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    lxvd2x v27, r1, r3 # 16-byte Folded Reload
 ; FAST-NEXT:    li r3, 224
-; FAST-NEXT:    lvx v26, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    lxvd2x v26, r1, r3 # 16-byte Folded Reload
 ; FAST-NEXT:    li r3, 208
-; FAST-NEXT:    lvx v25, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    lxvd2x v25, r1, r3 # 16-byte Folded Reload
 ; FAST-NEXT:    li r3, 192
-; FAST-NEXT:    lvx v24, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    lxvd2x v24, r1, r3 # 16-byte Folded Reload
 ; FAST-NEXT:    li r3, 176
-; FAST-NEXT:    lvx v23, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    lxvd2x v23, r1, r3 # 16-byte Folded Reload
 ; FAST-NEXT:    li r3, 160
-; FAST-NEXT:    lvx v22, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    lxvd2x v22, r1, r3 # 16-byte Folded Reload
 ; FAST-NEXT:    li r3, 144
-; FAST-NEXT:    lvx v21, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    lxvd2x v21, r1, r3 # 16-byte Folded Reload
 ; FAST-NEXT:    li r3, 128
-; FAST-NEXT:    lvx v20, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    lxvd2x v20, r1, r3 # 16-byte Folded Reload
 ; FAST-NEXT:    addi r1, r1, 480
 ; FAST-NEXT:    ld r0, 16(r1)
 ; FAST-NEXT:    mtlr r0
@@ -3676,14 +3376,12 @@ define <32 x i64> @lrint_v32i64_v32f16(<32 x half> %x) {
 }
 declare <32 x i64> @llvm.lrint.v32i64.v32f16(<32 x half>)
 
-define <1 x i64> @lrint_v1f32(<1 x float> %x) {
+define <1 x i64> @lrint_v1f32(<1 x float> %x) nounwind {
 ; BE-LABEL: lrint_v1f32:
 ; BE:       # %bb.0:
 ; BE-NEXT:    mflr r0
 ; BE-NEXT:    stdu r1, -112(r1)
 ; BE-NEXT:    std r0, 128(r1)
-; BE-NEXT:    .cfi_def_cfa_offset 112
-; BE-NEXT:    .cfi_offset lr, 16
 ; BE-NEXT:    bl lrintf
 ; BE-NEXT:    nop
 ; BE-NEXT:    addi r1, r1, 112
@@ -3696,8 +3394,6 @@ define <1 x i64> @lrint_v1f32(<1 x float> %x) {
 ; CHECK-NEXT:    mflr r0
 ; CHECK-NEXT:    stdu r1, -32(r1)
 ; CHECK-NEXT:    std r0, 48(r1)
-; CHECK-NEXT:    .cfi_def_cfa_offset 32
-; CHECK-NEXT:    .cfi_offset lr, 16
 ; CHECK-NEXT:    bl lrintf
 ; CHECK-NEXT:    nop
 ; CHECK-NEXT:    addi r1, r1, 32
@@ -3715,15 +3411,13 @@ define <1 x i64> @lrint_v1f32(<1 x float> %x) {
 }
 declare <1 x i64> @llvm.lrint.v1i64.v1f32(<1 x float>)
 
-define <2 x i64> @lrint_v2f32(<2 x float> %x) {
+define <2 x i64> @lrint_v2f32(<2 x float> %x) nounwind {
 ; BE-LABEL: lrint_v2f32:
 ; BE:       # %bb.0:
 ; BE-NEXT:    mflr r0
 ; BE-NEXT:    stdu r1, -144(r1)
-; BE-NEXT:    std r0, 160(r1)
-; BE-NEXT:    .cfi_def_cfa_offset 144
-; BE-NEXT:    .cfi_offset lr, 16
 ; BE-NEXT:    addi r3, r1, 112
+; BE-NEXT:    std r0, 160(r1)
 ; BE-NEXT:    stxvw4x v2, 0, r3
 ; BE-NEXT:    lfs f1, 116(r1)
 ; BE-NEXT:    bl lrintf
@@ -3744,14 +3438,11 @@ define <2 x i64> @lrint_v2f32(<2 x float> %x) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    mflr r0
 ; CHECK-NEXT:    stdu r1, -64(r1)
-; CHECK-NEXT:    std r0, 80(r1)
-; CHECK-NEXT:    .cfi_def_cfa_offset 64
-; CHECK-NEXT:    .cfi_offset lr, 16
-; CHECK-NEXT:    .cfi_offset v31, -16
 ; CHECK-NEXT:    xxsldwi vs0, v2, v2, 3
 ; CHECK-NEXT:    li r3, 48
+; CHECK-NEXT:    std r0, 80(r1)
 ; CHECK-NEXT:    xscvspdpn f1, vs0
-; CHECK-NEXT:    stvx v31, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    stxvd2x v31, r1, r3 # 16-byte Folded Spill
 ; CHECK-NEXT:    vmr v31, v2
 ; CHECK-NEXT:    bl lrintf
 ; CHECK-NEXT:    nop
@@ -3763,7 +3454,7 @@ define <2 x i64> @lrint_v2f32(<2 x float> %x) {
 ; CHECK-NEXT:    mtfprd f0, r3
 ; CHECK-NEXT:    li r3, 48
 ; CHECK-NEXT:    xxmrghd v2, vs0, v31
-; CHECK-NEXT:    lvx v31, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v31, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    addi r1, r1, 64
 ; CHECK-NEXT:    ld r0, 16(r1)
 ; CHECK-NEXT:    mtlr r0
@@ -3788,15 +3479,13 @@ define <2 x i64> @lrint_v2f32(<2 x float> %x) {
 }
 declare <2 x i64> @llvm.lrint.v2i64.v2f32(<2 x float>)
 
-define <4 x i64> @lrint_v4f32(<4 x float> %x) {
+define <4 x i64> @lrint_v4f32(<4 x float> %x) nounwind {
 ; BE-LABEL: lrint_v4f32:
 ; BE:       # %bb.0:
 ; BE-NEXT:    mflr r0
 ; BE-NEXT:    stdu r1, -160(r1)
-; BE-NEXT:    std r0, 176(r1)
-; BE-NEXT:    .cfi_def_cfa_offset 160
-; BE-NEXT:    .cfi_offset lr, 16
 ; BE-NEXT:    addi r3, r1, 112
+; BE-NEXT:    std r0, 176(r1)
 ; BE-NEXT:    stxvw4x v2, 0, r3
 ; BE-NEXT:    lfs f1, 116(r1)
 ; BE-NEXT:    bl lrintf
@@ -3827,17 +3516,13 @@ define <4 x i64> @lrint_v4f32(<4 x float> %x) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    mflr r0
 ; CHECK-NEXT:    stdu r1, -80(r1)
-; CHECK-NEXT:    std r0, 96(r1)
-; CHECK-NEXT:    .cfi_def_cfa_offset 80
-; CHECK-NEXT:    .cfi_offset lr, 16
-; CHECK-NEXT:    .cfi_offset v30, -32
-; CHECK-NEXT:    .cfi_offset v31, -16
 ; CHECK-NEXT:    xxsldwi vs0, v2, v2, 3
 ; CHECK-NEXT:    li r3, 48
+; CHECK-NEXT:    std r0, 96(r1)
 ; CHECK-NEXT:    xscvspdpn f1, vs0
-; CHECK-NEXT:    stvx v30, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    stxvd2x v30, r1, r3 # 16-byte Folded Spill
 ; CHECK-NEXT:    li r3, 64
-; CHECK-NEXT:    stvx v31, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    stxvd2x v31, r1, r3 # 16-byte Folded Spill
 ; CHECK-NEXT:    vmr v31, v2
 ; CHECK-NEXT:    bl lrintf
 ; CHECK-NEXT:    nop
@@ -3860,9 +3545,9 @@ define <4 x i64> @lrint_v4f32(<4 x float> %x) {
 ; CHECK-NEXT:    li r3, 64
 ; CHECK-NEXT:    vmr v2, v30
 ; CHECK-NEXT:    xxmrghd v3, v31, vs0
-; CHECK-NEXT:    lvx v31, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v31, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 48
-; CHECK-NEXT:    lvx v30, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v30, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    addi r1, r1, 80
 ; CHECK-NEXT:    ld r0, 16(r1)
 ; CHECK-NEXT:    mtlr r0
@@ -3898,15 +3583,13 @@ define <4 x i64> @lrint_v4f32(<4 x float> %x) {
 }
 declare <4 x i64> @llvm.lrint.v4i64.v4f32(<4 x float>)
 
-define <8 x i64> @lrint_v8f32(<8 x float> %x) {
+define <8 x i64> @lrint_v8f32(<8 x float> %x) nounwind {
 ; BE-LABEL: lrint_v8f32:
 ; BE:       # %bb.0:
 ; BE-NEXT:    mflr r0
 ; BE-NEXT:    stdu r1, -208(r1)
-; BE-NEXT:    std r0, 224(r1)
-; BE-NEXT:    .cfi_def_cfa_offset 208
-; BE-NEXT:    .cfi_offset lr, 16
 ; BE-NEXT:    addi r3, r1, 112
+; BE-NEXT:    std r0, 224(r1)
 ; BE-NEXT:    stxvw4x v2, 0, r3
 ; BE-NEXT:    addi r3, r1, 128
 ; BE-NEXT:    stxvw4x v3, 0, r3
@@ -3959,24 +3642,18 @@ define <8 x i64> @lrint_v8f32(<8 x float> %x) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    mflr r0
 ; CHECK-NEXT:    stdu r1, -112(r1)
-; CHECK-NEXT:    std r0, 128(r1)
-; CHECK-NEXT:    .cfi_def_cfa_offset 112
-; CHECK-NEXT:    .cfi_offset lr, 16
-; CHECK-NEXT:    .cfi_offset v28, -64
-; CHECK-NEXT:    .cfi_offset v29, -48
-; CHECK-NEXT:    .cfi_offset v30, -32
-; CHECK-NEXT:    .cfi_offset v31, -16
 ; CHECK-NEXT:    li r3, 48
 ; CHECK-NEXT:    xxsldwi vs0, v2, v2, 3
-; CHECK-NEXT:    stvx v28, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    std r0, 128(r1)
+; CHECK-NEXT:    stxvd2x v28, r1, r3 # 16-byte Folded Spill
 ; CHECK-NEXT:    li r3, 64
 ; CHECK-NEXT:    xscvspdpn f1, vs0
-; CHECK-NEXT:    stvx v29, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    stxvd2x v29, r1, r3 # 16-byte Folded Spill
 ; CHECK-NEXT:    li r3, 80
-; CHECK-NEXT:    stvx v30, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    stxvd2x v30, r1, r3 # 16-byte Folded Spill
 ; CHECK-NEXT:    li r3, 96
 ; CHECK-NEXT:    vmr v30, v2
-; CHECK-NEXT:    stvx v31, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    stxvd2x v31, r1, r3 # 16-byte Folded Spill
 ; CHECK-NEXT:    vmr v31, v3
 ; CHECK-NEXT:    bl lrintf
 ; CHECK-NEXT:    nop
@@ -4022,13 +3699,13 @@ define <8 x i64> @lrint_v8f32(<8 x float> %x) {
 ; CHECK-NEXT:    vmr v2, v29
 ; CHECK-NEXT:    vmr v4, v28
 ; CHECK-NEXT:    xxmrghd v5, v31, vs0
-; CHECK-NEXT:    lvx v31, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v31, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 80
-; CHECK-NEXT:    lvx v30, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v30, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 64
-; CHECK-NEXT:    lvx v29, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v29, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 48
-; CHECK-NEXT:    lvx v28, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v28, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    addi r1, r1, 112
 ; CHECK-NEXT:    ld r0, 16(r1)
 ; CHECK-NEXT:    mtlr r0
@@ -4086,15 +3763,13 @@ define <8 x i64> @lrint_v8f32(<8 x float> %x) {
 }
 declare <8 x i64> @llvm.lrint.v8i64.v8f32(<8 x float>)
 
-define <16 x i64> @lrint_v16i64_v16f32(<16 x float> %x) {
+define <16 x i64> @lrint_v16i64_v16f32(<16 x float> %x) nounwind {
 ; BE-LABEL: lrint_v16i64_v16f32:
 ; BE:       # %bb.0:
 ; BE-NEXT:    mflr r0
 ; BE-NEXT:    stdu r1, -304(r1)
-; BE-NEXT:    std r0, 320(r1)
-; BE-NEXT:    .cfi_def_cfa_offset 304
-; BE-NEXT:    .cfi_offset lr, 16
 ; BE-NEXT:    addi r3, r1, 112
+; BE-NEXT:    std r0, 320(r1)
 ; BE-NEXT:    stxvw4x v2, 0, r3
 ; BE-NEXT:    addi r3, r1, 128
 ; BE-NEXT:    stxvw4x v3, 0, r3
@@ -4191,38 +3866,28 @@ define <16 x i64> @lrint_v16i64_v16f32(<16 x float> %x) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    mflr r0
 ; CHECK-NEXT:    stdu r1, -176(r1)
-; CHECK-NEXT:    std r0, 192(r1)
-; CHECK-NEXT:    .cfi_def_cfa_offset 176
-; CHECK-NEXT:    .cfi_offset lr, 16
-; CHECK-NEXT:    .cfi_offset v24, -128
-; CHECK-NEXT:    .cfi_offset v25, -112
-; CHECK-NEXT:    .cfi_offset v26, -96
-; CHECK-NEXT:    .cfi_offset v27, -80
-; CHECK-NEXT:    .cfi_offset v28, -64
-; CHECK-NEXT:    .cfi_offset v29, -48
-; CHECK-NEXT:    .cfi_offset v30, -32
-; CHECK-NEXT:    .cfi_offset v31, -16
 ; CHECK-NEXT:    li r3, 48
+; CHECK-NEXT:    std r0, 192(r1)
 ; CHECK-NEXT:    xxsldwi vs0, v2, v2, 3
-; CHECK-NEXT:    stvx v24, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    stxvd2x v24, r1, r3 # 16-byte Folded Spill
 ; CHECK-NEXT:    li r3, 64
 ; CHECK-NEXT:    xscvspdpn f1, vs0
-; CHECK-NEXT:    stvx v25, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    stxvd2x v25, r1, r3 # 16-byte Folded Spill
 ; CHECK-NEXT:    li r3, 80
-; CHECK-NEXT:    stvx v26, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    stxvd2x v26, r1, r3 # 16-byte Folded Spill
 ; CHECK-NEXT:    li r3, 96
 ; CHECK-NEXT:    vmr v26, v3
-; CHECK-NEXT:    stvx v27, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    stxvd2x v27, r1, r3 # 16-byte Folded Spill
 ; CHECK-NEXT:    li r3, 112
-; CHECK-NEXT:    stvx v28, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    stxvd2x v28, r1, r3 # 16-byte Folded Spill
 ; CHECK-NEXT:    li r3, 128
 ; CHECK-NEXT:    vmr v28, v4
-; CHECK-NEXT:    stvx v29, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    stxvd2x v29, r1, r3 # 16-byte Folded Spill
 ; CHECK-NEXT:    li r3, 144
 ; CHECK-NEXT:    vmr v29, v2
-; CHECK-NEXT:    stvx v30, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    stxvd2x v30, r1, r3 # 16-byte Folded Spill
 ; CHECK-NEXT:    li r3, 160
-; CHECK-NEXT:    stvx v31, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    stxvd2x v31, r1, r3 # 16-byte Folded Spill
 ; CHECK-NEXT:    vmr v31, v5
 ; CHECK-NEXT:    bl lrintf
 ; CHECK-NEXT:    nop
@@ -4314,21 +3979,21 @@ define <16 x i64> @lrint_v16i64_v16f32(<16 x float> %x) {
 ; CHECK-NEXT:    vmr v6, v25
 ; CHECK-NEXT:    vmr v8, v24
 ; CHECK-NEXT:    xxmrghd v9, v31, vs0
-; CHECK-NEXT:    lvx v31, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v31, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 144
-; CHECK-NEXT:    lvx v30, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v30, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 128
-; CHECK-NEXT:    lvx v29, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v29, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 112
-; CHECK-NEXT:    lvx v28, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v28, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 96
-; CHECK-NEXT:    lvx v27, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v27, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 80
-; CHECK-NEXT:    lvx v26, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v26, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 64
-; CHECK-NEXT:    lvx v25, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v25, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 48
-; CHECK-NEXT:    lvx v24, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v24, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    addi r1, r1, 176
 ; CHECK-NEXT:    ld r0, 16(r1)
 ; CHECK-NEXT:    mtlr r0
@@ -4430,14 +4095,12 @@ define <16 x i64> @lrint_v16i64_v16f32(<16 x float> %x) {
 }
 declare <16 x i64> @llvm.lrint.v16i64.v16f32(<16 x float>)
 
-define <1 x i64> @lrint_v1f64(<1 x double> %x) {
+define <1 x i64> @lrint_v1f64(<1 x double> %x) nounwind {
 ; BE-LABEL: lrint_v1f64:
 ; BE:       # %bb.0:
 ; BE-NEXT:    mflr r0
 ; BE-NEXT:    stdu r1, -112(r1)
 ; BE-NEXT:    std r0, 128(r1)
-; BE-NEXT:    .cfi_def_cfa_offset 112
-; BE-NEXT:    .cfi_offset lr, 16
 ; BE-NEXT:    bl lrint
 ; BE-NEXT:    nop
 ; BE-NEXT:    addi r1, r1, 112
@@ -4450,8 +4113,6 @@ define <1 x i64> @lrint_v1f64(<1 x double> %x) {
 ; CHECK-NEXT:    mflr r0
 ; CHECK-NEXT:    stdu r1, -32(r1)
 ; CHECK-NEXT:    std r0, 48(r1)
-; CHECK-NEXT:    .cfi_def_cfa_offset 32
-; CHECK-NEXT:    .cfi_offset lr, 16
 ; CHECK-NEXT:    bl lrint
 ; CHECK-NEXT:    nop
 ; CHECK-NEXT:    addi r1, r1, 32
@@ -4469,16 +4130,13 @@ define <1 x i64> @lrint_v1f64(<1 x double> %x) {
 }
 declare <1 x i64> @llvm.lrint.v1i64.v1f64(<1 x double>)
 
-define <2 x i64> @lrint_v2f64(<2 x double> %x) {
+define <2 x i64> @lrint_v2f64(<2 x double> %x) nounwind {
 ; BE-LABEL: lrint_v2f64:
 ; BE:       # %bb.0:
 ; BE-NEXT:    mflr r0
 ; BE-NEXT:    stdu r1, -160(r1)
-; BE-NEXT:    std r0, 176(r1)
-; BE-NEXT:    .cfi_def_cfa_offset 160
-; BE-NEXT:    .cfi_offset lr, 16
-; BE-NEXT:    .cfi_offset v31, -16
 ; BE-NEXT:    li r3, 144
+; BE-NEXT:    std r0, 176(r1)
 ; BE-NEXT:    stxvd2x v31, r1, r3 # 16-byte Folded Spill
 ; BE-NEXT:    vmr v31, v2
 ; BE-NEXT:    xxlor f1, v31, v31
@@ -4502,12 +4160,9 @@ define <2 x i64> @lrint_v2f64(<2 x double> %x) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    mflr r0
 ; CHECK-NEXT:    stdu r1, -64(r1)
-; CHECK-NEXT:    std r0, 80(r1)
-; CHECK-NEXT:    .cfi_def_cfa_offset 64
-; CHECK-NEXT:    .cfi_offset lr, 16
-; CHECK-NEXT:    .cfi_offset v31, -16
 ; CHECK-NEXT:    li r3, 48
-; CHECK-NEXT:    stvx v31, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    std r0, 80(r1)
+; CHECK-NEXT:    stxvd2x v31, r1, r3 # 16-byte Folded Spill
 ; CHECK-NEXT:    vmr v31, v2
 ; CHECK-NEXT:    xxlor f1, v31, v31
 ; CHECK-NEXT:    bl lrint
@@ -4519,7 +4174,7 @@ define <2 x i64> @lrint_v2f64(<2 x double> %x) {
 ; CHECK-NEXT:    mtfprd f0, r3
 ; CHECK-NEXT:    li r3, 48
 ; CHECK-NEXT:    xxmrghd v2, v31, vs0
-; CHECK-NEXT:    lvx v31, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v31, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    addi r1, r1, 64
 ; CHECK-NEXT:    ld r0, 16(r1)
 ; CHECK-NEXT:    mtlr r0
@@ -4542,17 +4197,13 @@ define <2 x i64> @lrint_v2f64(<2 x double> %x) {
 }
 declare <2 x i64> @llvm.lrint.v2i64.v2f64(<2 x double>)
 
-define <4 x i64> @lrint_v4f64(<4 x double> %x) {
+define <4 x i64> @lrint_v4f64(<4 x double> %x) nounwind {
 ; BE-LABEL: lrint_v4f64:
 ; BE:       # %bb.0:
 ; BE-NEXT:    mflr r0
 ; BE-NEXT:    stdu r1, -192(r1)
-; BE-NEXT:    std r0, 208(r1)
-; BE-NEXT:    .cfi_def_cfa_offset 192
-; BE-NEXT:    .cfi_offset lr, 16
-; BE-NEXT:    .cfi_offset v30, -32
-; BE-NEXT:    .cfi_offset v31, -16
 ; BE-NEXT:    li r3, 160
+; BE-NEXT:    std r0, 208(r1)
 ; BE-NEXT:    stxvd2x v30, r1, r3 # 16-byte Folded Spill
 ; BE-NEXT:    vmr v30, v2
 ; BE-NEXT:    li r3, 176
@@ -4591,17 +4242,13 @@ define <4 x i64> @lrint_v4f64(<4 x double> %x) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    mflr r0
 ; CHECK-NEXT:    stdu r1, -80(r1)
-; CHECK-NEXT:    std r0, 96(r1)
-; CHECK-NEXT:    .cfi_def_cfa_offset 80
-; CHECK-NEXT:    .cfi_offset lr, 16
-; CHECK-NEXT:    .cfi_offset v30, -32
-; CHECK-NEXT:    .cfi_offset v31, -16
 ; CHECK-NEXT:    li r3, 48
-; CHECK-NEXT:    stvx v30, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    std r0, 96(r1)
+; CHECK-NEXT:    stxvd2x v30, r1, r3 # 16-byte Folded Spill
 ; CHECK-NEXT:    vmr v30, v2
 ; CHECK-NEXT:    li r3, 64
 ; CHECK-NEXT:    xxlor f1, v30, v30
-; CHECK-NEXT:    stvx v31, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    stxvd2x v31, r1, r3 # 16-byte Folded Spill
 ; CHECK-NEXT:    vmr v31, v3
 ; CHECK-NEXT:    bl lrint
 ; CHECK-NEXT:    nop
@@ -4622,9 +4269,9 @@ define <4 x i64> @lrint_v4f64(<4 x double> %x) {
 ; CHECK-NEXT:    li r3, 64
 ; CHECK-NEXT:    vmr v2, v30
 ; CHECK-NEXT:    xxmrghd v3, v31, vs0
-; CHECK-NEXT:    lvx v31, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v31, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 48
-; CHECK-NEXT:    lvx v30, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v30, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    addi r1, r1, 80
 ; CHECK-NEXT:    ld r0, 16(r1)
 ; CHECK-NEXT:    mtlr r0
@@ -4656,25 +4303,19 @@ define <4 x i64> @lrint_v4f64(<4 x double> %x) {
 }
 declare <4 x i64> @llvm.lrint.v4i64.v4f64(<4 x double>)
 
-define <8 x i64> @lrint_v8f64(<8 x double> %x) {
+define <8 x i64> @lrint_v8f64(<8 x double> %x) nounwind {
 ; BE-LABEL: lrint_v8f64:
 ; BE:       # %bb.0:
 ; BE-NEXT:    mflr r0
 ; BE-NEXT:    stdu r1, -256(r1)
-; BE-NEXT:    std r0, 272(r1)
-; BE-NEXT:    .cfi_def_cfa_offset 256
-; BE-NEXT:    .cfi_offset lr, 16
-; BE-NEXT:    .cfi_offset v28, -64
-; BE-NEXT:    .cfi_offset v29, -48
-; BE-NEXT:    .cfi_offset v30, -32
-; BE-NEXT:    .cfi_offset v31, -16
 ; BE-NEXT:    li r3, 192
+; BE-NEXT:    std r0, 272(r1)
 ; BE-NEXT:    stxvd2x v28, r1, r3 # 16-byte Folded Spill
 ; BE-NEXT:    li r3, 208
 ; BE-NEXT:    vmr v28, v2
-; BE-NEXT:    xxlor f1, v28, v28
 ; BE-NEXT:    stxvd2x v29, r1, r3 # 16-byte Folded Spill
 ; BE-NEXT:    li r3, 224
+; BE-NEXT:    xxlor f1, v28, v28
 ; BE-NEXT:    vmr v29, v3
 ; BE-NEXT:    stxvd2x v30, r1, r3 # 16-byte Folded Spill
 ; BE-NEXT:    li r3, 240
@@ -4737,25 +4378,19 @@ define <8 x i64> @lrint_v8f64(<8 x double> %x) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    mflr r0
 ; CHECK-NEXT:    stdu r1, -112(r1)
-; CHECK-NEXT:    std r0, 128(r1)
-; CHECK-NEXT:    .cfi_def_cfa_offset 112
-; CHECK-NEXT:    .cfi_offset lr, 16
-; CHECK-NEXT:    .cfi_offset v28, -64
-; CHECK-NEXT:    .cfi_offset v29, -48
-; CHECK-NEXT:    .cfi_offset v30, -32
-; CHECK-NEXT:    .cfi_offset v31, -16
 ; CHECK-NEXT:    li r3, 48
-; CHECK-NEXT:    stvx v28, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    std r0, 128(r1)
+; CHECK-NEXT:    stxvd2x v28, r1, r3 # 16-byte Folded Spill
 ; CHECK-NEXT:    li r3, 64
 ; CHECK-NEXT:    vmr v28, v2
-; CHECK-NEXT:    stvx v29, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    stxvd2x v29, r1, r3 # 16-byte Folded Spill
 ; CHECK-NEXT:    li r3, 80
 ; CHECK-NEXT:    xxlor f1, v28, v28
 ; CHECK-NEXT:    vmr v29, v3
-; CHECK-NEXT:    stvx v30, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    stxvd2x v30, r1, r3 # 16-byte Folded Spill
 ; CHECK-NEXT:    li r3, 96
 ; CHECK-NEXT:    vmr v30, v4
-; CHECK-NEXT:    stvx v31, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    stxvd2x v31, r1, r3 # 16-byte Folded Spill
 ; CHECK-NEXT:    vmr v31, v5
 ; CHECK-NEXT:    bl lrint
 ; CHECK-NEXT:    nop
@@ -4796,13 +4431,13 @@ define <8 x i64> @lrint_v8f64(<8 x double> %x) {
 ; CHECK-NEXT:    vmr v3, v29
 ; CHECK-NEXT:    vmr v2, v28
 ; CHECK-NEXT:    xxmrghd v5, v31, vs0
-; CHECK-NEXT:    lvx v31, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v31, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 80
-; CHECK-NEXT:    lvx v30, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v30, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 64
-; CHECK-NEXT:    lvx v29, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v29, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 48
-; CHECK-NEXT:    lvx v28, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v28, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    addi r1, r1, 112
 ; CHECK-NEXT:    ld r0, 16(r1)
 ; CHECK-NEXT:    mtlr r0
@@ -4852,14 +4487,12 @@ define <8 x i64> @lrint_v8f64(<8 x double> %x) {
 }
 declare <8 x i64> @llvm.lrint.v8i64.v8f64(<8 x double>)
 
-define <1 x i64> @lrint_v1f128(<1 x fp128> %x) {
+define <1 x i64> @lrint_v1f128(<1 x fp128> %x) nounwind {
 ; BE-LABEL: lrint_v1f128:
 ; BE:       # %bb.0:
 ; BE-NEXT:    mflr r0
 ; BE-NEXT:    stdu r1, -112(r1)
 ; BE-NEXT:    std r0, 128(r1)
-; BE-NEXT:    .cfi_def_cfa_offset 112
-; BE-NEXT:    .cfi_offset lr, 16
 ; BE-NEXT:    bl lrintf128
 ; BE-NEXT:    nop
 ; BE-NEXT:    addi r1, r1, 112
@@ -4872,8 +4505,6 @@ define <1 x i64> @lrint_v1f128(<1 x fp128> %x) {
 ; CHECK-NEXT:    mflr r0
 ; CHECK-NEXT:    stdu r1, -32(r1)
 ; CHECK-NEXT:    std r0, 48(r1)
-; CHECK-NEXT:    .cfi_def_cfa_offset 32
-; CHECK-NEXT:    .cfi_offset lr, 16
 ; CHECK-NEXT:    bl lrintf128
 ; CHECK-NEXT:    nop
 ; CHECK-NEXT:    addi r1, r1, 32
@@ -4886,8 +4517,6 @@ define <1 x i64> @lrint_v1f128(<1 x fp128> %x) {
 ; FAST-NEXT:    mflr r0
 ; FAST-NEXT:    stdu r1, -32(r1)
 ; FAST-NEXT:    std r0, 48(r1)
-; FAST-NEXT:    .cfi_def_cfa_offset 32
-; FAST-NEXT:    .cfi_offset lr, 16
 ; FAST-NEXT:    bl lrintf128
 ; FAST-NEXT:    nop
 ; FAST-NEXT:    addi r1, r1, 32
@@ -4899,16 +4528,13 @@ define <1 x i64> @lrint_v1f128(<1 x fp128> %x) {
 }
 declare <1 x i64> @llvm.lrint.v1i64.v1f128(<1 x fp128>)
 
-define <2 x i64> @lrint_v2f128(<2 x fp128> %x) {
+define <2 x i64> @lrint_v2f128(<2 x fp128> %x) nounwind {
 ; BE-LABEL: lrint_v2f128:
 ; BE:       # %bb.0:
 ; BE-NEXT:    mflr r0
 ; BE-NEXT:    stdu r1, -160(r1)
-; BE-NEXT:    std r0, 176(r1)
-; BE-NEXT:    .cfi_def_cfa_offset 160
-; BE-NEXT:    .cfi_offset lr, 16
-; BE-NEXT:    .cfi_offset v31, -16
 ; BE-NEXT:    li r3, 144
+; BE-NEXT:    std r0, 176(r1)
 ; BE-NEXT:    stxvd2x v31, r1, r3 # 16-byte Folded Spill
 ; BE-NEXT:    vmr v31, v2
 ; BE-NEXT:    vmr v2, v3
@@ -4932,15 +4558,11 @@ define <2 x i64> @lrint_v2f128(<2 x fp128> %x) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    mflr r0
 ; CHECK-NEXT:    stdu r1, -80(r1)
-; CHECK-NEXT:    std r0, 96(r1)
-; CHECK-NEXT:    .cfi_def_cfa_offset 80
-; CHECK-NEXT:    .cfi_offset lr, 16
-; CHECK-NEXT:    .cfi_offset v30, -32
-; CHECK-NEXT:    .cfi_offset v31, -16
 ; CHECK-NEXT:    li r3, 48
-; CHECK-NEXT:    stvx v30, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    std r0, 96(r1)
+; CHECK-NEXT:    stxvd2x v30, r1, r3 # 16-byte Folded Spill
 ; CHECK-NEXT:    li r3, 64
-; CHECK-NEXT:    stvx v31, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    stxvd2x v31, r1, r3 # 16-byte Folded Spill
 ; CHECK-NEXT:    vmr v31, v3
 ; CHECK-NEXT:    bl lrintf128
 ; CHECK-NEXT:    nop
@@ -4950,10 +4572,10 @@ define <2 x i64> @lrint_v2f128(<2 x fp128> %x) {
 ; CHECK-NEXT:    nop
 ; CHECK-NEXT:    mtfprd f0, r3
 ; CHECK-NEXT:    li r3, 64
-; CHECK-NEXT:    lvx v31, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v31, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 48
 ; CHECK-NEXT:    xxmrghd v2, vs0, v30
-; CHECK-NEXT:    lvx v30, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v30, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    addi r1, r1, 80
 ; CHECK-NEXT:    ld r0, 16(r1)
 ; CHECK-NEXT:    mtlr r0
@@ -4963,15 +4585,11 @@ define <2 x i64> @lrint_v2f128(<2 x fp128> %x) {
 ; FAST:       # %bb.0:
 ; FAST-NEXT:    mflr r0
 ; FAST-NEXT:    stdu r1, -80(r1)
-; FAST-NEXT:    std r0, 96(r1)
-; FAST-NEXT:    .cfi_def_cfa_offset 80
-; FAST-NEXT:    .cfi_offset lr, 16
-; FAST-NEXT:    .cfi_offset v30, -32
-; FAST-NEXT:    .cfi_offset v31, -16
 ; FAST-NEXT:    li r3, 48
-; FAST-NEXT:    stvx v30, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT:    std r0, 96(r1)
+; FAST-NEXT:    stxvd2x v30, r1, r3 # 16-byte Folded Spill
 ; FAST-NEXT:    li r3, 64
-; FAST-NEXT:    stvx v31, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT:    stxvd2x v31, r1, r3 # 16-byte Folded Spill
 ; FAST-NEXT:    vmr v31, v3
 ; FAST-NEXT:    bl lrintf128
 ; FAST-NEXT:    nop
@@ -4981,10 +4599,10 @@ define <2 x i64> @lrint_v2f128(<2 x fp128> %x) {
 ; FAST-NEXT:    nop
 ; FAST-NEXT:    mtfprd f0, r3
 ; FAST-NEXT:    li r3, 64
-; FAST-NEXT:    lvx v31, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    lxvd2x v31, r1, r3 # 16-byte Folded Reload
 ; FAST-NEXT:    li r3, 48
 ; FAST-NEXT:    xxmrghd v2, vs0, v30
-; FAST-NEXT:    lvx v30, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    lxvd2x v30, r1, r3 # 16-byte Folded Reload
 ; FAST-NEXT:    addi r1, r1, 80
 ; FAST-NEXT:    ld r0, 16(r1)
 ; FAST-NEXT:    mtlr r0
@@ -4994,18 +4612,13 @@ define <2 x i64> @lrint_v2f128(<2 x fp128> %x) {
 }
 declare <2 x i64> @llvm.lrint.v2i64.v2f128(<2 x fp128>)
 
-define <4 x i64> @lrint_v4f128(<4 x fp128> %x) {
+define <4 x i64> @lrint_v4f128(<4 x fp128> %x) nounwind {
 ; BE-LABEL: lrint_v4f128:
 ; BE:       # %bb.0:
 ; BE-NEXT:    mflr r0
 ; BE-NEXT:    stdu r1, -208(r1)
-; BE-NEXT:    std r0, 224(r1)
-; BE-NEXT:    .cfi_def_cfa_offset 208
-; BE-NEXT:    .cfi_offset lr, 16
-; BE-NEXT:    .cfi_offset v29, -48
-; BE-NEXT:    .cfi_offset v30, -32
-; BE-NEXT:    .cfi_offset v31, -16
 ; BE-NEXT:    li r3, 160
+; BE-NEXT:    std r0, 224(r1)
 ; BE-NEXT:    stxvd2x v29, r1, r3 # 16-byte Folded Spill
 ; BE-NEXT:    li r3, 176
 ; BE-NEXT:    vmr v29, v2
@@ -5049,23 +4662,17 @@ define <4 x i64> @lrint_v4f128(<4 x fp128> %x) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    mflr r0
 ; CHECK-NEXT:    stdu r1, -112(r1)
-; CHECK-NEXT:    std r0, 128(r1)
-; CHECK-NEXT:    .cfi_def_cfa_offset 112
-; CHECK-NEXT:    .cfi_offset lr, 16
-; CHECK-NEXT:    .cfi_offset v28, -64
-; CHECK-NEXT:    .cfi_offset v29, -48
-; CHECK-NEXT:    .cfi_offset v30, -32
-; CHECK-NEXT:    .cfi_offset v31, -16
 ; CHECK-NEXT:    li r3, 48
-; CHECK-NEXT:    stvx v28, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    std r0, 128(r1)
+; CHECK-NEXT:    stxvd2x v28, r1, r3 # 16-byte Folded Spill
 ; CHECK-NEXT:    li r3, 64
-; CHECK-NEXT:    stvx v29, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    stxvd2x v29, r1, r3 # 16-byte Folded Spill
 ; CHECK-NEXT:    li r3, 80
 ; CHECK-NEXT:    vmr v29, v3
-; CHECK-NEXT:    stvx v30, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    stxvd2x v30, r1, r3 # 16-byte Folded Spill
 ; CHECK-NEXT:    li r3, 96
 ; CHECK-NEXT:    vmr v30, v4
-; CHECK-NEXT:    stvx v31, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    stxvd2x v31, r1, r3 # 16-byte Folded Spill
 ; CHECK-NEXT:    vmr v31, v5
 ; CHECK-NEXT:    bl lrintf128
 ; CHECK-NEXT:    nop
@@ -5085,14 +4692,14 @@ define <4 x i64> @lrint_v4f128(<4 x fp128> %x) {
 ; CHECK-NEXT:    mtfprd f0, r3
 ; CHECK-NEXT:    li r3, 96
 ; CHECK-NEXT:    vmr v2, v29
-; CHECK-NEXT:    lvx v31, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v31, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 80
 ; CHECK-NEXT:    xxmrghd v3, vs0, v30
-; CHECK-NEXT:    lvx v30, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v30, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 64
-; CHECK-NEXT:    lvx v29, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v29, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 48
-; CHECK-NEXT:    lvx v28, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v28, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    addi r1, r1, 112
 ; CHECK-NEXT:    ld r0, 16(r1)
 ; CHECK-NEXT:    mtlr r0
@@ -5102,23 +4709,17 @@ define <4 x i64> @lrint_v4f128(<4 x fp128> %x) {
 ; FAST:       # %bb.0:
 ; FAST-NEXT:    mflr r0
 ; FAST-NEXT:    stdu r1, -112(r1)
-; FAST-NEXT:    std r0, 128(r1)
-; FAST-NEXT:    .cfi_def_cfa_offset 112
-; FAST-NEXT:    .cfi_offset lr, 16
-; FAST-NEXT:    .cfi_offset v28, -64
-; FAST-NEXT:    .cfi_offset v29, -48
-; FAST-NEXT:    .cfi_offset v30, -32
-; FAST-NEXT:    .cfi_offset v31, -16
 ; FAST-NEXT:    li r3, 48
-; FAST-NEXT:    stvx v28, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT:    std r0, 128(r1)
+; FAST-NEXT:    stxvd2x v28, r1, r3 # 16-byte Folded Spill
 ; FAST-NEXT:    li r3, 64
-; FAST-NEXT:    stvx v29, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT:    stxvd2x v29, r1, r3 # 16-byte Folded Spill
 ; FAST-NEXT:    li r3, 80
 ; FAST-NEXT:    vmr v29, v3
-; FAST-NEXT:    stvx v30, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT:    stxvd2x v30, r1, r3 # 16-byte Folded Spill
 ; FAST-NEXT:    li r3, 96
 ; FAST-NEXT:    vmr v30, v4
-; FAST-NEXT:    stvx v31, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT:    stxvd2x v31, r1, r3 # 16-byte Folded Spill
 ; FAST-NEXT:    vmr v31, v5
 ; FAST-NEXT:    bl lrintf128
 ; FAST-NEXT:    nop
@@ -5138,14 +4739,14 @@ define <4 x i64> @lrint_v4f128(<4 x fp128> %x) {
 ; FAST-NEXT:    mtfprd f0, r3
 ; FAST-NEXT:    li r3, 96
 ; FAST-NEXT:    vmr v2, v29
-; FAST-NEXT:    lvx v31, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    lxvd2x v31, r1, r3 # 16-byte Folded Reload
 ; FAST-NEXT:    li r3, 80
 ; FAST-NEXT:    xxmrghd v3, vs0, v30
-; FAST-NEXT:    lvx v30, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    lxvd2x v30, r1, r3 # 16-byte Folded Reload
 ; FAST-NEXT:    li r3, 64
-; FAST-NEXT:    lvx v29, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    lxvd2x v29, r1, r3 # 16-byte Folded Reload
 ; FAST-NEXT:    li r3, 48
-; FAST-NEXT:    lvx v28, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    lxvd2x v28, r1, r3 # 16-byte Folded Reload
 ; FAST-NEXT:    addi r1, r1, 112
 ; FAST-NEXT:    ld r0, 16(r1)
 ; FAST-NEXT:    mtlr r0
@@ -5155,22 +4756,13 @@ define <4 x i64> @lrint_v4f128(<4 x fp128> %x) {
 }
 declare <4 x i64> @llvm.lrint.v4i64.v4f128(<4 x fp128>)
 
-define <8 x i64> @lrint_v8f128(<8 x fp128> %x) {
+define <8 x i64> @lrint_v8f128(<8 x fp128> %x) nounwind {
 ; BE-LABEL: lrint_v8f128:
 ; BE:       # %bb.0:
 ; BE-NEXT:    mflr r0
 ; BE-NEXT:    stdu r1, -304(r1)
-; BE-NEXT:    std r0, 320(r1)
-; BE-NEXT:    .cfi_def_cfa_offset 304
-; BE-NEXT:    .cfi_offset lr, 16
-; BE-NEXT:    .cfi_offset v25, -112
-; BE-NEXT:    .cfi_offset v26, -96
-; BE-NEXT:    .cfi_offset v27, -80
-; BE-NEXT:    .cfi_offset v28, -64
-; BE-NEXT:    .cfi_offset v29, -48
-; BE-NEXT:    .cfi_offset v30, -32
-; BE-NEXT:    .cfi_offset v31, -16
 ; BE-NEXT:    li r3, 192
+; BE-NEXT:    std r0, 320(r1)
 ; BE-NEXT:    stxvd2x v25, r1, r3 # 16-byte Folded Spill
 ; BE-NEXT:    li r3, 208
 ; BE-NEXT:    vmr v25, v2
@@ -5254,39 +4846,29 @@ define <8 x i64> @lrint_v8f128(<8 x fp128> %x) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    mflr r0
 ; CHECK-NEXT:    stdu r1, -176(r1)
-; CHECK-NEXT:    std r0, 192(r1)
-; CHECK-NEXT:    .cfi_def_cfa_offset 176
-; CHECK-NEXT:    .cfi_offset lr, 16
-; CHECK-NEXT:    .cfi_offset v24, -128
-; CHECK-NEXT:    .cfi_offset v25, -112
-; CHECK-NEXT:    .cfi_offset v26, -96
-; CHECK-NEXT:    .cfi_offset v27, -80
-; CHECK-NEXT:    .cfi_offset v28, -64
-; CHECK-NEXT:    .cfi_offset v29, -48
-; CHECK-NEXT:    .cfi_offset v30, -32
-; CHECK-NEXT:    .cfi_offset v31, -16
 ; CHECK-NEXT:    li r3, 48
-; CHECK-NEXT:    stvx v24, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    std r0, 192(r1)
+; CHECK-NEXT:    stxvd2x v24, r1, r3 # 16-byte Folded Spill
 ; CHECK-NEXT:    li r3, 64
-; CHECK-NEXT:    stvx v25, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    stxvd2x v25, r1, r3 # 16-byte Folded Spill
 ; CHECK-NEXT:    li r3, 80
 ; CHECK-NEXT:    vmr v25, v3
-; CHECK-NEXT:    stvx v26, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    stxvd2x v26, r1, r3 # 16-byte Folded Spill
 ; CHECK-NEXT:    li r3, 96
 ; CHECK-NEXT:    vmr v26, v4
-; CHECK-NEXT:    stvx v27, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    stxvd2x v27, r1, r3 # 16-byte Folded Spill
 ; CHECK-NEXT:    li r3, 112
 ; CHECK-NEXT:    vmr v27, v5
-; CHECK-NEXT:    stvx v28, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    stxvd2x v28, r1, r3 # 16-byte Folded Spill
 ; CHECK-NEXT:    li r3, 128
 ; CHECK-NEXT:    vmr v28, v6
-; CHECK-NEXT:    stvx v29, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    stxvd2x v29, r1, r3 # 16-byte Folded Spill
 ; CHECK-NEXT:    li r3, 144
 ; CHECK-NEXT:    vmr v29, v7
-; CHECK-NEXT:    stvx v30, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    stxvd2x v30, r1, r3 # 16-byte Folded Spill
 ; CHECK-NEXT:    li r3, 160
 ; CHECK-NEXT:    vmr v30, v8
-; CHECK-NEXT:    stvx v31, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    stxvd2x v31, r1, r3 # 16-byte Folded Spill
 ; CHECK-NEXT:    vmr v31, v9
 ; CHECK-NEXT:    bl lrintf128
 ; CHECK-NEXT:    nop
@@ -5324,24 +4906,24 @@ define <8 x i64> @lrint_v8f128(<8 x fp128> %x) {
 ; CHECK-NEXT:    mtfprd f0, r3
 ; CHECK-NEXT:    li r3, 160
 ; CHECK-NEXT:    vmr v4, v29
-; CHECK-NEXT:    lvx v31, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v31, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 144
 ; CHECK-NEXT:    vmr v3, v27
 ; CHECK-NEXT:    vmr v2, v25
 ; CHECK-NEXT:    xxmrghd v5, vs0, v30
-; CHECK-NEXT:    lvx v30, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v30, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 128
-; CHECK-NEXT:    lvx v29, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v29, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 112
-; CHECK-NEXT:    lvx v28, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v28, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 96
-; CHECK-NEXT:    lvx v27, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v27, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 80
-; CHECK-NEXT:    lvx v26, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v26, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 64
-; CHECK-NEXT:    lvx v25, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v25, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 48
-; CHECK-NEXT:    lvx v24, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v24, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    addi r1, r1, 176
 ; CHECK-NEXT:    ld r0, 16(r1)
 ; CHECK-NEXT:    mtlr r0
@@ -5351,39 +4933,29 @@ define <8 x i64> @lrint_v8f128(<8 x fp128> %x) {
 ; FAST:       # %bb.0:
 ; FAST-NEXT:    mflr r0
 ; FAST-NEXT:    stdu r1, -176(r1)
-; FAST-NEXT:    std r0, 192(r1)
-; FAST-NEXT:    .cfi_def_cfa_offset 176
-; FAST-NEXT:    .cfi_offset lr, 16
-; FAST-NEXT:    .cfi_offset v24, -128
-; FAST-NEXT:    .cfi_offset v25, -112
-; FAST-NEXT:    .cfi_offset v26, -96
-; FAST-NEXT:    .cfi_offset v27, -80
-; FAST-NEXT:    .cfi_offset v28, -64
-; FAST-NEXT:    .cfi_offset v29, -48
-; FAST-NEXT:    .cfi_offset v30, -32
-; FAST-NEXT:    .cfi_offset v31, -16
 ; FAST-NEXT:    li r3, 48
-; FAST-NEXT:    stvx v24, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT:    std r0, 192(r1)
+; FAST-NEXT:    stxvd2x v24, r1, r3 # 16-byte Folded Spill
 ; FAST-NEXT:    li r3, 64
-; FAST-NEXT:    stvx v25, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT:    stxvd2x v25, r1, r3 # 16-byte Folded Spill
 ; FAST-NEXT:    li r3, 80
 ; FAST-NEXT:    vmr v25, v3
-; FAST-NEXT:    stvx v26, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT:    stxvd2x v26, r1, r3 # 16-byte Folded Spill
 ; FAST-NEXT:    li r3, 96
 ; FAST-NEXT:    vmr v26, v4
-; FAST-NEXT:    stvx v27, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT:    stxvd2x v27, r1, r3 # 16-byte Folded Spill
 ; FAST-NEXT:    li r3, 112
 ; FAST-NEXT:    vmr v27, v5
-; FAST-NEXT:    stvx v28, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT:    stxvd2x v28, r1, r3 # 16-byte Folded Spill
 ; FAST-NEXT:    li r3, 128
 ; FAST-NEXT:    vmr v28, v6
-; FAST-NEXT:    stvx v29, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT:    stxvd2x v29, r1, r3 # 16-byte Folded Spill
 ; FAST-NEXT:    li r3, 144
 ; FAST-NEXT:    vmr v29, v7
-; FAST-NEXT:    stvx v30, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT:    stxvd2x v30, r1, r3 # 16-byte Folded Spill
 ; FAST-NEXT:    li r3, 160
 ; FAST-NEXT:    vmr v30, v8
-; FAST-NEXT:    stvx v31, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT:    stxvd2x v31, r1, r3 # 16-byte Folded Spill
 ; FAST-NEXT:    vmr v31, v9
 ; FAST-NEXT:    bl lrintf128
 ; FAST-NEXT:    nop
@@ -5421,24 +4993,24 @@ define <8 x i64> @lrint_v8f128(<8 x fp128> %x) {
 ; FAST-NEXT:    mtfprd f0, r3
 ; FAST-NEXT:    li r3, 160
 ; FAST-NEXT:    vmr v4, v29
-; FAST-NEXT:    lvx v31, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    lxvd2x v31, r1, r3 # 16-byte Folded Reload
 ; FAST-NEXT:    li r3, 144
 ; FAST-NEXT:    vmr v3, v27
 ; FAST-NEXT:    vmr v2, v25
 ; FAST-NEXT:    xxmrghd v5, vs0, v30
-; FAST-NEXT:    lvx v30, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    lxvd2x v30, r1, r3 # 16-byte Folded Reload
 ; FAST-NEXT:    li r3, 128
-; FAST-NEXT:    lvx v29, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    lxvd2x v29, r1, r3 # 16-byte Folded Reload
 ; FAST-NEXT:    li r3, 112
-; FAST-NEXT:    lvx v28, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    lxvd2x v28, r1, r3 # 16-byte Folded Reload
 ; FAST-NEXT:    li r3, 96
-; FAST-NEXT:    lvx v27, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    lxvd2x v27, r1, r3 # 16-byte Folded Reload
 ; FAST-NEXT:    li r3, 80
-; FAST-NEXT:    lvx v26, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    lxvd2x v26, r1, r3 # 16-byte Folded Reload
 ; FAST-NEXT:    li r3, 64
-; FAST-NEXT:    lvx v25, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    lxvd2x v25, r1, r3 # 16-byte Folded Reload
 ; FAST-NEXT:    li r3, 48
-; FAST-NEXT:    lvx v24, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    lxvd2x v24, r1, r3 # 16-byte Folded Reload
 ; FAST-NEXT:    addi r1, r1, 176
 ; FAST-NEXT:    ld r0, 16(r1)
 ; FAST-NEXT:    mtlr r0
@@ -5448,27 +5020,13 @@ define <8 x i64> @lrint_v8f128(<8 x fp128> %x) {
 }
 declare <8 x i64> @llvm.lrint.v8i64.v8f128(<8 x fp128>)
 
-define <16 x i64> @lrint_v16i64_v16f128(<16 x fp128> %x) {
+define <16 x i64> @lrint_v16i64_v16f128(<16 x fp128> %x) nounwind {
 ; BE-LABEL: lrint_v16i64_v16f128:
 ; BE:       # %bb.0:
 ; BE-NEXT:    mflr r0
 ; BE-NEXT:    stdu r1, -496(r1)
-; BE-NEXT:    std r0, 512(r1)
-; BE-NEXT:    .cfi_def_cfa_offset 496
-; BE-NEXT:    .cfi_offset lr, 16
-; BE-NEXT:    .cfi_offset v20, -192
-; BE-NEXT:    .cfi_offset v21, -176
-; BE-NEXT:    .cfi_offset v22, -160
-; BE-NEXT:    .cfi_offset v23, -144
-; BE-NEXT:    .cfi_offset v24, -128
-; BE-NEXT:    .cfi_offset v25, -112
-; BE-NEXT:    .cfi_offset v26, -96
-; BE-NEXT:    .cfi_offset v27, -80
-; BE-NEXT:    .cfi_offset v28, -64
-; BE-NEXT:    .cfi_offset v29, -48
-; BE-NEXT:    .cfi_offset v30, -32
-; BE-NEXT:    .cfi_offset v31, -16
 ; BE-NEXT:    li r3, 304
+; BE-NEXT:    std r0, 512(r1)
 ; BE-NEXT:    stxvd2x v20, r1, r3 # 16-byte Folded Spill
 ; BE-NEXT:    li r3, 320
 ; BE-NEXT:    stxvd2x v21, r1, r3 # 16-byte Folded Spill
@@ -5632,55 +5190,41 @@ define <16 x i64> @lrint_v16i64_v16f128(<16 x fp128> %x) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    mflr r0
 ; CHECK-NEXT:    stdu r1, -304(r1)
-; CHECK-NEXT:    std r0, 320(r1)
-; CHECK-NEXT:    .cfi_def_cfa_offset 304
-; CHECK-NEXT:    .cfi_offset lr, 16
-; CHECK-NEXT:    .cfi_offset v20, -192
-; CHECK-NEXT:    .cfi_offset v21, -176
-; CHECK-NEXT:    .cfi_offset v22, -160
-; CHECK-NEXT:    .cfi_offset v23, -144
-; CHECK-NEXT:    .cfi_offset v24, -128
-; CHECK-NEXT:    .cfi_offset v25, -112
-; CHECK-NEXT:    .cfi_offset v26, -96
-; CHECK-NEXT:    .cfi_offset v27, -80
-; CHECK-NEXT:    .cfi_offset v28, -64
-; CHECK-NEXT:    .cfi_offset v29, -48
-; CHECK-NEXT:    .cfi_offset v30, -32
-; CHECK-NEXT:    .cfi_offset v31, -16
 ; CHECK-NEXT:    li r3, 112
-; CHECK-NEXT:    stvx v20, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    std r0, 320(r1)
+; CHECK-NEXT:    stxvd2x v20, r1, r3 # 16-byte Folded Spill
 ; CHECK-NEXT:    li r3, 128
-; CHECK-NEXT:    stvx v21, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    stxvd2x v21, r1, r3 # 16-byte Folded Spill
 ; CHECK-NEXT:    li r3, 144
 ; CHECK-NEXT:    vmr v21, v4
-; CHECK-NEXT:    stvx v22, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    stxvd2x v22, r1, r3 # 16-byte Folded Spill
 ; CHECK-NEXT:    li r3, 160
 ; CHECK-NEXT:    vmr v22, v6
-; CHECK-NEXT:    stvx v23, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    stxvd2x v23, r1, r3 # 16-byte Folded Spill
 ; CHECK-NEXT:    li r3, 176
 ; CHECK-NEXT:    vmr v23, v8
-; CHECK-NEXT:    stvx v24, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    stxvd2x v24, r1, r3 # 16-byte Folded Spill
 ; CHECK-NEXT:    li r3, 192
 ; CHECK-NEXT:    vmr v24, v9
-; CHECK-NEXT:    stvx v25, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    stxvd2x v25, r1, r3 # 16-byte Folded Spill
 ; CHECK-NEXT:    li r3, 208
 ; CHECK-NEXT:    vmr v25, v7
-; CHECK-NEXT:    stvx v26, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    stxvd2x v26, r1, r3 # 16-byte Folded Spill
 ; CHECK-NEXT:    li r3, 224
 ; CHECK-NEXT:    vmr v26, v10
-; CHECK-NEXT:    stvx v27, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    stxvd2x v27, r1, r3 # 16-byte Folded Spill
 ; CHECK-NEXT:    li r3, 240
 ; CHECK-NEXT:    vmr v27, v5
-; CHECK-NEXT:    stvx v28, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    stxvd2x v28, r1, r3 # 16-byte Folded Spill
 ; CHECK-NEXT:    li r3, 256
 ; CHECK-NEXT:    vmr v28, v11
-; CHECK-NEXT:    stvx v29, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    stxvd2x v29, r1, r3 # 16-byte Folded Spill
 ; CHECK-NEXT:    li r3, 272
 ; CHECK-NEXT:    vmr v29, v12
-; CHECK-NEXT:    stvx v30, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    stxvd2x v30, r1, r3 # 16-byte Folded Spill
 ; CHECK-NEXT:    li r3, 288
 ; CHECK-NEXT:    vmr v30, v3
-; CHECK-NEXT:    stvx v31, r1, r3 # 16-byte Folded Spill
+; CHECK-NEXT:    stxvd2x v31, r1, r3 # 16-byte Folded Spill
 ; CHECK-NEXT:    li r3, 64
 ; CHECK-NEXT:    stxvd2x v13, r1, r3 # 16-byte Folded Spill
 ; CHECK-NEXT:    addi r3, r1, 576
@@ -5777,36 +5321,36 @@ define <16 x i64> @lrint_v16i64_v16f128(<16 x fp128> %x) {
 ; CHECK-NEXT:    mtfprd f0, r3
 ; CHECK-NEXT:    li r3, 288
 ; CHECK-NEXT:    vmr v8, v31
-; CHECK-NEXT:    lvx v31, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v31, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 272
 ; CHECK-NEXT:    vmr v2, v30
 ; CHECK-NEXT:    vmr v7, v29
 ; CHECK-NEXT:    vmr v6, v28
 ; CHECK-NEXT:    vmr v3, v27
-; CHECK-NEXT:    lvx v30, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v30, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 256
 ; CHECK-NEXT:    vmr v4, v25
 ; CHECK-NEXT:    vmr v5, v24
-; CHECK-NEXT:    lvx v29, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v29, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 240
-; CHECK-NEXT:    lvx v28, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v28, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 224
-; CHECK-NEXT:    lvx v27, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v27, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 208
 ; CHECK-NEXT:    xxmrghd v9, vs0, v26
-; CHECK-NEXT:    lvx v26, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v26, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 192
-; CHECK-NEXT:    lvx v25, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v25, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 176
-; CHECK-NEXT:    lvx v24, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v24, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 160
-; CHECK-NEXT:    lvx v23, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v23, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 144
-; CHECK-NEXT:    lvx v22, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v22, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 128
-; CHECK-NEXT:    lvx v21, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v21, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 112
-; CHECK-NEXT:    lvx v20, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v20, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    addi r1, r1, 304
 ; CHECK-NEXT:    ld r0, 16(r1)
 ; CHECK-NEXT:    mtlr r0
@@ -5816,55 +5360,41 @@ define <16 x i64> @lrint_v16i64_v16f128(<16 x fp128> %x) {
 ; FAST:       # %bb.0:
 ; FAST-NEXT:    mflr r0
 ; FAST-NEXT:    stdu r1, -304(r1)
-; FAST-NEXT:    std r0, 320(r1)
-; FAST-NEXT:    .cfi_def_cfa_offset 304
-; FAST-NEXT:    .cfi_offset lr, 16
-; FAST-NEXT:    .cfi_offset v20, -192
-; FAST-NEXT:    .cfi_offset v21, -176
-; FAST-NEXT:    .cfi_offset v22, -160
-; FAST-NEXT:    .cfi_offset v23, -144
-; FAST-NEXT:    .cfi_offset v24, -128
-; FAST-NEXT:    .cfi_offset v25, -112
-; FAST-NEXT:    .cfi_offset v26, -96
-; FAST-NEXT:    .cfi_offset v27, -80
-; FAST-NEXT:    .cfi_offset v28, -64
-; FAST-NEXT:    .cfi_offset v29, -48
-; FAST-NEXT:    .cfi_offset v30, -32
-; FAST-NEXT:    .cfi_offset v31, -16
 ; FAST-NEXT:    li r3, 112
-; FAST-NEXT:    stvx v20, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT:    std r0, 320(r1)
+; FAST-NEXT:    stxvd2x v20, r1, r3 # 16-byte Folded Spill
 ; FAST-NEXT:    li r3, 128
-; FAST-NEXT:    stvx v21, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT:    stxvd2x v21, r1, r3 # 16-byte Folded Spill
 ; FAST-NEXT:    li r3, 144
 ; FAST-NEXT:    vmr v21, v4
-; FAST-NEXT:    stvx v22, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT:    stxvd2x v22, r1, r3 # 16-byte Folded Spill
 ; FAST-NEXT:    li r3, 160
 ; FAST-NEXT:    vmr v22, v6
-; FAST-NEXT:    stvx v23, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT:    stxvd2x v23, r1, r3 # 16-byte Folded Spill
 ; FAST-NEXT:    li r3, 176
 ; FAST-NEXT:    vmr v23, v8
-; FAST-NEXT:    stvx v24, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT:    stxvd2x v24, r1, r3 # 16-byte Folded Spill
 ; FAST-NEXT:    li r3, 192
 ; FAST-NEXT:    vmr v24, v9
-; FAST-NEXT:    stvx v25, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT:    stxvd2x v25, r1, r3 # 16-byte Folded Spill
 ; FAST-NEXT:    li r3, 208
 ; FAST-NEXT:    vmr v25, v7
-; FAST-NEXT:    stvx v26, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT:    stxvd2x v26, r1, r3 # 16-byte Folded Spill
 ; FAST-NEXT:    li r3, 224
 ; FAST-NEXT:    vmr v26, v10
-; FAST-NEXT:    stvx v27, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT:    stxvd2x v27, r1, r3 # 16-byte Folded Spill
 ; FAST-NEXT:    li r3, 240
 ; FAST-NEXT:    vmr v27, v5
-; FAST-NEXT:    stvx v28, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT:    stxvd2x v28, r1, r3 # 16-byte Folded Spill
 ; FAST-NEXT:    li r3, 256
 ; FAST-NEXT:    vmr v28, v11
-; FAST-NEXT:    stvx v29, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT:    stxvd2x v29, r1, r3 # 16-byte Folded Spill
 ; FAST-NEXT:    li r3, 272
 ; FAST-NEXT:    vmr v29, v12
-; FAST-NEXT:    stvx v30, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT:    stxvd2x v30, r1, r3 # 16-byte Folded Spill
 ; FAST-NEXT:    li r3, 288
 ; FAST-NEXT:    vmr v30, v3
-; FAST-NEXT:    stvx v31, r1, r3 # 16-byte Folded Spill
+; FAST-NEXT:    stxvd2x v31, r1, r3 # 16-byte Folded Spill
 ; FAST-NEXT:    li r3, 64
 ; FAST-NEXT:    stxvd2x v13, r1, r3 # 16-byte Folded Spill
 ; FAST-NEXT:    addi r3, r1, 576
@@ -5961,36 +5491,36 @@ define <16 x i64> @lrint_v16i64_v16f128(<16 x fp128> %x) {
 ; FAST-NEXT:    mtfprd f0, r3
 ; FAST-NEXT:    li r3, 288
 ; FAST-NEXT:    vmr v8, v31
-; FAST-NEXT:    lvx v31, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    lxvd2x v31, r1, r3 # 16-byte Folded Reload
 ; FAST-NEXT:    li r3, 272
 ; FAST-NEXT:    vmr v2, v30
 ; FAST-NEXT:    vmr v7, v29
 ; FAST-NEXT:    vmr v6, v28
 ; FAST-NEXT:    vmr v3, v27
-; FAST-NEXT:    lvx v30, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    lxvd2x v30, r1, r3 # 16-byte Folded Reload
 ; FAST-NEXT:    li r3, 256
 ; FAST-NEXT:    vmr v4, v25
 ; FAST-NEXT:    vmr v5, v24
-; FAST-NEXT:    lvx v29, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    lxvd2x v29, r1, r3 # 16-byte Folded Reload
 ; FAST-NEXT:    li r3, 240
-; FAST-NEXT:    lvx v28, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    lxvd2x v28, r1, r3 # 16-byte Folded Reload
 ; FAST-NEXT:    li r3, 224
-; FAST-NEXT:    lvx v27, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    lxvd2x v27, r1, r3 # 16-byte Folded Reload
 ; FAST-NEXT:    li r3, 208
 ; FAST-NEXT:    xxmrghd v9, vs0, v26
-; FAST-NEXT:    lvx v26, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    lxvd2x v26, r1, r3 # 16-byte Folded Reload
 ; FAST-NEXT:    li r3, 192
-; FAST-NEXT:    lvx v25, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    lxvd2x v25, r1, r3 # 16-byte Folded Reload
 ; FAST-NEXT:    li r3, 176
-; FAST-NEXT:    lvx v24, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    lxvd2x v24, r1, r3 # 16-byte Folded Reload
 ; FAST-NEXT:    li r3, 160
-; FAST-NEXT:    lvx v23, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    lxvd2x v23, r1, r3 # 16-byte Folded Reload
 ; FAST-NEXT:    li r3, 144
-; FAST-NEXT:    lvx v22, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    lxvd2x v22, r1, r3 # 16-byte Folded Reload
 ; FAST-NEXT:    li r3, 128
-; FAST-NEXT:    lvx v21, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    lxvd2x v21, r1, r3 # 16-byte Folded Reload
 ; FAST-NEXT:    li r3, 112
-; FAST-NEXT:    lvx v20, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    lxvd2x v20, r1, r3 # 16-byte Folded Reload
 ; FAST-NEXT:    addi r1, r1, 304
 ; FAST-NEXT:    ld r0, 16(r1)
 ; FAST-NEXT:    mtlr r0
@@ -6000,27 +5530,12 @@ define <16 x i64> @lrint_v16i64_v16f128(<16 x fp128> %x) {
 }
 declare <16 x i64> @llvm.lrint.v16i64.v16f128(<16 x fp128>)
 
-define <32 x i64> @lrint_v32i64_v32f128(<32 x fp128> %x) {
+define <32 x i64> @lrint_v32i64_v32f128(<32 x fp128> %x) nounwind {
 ; BE-LABEL: lrint_v32i64_v32f128:
 ; BE:       # %bb.0:
 ; BE-NEXT:    mflr r0
 ; BE-NEXT:    stdu r1, -896(r1)
 ; BE-NEXT:    std r0, 912(r1)
-; BE-NEXT:    .cfi_def_cfa_offset 896
-; BE-NEXT:    .cfi_offset lr, 16
-; BE-NEXT:    .cfi_offset r30, -16
-; BE-NEXT:    .cfi_offset v20, -208
-; BE-NEXT:    .cfi_offset v21, -192
-; BE-NEXT:    .cfi_offset v22, -176
-; BE-NEXT:    .cfi_offset v23, -160
-; BE-NEXT:    .cfi_offset v24, -144
-; BE-NEXT:    .cfi_offset v25, -128
-; BE-NEXT:    .cfi_offset v26, -112
-; BE-NEXT:    .cfi_offset v27, -96
-; BE-NEXT:    .cfi_offset v28, -80
-; BE-NEXT:    .cfi_offset v29, -64
-; BE-NEXT:    .cfi_offset v30, -48
-; BE-NEXT:    .cfi_offset v31, -32
 ; BE-NEXT:    std r30, 880(r1) # 8-byte Folded Spill
 ; BE-NEXT:    mr r30, r3
 ; BE-NEXT:    addi r3, r1, 1440
@@ -6370,63 +5885,48 @@ define <32 x i64> @lrint_v32i64_v32f128(<32 x fp128> %x) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    mflr r0
 ; CHECK-NEXT:    stdu r1, -640(r1)
-; CHECK-NEXT:    std r0, 656(r1)
-; CHECK-NEXT:    .cfi_def_cfa_offset 640
-; CHECK-NEXT:    .cfi_offset lr, 16
-; CHECK-NEXT:    .cfi_offset r30, -16
-; CHECK-NEXT:    .cfi_offset v20, -208
-; CHECK-NEXT:    .cfi_offset v21, -192
-; CHECK-NEXT:    .cfi_offset v22, -176
-; CHECK-NEXT:    .cfi_offset v23, -160
-; CHECK-NEXT:    .cfi_offset v24, -144
-; CHECK-NEXT:    .cfi_offset v25, -128
-; CHECK-NEXT:    .cfi_offset v26, -112
-; CHECK-NEXT:    .cfi_offset v27, -96
-; CHECK-NEXT:    .cfi_offset v28, -80
-; CHECK-NEXT:    .cfi_offset v29, -64
-; CHECK-NEXT:    .cfi_offset v30, -48
-; CHECK-NEXT:    .cfi_offset v31, -32
 ; CHECK-NEXT:    li r4, 432
+; CHECK-NEXT:    std r0, 656(r1)
 ; CHECK-NEXT:    std r30, 624(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    mr r30, r3
 ; CHECK-NEXT:    addi r3, r1, 1184
-; CHECK-NEXT:    stvx v20, r1, r4 # 16-byte Folded Spill
+; CHECK-NEXT:    stxvd2x v20, r1, r4 # 16-byte Folded Spill
 ; CHECK-NEXT:    li r4, 448
 ; CHECK-NEXT:    lxvd2x vs0, 0, r3
 ; CHECK-NEXT:    addi r3, r1, 1168
-; CHECK-NEXT:    stvx v21, r1, r4 # 16-byte Folded Spill
+; CHECK-NEXT:    stxvd2x v21, r1, r4 # 16-byte Folded Spill
 ; CHECK-NEXT:    li r4, 464
 ; CHECK-NEXT:    lxvd2x vs1, 0, r3
 ; CHECK-NEXT:    addi r3, r1, 1152
-; CHECK-NEXT:    stvx v22, r1, r4 # 16-byte Folded Spill
+; CHECK-NEXT:    stxvd2x v22, r1, r4 # 16-byte Folded Spill
 ; CHECK-NEXT:    li r4, 480
 ; CHECK-NEXT:    lxvd2x vs2, 0, r3
 ; CHECK-NEXT:    addi r3, r1, 1136
-; CHECK-NEXT:    stvx v23, r1, r4 # 16-byte Folded Spill
+; CHECK-NEXT:    stxvd2x v23, r1, r4 # 16-byte Folded Spill
 ; CHECK-NEXT:    li r4, 496
 ; CHECK-NEXT:    lxvd2x vs3, 0, r3
 ; CHECK-NEXT:    addi r3, r1, 1120
-; CHECK-NEXT:    stvx v24, r1, r4 # 16-byte Folded Spill
+; CHECK-NEXT:    stxvd2x v24, r1, r4 # 16-byte Folded Spill
 ; CHECK-NEXT:    li r4, 512
 ; CHECK-NEXT:    lxvd2x vs4, 0, r3
 ; CHECK-NEXT:    addi r3, r1, 1104
 ; CHECK-NEXT:    vmr v24, v3
-; CHECK-NEXT:    stvx v25, r1, r4 # 16-byte Folded Spill
+; CHECK-NEXT:    stxvd2x v25, r1, r4 # 16-byte Folded Spill
 ; CHECK-NEXT:    li r4, 528
 ; CHECK-NEXT:    lxvd2x vs5, 0, r3
-; CHECK-NEXT:    xxswapd vs0, vs0
 ; CHECK-NEXT:    addi r3, r1, 1088
-; CHECK-NEXT:    stvx v26, r1, r4 # 16-byte Folded Spill
+; CHECK-NEXT:    stxvd2x v26, r1, r4 # 16-byte Folded Spill
 ; CHECK-NEXT:    li r4, 544
-; CHECK-NEXT:    stvx v27, r1, r4 # 16-byte Folded Spill
+; CHECK-NEXT:    xxswapd vs0, vs0
+; CHECK-NEXT:    stxvd2x v27, r1, r4 # 16-byte Folded Spill
 ; CHECK-NEXT:    li r4, 560
-; CHECK-NEXT:    stvx v28, r1, r4 # 16-byte Folded Spill
+; CHECK-NEXT:    stxvd2x v28, r1, r4 # 16-byte Folded Spill
 ; CHECK-NEXT:    li r4, 576
-; CHECK-NEXT:    stvx v29, r1, r4 # 16-byte Folded Spill
+; CHECK-NEXT:    stxvd2x v29, r1, r4 # 16-byte Folded Spill
 ; CHECK-NEXT:    li r4, 592
-; CHECK-NEXT:    stvx v30, r1, r4 # 16-byte Folded Spill
+; CHECK-NEXT:    stxvd2x v30, r1, r4 # 16-byte Folded Spill
 ; CHECK-NEXT:    li r4, 608
-; CHECK-NEXT:    stvx v31, r1, r4 # 16-byte Folded Spill
+; CHECK-NEXT:    stxvd2x v31, r1, r4 # 16-byte Folded Spill
 ; CHECK-NEXT:    li r4, 416
 ; CHECK-NEXT:    stxvd2x v13, r1, r4 # 16-byte Folded Spill
 ; CHECK-NEXT:    li r4, 400
@@ -6740,30 +6240,30 @@ define <32 x i64> @lrint_v32i64_v32f128(<32 x fp128> %x) {
 ; CHECK-NEXT:    li r3, 608
 ; CHECK-NEXT:    xxswapd vs4, vs4
 ; CHECK-NEXT:    stxvd2x vs4, 0, r30
-; CHECK-NEXT:    lvx v31, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v31, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 592
 ; CHECK-NEXT:    ld r30, 624(r1) # 8-byte Folded Reload
-; CHECK-NEXT:    lvx v30, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v30, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 576
-; CHECK-NEXT:    lvx v29, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v29, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 560
-; CHECK-NEXT:    lvx v28, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v28, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 544
-; CHECK-NEXT:    lvx v27, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v27, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 528
-; CHECK-NEXT:    lvx v26, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v26, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 512
-; CHECK-NEXT:    lvx v25, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v25, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 496
-; CHECK-NEXT:    lvx v24, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v24, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 480
-; CHECK-NEXT:    lvx v23, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v23, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 464
-; CHECK-NEXT:    lvx v22, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v22, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 448
-; CHECK-NEXT:    lvx v21, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v21, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    li r3, 432
-; CHECK-NEXT:    lvx v20, r1, r3 # 16-byte Folded Reload
+; CHECK-NEXT:    lxvd2x v20, r1, r3 # 16-byte Folded Reload
 ; CHECK-NEXT:    addi r1, r1, 640
 ; CHECK-NEXT:    ld r0, 16(r1)
 ; CHECK-NEXT:    mtlr r0
@@ -6773,63 +6273,48 @@ define <32 x i64> @lrint_v32i64_v32f128(<32 x fp128> %x) {
 ; FAST:       # %bb.0:
 ; FAST-NEXT:    mflr r0
 ; FAST-NEXT:    stdu r1, -640(r1)
-; FAST-NEXT:    std r0, 656(r1)
-; FAST-NEXT:    .cfi_def_cfa_offset 640
-; FAST-NEXT:    .cfi_offset lr, 16
-; FAST-NEXT:    .cfi_offset r30, -16
-; FAST-NEXT:    .cfi_offset v20, -208
-; FAST-NEXT:    .cfi_offset v21, -192
-; FAST-NEXT:    .cfi_offset v22, -176
-; FAST-NEXT:    .cfi_offset v23, -160
-; FAST-NEXT:    .cfi_offset v24, -144
-; FAST-NEXT:    .cfi_offset v25, -128
-; FAST-NEXT:    .cfi_offset v26, -112
-; FAST-NEXT:    .cfi_offset v27, -96
-; FAST-NEXT:    .cfi_offset v28, -80
-; FAST-NEXT:    .cfi_offset v29, -64
-; FAST-NEXT:    .cfi_offset v30, -48
-; FAST-NEXT:    .cfi_offset v31, -32
 ; FAST-NEXT:    li r4, 432
+; FAST-NEXT:    std r0, 656(r1)
 ; FAST-NEXT:    std r30, 624(r1) # 8-byte Folded Spill
 ; FAST-NEXT:    mr r30, r3
 ; FAST-NEXT:    addi r3, r1, 1184
-; FAST-NEXT:    stvx v20, r1, r4 # 16-byte Folded Spill
+; FAST-NEXT:    stxvd2x v20, r1, r4 # 16-byte Folded Spill
 ; FAST-NEXT:    li r4, 448
 ; FAST-NEXT:    lxvd2x vs0, 0, r3
 ; FAST-NEXT:    addi r3, r1, 1168
-; FAST-NEXT:    stvx v21, r1, r4 # 16-byte Folded Spill
+; FAST-NEXT:    stxvd2x v21, r1, r4 # 16-byte Folded Spill
 ; FAST-NEXT:    li r4, 464
 ; FAST-NEXT:    lxvd2x vs1, 0, r3
 ; FAST-NEXT:    addi r3, r1, 1152
-; FAST-NEXT:    stvx v22, r1, r4 # 16-byte Folded Spill
+; FAST-NEXT:    stxvd2x v22, r1, r4 # 16-byte Folded Spill
 ; FAST-NEXT:    li r4, 480
 ; FAST-NEXT:    lxvd2x vs2, 0, r3
 ; FAST-NEXT:    addi r3, r1, 1136
-; FAST-NEXT:    stvx v23, r1, r4 # 16-byte Folded Spill
+; FAST-NEXT:    stxvd2x v23, r1, r4 # 16-byte Folded Spill
 ; FAST-NEXT:    li r4, 496
 ; FAST-NEXT:    lxvd2x vs3, 0, r3
 ; FAST-NEXT:    addi r3, r1, 1120
-; FAST-NEXT:    stvx v24, r1, r4 # 16-byte Folded Spill
+; FAST-NEXT:    stxvd2x v24, r1, r4 # 16-byte Folded Spill
 ; FAST-NEXT:    li r4, 512
 ; FAST-NEXT:    lxvd2x vs4, 0, r3
 ; FAST-NEXT:    addi r3, r1, 1104
 ; FAST-NEXT:    vmr v24, v3
-; FAST-NEXT:    stvx v25, r1, r4 # 16-byte Folded Spill
+; FAST-NEXT:    stxvd2x v25, r1, r4 # 16-byte Folded Spill
 ; FAST-NEXT:    li r4, 528
 ; FAST-NEXT:    lxvd2x vs5, 0, r3
-; FAST-NEXT:    xxswapd vs0, vs0
 ; FAST-NEXT:    addi r3, r1, 1088
-; FAST-NEXT:    stvx v26, r1, r4 # 16-byte Folded Spill
+; FAST-NEXT:    stxvd2x v26, r1, r4 # 16-byte Folded Spill
 ; FAST-NEXT:    li r4, 544
-; FAST-NEXT:    stvx v27, r1, r4 # 16-byte Folded Spill
+; FAST-NEXT:    xxswapd vs0, vs0
+; FAST-NEXT:    stxvd2x v27, r1, r4 # 16-byte Folded Spill
 ; FAST-NEXT:    li r4, 560
-; FAST-NEXT:    stvx v28, r1, r4 # 16-byte Folded Spill
+; FAST-NEXT:    stxvd2x v28, r1, r4 # 16-byte Folded Spill
 ; FAST-NEXT:    li r4, 576
-; FAST-NEXT:    stvx v29, r1, r4 # 16-byte Folded Spill
+; FAST-NEXT:    stxvd2x v29, r1, r4 # 16-byte Folded Spill
 ; FAST-NEXT:    li r4, 592
-; FAST-NEXT:    stvx v30, r1, r4 # 16-byte Folded Spill
+; FAST-NEXT:    stxvd2x v30, r1, r4 # 16-byte Folded Spill
 ; FAST-NEXT:    li r4, 608
-; FAST-NEXT:    stvx v31, r1, r4 # 16-byte Folded Spill
+; FAST-NEXT:    stxvd2x v31, r1, r4 # 16-byte Folded Spill
 ; FAST-NEXT:    li r4, 416
 ; FAST-NEXT:    stxvd2x v13, r1, r4 # 16-byte Folded Spill
 ; FAST-NEXT:    li r4, 400
@@ -7143,30 +6628,30 @@ define <32 x i64> @lrint_v32i64_v32f128(<32 x fp128> %x) {
 ; FAST-NEXT:    li r3, 608
 ; FAST-NEXT:    xxswapd vs4, vs4
 ; FAST-NEXT:    stxvd2x vs4, 0, r30
-; FAST-NEXT:    lvx v31, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    lxvd2x v31, r1, r3 # 16-byte Folded Reload
 ; FAST-NEXT:    li r3, 592
 ; FAST-NEXT:    ld r30, 624(r1) # 8-byte Folded Reload
-; FAST-NEXT:    lvx v30, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    lxvd2x v30, r1, r3 # 16-byte Folded Reload
 ; FAST-NEXT:    li r3, 576
-; FAST-NEXT:    lvx v29, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    lxvd2x v29, r1, r3 # 16-byte Folded Reload
 ; FAST-NEXT:    li r3, 560
-; FAST-NEXT:    lvx v28, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    lxvd2x v28, r1, r3 # 16-byte Folded Reload
 ; FAST-NEXT:    li r3, 544
-; FAST-NEXT:    lvx v27, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    lxvd2x v27, r1, r3 # 16-byte Folded Reload
 ; FAST-NEXT:    li r3, 528
-; FAST-NEXT:    lvx v26, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    lxvd2x v26, r1, r3 # 16-byte Folded Reload
 ; FAST-NEXT:    li r3, 512
-; FAST-NEXT:    lvx v25, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    lxvd2x v25, r1, r3 # 16-byte Folded Reload
 ; FAST-NEXT:    li r3, 496
-; FAST-NEXT:    lvx v24, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    lxvd2x v24, r1, r3 # 16-byte Folded Reload
 ; FAST-NEXT:    li r3, 480
-; FAST-NEXT:    lvx v23, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    lxvd2x v23, r1, r3 # 16-byte Folded Reload
 ; FAST-NEXT:    li r3, 464
-; FAST-NEXT:    lvx v22, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    lxvd2x v22, r1, r3 # 16-byte Folded Reload
 ; FAST-NEXT:    li r3, 448
-; FAST-NEXT:    lvx v21, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    lxvd2x v21, r1, r3 # 16-byte Folded Reload
 ; FAST-NEXT:    li r3, 432
-; FAST-NEXT:    lvx v20, r1, r3 # 16-byte Folded Reload
+; FAST-NEXT:    lxvd2x v20, r1, r3 # 16-byte Folded Reload
 ; FAST-NEXT:    addi r1, r1, 640
 ; FAST-NEXT:    ld r0, 16(r1)
 ; FAST-NEXT:    mtlr r0
diff --git a/llvm/test/CodeGen/X86/vector-llrint-f16.ll b/llvm/test/CodeGen/X86/vector-llrint-f16.ll
index d6a21e1c00502..eb7be61b719f2 100644
--- a/llvm/test/CodeGen/X86/vector-llrint-f16.ll
+++ b/llvm/test/CodeGen/X86/vector-llrint-f16.ll
@@ -7,7 +7,7 @@
 ; RUN: sed 's/XRINT/lrint/g' %s | llc -mtriple=x86_64-unknown -mattr=avx512fp16,avx512vl | FileCheck %s --check-prefix=FP16
 ; RUN: sed 's/XRINT/llrint/g' %s | llc -mtriple=x86_64-unknown -mattr=avx512fp16,avx512vl | FileCheck %s --check-prefix=FP16
 
-define <1 x i64> @llrint_v1i64_v1f16(<1 x half> %x) {
+define <1 x i64> @llrint_v1i64_v1f16(<1 x half> %x) nounwind {
 ; AVX-LABEL: llrint_v1i64_v1f16:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vcvtph2ps %xmm0, %xmm0
@@ -25,7 +25,7 @@ define <1 x i64> @llrint_v1i64_v1f16(<1 x half> %x) {
   ret <1 x i64> %a
 }
 
-define <2 x i64> @llrint_v2i64_v2f16(<2 x half> %x) {
+define <2 x i64> @llrint_v2i64_v2f16(<2 x half> %x) nounwind {
 ; AVX-LABEL: llrint_v2i64_v2f16:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vcvtph2ps %xmm0, %xmm1
@@ -52,7 +52,7 @@ define <2 x i64> @llrint_v2i64_v2f16(<2 x half> %x) {
   ret <2 x i64> %a
 }
 
-define <4 x i64> @llrint_v4i64_v4f16(<4 x half> %x) {
+define <4 x i64> @llrint_v4i64_v4f16(<4 x half> %x) nounwind {
 ; AVX-LABEL: llrint_v4i64_v4f16:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vpsrlq $48, %xmm0, %xmm1
@@ -95,7 +95,7 @@ define <4 x i64> @llrint_v4i64_v4f16(<4 x half> %x) {
   ret <4 x i64> %a
 }
 
-define <8 x i64> @llrint_v8i64_v8f16(<8 x half> %x) {
+define <8 x i64> @llrint_v8i64_v8f16(<8 x half> %x) nounwind {
 ; AVX-LABEL: llrint_v8i64_v8f16:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vpsrlq $48, %xmm0, %xmm1
@@ -170,7 +170,7 @@ define <8 x i64> @llrint_v8i64_v8f16(<8 x half> %x) {
   ret <8 x i64> %a
 }
 
-define <16 x i64> @llrint_v16i64_v16f16(<16 x half> %x) {
+define <16 x i64> @llrint_v16i64_v16f16(<16 x half> %x) nounwind {
 ; AVX-LABEL: llrint_v16i64_v16f16:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vmovdqa %ymm0, %ymm2
@@ -310,7 +310,7 @@ define <16 x i64> @llrint_v16i64_v16f16(<16 x half> %x) {
   ret <16 x i64> %a
 }
 
-define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) {
+define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) nounwind {
 ; AVX-LABEL: llrint_v32i64_v32f16:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    movq %rdi, %rax
diff --git a/llvm/test/CodeGen/X86/vector-llrint.ll b/llvm/test/CodeGen/X86/vector-llrint.ll
index f393ffd8a0441..6fd1a35505aac 100644
--- a/llvm/test/CodeGen/X86/vector-llrint.ll
+++ b/llvm/test/CodeGen/X86/vector-llrint.ll
@@ -5,14 +5,11 @@
 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=avx512f | FileCheck %s --check-prefixes=AVX,AVX512
 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=avx512dq,avx512vl | FileCheck %s --check-prefixes=AVX512DQ
 
-define <1 x i64> @llrint_v1i64_v1f32(<1 x float> %x) {
+define <1 x i64> @llrint_v1i64_v1f32(<1 x float> %x) nounwind  {
 ; X86-LABEL: llrint_v1i64_v1f32:
 ; X86:       # %bb.0:
 ; X86-NEXT:    pushl %ebp
-; X86-NEXT:    .cfi_def_cfa_offset 8
-; X86-NEXT:    .cfi_offset %ebp, -8
 ; X86-NEXT:    movl %esp, %ebp
-; X86-NEXT:    .cfi_def_cfa_register %ebp
 ; X86-NEXT:    andl $-8, %esp
 ; X86-NEXT:    subl $8, %esp
 ; X86-NEXT:    flds 8(%ebp)
@@ -21,7 +18,6 @@ define <1 x i64> @llrint_v1i64_v1f32(<1 x float> %x) {
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-NEXT:    movl %ebp, %esp
 ; X86-NEXT:    popl %ebp
-; X86-NEXT:    .cfi_def_cfa %esp, 4
 ; X86-NEXT:    retl
 ;
 ; SSE-LABEL: llrint_v1i64_v1f32:
@@ -43,20 +39,15 @@ define <1 x i64> @llrint_v1i64_v1f32(<1 x float> %x) {
 }
 declare <1 x i64> @llvm.llrint.v1i64.v1f32(<1 x float>)
 
-define <2 x i64> @llrint_v2i64_v2f32(<2 x float> %x) {
+define <2 x i64> @llrint_v2i64_v2f32(<2 x float> %x) nounwind {
 ; X86-LABEL: llrint_v2i64_v2f32:
 ; X86:       # %bb.0:
 ; X86-NEXT:    pushl %ebp
-; X86-NEXT:    .cfi_def_cfa_offset 8
-; X86-NEXT:    .cfi_offset %ebp, -8
 ; X86-NEXT:    movl %esp, %ebp
-; X86-NEXT:    .cfi_def_cfa_register %ebp
 ; X86-NEXT:    pushl %edi
 ; X86-NEXT:    pushl %esi
 ; X86-NEXT:    andl $-8, %esp
 ; X86-NEXT:    subl $16, %esp
-; X86-NEXT:    .cfi_offset %esi, -16
-; X86-NEXT:    .cfi_offset %edi, -12
 ; X86-NEXT:    movl 8(%ebp), %eax
 ; X86-NEXT:    flds 16(%ebp)
 ; X86-NEXT:    flds 12(%ebp)
@@ -74,7 +65,6 @@ define <2 x i64> @llrint_v2i64_v2f32(<2 x float> %x) {
 ; X86-NEXT:    popl %esi
 ; X86-NEXT:    popl %edi
 ; X86-NEXT:    popl %ebp
-; X86-NEXT:    .cfi_def_cfa %esp, 4
 ; X86-NEXT:    retl $4
 ;
 ; SSE-LABEL: llrint_v2i64_v2f32:
@@ -107,22 +97,16 @@ define <2 x i64> @llrint_v2i64_v2f32(<2 x float> %x) {
 }
 declare <2 x i64> @llvm.llrint.v2i64.v2f32(<2 x float>)
 
-define <4 x i64> @llrint_v4i64_v4f32(<4 x float> %x) {
+define <4 x i64> @llrint_v4i64_v4f32(<4 x float> %x) nounwind {
 ; X86-LABEL: llrint_v4i64_v4f32:
 ; X86:       # %bb.0:
 ; X86-NEXT:    pushl %ebp
-; X86-NEXT:    .cfi_def_cfa_offset 8
-; X86-NEXT:    .cfi_offset %ebp, -8
 ; X86-NEXT:    movl %esp, %ebp
-; X86-NEXT:    .cfi_def_cfa_register %ebp
 ; X86-NEXT:    pushl %ebx
 ; X86-NEXT:    pushl %edi
 ; X86-NEXT:    pushl %esi
 ; X86-NEXT:    andl $-8, %esp
 ; X86-NEXT:    subl $56, %esp
-; X86-NEXT:    .cfi_offset %esi, -20
-; X86-NEXT:    .cfi_offset %edi, -16
-; X86-NEXT:    .cfi_offset %ebx, -12
 ; X86-NEXT:    movl 8(%ebp), %eax
 ; X86-NEXT:    flds 24(%ebp)
 ; X86-NEXT:    flds 20(%ebp)
@@ -159,7 +143,6 @@ define <4 x i64> @llrint_v4i64_v4f32(<4 x float> %x) {
 ; X86-NEXT:    popl %edi
 ; X86-NEXT:    popl %ebx
 ; X86-NEXT:    popl %ebp
-; X86-NEXT:    .cfi_def_cfa %esp, 4
 ; X86-NEXT:    retl $4
 ;
 ; SSE-LABEL: llrint_v4i64_v4f32:
@@ -227,22 +210,16 @@ define <4 x i64> @llrint_v4i64_v4f32(<4 x float> %x) {
 }
 declare <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float>)
 
-define <8 x i64> @llrint_v8i64_v8f32(<8 x float> %x) {
+define <8 x i64> @llrint_v8i64_v8f32(<8 x float> %x) nounwind {
 ; X86-LABEL: llrint_v8i64_v8f32:
 ; X86:       # %bb.0:
 ; X86-NEXT:    pushl %ebp
-; X86-NEXT:    .cfi_def_cfa_offset 8
-; X86-NEXT:    .cfi_offset %ebp, -8
 ; X86-NEXT:    movl %esp, %ebp
-; X86-NEXT:    .cfi_def_cfa_register %ebp
 ; X86-NEXT:    pushl %ebx
 ; X86-NEXT:    pushl %edi
 ; X86-NEXT:    pushl %esi
 ; X86-NEXT:    andl $-8, %esp
 ; X86-NEXT:    subl $120, %esp
-; X86-NEXT:    .cfi_offset %esi, -20
-; X86-NEXT:    .cfi_offset %edi, -16
-; X86-NEXT:    .cfi_offset %ebx, -12
 ; X86-NEXT:    flds 12(%ebp)
 ; X86-NEXT:    fistpll {{[0-9]+}}(%esp)
 ; X86-NEXT:    flds 16(%ebp)
@@ -319,7 +296,6 @@ define <8 x i64> @llrint_v8i64_v8f32(<8 x float> %x) {
 ; X86-NEXT:    popl %edi
 ; X86-NEXT:    popl %ebx
 ; X86-NEXT:    popl %ebp
-; X86-NEXT:    .cfi_def_cfa %esp, 4
 ; X86-NEXT:    retl $4
 ;
 ; SSE-LABEL: llrint_v8i64_v8f32:
@@ -435,22 +411,16 @@ define <8 x i64> @llrint_v8i64_v8f32(<8 x float> %x) {
 }
 declare <8 x i64> @llvm.llrint.v8i64.v8f32(<8 x float>)
 
-define <16 x i64> @llrint_v16i64_v16f32(<16 x float> %x) {
+define <16 x i64> @llrint_v16i64_v16f32(<16 x float> %x) nounwind {
 ; X86-LABEL: llrint_v16i64_v16f32:
 ; X86:       # %bb.0:
 ; X86-NEXT:    pushl %ebp
-; X86-NEXT:    .cfi_def_cfa_offset 8
-; X86-NEXT:    .cfi_offset %ebp, -8
 ; X86-NEXT:    movl %esp, %ebp
-; X86-NEXT:    .cfi_def_cfa_register %ebp
 ; X86-NEXT:    pushl %ebx
 ; X86-NEXT:    pushl %edi
 ; X86-NEXT:    pushl %esi
 ; X86-NEXT:    andl $-8, %esp
 ; X86-NEXT:    subl $248, %esp
-; X86-NEXT:    .cfi_offset %esi, -20
-; X86-NEXT:    .cfi_offset %edi, -16
-; X86-NEXT:    .cfi_offset %ebx, -12
 ; X86-NEXT:    flds 12(%ebp)
 ; X86-NEXT:    fistpll {{[0-9]+}}(%esp)
 ; X86-NEXT:    flds 16(%ebp)
@@ -607,7 +577,6 @@ define <16 x i64> @llrint_v16i64_v16f32(<16 x float> %x) {
 ; X86-NEXT:    popl %edi
 ; X86-NEXT:    popl %ebx
 ; X86-NEXT:    popl %ebp
-; X86-NEXT:    .cfi_def_cfa %esp, 4
 ; X86-NEXT:    retl $4
 ;
 ; SSE-LABEL: llrint_v16i64_v16f32:
@@ -825,14 +794,11 @@ define <16 x i64> @llrint_v16i64_v16f32(<16 x float> %x) {
 }
 declare <16 x i64> @llvm.llrint.v16i64.v16f32(<16 x float>)
 
-define <1 x i64> @llrint_v1i64_v1f64(<1 x double> %x) {
+define <1 x i64> @llrint_v1i64_v1f64(<1 x double> %x) nounwind {
 ; X86-LABEL: llrint_v1i64_v1f64:
 ; X86:       # %bb.0:
 ; X86-NEXT:    pushl %ebp
-; X86-NEXT:    .cfi_def_cfa_offset 8
-; X86-NEXT:    .cfi_offset %ebp, -8
 ; X86-NEXT:    movl %esp, %ebp
-; X86-NEXT:    .cfi_def_cfa_register %ebp
 ; X86-NEXT:    andl $-8, %esp
 ; X86-NEXT:    subl $8, %esp
 ; X86-NEXT:    fldl 8(%ebp)
@@ -841,7 +807,6 @@ define <1 x i64> @llrint_v1i64_v1f64(<1 x double> %x) {
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-NEXT:    movl %ebp, %esp
 ; X86-NEXT:    popl %ebp
-; X86-NEXT:    .cfi_def_cfa %esp, 4
 ; X86-NEXT:    retl
 ;
 ; SSE-LABEL: llrint_v1i64_v1f64:
@@ -863,20 +828,15 @@ define <1 x i64> @llrint_v1i64_v1f64(<1 x double> %x) {
 }
 declare <1 x i64> @llvm.llrint.v1i64.v1f64(<1 x double>)
 
-define <2 x i64> @llrint_v2i64_v2f64(<2 x double> %x) {
+define <2 x i64> @llrint_v2i64_v2f64(<2 x double> %x) nounwind {
 ; X86-LABEL: llrint_v2i64_v2f64:
 ; X86:       # %bb.0:
 ; X86-NEXT:    pushl %ebp
-; X86-NEXT:    .cfi_def_cfa_offset 8
-; X86-NEXT:    .cfi_offset %ebp, -8
 ; X86-NEXT:    movl %esp, %ebp
-; X86-NEXT:    .cfi_def_cfa_register %ebp
 ; X86-NEXT:    pushl %edi
 ; X86-NEXT:    pushl %esi
 ; X86-NEXT:    andl $-8, %esp
 ; X86-NEXT:    subl $16, %esp
-; X86-NEXT:    .cfi_offset %esi, -16
-; X86-NEXT:    .cfi_offset %edi, -12
 ; X86-NEXT:    movl 8(%ebp), %eax
 ; X86-NEXT:    fldl 20(%ebp)
 ; X86-NEXT:    fldl 12(%ebp)
@@ -894,7 +854,6 @@ define <2 x i64> @llrint_v2i64_v2f64(<2 x double> %x) {
 ; X86-NEXT:    popl %esi
 ; X86-NEXT:    popl %edi
 ; X86-NEXT:    popl %ebp
-; X86-NEXT:    .cfi_def_cfa %esp, 4
 ; X86-NEXT:    retl $4
 ;
 ; SSE-LABEL: llrint_v2i64_v2f64:
@@ -927,22 +886,16 @@ define <2 x i64> @llrint_v2i64_v2f64(<2 x double> %x) {
 }
 declare <2 x i64> @llvm.llrint.v2i64.v2f64(<2 x double>)
 
-define <4 x i64> @llrint_v4i64_v4f64(<4 x double> %x) {
+define <4 x i64> @llrint_v4i64_v4f64(<4 x double> %x) nounwind {
 ; X86-LABEL: llrint_v4i64_v4f64:
 ; X86:       # %bb.0:
 ; X86-NEXT:    pushl %ebp
-; X86-NEXT:    .cfi_def_cfa_offset 8
-; X86-NEXT:    .cfi_offset %ebp, -8
 ; X86-NEXT:    movl %esp, %ebp
-; X86-NEXT:    .cfi_def_cfa_register %ebp
 ; X86-NEXT:    pushl %ebx
 ; X86-NEXT:    pushl %edi
 ; X86-NEXT:    pushl %esi
 ; X86-NEXT:    andl $-8, %esp
 ; X86-NEXT:    subl $56, %esp
-; X86-NEXT:    .cfi_offset %esi, -20
-; X86-NEXT:    .cfi_offset %edi, -16
-; X86-NEXT:    .cfi_offset %ebx, -12
 ; X86-NEXT:    movl 8(%ebp), %eax
 ; X86-NEXT:    fldl 36(%ebp)
 ; X86-NEXT:    fldl 28(%ebp)
@@ -979,7 +932,6 @@ define <4 x i64> @llrint_v4i64_v4f64(<4 x double> %x) {
 ; X86-NEXT:    popl %edi
 ; X86-NEXT:    popl %ebx
 ; X86-NEXT:    popl %ebp
-; X86-NEXT:    .cfi_def_cfa %esp, 4
 ; X86-NEXT:    retl $4
 ;
 ; SSE-LABEL: llrint_v4i64_v4f64:
@@ -1045,22 +997,16 @@ define <4 x i64> @llrint_v4i64_v4f64(<4 x double> %x) {
 }
 declare <4 x i64> @llvm.llrint.v4i64.v4f64(<4 x double>)
 
-define <8 x i64> @llrint_v8i64_v8f64(<8 x double> %x) {
+define <8 x i64> @llrint_v8i64_v8f64(<8 x double> %x) nounwind {
 ; X86-LABEL: llrint_v8i64_v8f64:
 ; X86:       # %bb.0:
 ; X86-NEXT:    pushl %ebp
-; X86-NEXT:    .cfi_def_cfa_offset 8
-; X86-NEXT:    .cfi_offset %ebp, -8
 ; X86-NEXT:    movl %esp, %ebp
-; X86-NEXT:    .cfi_def_cfa_register %ebp
 ; X86-NEXT:    pushl %ebx
 ; X86-NEXT:    pushl %edi
 ; X86-NEXT:    pushl %esi
 ; X86-NEXT:    andl $-8, %esp
 ; X86-NEXT:    subl $120, %esp
-; X86-NEXT:    .cfi_offset %esi, -20
-; X86-NEXT:    .cfi_offset %edi, -16
-; X86-NEXT:    .cfi_offset %ebx, -12
 ; X86-NEXT:    fldl 12(%ebp)
 ; X86-NEXT:    fistpll {{[0-9]+}}(%esp)
 ; X86-NEXT:    fldl 20(%ebp)
@@ -1137,7 +1083,6 @@ define <8 x i64> @llrint_v8i64_v8f64(<8 x double> %x) {
 ; X86-NEXT:    popl %edi
 ; X86-NEXT:    popl %ebx
 ; X86-NEXT:    popl %ebp
-; X86-NEXT:    .cfi_def_cfa %esp, 4
 ; X86-NEXT:    retl $4
 ;
 ; SSE-LABEL: llrint_v8i64_v8f64:
@@ -1247,14 +1192,11 @@ define <8 x i64> @llrint_v8i64_v8f64(<8 x double> %x) {
 }
 declare <8 x i64> @llvm.llrint.v8i64.v8f64(<8 x double>)
 
-define <1 x i64> @llrint_v1i64_v1f128(<1 x fp128> %x) {
+define <1 x i64> @llrint_v1i64_v1f128(<1 x fp128> %x) nounwind {
 ; X86-LABEL: llrint_v1i64_v1f128:
 ; X86:       # %bb.0:
 ; X86-NEXT:    pushl %ebp
-; X86-NEXT:    .cfi_def_cfa_offset 8
-; X86-NEXT:    .cfi_offset %ebp, -8
 ; X86-NEXT:    movl %esp, %ebp
-; X86-NEXT:    .cfi_def_cfa_register %ebp
 ; X86-NEXT:    andl $-16, %esp
 ; X86-NEXT:    subl $16, %esp
 ; X86-NEXT:    pushl 20(%ebp)
@@ -1265,56 +1207,43 @@ define <1 x i64> @llrint_v1i64_v1f128(<1 x fp128> %x) {
 ; X86-NEXT:    addl $16, %esp
 ; X86-NEXT:    movl %ebp, %esp
 ; X86-NEXT:    popl %ebp
-; X86-NEXT:    .cfi_def_cfa %esp, 4
 ; X86-NEXT:    retl
 ;
 ; SSE-LABEL: llrint_v1i64_v1f128:
 ; SSE:       # %bb.0:
 ; SSE-NEXT:    pushq %rax
-; SSE-NEXT:    .cfi_def_cfa_offset 16
 ; SSE-NEXT:    callq llrintl at PLT
 ; SSE-NEXT:    popq %rcx
-; SSE-NEXT:    .cfi_def_cfa_offset 8
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: llrint_v1i64_v1f128:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    pushq %rax
-; AVX-NEXT:    .cfi_def_cfa_offset 16
 ; AVX-NEXT:    callq llrintl at PLT
 ; AVX-NEXT:    popq %rcx
-; AVX-NEXT:    .cfi_def_cfa_offset 8
 ; AVX-NEXT:    retq
 ;
 ; AVX512DQ-LABEL: llrint_v1i64_v1f128:
 ; AVX512DQ:       # %bb.0:
 ; AVX512DQ-NEXT:    pushq %rax
-; AVX512DQ-NEXT:    .cfi_def_cfa_offset 16
 ; AVX512DQ-NEXT:    callq llrintl at PLT
 ; AVX512DQ-NEXT:    popq %rcx
-; AVX512DQ-NEXT:    .cfi_def_cfa_offset 8
 ; AVX512DQ-NEXT:    retq
   %a = call <1 x i64> @llvm.llrint.v1i64.v1f128(<1 x fp128> %x)
   ret <1 x i64> %a
 }
 declare <1 x i64> @llvm.llrint.v1i64.v1f128(<1 x fp128>)
 
-define <2 x i64> @llrint_v2i64_v2f128(<2 x fp128> %x) {
+define <2 x i64> @llrint_v2i64_v2f128(<2 x fp128> %x) nounwind {
 ; X86-LABEL: llrint_v2i64_v2f128:
 ; X86:       # %bb.0:
 ; X86-NEXT:    pushl %ebp
-; X86-NEXT:    .cfi_def_cfa_offset 8
-; X86-NEXT:    .cfi_offset %ebp, -8
 ; X86-NEXT:    movl %esp, %ebp
-; X86-NEXT:    .cfi_def_cfa_register %ebp
 ; X86-NEXT:    pushl %ebx
 ; X86-NEXT:    pushl %edi
 ; X86-NEXT:    pushl %esi
 ; X86-NEXT:    andl $-16, %esp
 ; X86-NEXT:    subl $16, %esp
-; X86-NEXT:    .cfi_offset %esi, -20
-; X86-NEXT:    .cfi_offset %edi, -16
-; X86-NEXT:    .cfi_offset %ebx, -12
 ; X86-NEXT:    movl 8(%ebp), %esi
 ; X86-NEXT:    pushl 24(%ebp)
 ; X86-NEXT:    pushl 20(%ebp)
@@ -1340,13 +1269,11 @@ define <2 x i64> @llrint_v2i64_v2f128(<2 x fp128> %x) {
 ; X86-NEXT:    popl %edi
 ; X86-NEXT:    popl %ebx
 ; X86-NEXT:    popl %ebp
-; X86-NEXT:    .cfi_def_cfa %esp, 4
 ; X86-NEXT:    retl $4
 ;
 ; SSE-LABEL: llrint_v2i64_v2f128:
 ; SSE:       # %bb.0:
 ; SSE-NEXT:    subq $40, %rsp
-; SSE-NEXT:    .cfi_def_cfa_offset 48
 ; SSE-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 ; SSE-NEXT:    movaps %xmm1, %xmm0
 ; SSE-NEXT:    callq llrintl at PLT
@@ -1358,13 +1285,11 @@ define <2 x i64> @llrint_v2i64_v2f128(<2 x fp128> %x) {
 ; SSE-NEXT:    punpcklqdq (%rsp), %xmm0 # 16-byte Folded Reload
 ; SSE-NEXT:    # xmm0 = xmm0[0],mem[0]
 ; SSE-NEXT:    addq $40, %rsp
-; SSE-NEXT:    .cfi_def_cfa_offset 8
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: llrint_v2i64_v2f128:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    subq $40, %rsp
-; AVX-NEXT:    .cfi_def_cfa_offset 48
 ; AVX-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 ; AVX-NEXT:    vmovaps %xmm1, %xmm0
 ; AVX-NEXT:    callq llrintl at PLT
@@ -1376,13 +1301,11 @@ define <2 x i64> @llrint_v2i64_v2f128(<2 x fp128> %x) {
 ; AVX-NEXT:    vpunpcklqdq (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
 ; AVX-NEXT:    # xmm0 = xmm0[0],mem[0]
 ; AVX-NEXT:    addq $40, %rsp
-; AVX-NEXT:    .cfi_def_cfa_offset 8
 ; AVX-NEXT:    retq
 ;
 ; AVX512DQ-LABEL: llrint_v2i64_v2f128:
 ; AVX512DQ:       # %bb.0:
 ; AVX512DQ-NEXT:    subq $40, %rsp
-; AVX512DQ-NEXT:    .cfi_def_cfa_offset 48
 ; AVX512DQ-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 ; AVX512DQ-NEXT:    vmovaps %xmm1, %xmm0
 ; AVX512DQ-NEXT:    callq llrintl at PLT
@@ -1394,29 +1317,22 @@ define <2 x i64> @llrint_v2i64_v2f128(<2 x fp128> %x) {
 ; AVX512DQ-NEXT:    vpunpcklqdq (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
 ; AVX512DQ-NEXT:    # xmm0 = xmm0[0],mem[0]
 ; AVX512DQ-NEXT:    addq $40, %rsp
-; AVX512DQ-NEXT:    .cfi_def_cfa_offset 8
 ; AVX512DQ-NEXT:    retq
   %a = call <2 x i64> @llvm.llrint.v2i64.v2f128(<2 x fp128> %x)
   ret <2 x i64> %a
 }
 declare <2 x i64> @llvm.llrint.v2i64.v2f128(<2 x fp128>)
 
-define <4 x i64> @llrint_v4i64_v4f128(<4 x fp128> %x) {
+define <4 x i64> @llrint_v4i64_v4f128(<4 x fp128> %x) nounwind {
 ; X86-LABEL: llrint_v4i64_v4f128:
 ; X86:       # %bb.0:
 ; X86-NEXT:    pushl %ebp
-; X86-NEXT:    .cfi_def_cfa_offset 8
-; X86-NEXT:    .cfi_offset %ebp, -8
 ; X86-NEXT:    movl %esp, %ebp
-; X86-NEXT:    .cfi_def_cfa_register %ebp
 ; X86-NEXT:    pushl %ebx
 ; X86-NEXT:    pushl %edi
 ; X86-NEXT:    pushl %esi
 ; X86-NEXT:    andl $-16, %esp
 ; X86-NEXT:    subl $32, %esp
-; X86-NEXT:    .cfi_offset %esi, -20
-; X86-NEXT:    .cfi_offset %edi, -16
-; X86-NEXT:    .cfi_offset %ebx, -12
 ; X86-NEXT:    movl 8(%ebp), %esi
 ; X86-NEXT:    movl 36(%ebp), %edi
 ; X86-NEXT:    movl 40(%ebp), %ebx
@@ -1468,13 +1384,11 @@ define <4 x i64> @llrint_v4i64_v4f128(<4 x fp128> %x) {
 ; X86-NEXT:    popl %edi
 ; X86-NEXT:    popl %ebx
 ; X86-NEXT:    popl %ebp
-; X86-NEXT:    .cfi_def_cfa %esp, 4
 ; X86-NEXT:    retl $4
 ;
 ; SSE-LABEL: llrint_v4i64_v4f128:
 ; SSE:       # %bb.0:
 ; SSE-NEXT:    subq $72, %rsp
-; SSE-NEXT:    .cfi_def_cfa_offset 80
 ; SSE-NEXT:    movaps %xmm3, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 ; SSE-NEXT:    movaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 ; SSE-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
@@ -1499,13 +1413,11 @@ define <4 x i64> @llrint_v4i64_v4f128(<4 x fp128> %x) {
 ; SSE-NEXT:    # xmm1 = xmm1[0],mem[0]
 ; SSE-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
 ; SSE-NEXT:    addq $72, %rsp
-; SSE-NEXT:    .cfi_def_cfa_offset 8
 ; SSE-NEXT:    retq
 ;
 ; AVX1-LABEL: llrint_v4i64_v4f128:
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    subq $72, %rsp
-; AVX1-NEXT:    .cfi_def_cfa_offset 80
 ; AVX1-NEXT:    vmovaps %xmm2, (%rsp) # 16-byte Spill
 ; AVX1-NEXT:    vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 ; AVX1-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
@@ -1530,13 +1442,11 @@ define <4 x i64> @llrint_v4i64_v4f128(<4 x fp128> %x) {
 ; AVX1-NEXT:    # xmm0 = xmm0[0],mem[0]
 ; AVX1-NEXT:    vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload
 ; AVX1-NEXT:    addq $72, %rsp
-; AVX1-NEXT:    .cfi_def_cfa_offset 8
 ; AVX1-NEXT:    retq
 ;
 ; AVX512-LABEL: llrint_v4i64_v4f128:
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    subq $72, %rsp
-; AVX512-NEXT:    .cfi_def_cfa_offset 80
 ; AVX512-NEXT:    vmovaps %xmm2, (%rsp) # 16-byte Spill
 ; AVX512-NEXT:    vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 ; AVX512-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
@@ -1561,13 +1471,11 @@ define <4 x i64> @llrint_v4i64_v4f128(<4 x fp128> %x) {
 ; AVX512-NEXT:    # xmm0 = xmm0[0],mem[0]
 ; AVX512-NEXT:    vinserti128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload
 ; AVX512-NEXT:    addq $72, %rsp
-; AVX512-NEXT:    .cfi_def_cfa_offset 8
 ; AVX512-NEXT:    retq
 ;
 ; AVX512DQ-LABEL: llrint_v4i64_v4f128:
 ; AVX512DQ:       # %bb.0:
 ; AVX512DQ-NEXT:    subq $72, %rsp
-; AVX512DQ-NEXT:    .cfi_def_cfa_offset 80
 ; AVX512DQ-NEXT:    vmovaps %xmm2, (%rsp) # 16-byte Spill
 ; AVX512DQ-NEXT:    vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 ; AVX512DQ-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
@@ -1592,29 +1500,22 @@ define <4 x i64> @llrint_v4i64_v4f128(<4 x fp128> %x) {
 ; AVX512DQ-NEXT:    # xmm0 = xmm0[0],mem[0]
 ; AVX512DQ-NEXT:    vinserti128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload
 ; AVX512DQ-NEXT:    addq $72, %rsp
-; AVX512DQ-NEXT:    .cfi_def_cfa_offset 8
 ; AVX512DQ-NEXT:    retq
   %a = call <4 x i64> @llvm.llrint.v4i64.v4f128(<4 x fp128> %x)
   ret <4 x i64> %a
 }
 declare <4 x i64> @llvm.llrint.v4i64.v4f128(<4 x fp128>)
 
-define <8 x i64> @llrint_v8i64_v8f128(<8 x fp128> %x) {
+define <8 x i64> @llrint_v8i64_v8f128(<8 x fp128> %x) nounwind {
 ; X86-LABEL: llrint_v8i64_v8f128:
 ; X86:       # %bb.0:
 ; X86-NEXT:    pushl %ebp
-; X86-NEXT:    .cfi_def_cfa_offset 8
-; X86-NEXT:    .cfi_offset %ebp, -8
 ; X86-NEXT:    movl %esp, %ebp
-; X86-NEXT:    .cfi_def_cfa_register %ebp
 ; X86-NEXT:    pushl %ebx
 ; X86-NEXT:    pushl %edi
 ; X86-NEXT:    pushl %esi
 ; X86-NEXT:    andl $-16, %esp
 ; X86-NEXT:    subl $64, %esp
-; X86-NEXT:    .cfi_offset %esi, -20
-; X86-NEXT:    .cfi_offset %edi, -16
-; X86-NEXT:    .cfi_offset %ebx, -12
 ; X86-NEXT:    movl 8(%ebp), %esi
 ; X86-NEXT:    movl 36(%ebp), %edi
 ; X86-NEXT:    movl 40(%ebp), %ebx
@@ -1714,13 +1615,11 @@ define <8 x i64> @llrint_v8i64_v8f128(<8 x fp128> %x) {
 ; X86-NEXT:    popl %edi
 ; X86-NEXT:    popl %ebx
 ; X86-NEXT:    popl %ebp
-; X86-NEXT:    .cfi_def_cfa %esp, 4
 ; X86-NEXT:    retl $4
 ;
 ; SSE-LABEL: llrint_v8i64_v8f128:
 ; SSE:       # %bb.0:
 ; SSE-NEXT:    subq $136, %rsp
-; SSE-NEXT:    .cfi_def_cfa_offset 144
 ; SSE-NEXT:    movaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 ; SSE-NEXT:    movaps %xmm6, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 ; SSE-NEXT:    movaps %xmm5, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
@@ -1771,13 +1670,11 @@ define <8 x i64> @llrint_v8i64_v8f128(<8 x fp128> %x) {
 ; SSE-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
 ; SSE-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 16-byte Reload
 ; SSE-NEXT:    addq $136, %rsp
-; SSE-NEXT:    .cfi_def_cfa_offset 8
 ; SSE-NEXT:    retq
 ;
 ; AVX1-LABEL: llrint_v8i64_v8f128:
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    subq $152, %rsp
-; AVX1-NEXT:    .cfi_def_cfa_offset 160
 ; AVX1-NEXT:    vmovaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 ; AVX1-NEXT:    vmovaps %xmm6, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 ; AVX1-NEXT:    vmovaps %xmm5, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
@@ -1829,13 +1726,11 @@ define <8 x i64> @llrint_v8i64_v8f128(<8 x fp128> %x) {
 ; AVX1-NEXT:    vinsertf128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm1 # 16-byte Folded Reload
 ; AVX1-NEXT:    vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
 ; AVX1-NEXT:    addq $152, %rsp
-; AVX1-NEXT:    .cfi_def_cfa_offset 8
 ; AVX1-NEXT:    retq
 ;
 ; AVX512-LABEL: llrint_v8i64_v8f128:
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    subq $152, %rsp
-; AVX512-NEXT:    .cfi_def_cfa_offset 160
 ; AVX512-NEXT:    vmovaps %xmm6, (%rsp) # 16-byte Spill
 ; AVX512-NEXT:    vmovaps %xmm5, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 ; AVX512-NEXT:    vmovaps %xmm4, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
@@ -1887,13 +1782,11 @@ define <8 x i64> @llrint_v8i64_v8f128(<8 x fp128> %x) {
 ; AVX512-NEXT:    vinserti128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload
 ; AVX512-NEXT:    vinserti64x4 $1, {{[-0-9]+}}(%r{{[sb]}}p), %zmm0, %zmm0 # 32-byte Folded Reload
 ; AVX512-NEXT:    addq $152, %rsp
-; AVX512-NEXT:    .cfi_def_cfa_offset 8
 ; AVX512-NEXT:    retq
 ;
 ; AVX512DQ-LABEL: llrint_v8i64_v8f128:
 ; AVX512DQ:       # %bb.0:
 ; AVX512DQ-NEXT:    subq $152, %rsp
-; AVX512DQ-NEXT:    .cfi_def_cfa_offset 160
 ; AVX512DQ-NEXT:    vmovaps %xmm6, (%rsp) # 16-byte Spill
 ; AVX512DQ-NEXT:    vmovaps %xmm5, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 ; AVX512DQ-NEXT:    vmovaps %xmm4, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
@@ -1945,7 +1838,6 @@ define <8 x i64> @llrint_v8i64_v8f128(<8 x fp128> %x) {
 ; AVX512DQ-NEXT:    vinserti128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload
 ; AVX512DQ-NEXT:    vinserti64x4 $1, {{[-0-9]+}}(%r{{[sb]}}p), %zmm0, %zmm0 # 32-byte Folded Reload
 ; AVX512DQ-NEXT:    addq $152, %rsp
-; AVX512DQ-NEXT:    .cfi_def_cfa_offset 8
 ; AVX512DQ-NEXT:    retq
   %a = call <8 x i64> @llvm.llrint.v8i64.v8f128(<8 x fp128> %x)
   ret <8 x i64> %a
diff --git a/llvm/test/CodeGen/X86/vector-lrint-f16.ll b/llvm/test/CodeGen/X86/vector-lrint-f16.ll
index 1316f808aa27e..fa3aeb09eae6f 100644
--- a/llvm/test/CodeGen/X86/vector-lrint-f16.ll
+++ b/llvm/test/CodeGen/X86/vector-lrint-f16.ll
@@ -8,7 +8,7 @@
 ; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=x86_64-unknown -mattr=avx2,f16c | FileCheck %s --check-prefixes=X64-AVX-I32
 ; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=x86_64-unknown -mattr=avx512fp16,avx512vl | FileCheck %s --check-prefixes=X64-FP16-I32
 
-define <1 x iXLen> @lrint_v1f16(<1 x half> %x) {
+define <1 x iXLen> @lrint_v1f16(<1 x half> %x) nounwind {
 ; X86-AVX-I16-LABEL: lrint_v1f16:
 ; X86-AVX-I16:       # %bb.0:
 ; X86-AVX-I16-NEXT:    vcvtph2ps %xmm0, %xmm0
@@ -73,7 +73,7 @@ define <1 x iXLen> @lrint_v1f16(<1 x half> %x) {
 }
 declare <1 x iXLen> @llvm.lrint.v1iXLen.v1f16(<1 x half>)
 
-define <2 x iXLen> @lrint_v2f16(<2 x half> %x) {
+define <2 x iXLen> @lrint_v2f16(<2 x half> %x) nounwind {
 ; X86-AVX-I16-LABEL: lrint_v2f16:
 ; X86-AVX-I16:       # %bb.0:
 ; X86-AVX-I16-NEXT:    vpsrld $16, %xmm0, %xmm1
@@ -250,7 +250,7 @@ define <2 x iXLen> @lrint_v2f16(<2 x half> %x) {
 }
 declare <2 x iXLen> @llvm.lrint.v2iXLen.v2f16(<2 x half>)
 
-define <4 x iXLen> @lrint_v4f16(<4 x half> %x) {
+define <4 x iXLen> @lrint_v4f16(<4 x half> %x) nounwind {
 ; X86-AVX-I16-LABEL: lrint_v4f16:
 ; X86-AVX-I16:       # %bb.0:
 ; X86-AVX-I16-NEXT:    vpsrld $16, %xmm0, %xmm1
@@ -455,7 +455,7 @@ define <4 x iXLen> @lrint_v4f16(<4 x half> %x) {
 }
 declare <4 x iXLen> @llvm.lrint.v4iXLen.v4f16(<4 x half>)
 
-define <8 x iXLen> @lrint_v8f16(<8 x half> %x) {
+define <8 x iXLen> @lrint_v8f16(<8 x half> %x) nounwind {
 ; X86-AVX-I16-LABEL: lrint_v8f16:
 ; X86-AVX-I16:       # %bb.0:
 ; X86-AVX-I16-NEXT:    vpsrld $16, %xmm0, %xmm1
@@ -718,7 +718,7 @@ define <8 x iXLen> @lrint_v8f16(<8 x half> %x) {
 }
 declare <8 x iXLen> @llvm.lrint.v8iXLen.v8f16(<8 x half>)
 
-define <16 x iXLen> @lrint_v16f16(<16 x half> %x) {
+define <16 x iXLen> @lrint_v16f16(<16 x half> %x) nounwind {
 ; X86-AVX-I16-LABEL: lrint_v16f16:
 ; X86-AVX-I16:       # %bb.0:
 ; X86-AVX-I16-NEXT:    vextracti128 $1, %ymm0, %xmm1
@@ -1211,7 +1211,7 @@ define <16 x iXLen> @lrint_v16f16(<16 x half> %x) {
 }
 declare <16 x iXLen> @llvm.lrint.v16iXLen.v16f16(<16 x half>)
 
-define <32 x iXLen> @lrint_v32f32(<32 x half> %x) {
+define <32 x iXLen> @lrint_v32f32(<32 x half> %x) nounwind {
 ; X86-AVX-I16-LABEL: lrint_v32f32:
 ; X86-AVX-I16:       # %bb.0:
 ; X86-AVX-I16-NEXT:    vextracti128 $1, %ymm0, %xmm2
diff --git a/llvm/test/CodeGen/X86/vector-lrint.ll b/llvm/test/CodeGen/X86/vector-lrint.ll
index 8900e94c50305..b3e5a0929b7a5 100644
--- a/llvm/test/CodeGen/X86/vector-lrint.ll
+++ b/llvm/test/CodeGen/X86/vector-lrint.ll
@@ -12,25 +12,20 @@
 ; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=x86_64-unknown -mattr=avx512f | FileCheck %s --check-prefixes=X64-AVX-i64,AVX512-i64
 ; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=x86_64-unknown -mattr=avx512dq,avx512vl | FileCheck %s --check-prefixes=X64-AVX-i64,AVX512DQ-i64
 
-define <1 x iXLen> @lrint_v1f32(<1 x float> %x) {
+define <1 x iXLen> @lrint_v1f32(<1 x float> %x) nounwind {
 ; X86-I32-LABEL: lrint_v1f32:
 ; X86-I32:       # %bb.0:
 ; X86-I32-NEXT:    pushl %eax
-; X86-I32-NEXT:    .cfi_def_cfa_offset 8
 ; X86-I32-NEXT:    flds {{[0-9]+}}(%esp)
 ; X86-I32-NEXT:    fistpl (%esp)
 ; X86-I32-NEXT:    movl (%esp), %eax
 ; X86-I32-NEXT:    popl %ecx
-; X86-I32-NEXT:    .cfi_def_cfa_offset 4
 ; X86-I32-NEXT:    retl
 ;
 ; X86-I64-LABEL: lrint_v1f32:
 ; X86-I64:       # %bb.0:
 ; X86-I64-NEXT:    pushl %ebp
-; X86-I64-NEXT:    .cfi_def_cfa_offset 8
-; X86-I64-NEXT:    .cfi_offset %ebp, -8
 ; X86-I64-NEXT:    movl %esp, %ebp
-; X86-I64-NEXT:    .cfi_def_cfa_register %ebp
 ; X86-I64-NEXT:    andl $-8, %esp
 ; X86-I64-NEXT:    subl $8, %esp
 ; X86-I64-NEXT:    flds 8(%ebp)
@@ -39,7 +34,6 @@ define <1 x iXLen> @lrint_v1f32(<1 x float> %x) {
 ; X86-I64-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-I64-NEXT:    movl %ebp, %esp
 ; X86-I64-NEXT:    popl %ebp
-; X86-I64-NEXT:    .cfi_def_cfa %esp, 4
 ; X86-I64-NEXT:    retl
 ;
 ; X86-SSE2-LABEL: lrint_v1f32:
@@ -66,11 +60,10 @@ define <1 x iXLen> @lrint_v1f32(<1 x float> %x) {
 }
 declare <1 x iXLen> @llvm.lrint.v1iXLen.v1f32(<1 x float>)
 
-define <2 x iXLen> @lrint_v2f32(<2 x float> %x) {
+define <2 x iXLen> @lrint_v2f32(<2 x float> %x) nounwind {
 ; X86-I32-LABEL: lrint_v2f32:
 ; X86-I32:       # %bb.0:
 ; X86-I32-NEXT:    subl $8, %esp
-; X86-I32-NEXT:    .cfi_def_cfa_offset 12
 ; X86-I32-NEXT:    flds {{[0-9]+}}(%esp)
 ; X86-I32-NEXT:    flds {{[0-9]+}}(%esp)
 ; X86-I32-NEXT:    fistpl (%esp)
@@ -78,22 +71,16 @@ define <2 x iXLen> @lrint_v2f32(<2 x float> %x) {
 ; X86-I32-NEXT:    movl (%esp), %eax
 ; X86-I32-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-I32-NEXT:    addl $8, %esp
-; X86-I32-NEXT:    .cfi_def_cfa_offset 4
 ; X86-I32-NEXT:    retl
 ;
 ; X86-I64-LABEL: lrint_v2f32:
 ; X86-I64:       # %bb.0:
 ; X86-I64-NEXT:    pushl %ebp
-; X86-I64-NEXT:    .cfi_def_cfa_offset 8
-; X86-I64-NEXT:    .cfi_offset %ebp, -8
 ; X86-I64-NEXT:    movl %esp, %ebp
-; X86-I64-NEXT:    .cfi_def_cfa_register %ebp
 ; X86-I64-NEXT:    pushl %edi
 ; X86-I64-NEXT:    pushl %esi
 ; X86-I64-NEXT:    andl $-8, %esp
 ; X86-I64-NEXT:    subl $16, %esp
-; X86-I64-NEXT:    .cfi_offset %esi, -16
-; X86-I64-NEXT:    .cfi_offset %edi, -12
 ; X86-I64-NEXT:    movl 8(%ebp), %eax
 ; X86-I64-NEXT:    flds 16(%ebp)
 ; X86-I64-NEXT:    flds 12(%ebp)
@@ -111,7 +98,6 @@ define <2 x iXLen> @lrint_v2f32(<2 x float> %x) {
 ; X86-I64-NEXT:    popl %esi
 ; X86-I64-NEXT:    popl %edi
 ; X86-I64-NEXT:    popl %ebp
-; X86-I64-NEXT:    .cfi_def_cfa %esp, 4
 ; X86-I64-NEXT:    retl $4
 ;
 ; X86-SSE2-LABEL: lrint_v2f32:
@@ -158,17 +144,12 @@ define <2 x iXLen> @lrint_v2f32(<2 x float> %x) {
 }
 declare <2 x iXLen> @llvm.lrint.v2iXLen.v2f32(<2 x float>)
 
-define <4 x iXLen> @lrint_v4f32(<4 x float> %x) {
+define <4 x iXLen> @lrint_v4f32(<4 x float> %x) nounwind {
 ; X86-I32-LABEL: lrint_v4f32:
 ; X86-I32:       # %bb.0:
 ; X86-I32-NEXT:    pushl %edi
-; X86-I32-NEXT:    .cfi_def_cfa_offset 8
 ; X86-I32-NEXT:    pushl %esi
-; X86-I32-NEXT:    .cfi_def_cfa_offset 12
 ; X86-I32-NEXT:    subl $16, %esp
-; X86-I32-NEXT:    .cfi_def_cfa_offset 28
-; X86-I32-NEXT:    .cfi_offset %esi, -12
-; X86-I32-NEXT:    .cfi_offset %edi, -8
 ; X86-I32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-I32-NEXT:    flds {{[0-9]+}}(%esp)
 ; X86-I32-NEXT:    flds {{[0-9]+}}(%esp)
@@ -187,28 +168,19 @@ define <4 x iXLen> @lrint_v4f32(<4 x float> %x) {
 ; X86-I32-NEXT:    movl %edx, 4(%eax)
 ; X86-I32-NEXT:    movl %ecx, (%eax)
 ; X86-I32-NEXT:    addl $16, %esp
-; X86-I32-NEXT:    .cfi_def_cfa_offset 12
 ; X86-I32-NEXT:    popl %esi
-; X86-I32-NEXT:    .cfi_def_cfa_offset 8
 ; X86-I32-NEXT:    popl %edi
-; X86-I32-NEXT:    .cfi_def_cfa_offset 4
 ; X86-I32-NEXT:    retl $4
 ;
 ; X86-I64-LABEL: lrint_v4f32:
 ; X86-I64:       # %bb.0:
 ; X86-I64-NEXT:    pushl %ebp
-; X86-I64-NEXT:    .cfi_def_cfa_offset 8
-; X86-I64-NEXT:    .cfi_offset %ebp, -8
 ; X86-I64-NEXT:    movl %esp, %ebp
-; X86-I64-NEXT:    .cfi_def_cfa_register %ebp
 ; X86-I64-NEXT:    pushl %ebx
 ; X86-I64-NEXT:    pushl %edi
 ; X86-I64-NEXT:    pushl %esi
 ; X86-I64-NEXT:    andl $-8, %esp
 ; X86-I64-NEXT:    subl $56, %esp
-; X86-I64-NEXT:    .cfi_offset %esi, -20
-; X86-I64-NEXT:    .cfi_offset %edi, -16
-; X86-I64-NEXT:    .cfi_offset %ebx, -12
 ; X86-I64-NEXT:    movl 8(%ebp), %eax
 ; X86-I64-NEXT:    flds 24(%ebp)
 ; X86-I64-NEXT:    flds 20(%ebp)
@@ -245,7 +217,6 @@ define <4 x iXLen> @lrint_v4f32(<4 x float> %x) {
 ; X86-I64-NEXT:    popl %edi
 ; X86-I64-NEXT:    popl %ebx
 ; X86-I64-NEXT:    popl %ebp
-; X86-I64-NEXT:    .cfi_def_cfa %esp, 4
 ; X86-I64-NEXT:    retl $4
 ;
 ; X86-SSE2-LABEL: lrint_v4f32:
@@ -308,23 +279,14 @@ define <4 x iXLen> @lrint_v4f32(<4 x float> %x) {
 }
 declare <4 x iXLen> @llvm.lrint.v4iXLen.v4f32(<4 x float>)
 
-define <8 x iXLen> @lrint_v8f32(<8 x float> %x) {
+define <8 x iXLen> @lrint_v8f32(<8 x float> %x) nounwind {
 ; X86-I32-LABEL: lrint_v8f32:
 ; X86-I32:       # %bb.0:
 ; X86-I32-NEXT:    pushl %ebp
-; X86-I32-NEXT:    .cfi_def_cfa_offset 8
 ; X86-I32-NEXT:    pushl %ebx
-; X86-I32-NEXT:    .cfi_def_cfa_offset 12
 ; X86-I32-NEXT:    pushl %edi
-; X86-I32-NEXT:    .cfi_def_cfa_offset 16
 ; X86-I32-NEXT:    pushl %esi
-; X86-I32-NEXT:    .cfi_def_cfa_offset 20
 ; X86-I32-NEXT:    subl $40, %esp
-; X86-I32-NEXT:    .cfi_def_cfa_offset 60
-; X86-I32-NEXT:    .cfi_offset %esi, -20
-; X86-I32-NEXT:    .cfi_offset %edi, -16
-; X86-I32-NEXT:    .cfi_offset %ebx, -12
-; X86-I32-NEXT:    .cfi_offset %ebp, -8
 ; X86-I32-NEXT:    flds {{[0-9]+}}(%esp)
 ; X86-I32-NEXT:    fistpl {{[0-9]+}}(%esp)
 ; X86-I32-NEXT:    flds {{[0-9]+}}(%esp)
@@ -363,32 +325,21 @@ define <8 x iXLen> @lrint_v8f32(<8 x float> %x) {
 ; X86-I32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
 ; X86-I32-NEXT:    movl %ecx, (%eax)
 ; X86-I32-NEXT:    addl $40, %esp
-; X86-I32-NEXT:    .cfi_def_cfa_offset 20
 ; X86-I32-NEXT:    popl %esi
-; X86-I32-NEXT:    .cfi_def_cfa_offset 16
 ; X86-I32-NEXT:    popl %edi
-; X86-I32-NEXT:    .cfi_def_cfa_offset 12
 ; X86-I32-NEXT:    popl %ebx
-; X86-I32-NEXT:    .cfi_def_cfa_offset 8
 ; X86-I32-NEXT:    popl %ebp
-; X86-I32-NEXT:    .cfi_def_cfa_offset 4
 ; X86-I32-NEXT:    retl $4
 ;
 ; X86-I64-LABEL: lrint_v8f32:
 ; X86-I64:       # %bb.0:
 ; X86-I64-NEXT:    pushl %ebp
-; X86-I64-NEXT:    .cfi_def_cfa_offset 8
-; X86-I64-NEXT:    .cfi_offset %ebp, -8
 ; X86-I64-NEXT:    movl %esp, %ebp
-; X86-I64-NEXT:    .cfi_def_cfa_register %ebp
 ; X86-I64-NEXT:    pushl %ebx
 ; X86-I64-NEXT:    pushl %edi
 ; X86-I64-NEXT:    pushl %esi
 ; X86-I64-NEXT:    andl $-8, %esp
 ; X86-I64-NEXT:    subl $120, %esp
-; X86-I64-NEXT:    .cfi_offset %esi, -20
-; X86-I64-NEXT:    .cfi_offset %edi, -16
-; X86-I64-NEXT:    .cfi_offset %ebx, -12
 ; X86-I64-NEXT:    flds 12(%ebp)
 ; X86-I64-NEXT:    fistpll {{[0-9]+}}(%esp)
 ; X86-I64-NEXT:    flds 16(%ebp)
@@ -465,7 +416,6 @@ define <8 x iXLen> @lrint_v8f32(<8 x float> %x) {
 ; X86-I64-NEXT:    popl %edi
 ; X86-I64-NEXT:    popl %ebx
 ; X86-I64-NEXT:    popl %ebp
-; X86-I64-NEXT:    .cfi_def_cfa %esp, 4
 ; X86-I64-NEXT:    retl $4
 ;
 ; X86-SSE2-LABEL: lrint_v8f32:
@@ -561,31 +511,26 @@ define <8 x iXLen> @lrint_v8f32(<8 x float> %x) {
 }
 declare <8 x iXLen> @llvm.lrint.v8iXLen.v8f32(<8 x float>)
 
-define <16 x iXLen> @lrint_v16iXLen_v16f32(<16 x float> %x) {
+define <16 x iXLen> @lrint_v16iXLen_v16f32(<16 x float> %x) nounwind {
   %a = call <16 x iXLen> @llvm.lrint.v16iXLen.v16f32(<16 x float> %x)
   ret <16 x iXLen> %a
 }
 declare <16 x iXLen> @llvm.lrint.v16iXLen.v16f32(<16 x float>)
 
-define <1 x iXLen> @lrint_v1f64(<1 x double> %x) {
+define <1 x iXLen> @lrint_v1f64(<1 x double> %x) nounwind {
 ; X86-I32-LABEL: lrint_v1f64:
 ; X86-I32:       # %bb.0:
 ; X86-I32-NEXT:    pushl %eax
-; X86-I32-NEXT:    .cfi_def_cfa_offset 8
 ; X86-I32-NEXT:    fldl {{[0-9]+}}(%esp)
 ; X86-I32-NEXT:    fistpl (%esp)
 ; X86-I32-NEXT:    movl (%esp), %eax
 ; X86-I32-NEXT:    popl %ecx
-; X86-I32-NEXT:    .cfi_def_cfa_offset 4
 ; X86-I32-NEXT:    retl
 ;
 ; X86-I64-LABEL: lrint_v1f64:
 ; X86-I64:       # %bb.0:
 ; X86-I64-NEXT:    pushl %ebp
-; X86-I64-NEXT:    .cfi_def_cfa_offset 8
-; X86-I64-NEXT:    .cfi_offset %ebp, -8
 ; X86-I64-NEXT:    movl %esp, %ebp
-; X86-I64-NEXT:    .cfi_def_cfa_register %ebp
 ; X86-I64-NEXT:    andl $-8, %esp
 ; X86-I64-NEXT:    subl $8, %esp
 ; X86-I64-NEXT:    fldl 8(%ebp)
@@ -594,7 +539,6 @@ define <1 x iXLen> @lrint_v1f64(<1 x double> %x) {
 ; X86-I64-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-I64-NEXT:    movl %ebp, %esp
 ; X86-I64-NEXT:    popl %ebp
-; X86-I64-NEXT:    .cfi_def_cfa %esp, 4
 ; X86-I64-NEXT:    retl
 ;
 ; X86-SSE2-LABEL: lrint_v1f64:
@@ -621,11 +565,10 @@ define <1 x iXLen> @lrint_v1f64(<1 x double> %x) {
 }
 declare <1 x iXLen> @llvm.lrint.v1iXLen.v1f64(<1 x double>)
 
-define <2 x iXLen> @lrint_v2f64(<2 x double> %x) {
+define <2 x iXLen> @lrint_v2f64(<2 x double> %x) nounwind {
 ; X86-I32-LABEL: lrint_v2f64:
 ; X86-I32:       # %bb.0:
 ; X86-I32-NEXT:    subl $8, %esp
-; X86-I32-NEXT:    .cfi_def_cfa_offset 12
 ; X86-I32-NEXT:    fldl {{[0-9]+}}(%esp)
 ; X86-I32-NEXT:    fldl {{[0-9]+}}(%esp)
 ; X86-I32-NEXT:    fistpl (%esp)
@@ -633,22 +576,16 @@ define <2 x iXLen> @lrint_v2f64(<2 x double> %x) {
 ; X86-I32-NEXT:    movl (%esp), %eax
 ; X86-I32-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-I32-NEXT:    addl $8, %esp
-; X86-I32-NEXT:    .cfi_def_cfa_offset 4
 ; X86-I32-NEXT:    retl
 ;
 ; X86-I64-LABEL: lrint_v2f64:
 ; X86-I64:       # %bb.0:
 ; X86-I64-NEXT:    pushl %ebp
-; X86-I64-NEXT:    .cfi_def_cfa_offset 8
-; X86-I64-NEXT:    .cfi_offset %ebp, -8
 ; X86-I64-NEXT:    movl %esp, %ebp
-; X86-I64-NEXT:    .cfi_def_cfa_register %ebp
 ; X86-I64-NEXT:    pushl %edi
 ; X86-I64-NEXT:    pushl %esi
 ; X86-I64-NEXT:    andl $-8, %esp
 ; X86-I64-NEXT:    subl $16, %esp
-; X86-I64-NEXT:    .cfi_offset %esi, -16
-; X86-I64-NEXT:    .cfi_offset %edi, -12
 ; X86-I64-NEXT:    movl 8(%ebp), %eax
 ; X86-I64-NEXT:    fldl 20(%ebp)
 ; X86-I64-NEXT:    fldl 12(%ebp)
@@ -666,7 +603,6 @@ define <2 x iXLen> @lrint_v2f64(<2 x double> %x) {
 ; X86-I64-NEXT:    popl %esi
 ; X86-I64-NEXT:    popl %edi
 ; X86-I64-NEXT:    popl %ebp
-; X86-I64-NEXT:    .cfi_def_cfa %esp, 4
 ; X86-I64-NEXT:    retl $4
 ;
 ; X86-SSE2-LABEL: lrint_v2f64:
@@ -713,17 +649,12 @@ define <2 x iXLen> @lrint_v2f64(<2 x double> %x) {
 }
 declare <2 x iXLen> @llvm.lrint.v2iXLen.v2f64(<2 x double>)
 
-define <4 x iXLen> @lrint_v4f64(<4 x double> %x) {
+define <4 x iXLen> @lrint_v4f64(<4 x double> %x) nounwind {
 ; X86-I32-LABEL: lrint_v4f64:
 ; X86-I32:       # %bb.0:
 ; X86-I32-NEXT:    pushl %edi
-; X86-I32-NEXT:    .cfi_def_cfa_offset 8
 ; X86-I32-NEXT:    pushl %esi
-; X86-I32-NEXT:    .cfi_def_cfa_offset 12
 ; X86-I32-NEXT:    subl $16, %esp
-; X86-I32-NEXT:    .cfi_def_cfa_offset 28
-; X86-I32-NEXT:    .cfi_offset %esi, -12
-; X86-I32-NEXT:    .cfi_offset %edi, -8
 ; X86-I32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-I32-NEXT:    fldl {{[0-9]+}}(%esp)
 ; X86-I32-NEXT:    fldl {{[0-9]+}}(%esp)
@@ -742,28 +673,19 @@ define <4 x iXLen> @lrint_v4f64(<4 x double> %x) {
 ; X86-I32-NEXT:    movl %edx, 4(%eax)
 ; X86-I32-NEXT:    movl %ecx, (%eax)
 ; X86-I32-NEXT:    addl $16, %esp
-; X86-I32-NEXT:    .cfi_def_cfa_offset 12
 ; X86-I32-NEXT:    popl %esi
-; X86-I32-NEXT:    .cfi_def_cfa_offset 8
 ; X86-I32-NEXT:    popl %edi
-; X86-I32-NEXT:    .cfi_def_cfa_offset 4
 ; X86-I32-NEXT:    retl $4
 ;
 ; X86-I64-LABEL: lrint_v4f64:
 ; X86-I64:       # %bb.0:
 ; X86-I64-NEXT:    pushl %ebp
-; X86-I64-NEXT:    .cfi_def_cfa_offset 8
-; X86-I64-NEXT:    .cfi_offset %ebp, -8
 ; X86-I64-NEXT:    movl %esp, %ebp
-; X86-I64-NEXT:    .cfi_def_cfa_register %ebp
 ; X86-I64-NEXT:    pushl %ebx
 ; X86-I64-NEXT:    pushl %edi
 ; X86-I64-NEXT:    pushl %esi
 ; X86-I64-NEXT:    andl $-8, %esp
 ; X86-I64-NEXT:    subl $56, %esp
-; X86-I64-NEXT:    .cfi_offset %esi, -20
-; X86-I64-NEXT:    .cfi_offset %edi, -16
-; X86-I64-NEXT:    .cfi_offset %ebx, -12
 ; X86-I64-NEXT:    movl 8(%ebp), %eax
 ; X86-I64-NEXT:    fldl 36(%ebp)
 ; X86-I64-NEXT:    fldl 28(%ebp)
@@ -800,7 +722,6 @@ define <4 x iXLen> @lrint_v4f64(<4 x double> %x) {
 ; X86-I64-NEXT:    popl %edi
 ; X86-I64-NEXT:    popl %ebx
 ; X86-I64-NEXT:    popl %ebp
-; X86-I64-NEXT:    .cfi_def_cfa %esp, 4
 ; X86-I64-NEXT:    retl $4
 ;
 ; X86-SSE2-LABEL: lrint_v4f64:
@@ -867,23 +788,14 @@ define <4 x iXLen> @lrint_v4f64(<4 x double> %x) {
 }
 declare <4 x iXLen> @llvm.lrint.v4iXLen.v4f64(<4 x double>)
 
-define <8 x iXLen> @lrint_v8f64(<8 x double> %x) {
+define <8 x iXLen> @lrint_v8f64(<8 x double> %x) nounwind {
 ; X86-I32-LABEL: lrint_v8f64:
 ; X86-I32:       # %bb.0:
 ; X86-I32-NEXT:    pushl %ebp
-; X86-I32-NEXT:    .cfi_def_cfa_offset 8
 ; X86-I32-NEXT:    pushl %ebx
-; X86-I32-NEXT:    .cfi_def_cfa_offset 12
 ; X86-I32-NEXT:    pushl %edi
-; X86-I32-NEXT:    .cfi_def_cfa_offset 16
 ; X86-I32-NEXT:    pushl %esi
-; X86-I32-NEXT:    .cfi_def_cfa_offset 20
 ; X86-I32-NEXT:    subl $40, %esp
-; X86-I32-NEXT:    .cfi_def_cfa_offset 60
-; X86-I32-NEXT:    .cfi_offset %esi, -20
-; X86-I32-NEXT:    .cfi_offset %edi, -16
-; X86-I32-NEXT:    .cfi_offset %ebx, -12
-; X86-I32-NEXT:    .cfi_offset %ebp, -8
 ; X86-I32-NEXT:    fldl {{[0-9]+}}(%esp)
 ; X86-I32-NEXT:    fistpl {{[0-9]+}}(%esp)
 ; X86-I32-NEXT:    fldl {{[0-9]+}}(%esp)
@@ -922,32 +834,21 @@ define <8 x iXLen> @lrint_v8f64(<8 x double> %x) {
 ; X86-I32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
 ; X86-I32-NEXT:    movl %ecx, (%eax)
 ; X86-I32-NEXT:    addl $40, %esp
-; X86-I32-NEXT:    .cfi_def_cfa_offset 20
 ; X86-I32-NEXT:    popl %esi
-; X86-I32-NEXT:    .cfi_def_cfa_offset 16
 ; X86-I32-NEXT:    popl %edi
-; X86-I32-NEXT:    .cfi_def_cfa_offset 12
 ; X86-I32-NEXT:    popl %ebx
-; X86-I32-NEXT:    .cfi_def_cfa_offset 8
 ; X86-I32-NEXT:    popl %ebp
-; X86-I32-NEXT:    .cfi_def_cfa_offset 4
 ; X86-I32-NEXT:    retl $4
 ;
 ; X86-I64-LABEL: lrint_v8f64:
 ; X86-I64:       # %bb.0:
 ; X86-I64-NEXT:    pushl %ebp
-; X86-I64-NEXT:    .cfi_def_cfa_offset 8
-; X86-I64-NEXT:    .cfi_offset %ebp, -8
 ; X86-I64-NEXT:    movl %esp, %ebp
-; X86-I64-NEXT:    .cfi_def_cfa_register %ebp
 ; X86-I64-NEXT:    pushl %ebx
 ; X86-I64-NEXT:    pushl %edi
 ; X86-I64-NEXT:    pushl %esi
 ; X86-I64-NEXT:    andl $-8, %esp
 ; X86-I64-NEXT:    subl $120, %esp
-; X86-I64-NEXT:    .cfi_offset %esi, -20
-; X86-I64-NEXT:    .cfi_offset %edi, -16
-; X86-I64-NEXT:    .cfi_offset %ebx, -12
 ; X86-I64-NEXT:    fldl 12(%ebp)
 ; X86-I64-NEXT:    fistpll {{[0-9]+}}(%esp)
 ; X86-I64-NEXT:    fldl 20(%ebp)
@@ -1024,16 +925,12 @@ define <8 x iXLen> @lrint_v8f64(<8 x double> %x) {
 ; X86-I64-NEXT:    popl %edi
 ; X86-I64-NEXT:    popl %ebx
 ; X86-I64-NEXT:    popl %ebp
-; X86-I64-NEXT:    .cfi_def_cfa %esp, 4
 ; X86-I64-NEXT:    retl $4
 ;
 ; X86-SSE2-LABEL: lrint_v8f64:
 ; X86-SSE2:       # %bb.0:
 ; X86-SSE2-NEXT:    pushl %ebp
-; X86-SSE2-NEXT:    .cfi_def_cfa_offset 8
-; X86-SSE2-NEXT:    .cfi_offset %ebp, -8
 ; X86-SSE2-NEXT:    movl %esp, %ebp
-; X86-SSE2-NEXT:    .cfi_def_cfa_register %ebp
 ; X86-SSE2-NEXT:    andl $-16, %esp
 ; X86-SSE2-NEXT:    subl $16, %esp
 ; X86-SSE2-NEXT:    cvtpd2dq %xmm1, %xmm1
@@ -1044,7 +941,6 @@ define <8 x iXLen> @lrint_v8f64(<8 x double> %x) {
 ; X86-SSE2-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
 ; X86-SSE2-NEXT:    movl %ebp, %esp
 ; X86-SSE2-NEXT:    popl %ebp
-; X86-SSE2-NEXT:    .cfi_def_cfa %esp, 4
 ; X86-SSE2-NEXT:    retl
 ;
 ; X86-AVX1-LABEL: lrint_v8f64:
@@ -1141,14 +1037,11 @@ define <8 x iXLen> @lrint_v8f64(<8 x double> %x) {
 }
 declare <8 x iXLen> @llvm.lrint.v8iXLen.v8f64(<8 x double>)
 
-define <1 x iXLen> @lrint_v1fp128(<1 x fp128> %x) {
+define <1 x iXLen> @lrint_v1fp128(<1 x fp128> %x) nounwind {
 ; X86-I32-LABEL: lrint_v1fp128:
 ; X86-I32:       # %bb.0:
 ; X86-I32-NEXT:    pushl %ebp
-; X86-I32-NEXT:    .cfi_def_cfa_offset 8
-; X86-I32-NEXT:    .cfi_offset %ebp, -8
 ; X86-I32-NEXT:    movl %esp, %ebp
-; X86-I32-NEXT:    .cfi_def_cfa_register %ebp
 ; X86-I32-NEXT:    andl $-16, %esp
 ; X86-I32-NEXT:    subl $16, %esp
 ; X86-I32-NEXT:    pushl 20(%ebp)
@@ -1159,16 +1052,12 @@ define <1 x iXLen> @lrint_v1fp128(<1 x fp128> %x) {
 ; X86-I32-NEXT:    addl $16, %esp
 ; X86-I32-NEXT:    movl %ebp, %esp
 ; X86-I32-NEXT:    popl %ebp
-; X86-I32-NEXT:    .cfi_def_cfa %esp, 4
 ; X86-I32-NEXT:    retl
 ;
 ; X86-I64-LABEL: lrint_v1fp128:
 ; X86-I64:       # %bb.0:
 ; X86-I64-NEXT:    pushl %ebp
-; X86-I64-NEXT:    .cfi_def_cfa_offset 8
-; X86-I64-NEXT:    .cfi_offset %ebp, -8
 ; X86-I64-NEXT:    movl %esp, %ebp
-; X86-I64-NEXT:    .cfi_def_cfa_register %ebp
 ; X86-I64-NEXT:    andl $-16, %esp
 ; X86-I64-NEXT:    subl $16, %esp
 ; X86-I64-NEXT:    pushl 20(%ebp)
@@ -1179,16 +1068,12 @@ define <1 x iXLen> @lrint_v1fp128(<1 x fp128> %x) {
 ; X86-I64-NEXT:    addl $16, %esp
 ; X86-I64-NEXT:    movl %ebp, %esp
 ; X86-I64-NEXT:    popl %ebp
-; X86-I64-NEXT:    .cfi_def_cfa %esp, 4
 ; X86-I64-NEXT:    retl
 ;
 ; X86-SSE2-LABEL: lrint_v1fp128:
 ; X86-SSE2:       # %bb.0:
 ; X86-SSE2-NEXT:    pushl %ebp
-; X86-SSE2-NEXT:    .cfi_def_cfa_offset 8
-; X86-SSE2-NEXT:    .cfi_offset %ebp, -8
 ; X86-SSE2-NEXT:    movl %esp, %ebp
-; X86-SSE2-NEXT:    .cfi_def_cfa_register %ebp
 ; X86-SSE2-NEXT:    andl $-16, %esp
 ; X86-SSE2-NEXT:    subl $16, %esp
 ; X86-SSE2-NEXT:    pushl 20(%ebp)
@@ -1199,16 +1084,12 @@ define <1 x iXLen> @lrint_v1fp128(<1 x fp128> %x) {
 ; X86-SSE2-NEXT:    addl $16, %esp
 ; X86-SSE2-NEXT:    movl %ebp, %esp
 ; X86-SSE2-NEXT:    popl %ebp
-; X86-SSE2-NEXT:    .cfi_def_cfa %esp, 4
 ; X86-SSE2-NEXT:    retl
 ;
 ; X86-AVX-LABEL: lrint_v1fp128:
 ; X86-AVX:       # %bb.0:
 ; X86-AVX-NEXT:    pushl %ebp
-; X86-AVX-NEXT:    .cfi_def_cfa_offset 8
-; X86-AVX-NEXT:    .cfi_offset %ebp, -8
 ; X86-AVX-NEXT:    movl %esp, %ebp
-; X86-AVX-NEXT:    .cfi_def_cfa_register %ebp
 ; X86-AVX-NEXT:    andl $-16, %esp
 ; X86-AVX-NEXT:    subl $32, %esp
 ; X86-AVX-NEXT:    vmovups 8(%ebp), %xmm0
@@ -1216,47 +1097,36 @@ define <1 x iXLen> @lrint_v1fp128(<1 x fp128> %x) {
 ; X86-AVX-NEXT:    calll lrintl
 ; X86-AVX-NEXT:    movl %ebp, %esp
 ; X86-AVX-NEXT:    popl %ebp
-; X86-AVX-NEXT:    .cfi_def_cfa %esp, 4
 ; X86-AVX-NEXT:    retl
 ;
 ; X64-AVX-i32-LABEL: lrint_v1fp128:
 ; X64-AVX-i32:       # %bb.0:
 ; X64-AVX-i32-NEXT:    pushq %rax
-; X64-AVX-i32-NEXT:    .cfi_def_cfa_offset 16
 ; X64-AVX-i32-NEXT:    callq lrintl at PLT
 ; X64-AVX-i32-NEXT:    popq %rcx
-; X64-AVX-i32-NEXT:    .cfi_def_cfa_offset 8
 ; X64-AVX-i32-NEXT:    retq
 ;
 ; X64-AVX-i64-LABEL: lrint_v1fp128:
 ; X64-AVX-i64:       # %bb.0:
 ; X64-AVX-i64-NEXT:    pushq %rax
-; X64-AVX-i64-NEXT:    .cfi_def_cfa_offset 16
 ; X64-AVX-i64-NEXT:    callq lrintl at PLT
 ; X64-AVX-i64-NEXT:    popq %rcx
-; X64-AVX-i64-NEXT:    .cfi_def_cfa_offset 8
 ; X64-AVX-i64-NEXT:    retq
   %a = call <1 x iXLen> @llvm.lrint.v1iXLen.v1fp128(<1 x fp128> %x)
   ret <1 x iXLen> %a
 }
 declare <1 x iXLen> @llvm.lrint.v1iXLen.v1fp128(<1 x fp128>)
 
-define <2 x iXLen> @lrint_v2fp128(<2 x fp128> %x) {
+define <2 x iXLen> @lrint_v2fp128(<2 x fp128> %x) nounwind {
 ; X86-I32-LABEL: lrint_v2fp128:
 ; X86-I32:       # %bb.0:
 ; X86-I32-NEXT:    pushl %ebp
-; X86-I32-NEXT:    .cfi_def_cfa_offset 8
-; X86-I32-NEXT:    .cfi_offset %ebp, -8
 ; X86-I32-NEXT:    movl %esp, %ebp
-; X86-I32-NEXT:    .cfi_def_cfa_register %ebp
 ; X86-I32-NEXT:    pushl %ebx
 ; X86-I32-NEXT:    pushl %edi
 ; X86-I32-NEXT:    pushl %esi
 ; X86-I32-NEXT:    andl $-16, %esp
 ; X86-I32-NEXT:    subl $16, %esp
-; X86-I32-NEXT:    .cfi_offset %esi, -20
-; X86-I32-NEXT:    .cfi_offset %edi, -16
-; X86-I32-NEXT:    .cfi_offset %ebx, -12
 ; X86-I32-NEXT:    movl 32(%ebp), %edi
 ; X86-I32-NEXT:    movl 36(%ebp), %ebx
 ; X86-I32-NEXT:    pushl 20(%ebp)
@@ -1279,24 +1149,17 @@ define <2 x iXLen> @lrint_v2fp128(<2 x fp128> %x) {
 ; X86-I32-NEXT:    popl %edi
 ; X86-I32-NEXT:    popl %ebx
 ; X86-I32-NEXT:    popl %ebp
-; X86-I32-NEXT:    .cfi_def_cfa %esp, 4
 ; X86-I32-NEXT:    retl
 ;
 ; X86-I64-LABEL: lrint_v2fp128:
 ; X86-I64:       # %bb.0:
 ; X86-I64-NEXT:    pushl %ebp
-; X86-I64-NEXT:    .cfi_def_cfa_offset 8
-; X86-I64-NEXT:    .cfi_offset %ebp, -8
 ; X86-I64-NEXT:    movl %esp, %ebp
-; X86-I64-NEXT:    .cfi_def_cfa_register %ebp
 ; X86-I64-NEXT:    pushl %ebx
 ; X86-I64-NEXT:    pushl %edi
 ; X86-I64-NEXT:    pushl %esi
 ; X86-I64-NEXT:    andl $-16, %esp
 ; X86-I64-NEXT:    subl $16, %esp
-; X86-I64-NEXT:    .cfi_offset %esi, -20
-; X86-I64-NEXT:    .cfi_offset %edi, -16
-; X86-I64-NEXT:    .cfi_offset %ebx, -12
 ; X86-I64-NEXT:    movl 8(%ebp), %esi
 ; X86-I64-NEXT:    pushl 24(%ebp)
 ; X86-I64-NEXT:    pushl 20(%ebp)
@@ -1322,24 +1185,17 @@ define <2 x iXLen> @lrint_v2fp128(<2 x fp128> %x) {
 ; X86-I64-NEXT:    popl %edi
 ; X86-I64-NEXT:    popl %ebx
 ; X86-I64-NEXT:    popl %ebp
-; X86-I64-NEXT:    .cfi_def_cfa %esp, 4
 ; X86-I64-NEXT:    retl $4
 ;
 ; X86-SSE2-LABEL: lrint_v2fp128:
 ; X86-SSE2:       # %bb.0:
 ; X86-SSE2-NEXT:    pushl %ebp
-; X86-SSE2-NEXT:    .cfi_def_cfa_offset 8
-; X86-SSE2-NEXT:    .cfi_offset %ebp, -8
 ; X86-SSE2-NEXT:    movl %esp, %ebp
-; X86-SSE2-NEXT:    .cfi_def_cfa_register %ebp
 ; X86-SSE2-NEXT:    pushl %ebx
 ; X86-SSE2-NEXT:    pushl %edi
 ; X86-SSE2-NEXT:    pushl %esi
 ; X86-SSE2-NEXT:    andl $-16, %esp
 ; X86-SSE2-NEXT:    subl $32, %esp
-; X86-SSE2-NEXT:    .cfi_offset %esi, -20
-; X86-SSE2-NEXT:    .cfi_offset %edi, -16
-; X86-SSE2-NEXT:    .cfi_offset %ebx, -12
 ; X86-SSE2-NEXT:    movl 12(%ebp), %edi
 ; X86-SSE2-NEXT:    movl 16(%ebp), %ebx
 ; X86-SSE2-NEXT:    movl 20(%ebp), %esi
@@ -1365,16 +1221,12 @@ define <2 x iXLen> @lrint_v2fp128(<2 x fp128> %x) {
 ; X86-SSE2-NEXT:    popl %edi
 ; X86-SSE2-NEXT:    popl %ebx
 ; X86-SSE2-NEXT:    popl %ebp
-; X86-SSE2-NEXT:    .cfi_def_cfa %esp, 4
 ; X86-SSE2-NEXT:    retl
 ;
 ; X86-AVX-LABEL: lrint_v2fp128:
 ; X86-AVX:       # %bb.0:
 ; X86-AVX-NEXT:    pushl %ebp
-; X86-AVX-NEXT:    .cfi_def_cfa_offset 8
-; X86-AVX-NEXT:    .cfi_offset %ebp, -8
 ; X86-AVX-NEXT:    movl %esp, %ebp
-; X86-AVX-NEXT:    .cfi_def_cfa_register %ebp
 ; X86-AVX-NEXT:    andl $-16, %esp
 ; X86-AVX-NEXT:    subl $48, %esp
 ; X86-AVX-NEXT:    vmovups 8(%ebp), %xmm0
@@ -1389,16 +1241,12 @@ define <2 x iXLen> @lrint_v2fp128(<2 x fp128> %x) {
 ; X86-AVX-NEXT:    vpinsrd $1, %eax, %xmm0, %xmm0
 ; X86-AVX-NEXT:    movl %ebp, %esp
 ; X86-AVX-NEXT:    popl %ebp
-; X86-AVX-NEXT:    .cfi_def_cfa %esp, 4
 ; X86-AVX-NEXT:    retl
 ;
 ; X64-AVX-i32-LABEL: lrint_v2fp128:
 ; X64-AVX-i32:       # %bb.0:
 ; X64-AVX-i32-NEXT:    pushq %rbx
-; X64-AVX-i32-NEXT:    .cfi_def_cfa_offset 16
 ; X64-AVX-i32-NEXT:    subq $16, %rsp
-; X64-AVX-i32-NEXT:    .cfi_def_cfa_offset 32
-; X64-AVX-i32-NEXT:    .cfi_offset %rbx, -16
 ; X64-AVX-i32-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
 ; X64-AVX-i32-NEXT:    vmovaps %xmm1, %xmm0
 ; X64-AVX-i32-NEXT:    callq lrintl at PLT
@@ -1408,15 +1256,12 @@ define <2 x iXLen> @lrint_v2fp128(<2 x fp128> %x) {
 ; X64-AVX-i32-NEXT:    vmovd %eax, %xmm0
 ; X64-AVX-i32-NEXT:    vpinsrd $1, %ebx, %xmm0, %xmm0
 ; X64-AVX-i32-NEXT:    addq $16, %rsp
-; X64-AVX-i32-NEXT:    .cfi_def_cfa_offset 16
 ; X64-AVX-i32-NEXT:    popq %rbx
-; X64-AVX-i32-NEXT:    .cfi_def_cfa_offset 8
 ; X64-AVX-i32-NEXT:    retq
 ;
 ; X64-AVX-i64-LABEL: lrint_v2fp128:
 ; X64-AVX-i64:       # %bb.0:
 ; X64-AVX-i64-NEXT:    subq $40, %rsp
-; X64-AVX-i64-NEXT:    .cfi_def_cfa_offset 48
 ; X64-AVX-i64-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 ; X64-AVX-i64-NEXT:    vmovaps %xmm1, %xmm0
 ; X64-AVX-i64-NEXT:    callq lrintl at PLT
@@ -1428,29 +1273,22 @@ define <2 x iXLen> @lrint_v2fp128(<2 x fp128> %x) {
 ; X64-AVX-i64-NEXT:    vpunpcklqdq (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
 ; X64-AVX-i64-NEXT:    # xmm0 = xmm0[0],mem[0]
 ; X64-AVX-i64-NEXT:    addq $40, %rsp
-; X64-AVX-i64-NEXT:    .cfi_def_cfa_offset 8
 ; X64-AVX-i64-NEXT:    retq
   %a = call <2 x iXLen> @llvm.lrint.v2iXLen.v2fp128(<2 x fp128> %x)
   ret <2 x iXLen> %a
 }
 declare <2 x iXLen> @llvm.lrint.v2iXLen.v2fp128(<2 x fp128>)
 
-define <4 x iXLen> @lrint_v4fp128(<4 x fp128> %x) {
+define <4 x iXLen> @lrint_v4fp128(<4 x fp128> %x) nounwind {
 ; X86-I32-LABEL: lrint_v4fp128:
 ; X86-I32:       # %bb.0:
 ; X86-I32-NEXT:    pushl %ebp
-; X86-I32-NEXT:    .cfi_def_cfa_offset 8
-; X86-I32-NEXT:    .cfi_offset %ebp, -8
 ; X86-I32-NEXT:    movl %esp, %ebp
-; X86-I32-NEXT:    .cfi_def_cfa_register %ebp
 ; X86-I32-NEXT:    pushl %ebx
 ; X86-I32-NEXT:    pushl %edi
 ; X86-I32-NEXT:    pushl %esi
 ; X86-I32-NEXT:    andl $-16, %esp
 ; X86-I32-NEXT:    subl $16, %esp
-; X86-I32-NEXT:    .cfi_offset %esi, -20
-; X86-I32-NEXT:    .cfi_offset %edi, -16
-; X86-I32-NEXT:    .cfi_offset %ebx, -12
 ; X86-I32-NEXT:    movl 8(%ebp), %esi
 ; X86-I32-NEXT:    movl 36(%ebp), %ebx
 ; X86-I32-NEXT:    movl 40(%ebp), %edi
@@ -1492,24 +1330,17 @@ define <4 x iXLen> @lrint_v4fp128(<4 x fp128> %x) {
 ; X86-I32-NEXT:    popl %edi
 ; X86-I32-NEXT:    popl %ebx
 ; X86-I32-NEXT:    popl %ebp
-; X86-I32-NEXT:    .cfi_def_cfa %esp, 4
 ; X86-I32-NEXT:    retl $4
 ;
 ; X86-I64-LABEL: lrint_v4fp128:
 ; X86-I64:       # %bb.0:
 ; X86-I64-NEXT:    pushl %ebp
-; X86-I64-NEXT:    .cfi_def_cfa_offset 8
-; X86-I64-NEXT:    .cfi_offset %ebp, -8
 ; X86-I64-NEXT:    movl %esp, %ebp
-; X86-I64-NEXT:    .cfi_def_cfa_register %ebp
 ; X86-I64-NEXT:    pushl %ebx
 ; X86-I64-NEXT:    pushl %edi
 ; X86-I64-NEXT:    pushl %esi
 ; X86-I64-NEXT:    andl $-16, %esp
 ; X86-I64-NEXT:    subl $32, %esp
-; X86-I64-NEXT:    .cfi_offset %esi, -20
-; X86-I64-NEXT:    .cfi_offset %edi, -16
-; X86-I64-NEXT:    .cfi_offset %ebx, -12
 ; X86-I64-NEXT:    movl 8(%ebp), %esi
 ; X86-I64-NEXT:    movl 36(%ebp), %edi
 ; X86-I64-NEXT:    movl 40(%ebp), %ebx
@@ -1561,24 +1392,17 @@ define <4 x iXLen> @lrint_v4fp128(<4 x fp128> %x) {
 ; X86-I64-NEXT:    popl %edi
 ; X86-I64-NEXT:    popl %ebx
 ; X86-I64-NEXT:    popl %ebp
-; X86-I64-NEXT:    .cfi_def_cfa %esp, 4
 ; X86-I64-NEXT:    retl $4
 ;
 ; X86-SSE2-LABEL: lrint_v4fp128:
 ; X86-SSE2:       # %bb.0:
 ; X86-SSE2-NEXT:    pushl %ebp
-; X86-SSE2-NEXT:    .cfi_def_cfa_offset 8
-; X86-SSE2-NEXT:    .cfi_offset %ebp, -8
 ; X86-SSE2-NEXT:    movl %esp, %ebp
-; X86-SSE2-NEXT:    .cfi_def_cfa_register %ebp
 ; X86-SSE2-NEXT:    pushl %ebx
 ; X86-SSE2-NEXT:    pushl %edi
 ; X86-SSE2-NEXT:    pushl %esi
 ; X86-SSE2-NEXT:    andl $-16, %esp
 ; X86-SSE2-NEXT:    subl $48, %esp
-; X86-SSE2-NEXT:    .cfi_offset %esi, -20
-; X86-SSE2-NEXT:    .cfi_offset %edi, -16
-; X86-SSE2-NEXT:    .cfi_offset %ebx, -12
 ; X86-SSE2-NEXT:    movl 48(%ebp), %edi
 ; X86-SSE2-NEXT:    movl 52(%ebp), %ebx
 ; X86-SSE2-NEXT:    pushl 36(%ebp)
@@ -1623,22 +1447,16 @@ define <4 x iXLen> @lrint_v4fp128(<4 x fp128> %x) {
 ; X86-SSE2-NEXT:    popl %edi
 ; X86-SSE2-NEXT:    popl %ebx
 ; X86-SSE2-NEXT:    popl %ebp
-; X86-SSE2-NEXT:    .cfi_def_cfa %esp, 4
 ; X86-SSE2-NEXT:    retl
 ;
 ; X86-AVX-LABEL: lrint_v4fp128:
 ; X86-AVX:       # %bb.0:
 ; X86-AVX-NEXT:    pushl %ebp
-; X86-AVX-NEXT:    .cfi_def_cfa_offset 8
-; X86-AVX-NEXT:    .cfi_offset %ebp, -8
 ; X86-AVX-NEXT:    movl %esp, %ebp
-; X86-AVX-NEXT:    .cfi_def_cfa_register %ebp
 ; X86-AVX-NEXT:    pushl %edi
 ; X86-AVX-NEXT:    pushl %esi
 ; X86-AVX-NEXT:    andl $-16, %esp
 ; X86-AVX-NEXT:    subl $32, %esp
-; X86-AVX-NEXT:    .cfi_offset %esi, -16
-; X86-AVX-NEXT:    .cfi_offset %edi, -12
 ; X86-AVX-NEXT:    vmovups 40(%ebp), %xmm0
 ; X86-AVX-NEXT:    vmovups %xmm0, (%esp)
 ; X86-AVX-NEXT:    calll lrintl
@@ -1663,16 +1481,12 @@ define <4 x iXLen> @lrint_v4fp128(<4 x fp128> %x) {
 ; X86-AVX-NEXT:    popl %esi
 ; X86-AVX-NEXT:    popl %edi
 ; X86-AVX-NEXT:    popl %ebp
-; X86-AVX-NEXT:    .cfi_def_cfa %esp, 4
 ; X86-AVX-NEXT:    retl
 ;
 ; X64-AVX-i32-LABEL: lrint_v4fp128:
 ; X64-AVX-i32:       # %bb.0:
 ; X64-AVX-i32-NEXT:    pushq %rbx
-; X64-AVX-i32-NEXT:    .cfi_def_cfa_offset 16
 ; X64-AVX-i32-NEXT:    subq $48, %rsp
-; X64-AVX-i32-NEXT:    .cfi_def_cfa_offset 64
-; X64-AVX-i32-NEXT:    .cfi_offset %rbx, -16
 ; X64-AVX-i32-NEXT:    vmovaps %xmm3, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 ; X64-AVX-i32-NEXT:    vmovaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 ; X64-AVX-i32-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
@@ -1694,15 +1508,12 @@ define <4 x iXLen> @lrint_v4fp128(<4 x fp128> %x) {
 ; X64-AVX-i32-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
 ; X64-AVX-i32-NEXT:    vpinsrd $3, %eax, %xmm0, %xmm0
 ; X64-AVX-i32-NEXT:    addq $48, %rsp
-; X64-AVX-i32-NEXT:    .cfi_def_cfa_offset 16
 ; X64-AVX-i32-NEXT:    popq %rbx
-; X64-AVX-i32-NEXT:    .cfi_def_cfa_offset 8
 ; X64-AVX-i32-NEXT:    retq
 ;
 ; X64-AVX1-i64-LABEL: lrint_v4fp128:
 ; X64-AVX1-i64:       # %bb.0:
 ; X64-AVX1-i64-NEXT:    subq $72, %rsp
-; X64-AVX1-i64-NEXT:    .cfi_def_cfa_offset 80
 ; X64-AVX1-i64-NEXT:    vmovaps %xmm2, (%rsp) # 16-byte Spill
 ; X64-AVX1-i64-NEXT:    vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 ; X64-AVX1-i64-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
@@ -1727,13 +1538,11 @@ define <4 x iXLen> @lrint_v4fp128(<4 x fp128> %x) {
 ; X64-AVX1-i64-NEXT:    # xmm0 = xmm0[0],mem[0]
 ; X64-AVX1-i64-NEXT:    vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload
 ; X64-AVX1-i64-NEXT:    addq $72, %rsp
-; X64-AVX1-i64-NEXT:    .cfi_def_cfa_offset 8
 ; X64-AVX1-i64-NEXT:    retq
 ;
 ; AVX512-i64-LABEL: lrint_v4fp128:
 ; AVX512-i64:       # %bb.0:
 ; AVX512-i64-NEXT:    subq $72, %rsp
-; AVX512-i64-NEXT:    .cfi_def_cfa_offset 80
 ; AVX512-i64-NEXT:    vmovaps %xmm2, (%rsp) # 16-byte Spill
 ; AVX512-i64-NEXT:    vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 ; AVX512-i64-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
@@ -1758,13 +1567,11 @@ define <4 x iXLen> @lrint_v4fp128(<4 x fp128> %x) {
 ; AVX512-i64-NEXT:    # xmm0 = xmm0[0],mem[0]
 ; AVX512-i64-NEXT:    vinserti128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload
 ; AVX512-i64-NEXT:    addq $72, %rsp
-; AVX512-i64-NEXT:    .cfi_def_cfa_offset 8
 ; AVX512-i64-NEXT:    retq
 ;
 ; AVX512DQ-i64-LABEL: lrint_v4fp128:
 ; AVX512DQ-i64:       # %bb.0:
 ; AVX512DQ-i64-NEXT:    subq $72, %rsp
-; AVX512DQ-i64-NEXT:    .cfi_def_cfa_offset 80
 ; AVX512DQ-i64-NEXT:    vmovaps %xmm2, (%rsp) # 16-byte Spill
 ; AVX512DQ-i64-NEXT:    vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 ; AVX512DQ-i64-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
@@ -1789,29 +1596,22 @@ define <4 x iXLen> @lrint_v4fp128(<4 x fp128> %x) {
 ; AVX512DQ-i64-NEXT:    # xmm0 = xmm0[0],mem[0]
 ; AVX512DQ-i64-NEXT:    vinserti128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload
 ; AVX512DQ-i64-NEXT:    addq $72, %rsp
-; AVX512DQ-i64-NEXT:    .cfi_def_cfa_offset 8
 ; AVX512DQ-i64-NEXT:    retq
   %a = call <4 x iXLen> @llvm.lrint.v4iXLen.v4fp128(<4 x fp128> %x)
   ret <4 x iXLen> %a
 }
 declare <4 x iXLen> @llvm.lrint.v4iXLen.v4fp128(<4 x fp128>)
 
-define <8 x iXLen> @lrint_v8fp128(<8 x fp128> %x) {
+define <8 x iXLen> @lrint_v8fp128(<8 x fp128> %x) nounwind {
 ; X86-I32-LABEL: lrint_v8fp128:
 ; X86-I32:       # %bb.0:
 ; X86-I32-NEXT:    pushl %ebp
-; X86-I32-NEXT:    .cfi_def_cfa_offset 8
-; X86-I32-NEXT:    .cfi_offset %ebp, -8
 ; X86-I32-NEXT:    movl %esp, %ebp
-; X86-I32-NEXT:    .cfi_def_cfa_register %ebp
 ; X86-I32-NEXT:    pushl %ebx
 ; X86-I32-NEXT:    pushl %edi
 ; X86-I32-NEXT:    pushl %esi
 ; X86-I32-NEXT:    andl $-16, %esp
 ; X86-I32-NEXT:    subl $32, %esp
-; X86-I32-NEXT:    .cfi_offset %esi, -20
-; X86-I32-NEXT:    .cfi_offset %edi, -16
-; X86-I32-NEXT:    .cfi_offset %ebx, -12
 ; X86-I32-NEXT:    movl 8(%ebp), %esi
 ; X86-I32-NEXT:    movl 36(%ebp), %ebx
 ; X86-I32-NEXT:    movl 40(%ebp), %edi
@@ -1889,24 +1689,17 @@ define <8 x iXLen> @lrint_v8fp128(<8 x fp128> %x) {
 ; X86-I32-NEXT:    popl %edi
 ; X86-I32-NEXT:    popl %ebx
 ; X86-I32-NEXT:    popl %ebp
-; X86-I32-NEXT:    .cfi_def_cfa %esp, 4
 ; X86-I32-NEXT:    retl $4
 ;
 ; X86-I64-LABEL: lrint_v8fp128:
 ; X86-I64:       # %bb.0:
 ; X86-I64-NEXT:    pushl %ebp
-; X86-I64-NEXT:    .cfi_def_cfa_offset 8
-; X86-I64-NEXT:    .cfi_offset %ebp, -8
 ; X86-I64-NEXT:    movl %esp, %ebp
-; X86-I64-NEXT:    .cfi_def_cfa_register %ebp
 ; X86-I64-NEXT:    pushl %ebx
 ; X86-I64-NEXT:    pushl %edi
 ; X86-I64-NEXT:    pushl %esi
 ; X86-I64-NEXT:    andl $-16, %esp
 ; X86-I64-NEXT:    subl $64, %esp
-; X86-I64-NEXT:    .cfi_offset %esi, -20
-; X86-I64-NEXT:    .cfi_offset %edi, -16
-; X86-I64-NEXT:    .cfi_offset %ebx, -12
 ; X86-I64-NEXT:    movl 8(%ebp), %esi
 ; X86-I64-NEXT:    movl 36(%ebp), %edi
 ; X86-I64-NEXT:    movl 40(%ebp), %ebx
@@ -2006,24 +1799,17 @@ define <8 x iXLen> @lrint_v8fp128(<8 x fp128> %x) {
 ; X86-I64-NEXT:    popl %edi
 ; X86-I64-NEXT:    popl %ebx
 ; X86-I64-NEXT:    popl %ebp
-; X86-I64-NEXT:    .cfi_def_cfa %esp, 4
 ; X86-I64-NEXT:    retl $4
 ;
 ; X86-SSE2-LABEL: lrint_v8fp128:
 ; X86-SSE2:       # %bb.0:
 ; X86-SSE2-NEXT:    pushl %ebp
-; X86-SSE2-NEXT:    .cfi_def_cfa_offset 8
-; X86-SSE2-NEXT:    .cfi_offset %ebp, -8
 ; X86-SSE2-NEXT:    movl %esp, %ebp
-; X86-SSE2-NEXT:    .cfi_def_cfa_register %ebp
 ; X86-SSE2-NEXT:    pushl %ebx
 ; X86-SSE2-NEXT:    pushl %edi
 ; X86-SSE2-NEXT:    pushl %esi
 ; X86-SSE2-NEXT:    andl $-16, %esp
 ; X86-SSE2-NEXT:    subl $64, %esp
-; X86-SSE2-NEXT:    .cfi_offset %esi, -20
-; X86-SSE2-NEXT:    .cfi_offset %edi, -16
-; X86-SSE2-NEXT:    .cfi_offset %ebx, -12
 ; X86-SSE2-NEXT:    movl 108(%ebp), %esi
 ; X86-SSE2-NEXT:    movl 112(%ebp), %edi
 ; X86-SSE2-NEXT:    movl 116(%ebp), %ebx
@@ -2109,24 +1895,17 @@ define <8 x iXLen> @lrint_v8fp128(<8 x fp128> %x) {
 ; X86-SSE2-NEXT:    popl %edi
 ; X86-SSE2-NEXT:    popl %ebx
 ; X86-SSE2-NEXT:    popl %ebp
-; X86-SSE2-NEXT:    .cfi_def_cfa %esp, 4
 ; X86-SSE2-NEXT:    retl
 ;
 ; X86-AVX1-LABEL: lrint_v8fp128:
 ; X86-AVX1:       # %bb.0:
 ; X86-AVX1-NEXT:    pushl %ebp
-; X86-AVX1-NEXT:    .cfi_def_cfa_offset 8
-; X86-AVX1-NEXT:    .cfi_offset %ebp, -8
 ; X86-AVX1-NEXT:    movl %esp, %ebp
-; X86-AVX1-NEXT:    .cfi_def_cfa_register %ebp
 ; X86-AVX1-NEXT:    pushl %ebx
 ; X86-AVX1-NEXT:    pushl %edi
 ; X86-AVX1-NEXT:    pushl %esi
 ; X86-AVX1-NEXT:    andl $-16, %esp
 ; X86-AVX1-NEXT:    subl $80, %esp
-; X86-AVX1-NEXT:    .cfi_offset %esi, -20
-; X86-AVX1-NEXT:    .cfi_offset %edi, -16
-; X86-AVX1-NEXT:    .cfi_offset %ebx, -12
 ; X86-AVX1-NEXT:    vmovups 40(%ebp), %xmm0
 ; X86-AVX1-NEXT:    vmovups %xmm0, (%esp)
 ; X86-AVX1-NEXT:    calll lrintl
@@ -2175,16 +1954,12 @@ define <8 x iXLen> @lrint_v8fp128(<8 x fp128> %x) {
 ; X86-AVX1-NEXT:    popl %edi
 ; X86-AVX1-NEXT:    popl %ebx
 ; X86-AVX1-NEXT:    popl %ebp
-; X86-AVX1-NEXT:    .cfi_def_cfa %esp, 4
 ; X86-AVX1-NEXT:    retl
 ;
 ; X64-AVX1-i32-LABEL: lrint_v8fp128:
 ; X64-AVX1-i32:       # %bb.0:
 ; X64-AVX1-i32-NEXT:    pushq %rbx
-; X64-AVX1-i32-NEXT:    .cfi_def_cfa_offset 16
 ; X64-AVX1-i32-NEXT:    subq $112, %rsp
-; X64-AVX1-i32-NEXT:    .cfi_def_cfa_offset 128
-; X64-AVX1-i32-NEXT:    .cfi_offset %rbx, -16
 ; X64-AVX1-i32-NEXT:    vmovaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 ; X64-AVX1-i32-NEXT:    vmovaps %xmm6, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 ; X64-AVX1-i32-NEXT:    vmovaps %xmm4, (%rsp) # 16-byte Spill
@@ -2229,15 +2004,12 @@ define <8 x iXLen> @lrint_v8fp128(<8 x fp128> %x) {
 ; X64-AVX1-i32-NEXT:    vpinsrd $3, %eax, %xmm0, %xmm0
 ; X64-AVX1-i32-NEXT:    vinsertf128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload
 ; X64-AVX1-i32-NEXT:    addq $112, %rsp
-; X64-AVX1-i32-NEXT:    .cfi_def_cfa_offset 16
 ; X64-AVX1-i32-NEXT:    popq %rbx
-; X64-AVX1-i32-NEXT:    .cfi_def_cfa_offset 8
 ; X64-AVX1-i32-NEXT:    retq
 ;
 ; X64-AVX1-i64-LABEL: lrint_v8fp128:
 ; X64-AVX1-i64:       # %bb.0:
 ; X64-AVX1-i64-NEXT:    subq $152, %rsp
-; X64-AVX1-i64-NEXT:    .cfi_def_cfa_offset 160
 ; X64-AVX1-i64-NEXT:    vmovaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 ; X64-AVX1-i64-NEXT:    vmovaps %xmm6, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 ; X64-AVX1-i64-NEXT:    vmovaps %xmm5, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
@@ -2289,13 +2061,11 @@ define <8 x iXLen> @lrint_v8fp128(<8 x fp128> %x) {
 ; X64-AVX1-i64-NEXT:    vinsertf128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm1 # 16-byte Folded Reload
 ; X64-AVX1-i64-NEXT:    vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
 ; X64-AVX1-i64-NEXT:    addq $152, %rsp
-; X64-AVX1-i64-NEXT:    .cfi_def_cfa_offset 8
 ; X64-AVX1-i64-NEXT:    retq
 ;
 ; AVX512-i64-LABEL: lrint_v8fp128:
 ; AVX512-i64:       # %bb.0:
 ; AVX512-i64-NEXT:    subq $152, %rsp
-; AVX512-i64-NEXT:    .cfi_def_cfa_offset 160
 ; AVX512-i64-NEXT:    vmovaps %xmm6, (%rsp) # 16-byte Spill
 ; AVX512-i64-NEXT:    vmovaps %xmm5, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 ; AVX512-i64-NEXT:    vmovaps %xmm4, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
@@ -2347,13 +2117,11 @@ define <8 x iXLen> @lrint_v8fp128(<8 x fp128> %x) {
 ; AVX512-i64-NEXT:    vinserti128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload
 ; AVX512-i64-NEXT:    vinserti64x4 $1, {{[-0-9]+}}(%r{{[sb]}}p), %zmm0, %zmm0 # 32-byte Folded Reload
 ; AVX512-i64-NEXT:    addq $152, %rsp
-; AVX512-i64-NEXT:    .cfi_def_cfa_offset 8
 ; AVX512-i64-NEXT:    retq
 ;
 ; AVX512DQ-i64-LABEL: lrint_v8fp128:
 ; AVX512DQ-i64:       # %bb.0:
 ; AVX512DQ-i64-NEXT:    subq $152, %rsp
-; AVX512DQ-i64-NEXT:    .cfi_def_cfa_offset 160
 ; AVX512DQ-i64-NEXT:    vmovaps %xmm6, (%rsp) # 16-byte Spill
 ; AVX512DQ-i64-NEXT:    vmovaps %xmm5, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 ; AVX512DQ-i64-NEXT:    vmovaps %xmm4, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
@@ -2405,7 +2173,6 @@ define <8 x iXLen> @lrint_v8fp128(<8 x fp128> %x) {
 ; AVX512DQ-i64-NEXT:    vinserti128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload
 ; AVX512DQ-i64-NEXT:    vinserti64x4 $1, {{[-0-9]+}}(%r{{[sb]}}p), %zmm0, %zmm0 # 32-byte Folded Reload
 ; AVX512DQ-i64-NEXT:    addq $152, %rsp
-; AVX512DQ-i64-NEXT:    .cfi_def_cfa_offset 8
 ; AVX512DQ-i64-NEXT:    retq
   %a = call <8 x iXLen> @llvm.lrint.v8iXLen.v8fp128(<8 x fp128> %x)
   ret <8 x iXLen> %a

>From 5255ea8fbd982e5f7ec237ee8d2ffa0454331e27 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross at umich.edu>
Date: Sat, 9 Aug 2025 23:05:31 -0500
Subject: [PATCH 6/7] specify the type for intrinsic calls

---
 llvm/test/CodeGen/ARM/llrint-conv.ll     | 12 +++++------
 llvm/test/CodeGen/AVR/llrint.ll          | 12 +++++------
 llvm/test/CodeGen/Mips/llrint-conv.ll    | 22 ++++++++++----------
 llvm/test/CodeGen/PowerPC/llrint-conv.ll | 26 ++++++++++++------------
 llvm/test/CodeGen/X86/llrint-conv.ll     | 16 +++++++--------
 5 files changed, 44 insertions(+), 44 deletions(-)

diff --git a/llvm/test/CodeGen/ARM/llrint-conv.ll b/llvm/test/CodeGen/ARM/llrint-conv.ll
index f0fb2e7543be6..749ee00a3c68e 100644
--- a/llvm/test/CodeGen/ARM/llrint-conv.ll
+++ b/llvm/test/CodeGen/ARM/llrint-conv.ll
@@ -7,7 +7,7 @@
 ; HARDFP:       bl      llrintf
 define i64 @testmsxh_builtin(half %x) {
 entry:
-  %0 = tail call i64 @llvm.llrint.f16(half %x)
+  %0 = tail call i64 @llvm.llrint.i64.f16(half %x)
   ret i64 %0
 }
 
@@ -17,7 +17,7 @@ entry:
 ; HARDFP:       bl      llrintf
 define i64 @testmsxs_builtin(float %x) {
 entry:
-  %0 = tail call i64 @llvm.llrint.f32(float %x)
+  %0 = tail call i64 @llvm.llrint.i64.f32(float %x)
   ret i64 %0
 }
 
@@ -27,7 +27,7 @@ entry:
 ; HARDFP:       bl      llrint
 define i64 @testmsxd_builtin(double %x) {
 entry:
-  %0 = tail call i64 @llvm.llrint.f64(double %x)
+  %0 = tail call i64 @llvm.llrint.i64.f64(double %x)
   ret i64 %0
 }
 
@@ -38,9 +38,9 @@ entry:
 ; HARDFP:       bl      llrintl
 define i64 @testmsxq_builtin(fp128 %x) {
 entry:
-  %0 = tail call i64 @llvm.llrint.f128(fp128 %x)
+  %0 = tail call i64 @llvm.llrint.i64.f128(fp128 %x)
   ret i64 %0
 }
 
-declare i64 @llvm.llrint.f32(float) nounwind readnone
-declare i64 @llvm.llrint.f64(double) nounwind readnone
+declare i64 @llvm.llrint.i64.f32(float) nounwind readnone
+declare i64 @llvm.llrint.i64.f64(double) nounwind readnone
diff --git a/llvm/test/CodeGen/AVR/llrint.ll b/llvm/test/CodeGen/AVR/llrint.ll
index c55664f2d7353..2980879bb6e3e 100644
--- a/llvm/test/CodeGen/AVR/llrint.ll
+++ b/llvm/test/CodeGen/AVR/llrint.ll
@@ -4,7 +4,7 @@
 ; FIXME: crash "Input type needs to be promoted!"
 ; define i64 @testmsxh_builtin(half %x) {
 ; entry:
-;   %0 = tail call i64 @llvm.llrint.f16(half %x)
+;   %0 = tail call i64 @llvm.llrint.i64.f16(half %x)
 ;   ret i64 %0
 ; }
 
@@ -14,7 +14,7 @@ define i64 @testmsxs_builtin(float %x) {
 ; CHECK-NEXT:    call llrintf
 ; CHECK-NEXT:    ret
 entry:
-  %0 = tail call i64 @llvm.llrint.f32(float %x)
+  %0 = tail call i64 @llvm.llrint.i64.f32(float %x)
   ret i64 %0
 }
 
@@ -24,7 +24,7 @@ define i64 @testmsxd_builtin(double %x) {
 ; CHECK-NEXT:    call llrint
 ; CHECK-NEXT:    ret
 entry:
-  %0 = tail call i64 @llvm.llrint.f64(double %x)
+  %0 = tail call i64 @llvm.llrint.i64.f64(double %x)
   ret i64 %0
 }
 
@@ -35,9 +35,9 @@ define i64 @testmsxq_builtin(fp128 %x) {
 ; CHECK-NEXT:    call llrintl
 ; CHECK-NEXT:    ret
 entry:
-  %0 = tail call i64 @llvm.llrint.fp128(fp128 %x)
+  %0 = tail call i64 @llvm.llrint.i64.fp128(fp128 %x)
   ret i64 %0
 }
 
-declare i64 @llvm.llrint.f32(float) nounwind readnone
-declare i64 @llvm.llrint.f64(double) nounwind readnone
+declare i64 @llvm.llrint.i64.f32(float) nounwind readnone
+declare i64 @llvm.llrint.i64.f64(double) nounwind readnone
diff --git a/llvm/test/CodeGen/Mips/llrint-conv.ll b/llvm/test/CodeGen/Mips/llrint-conv.ll
index ee3c0d99253a6..592d40c0f65aa 100644
--- a/llvm/test/CodeGen/Mips/llrint-conv.ll
+++ b/llvm/test/CodeGen/Mips/llrint-conv.ll
@@ -4,14 +4,14 @@
 ; FIXME: crash
 ; define signext i32 @testmswh(half %x) {
 ; entry:
-;   %0 = tail call i64 @llvm.llrint.f16(half %x)
+;   %0 = tail call i64 @llvm.llrint.i64.f16(half %x)
 ;   %conv = trunc i64 %0 to i32
 ;   ret i32 %conv
 ; }
 
 ; define i64 @testmsxh(half %x) {
 ; entry:
-;   %0 = tail call i64 @llvm.llrint.f16(half %x)
+;   %0 = tail call i64 @llvm.llrint.i64.f16(half %x)
 ;   ret i64 %0
 ; }
 
@@ -19,7 +19,7 @@ define signext i32 @testmsws(float %x) {
 ; CHECK-LABEL: testmsws:
 ; CHECK:       jal     llrintf
 entry:
-  %0 = tail call i64 @llvm.llrint.f32(float %x)
+  %0 = tail call i64 @llvm.llrint.i64.f32(float %x)
   %conv = trunc i64 %0 to i32
   ret i32 %conv
 }
@@ -28,7 +28,7 @@ define i64 @testmsxs(float %x) {
 ; CHECK-LABEL: testmsxs:
 ; CHECK:       jal     llrintf
 entry:
-  %0 = tail call i64 @llvm.llrint.f32(float %x)
+  %0 = tail call i64 @llvm.llrint.i64.f32(float %x)
   ret i64 %0
 }
 
@@ -36,7 +36,7 @@ define signext i32 @testmswd(double %x) {
 ; CHECK-LABEL: testmswd:
 ; CHECK:       jal     llrint
 entry:
-  %0 = tail call i64 @llvm.llrint.f64(double %x)
+  %0 = tail call i64 @llvm.llrint.i64.f64(double %x)
   %conv = trunc i64 %0 to i32
   ret i32 %conv
 }
@@ -45,7 +45,7 @@ define i64 @testmsxd(double %x) {
 ; CHECK-LABEL: testmsxd:
 ; CHECK:       jal     llrint
 entry:
-  %0 = tail call i64 @llvm.llrint.f64(double %x)
+  %0 = tail call i64 @llvm.llrint.i64.f64(double %x)
   ret i64 %0
 }
 
@@ -53,7 +53,7 @@ define signext i32 @testmswl(fp128 %x) {
 ; CHECK-LABEL: testmswl:
 ; CHECK:       jal     llrintl
 entry:
-  %0 = tail call i64 @llvm.llrint.f128(fp128 %x)
+  %0 = tail call i64 @llvm.llrint.i64.f128(fp128 %x)
   %conv = trunc i64 %0 to i32
   ret i32 %conv
 }
@@ -62,10 +62,10 @@ define i64 @testmsll(fp128 %x) {
 ; CHECK-LABEL: testmsll:
 ; CHECK:       jal     llrintl
 entry:
-  %0 = tail call i64 @llvm.llrint.f128(fp128 %x)
+  %0 = tail call i64 @llvm.llrint.i64.f128(fp128 %x)
   ret i64 %0
 }
 
-declare i64 @llvm.llrint.f32(float) nounwind readnone
-declare i64 @llvm.llrint.f64(double) nounwind readnone
-declare i64 @llvm.llrint.f128(fp128) nounwind readnone
+declare i64 @llvm.llrint.i64.f32(float) nounwind readnone
+declare i64 @llvm.llrint.i64.f64(double) nounwind readnone
+declare i64 @llvm.llrint.i64.f128(fp128) nounwind readnone
diff --git a/llvm/test/CodeGen/PowerPC/llrint-conv.ll b/llvm/test/CodeGen/PowerPC/llrint-conv.ll
index dcd3bd25a83c5..8e49ddcc6355f 100644
--- a/llvm/test/CodeGen/PowerPC/llrint-conv.ll
+++ b/llvm/test/CodeGen/PowerPC/llrint-conv.ll
@@ -4,14 +4,14 @@
 ; FIXME: crash "Input type needs to be promoted!"
 ; define signext i32 @testmswh(half %x) {
 ; entry:
-;   %0 = tail call i64 @llvm.llrint.f16(half %x)
+;   %0 = tail call i64 @llvm.llrint.i64.f16(half %x)
 ;   %conv = trunc i64 %0 to i32
 ;   ret i32 %conv
 ; }
 
 ; define i64 @testmsxh(half %x) {
 ; entry:
-;   %0 = tail call i64 @llvm.llrint.f16(half %x)
+;   %0 = tail call i64 @llvm.llrint.i64.f16(half %x)
 ;   ret i64 %0
 ; }
 
@@ -19,7 +19,7 @@
 ; CHECK:       bl      llrintf
 define signext i32 @testmsws(float %x) {
 entry:
-  %0 = tail call i64 @llvm.llrint.f32(float %x)
+  %0 = tail call i64 @llvm.llrint.i64.f32(float %x)
   %conv = trunc i64 %0 to i32
   ret i32 %conv
 }
@@ -28,7 +28,7 @@ entry:
 ; CHECK:       bl      llrintf
 define i64 @testmsxs(float %x) {
 entry:
-  %0 = tail call i64 @llvm.llrint.f32(float %x)
+  %0 = tail call i64 @llvm.llrint.i64.f32(float %x)
   ret i64 %0
 }
 
@@ -36,7 +36,7 @@ entry:
 ; CHECK:       bl      llrint
 define signext i32 @testmswd(double %x) {
 entry:
-  %0 = tail call i64 @llvm.llrint.f64(double %x)
+  %0 = tail call i64 @llvm.llrint.i64.f64(double %x)
   %conv = trunc i64 %0 to i32
   ret i32 %conv
 }
@@ -45,7 +45,7 @@ entry:
 ; CHECK:       bl      llrint
 define i64 @testmsxd(double %x) {
 entry:
-  %0 = tail call i64 @llvm.llrint.f64(double %x)
+  %0 = tail call i64 @llvm.llrint.i64.f64(double %x)
   ret i64 %0
 }
 
@@ -53,7 +53,7 @@ entry:
 ; CHECK:       bl      llrintl
 define signext i32 @testmswl(ppc_fp128 %x) {
 entry:
-  %0 = tail call i64 @llvm.llrint.ppcf128(ppc_fp128 %x)
+  %0 = tail call i64 @llvm.llrint.i64.ppcf128(ppc_fp128 %x)
   %conv = trunc i64 %0 to i32
   ret i32 %conv
 }
@@ -62,7 +62,7 @@ entry:
 ; CHECK:       bl      llrintl
 define i64 @testmsll(ppc_fp128 %x) {
 entry:
-  %0 = tail call i64 @llvm.llrint.ppcf128(ppc_fp128 %x)
+  %0 = tail call i64 @llvm.llrint.i64.ppcf128(ppc_fp128 %x)
   ret i64 %0
 }
 
@@ -70,7 +70,7 @@ entry:
 ; CHECK:       bl      llrintf128
 define signext i32 @testmswq(fp128 %x) {
 entry:
-  %0 = tail call i64 @llvm.llrint.f128(fp128 %x)
+  %0 = tail call i64 @llvm.llrint.i64.f128(fp128 %x)
   %conv = trunc i64 %0 to i32
   ret i32 %conv
 }
@@ -79,10 +79,10 @@ entry:
 ; CHECK:       bl      llrintf128
 define i64 @testmslq(fp128 %x) {
 entry:
-  %0 = tail call i64 @llvm.llrint.f128(fp128 %x)
+  %0 = tail call i64 @llvm.llrint.i64.f128(fp128 %x)
   ret i64 %0
 }
 
-declare i64 @llvm.llrint.f32(float) nounwind readnone
-declare i64 @llvm.llrint.f64(double) nounwind readnone
-declare i64 @llvm.llrint.ppcf128(ppc_fp128) nounwind readnone
+declare i64 @llvm.llrint.i64.f32(float) nounwind readnone
+declare i64 @llvm.llrint.i64.f64(double) nounwind readnone
+declare i64 @llvm.llrint.i64.ppcf128(ppc_fp128) nounwind readnone
diff --git a/llvm/test/CodeGen/X86/llrint-conv.ll b/llvm/test/CodeGen/X86/llrint-conv.ll
index d3eca5197a94b..7bcf573118538 100644
--- a/llvm/test/CodeGen/X86/llrint-conv.ll
+++ b/llvm/test/CodeGen/X86/llrint-conv.ll
@@ -42,7 +42,7 @@ define i64 @testmsxh(half %x) nounwind {
 ; X64-SSE-NEXT:    popq %rcx
 ; X64-SSE-NEXT:    retq
 entry:
-  %0 = tail call i64 @llvm.llrint.f16(half %x)
+  %0 = tail call i64 @llvm.llrint.i64.f16(half %x)
   ret i64 %0
 }
 
@@ -103,7 +103,7 @@ define i64 @testmsxs(float %x) nounwind {
 ; X64-AVX-NEXT:    vcvtss2si %xmm0, %rax
 ; X64-AVX-NEXT:    retq
 entry:
-  %0 = tail call i64 @llvm.llrint.f32(float %x)
+  %0 = tail call i64 @llvm.llrint.i64.f32(float %x)
   ret i64 %0
 }
 
@@ -164,7 +164,7 @@ define i64 @testmsxd(double %x) nounwind {
 ; X64-AVX-NEXT:    vcvtsd2si %xmm0, %rax
 ; X64-AVX-NEXT:    retq
 entry:
-  %0 = tail call i64 @llvm.llrint.f64(double %x)
+  %0 = tail call i64 @llvm.llrint.i64.f64(double %x)
   ret i64 %0
 }
 
@@ -190,7 +190,7 @@ define i64 @testmsll(x86_fp80 %x) nounwind {
 ; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %rax
 ; X64-NEXT:    retq
 entry:
-  %0 = tail call i64 @llvm.llrint.f80(x86_fp80 %x)
+  %0 = tail call i64 @llvm.llrint.i64.f80(x86_fp80 %x)
   ret i64 %0
 }
 
@@ -245,10 +245,10 @@ define i64 @testmslq(fp128 %x) nounwind {
 ; X64:       # %bb.0: # %entry
 ; X64-NEXT:    jmp llrintl at PLT # TAILCALL
 entry:
-  %0 = tail call i64 @llvm.llrint.fp128(fp128 %x)
+  %0 = tail call i64 @llvm.llrint.i64.fp128(fp128 %x)
   ret i64 %0
 }
 
-declare i64 @llvm.llrint.f32(float) nounwind readnone
-declare i64 @llvm.llrint.f64(double) nounwind readnone
-declare i64 @llvm.llrint.f80(x86_fp80) nounwind readnone
+declare i64 @llvm.llrint.i64.f32(float) nounwind readnone
+declare i64 @llvm.llrint.i64.f64(double) nounwind readnone
+declare i64 @llvm.llrint.i64.f80(x86_fp80) nounwind readnone

>From 81edb7c48e5855c4ff9ce6df50a2173228c5c1a1 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross at umich.edu>
Date: Sat, 9 Aug 2025 23:22:36 -0500
Subject: [PATCH 7/7] get rid of excessively large <32 x *> and <16 x fp128>
 tests on arm

---
 llvm/test/CodeGen/ARM/vector-llrint.ll | 10652 ++-----------------
 llvm/test/CodeGen/ARM/vector-lrint.ll  | 12382 +++--------------------
 2 files changed, 2158 insertions(+), 20876 deletions(-)

diff --git a/llvm/test/CodeGen/ARM/vector-llrint.ll b/llvm/test/CodeGen/ARM/vector-llrint.ll
index 870947fac063e..5f4e39125da12 100644
--- a/llvm/test/CodeGen/ARM/vector-llrint.ll
+++ b/llvm/test/CodeGen/ARM/vector-llrint.ll
@@ -1,13 +1,9 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 
-; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=armv7-unknown-none-eabihf                | FileCheck %s --check-prefix=LE
-; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=armv7-unknown-none-eabihf                | FileCheck %s --check-prefix=LE
-; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=armv7-unknown-none-eabihf -mattr=+neon   | FileCheck %s --check-prefix=LE-NEON
-; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=armv7-unknown-none-eabihf -mattr=+neon   | FileCheck %s --check-prefix=LE-NEON
-; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=armebv7-unknown-none-eabihf              | FileCheck %s --check-prefix=BE
-; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=armebv7-unknown-none-eabihf              | FileCheck %s --check-prefix=BE
-; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=armebv7-unknown-none-eabihf -mattr=+neon | FileCheck %s --check-prefix=BE-NEON
-; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=armebv7-unknown-none-eabihf -mattr=+neon | FileCheck %s --check-prefix=BE-NEON
+; RUN: llc %s -o - -mtriple=armv7-unknown-none-eabihf                | FileCheck %s --check-prefixes=LE
+; RUN: llc %s -o - -mtriple=armv7-unknown-none-eabihf -mattr=+neon   | FileCheck %s --check-prefixes=LE
+; RUN: llc %s -o - -mtriple=armebv7-unknown-none-eabihf              | FileCheck %s --check-prefixes=BE
+; RUN: llc %s -o - -mtriple=armebv7-unknown-none-eabihf -mattr=+neon | FileCheck %s --check-prefixes=BE
 
 define <1 x i64> @llrint_v1i64_v1f16(<1 x half> %x) {
 ; LE-LABEL: llrint_v1i64_v1f16:
@@ -23,19 +19,6 @@ define <1 x i64> @llrint_v1i64_v1f16(<1 x half> %x) {
 ; LE-NEXT:    vmov.32 d0[1], r1
 ; LE-NEXT:    pop {r11, pc}
 ;
-; LE-NEON-LABEL: llrint_v1i64_v1f16:
-; LE-NEON:       @ %bb.0:
-; LE-NEON-NEXT:    .save {r11, lr}
-; LE-NEON-NEXT:    push {r11, lr}
-; LE-NEON-NEXT:    vmov r0, s0
-; LE-NEON-NEXT:    bl __aeabi_f2h
-; LE-NEON-NEXT:    bl __aeabi_h2f
-; LE-NEON-NEXT:    vmov s0, r0
-; LE-NEON-NEXT:    bl llrintf
-; LE-NEON-NEXT:    vmov.32 d0[0], r0
-; LE-NEON-NEXT:    vmov.32 d0[1], r1
-; LE-NEON-NEXT:    pop {r11, pc}
-;
 ; BE-LABEL: llrint_v1i64_v1f16:
 ; BE:       @ %bb.0:
 ; BE-NEXT:    .save {r11, lr}
@@ -49,20 +32,6 @@ define <1 x i64> @llrint_v1i64_v1f16(<1 x half> %x) {
 ; BE-NEXT:    vmov.32 d16[1], r1
 ; BE-NEXT:    vrev64.32 d0, d16
 ; BE-NEXT:    pop {r11, pc}
-;
-; BE-NEON-LABEL: llrint_v1i64_v1f16:
-; BE-NEON:       @ %bb.0:
-; BE-NEON-NEXT:    .save {r11, lr}
-; BE-NEON-NEXT:    push {r11, lr}
-; BE-NEON-NEXT:    vmov r0, s0
-; BE-NEON-NEXT:    bl __aeabi_f2h
-; BE-NEON-NEXT:    bl __aeabi_h2f
-; BE-NEON-NEXT:    vmov s0, r0
-; BE-NEON-NEXT:    bl llrintf
-; BE-NEON-NEXT:    vmov.32 d16[0], r0
-; BE-NEON-NEXT:    vmov.32 d16[1], r1
-; BE-NEON-NEXT:    vrev64.32 d0, d16
-; BE-NEON-NEXT:    pop {r11, pc}
   %a = call <1 x i64> @llvm.llrint.v1i64.v1f16(<1 x half> %x)
   ret <1 x i64> %a
 }
@@ -94,31 +63,6 @@ define <2 x i64> @llrint_v1i64_v2f16(<2 x half> %x) {
 ; LE-NEXT:    vpop {d8, d9}
 ; LE-NEXT:    pop {r4, r5, r11, pc}
 ;
-; LE-NEON-LABEL: llrint_v1i64_v2f16:
-; LE-NEON:       @ %bb.0:
-; LE-NEON-NEXT:    .save {r4, r5, r11, lr}
-; LE-NEON-NEXT:    push {r4, r5, r11, lr}
-; LE-NEON-NEXT:    .vsave {d8, d9}
-; LE-NEON-NEXT:    vpush {d8, d9}
-; LE-NEON-NEXT:    vmov r0, s1
-; LE-NEON-NEXT:    vmov.f32 s16, s0
-; LE-NEON-NEXT:    bl __aeabi_h2f
-; LE-NEON-NEXT:    vmov s0, r0
-; LE-NEON-NEXT:    bl llrintf
-; LE-NEON-NEXT:    mov r4, r0
-; LE-NEON-NEXT:    vmov r0, s16
-; LE-NEON-NEXT:    mov r5, r1
-; LE-NEON-NEXT:    bl __aeabi_h2f
-; LE-NEON-NEXT:    vmov s0, r0
-; LE-NEON-NEXT:    vmov.32 d9[0], r4
-; LE-NEON-NEXT:    bl llrintf
-; LE-NEON-NEXT:    vmov.32 d8[0], r0
-; LE-NEON-NEXT:    vmov.32 d9[1], r5
-; LE-NEON-NEXT:    vmov.32 d8[1], r1
-; LE-NEON-NEXT:    vorr q0, q4, q4
-; LE-NEON-NEXT:    vpop {d8, d9}
-; LE-NEON-NEXT:    pop {r4, r5, r11, pc}
-;
 ; BE-LABEL: llrint_v1i64_v2f16:
 ; BE:       @ %bb.0:
 ; BE-NEXT:    .save {r4, r5, r11, lr}
@@ -144,32 +88,6 @@ define <2 x i64> @llrint_v1i64_v2f16(<2 x half> %x) {
 ; BE-NEXT:    vrev64.32 d0, d16
 ; BE-NEXT:    vpop {d8}
 ; BE-NEXT:    pop {r4, r5, r11, pc}
-;
-; BE-NEON-LABEL: llrint_v1i64_v2f16:
-; BE-NEON:       @ %bb.0:
-; BE-NEON-NEXT:    .save {r4, r5, r11, lr}
-; BE-NEON-NEXT:    push {r4, r5, r11, lr}
-; BE-NEON-NEXT:    .vsave {d8}
-; BE-NEON-NEXT:    vpush {d8}
-; BE-NEON-NEXT:    vmov r0, s1
-; BE-NEON-NEXT:    vmov.f32 s16, s0
-; BE-NEON-NEXT:    bl __aeabi_h2f
-; BE-NEON-NEXT:    vmov s0, r0
-; BE-NEON-NEXT:    bl llrintf
-; BE-NEON-NEXT:    mov r4, r0
-; BE-NEON-NEXT:    vmov r0, s16
-; BE-NEON-NEXT:    mov r5, r1
-; BE-NEON-NEXT:    bl __aeabi_h2f
-; BE-NEON-NEXT:    vmov s0, r0
-; BE-NEON-NEXT:    vmov.32 d8[0], r4
-; BE-NEON-NEXT:    bl llrintf
-; BE-NEON-NEXT:    vmov.32 d16[0], r0
-; BE-NEON-NEXT:    vmov.32 d8[1], r5
-; BE-NEON-NEXT:    vmov.32 d16[1], r1
-; BE-NEON-NEXT:    vrev64.32 d1, d8
-; BE-NEON-NEXT:    vrev64.32 d0, d16
-; BE-NEON-NEXT:    vpop {d8}
-; BE-NEON-NEXT:    pop {r4, r5, r11, pc}
   %a = call <2 x i64> @llvm.llrint.v2i64.v2f16(<2 x half> %x)
   ret <2 x i64> %a
 }
@@ -222,52 +140,6 @@ define <4 x i64> @llrint_v4i64_v4f16(<4 x half> %x) {
 ; LE-NEXT:    vpop {d12, d13}
 ; LE-NEXT:    pop {r4, r5, r6, r7, r11, pc}
 ;
-; LE-NEON-LABEL: llrint_v4i64_v4f16:
-; LE-NEON:       @ %bb.0:
-; LE-NEON-NEXT:    .save {r4, r5, r6, r7, r11, lr}
-; LE-NEON-NEXT:    push {r4, r5, r6, r7, r11, lr}
-; LE-NEON-NEXT:    .vsave {d12, d13}
-; LE-NEON-NEXT:    vpush {d12, d13}
-; LE-NEON-NEXT:    .vsave {d8, d9, d10}
-; LE-NEON-NEXT:    vpush {d8, d9, d10}
-; LE-NEON-NEXT:    vmov r0, s1
-; LE-NEON-NEXT:    vmov.f32 s16, s3
-; LE-NEON-NEXT:    vmov.f32 s20, s2
-; LE-NEON-NEXT:    vmov.f32 s18, s0
-; LE-NEON-NEXT:    bl __aeabi_h2f
-; LE-NEON-NEXT:    vmov s0, r0
-; LE-NEON-NEXT:    bl llrintf
-; LE-NEON-NEXT:    mov r5, r0
-; LE-NEON-NEXT:    vmov r0, s18
-; LE-NEON-NEXT:    mov r4, r1
-; LE-NEON-NEXT:    bl __aeabi_h2f
-; LE-NEON-NEXT:    mov r7, r0
-; LE-NEON-NEXT:    vmov r0, s16
-; LE-NEON-NEXT:    bl __aeabi_h2f
-; LE-NEON-NEXT:    vmov s0, r0
-; LE-NEON-NEXT:    bl llrintf
-; LE-NEON-NEXT:    vmov s0, r7
-; LE-NEON-NEXT:    mov r6, r1
-; LE-NEON-NEXT:    vmov.32 d9[0], r0
-; LE-NEON-NEXT:    bl llrintf
-; LE-NEON-NEXT:    vmov.32 d12[0], r0
-; LE-NEON-NEXT:    vmov r0, s20
-; LE-NEON-NEXT:    mov r7, r1
-; LE-NEON-NEXT:    bl __aeabi_h2f
-; LE-NEON-NEXT:    vmov s0, r0
-; LE-NEON-NEXT:    vmov.32 d13[0], r5
-; LE-NEON-NEXT:    bl llrintf
-; LE-NEON-NEXT:    vmov.32 d8[0], r0
-; LE-NEON-NEXT:    vmov.32 d13[1], r4
-; LE-NEON-NEXT:    vmov.32 d9[1], r6
-; LE-NEON-NEXT:    vmov.32 d12[1], r7
-; LE-NEON-NEXT:    vmov.32 d8[1], r1
-; LE-NEON-NEXT:    vorr q0, q6, q6
-; LE-NEON-NEXT:    vorr q1, q4, q4
-; LE-NEON-NEXT:    vpop {d8, d9, d10}
-; LE-NEON-NEXT:    vpop {d12, d13}
-; LE-NEON-NEXT:    pop {r4, r5, r6, r7, r11, pc}
-;
 ; BE-LABEL: llrint_v4i64_v4f16:
 ; BE:       @ %bb.0:
 ; BE-NEXT:    .save {r4, r5, r6, r7, r11, lr}
@@ -312,51 +184,6 @@ define <4 x i64> @llrint_v4i64_v4f16(<4 x half> %x) {
 ; BE-NEXT:    vrev64.32 d2, d16
 ; BE-NEXT:    vpop {d8, d9, d10}
 ; BE-NEXT:    pop {r4, r5, r6, r7, r11, pc}
-;
-; BE-NEON-LABEL: llrint_v4i64_v4f16:
-; BE-NEON:       @ %bb.0:
-; BE-NEON-NEXT:    .save {r4, r5, r6, r7, r11, lr}
-; BE-NEON-NEXT:    push {r4, r5, r6, r7, r11, lr}
-; BE-NEON-NEXT:    .vsave {d8, d9, d10}
-; BE-NEON-NEXT:    vpush {d8, d9, d10}
-; BE-NEON-NEXT:    vmov r0, s1
-; BE-NEON-NEXT:    vmov.f32 s16, s3
-; BE-NEON-NEXT:    vmov.f32 s18, s2
-; BE-NEON-NEXT:    vmov.f32 s20, s0
-; BE-NEON-NEXT:    bl __aeabi_h2f
-; BE-NEON-NEXT:    vmov s0, r0
-; BE-NEON-NEXT:    bl llrintf
-; BE-NEON-NEXT:    mov r5, r0
-; BE-NEON-NEXT:    vmov r0, s20
-; BE-NEON-NEXT:    mov r4, r1
-; BE-NEON-NEXT:    bl __aeabi_h2f
-; BE-NEON-NEXT:    mov r7, r0
-; BE-NEON-NEXT:    vmov r0, s16
-; BE-NEON-NEXT:    bl __aeabi_h2f
-; BE-NEON-NEXT:    vmov s0, r0
-; BE-NEON-NEXT:    bl llrintf
-; BE-NEON-NEXT:    vmov s0, r7
-; BE-NEON-NEXT:    mov r6, r1
-; BE-NEON-NEXT:    vmov.32 d8[0], r0
-; BE-NEON-NEXT:    bl llrintf
-; BE-NEON-NEXT:    vmov.32 d10[0], r0
-; BE-NEON-NEXT:    vmov r0, s18
-; BE-NEON-NEXT:    mov r7, r1
-; BE-NEON-NEXT:    bl __aeabi_h2f
-; BE-NEON-NEXT:    vmov s0, r0
-; BE-NEON-NEXT:    vmov.32 d9[0], r5
-; BE-NEON-NEXT:    bl llrintf
-; BE-NEON-NEXT:    vmov.32 d16[0], r0
-; BE-NEON-NEXT:    vmov.32 d9[1], r4
-; BE-NEON-NEXT:    vmov.32 d8[1], r6
-; BE-NEON-NEXT:    vmov.32 d10[1], r7
-; BE-NEON-NEXT:    vmov.32 d16[1], r1
-; BE-NEON-NEXT:    vrev64.32 d1, d9
-; BE-NEON-NEXT:    vrev64.32 d3, d8
-; BE-NEON-NEXT:    vrev64.32 d0, d10
-; BE-NEON-NEXT:    vrev64.32 d2, d16
-; BE-NEON-NEXT:    vpop {d8, d9, d10}
-; BE-NEON-NEXT:    pop {r4, r5, r6, r7, r11, pc}
   %a = call <4 x i64> @llvm.llrint.v4i64.v4f16(<4 x half> %x)
   ret <4 x i64> %a
 }
@@ -452,95 +279,6 @@ define <8 x i64> @llrint_v8i64_v8f16(<8 x half> %x) {
 ; LE-NEXT:    add sp, sp, #4
 ; LE-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
 ;
-; LE-NEON-LABEL: llrint_v8i64_v8f16:
-; LE-NEON:       @ %bb.0:
-; LE-NEON-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; LE-NEON-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; LE-NEON-NEXT:    .pad #4
-; LE-NEON-NEXT:    sub sp, sp, #4
-; LE-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-NEON-NEXT:    .pad #8
-; LE-NEON-NEXT:    sub sp, sp, #8
-; LE-NEON-NEXT:    vmov r0, s1
-; LE-NEON-NEXT:    vstr s6, [sp, #4] @ 4-byte Spill
-; LE-NEON-NEXT:    vmov.f32 s16, s7
-; LE-NEON-NEXT:    vmov.f32 s18, s5
-; LE-NEON-NEXT:    vmov.f32 s20, s4
-; LE-NEON-NEXT:    vmov.f32 s22, s3
-; LE-NEON-NEXT:    vmov.f32 s24, s2
-; LE-NEON-NEXT:    vmov.f32 s26, s0
-; LE-NEON-NEXT:    bl __aeabi_h2f
-; LE-NEON-NEXT:    vmov s0, r0
-; LE-NEON-NEXT:    bl llrintf
-; LE-NEON-NEXT:    mov r9, r0
-; LE-NEON-NEXT:    vmov r0, s26
-; LE-NEON-NEXT:    str r1, [sp] @ 4-byte Spill
-; LE-NEON-NEXT:    bl __aeabi_h2f
-; LE-NEON-NEXT:    mov r10, r0
-; LE-NEON-NEXT:    vmov r0, s22
-; LE-NEON-NEXT:    bl __aeabi_h2f
-; LE-NEON-NEXT:    mov r5, r0
-; LE-NEON-NEXT:    vmov r0, s24
-; LE-NEON-NEXT:    bl __aeabi_h2f
-; LE-NEON-NEXT:    mov r7, r0
-; LE-NEON-NEXT:    vmov r0, s18
-; LE-NEON-NEXT:    bl __aeabi_h2f
-; LE-NEON-NEXT:    mov r6, r0
-; LE-NEON-NEXT:    vmov r0, s20
-; LE-NEON-NEXT:    bl __aeabi_h2f
-; LE-NEON-NEXT:    mov r4, r0
-; LE-NEON-NEXT:    vmov r0, s16
-; LE-NEON-NEXT:    bl __aeabi_h2f
-; LE-NEON-NEXT:    vmov s0, r0
-; LE-NEON-NEXT:    bl llrintf
-; LE-NEON-NEXT:    vmov s0, r4
-; LE-NEON-NEXT:    mov r11, r1
-; LE-NEON-NEXT:    vmov.32 d11[0], r0
-; LE-NEON-NEXT:    bl llrintf
-; LE-NEON-NEXT:    vmov s0, r6
-; LE-NEON-NEXT:    mov r8, r1
-; LE-NEON-NEXT:    vmov.32 d12[0], r0
-; LE-NEON-NEXT:    bl llrintf
-; LE-NEON-NEXT:    vmov s0, r7
-; LE-NEON-NEXT:    mov r6, r1
-; LE-NEON-NEXT:    vmov.32 d13[0], r0
-; LE-NEON-NEXT:    bl llrintf
-; LE-NEON-NEXT:    vmov s0, r5
-; LE-NEON-NEXT:    mov r7, r1
-; LE-NEON-NEXT:    vmov.32 d14[0], r0
-; LE-NEON-NEXT:    bl llrintf
-; LE-NEON-NEXT:    vmov s0, r10
-; LE-NEON-NEXT:    mov r5, r1
-; LE-NEON-NEXT:    vmov.32 d15[0], r0
-; LE-NEON-NEXT:    bl llrintf
-; LE-NEON-NEXT:    vldr s0, [sp, #4] @ 4-byte Reload
-; LE-NEON-NEXT:    mov r4, r1
-; LE-NEON-NEXT:    vmov.32 d8[0], r0
-; LE-NEON-NEXT:    vmov r0, s0
-; LE-NEON-NEXT:    bl __aeabi_h2f
-; LE-NEON-NEXT:    vmov s0, r0
-; LE-NEON-NEXT:    vmov.32 d9[0], r9
-; LE-NEON-NEXT:    bl llrintf
-; LE-NEON-NEXT:    vmov.32 d10[0], r0
-; LE-NEON-NEXT:    ldr r0, [sp] @ 4-byte Reload
-; LE-NEON-NEXT:    vmov.32 d15[1], r5
-; LE-NEON-NEXT:    vmov.32 d9[1], r0
-; LE-NEON-NEXT:    vmov.32 d13[1], r6
-; LE-NEON-NEXT:    vmov.32 d11[1], r11
-; LE-NEON-NEXT:    vmov.32 d8[1], r4
-; LE-NEON-NEXT:    vmov.32 d14[1], r7
-; LE-NEON-NEXT:    vorr q0, q4, q4
-; LE-NEON-NEXT:    vmov.32 d12[1], r8
-; LE-NEON-NEXT:    vorr q1, q7, q7
-; LE-NEON-NEXT:    vmov.32 d10[1], r1
-; LE-NEON-NEXT:    vorr q2, q6, q6
-; LE-NEON-NEXT:    vorr q3, q5, q5
-; LE-NEON-NEXT:    add sp, sp, #8
-; LE-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-NEON-NEXT:    add sp, sp, #4
-; LE-NEON-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
-;
 ; BE-LABEL: llrint_v8i64_v8f16:
 ; BE:       @ %bb.0:
 ; BE-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
@@ -632,98 +370,6 @@ define <8 x i64> @llrint_v8i64_v8f16(<8 x half> %x) {
 ; BE-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14}
 ; BE-NEXT:    add sp, sp, #4
 ; BE-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
-;
-; BE-NEON-LABEL: llrint_v8i64_v8f16:
-; BE-NEON:       @ %bb.0:
-; BE-NEON-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; BE-NEON-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; BE-NEON-NEXT:    .pad #4
-; BE-NEON-NEXT:    sub sp, sp, #4
-; BE-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14}
-; BE-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14}
-; BE-NEON-NEXT:    .pad #8
-; BE-NEON-NEXT:    sub sp, sp, #8
-; BE-NEON-NEXT:    vmov r0, s1
-; BE-NEON-NEXT:    vmov.f32 s18, s7
-; BE-NEON-NEXT:    vmov.f32 s16, s6
-; BE-NEON-NEXT:    vmov.f32 s20, s5
-; BE-NEON-NEXT:    vmov.f32 s22, s4
-; BE-NEON-NEXT:    vmov.f32 s24, s3
-; BE-NEON-NEXT:    vmov.f32 s26, s2
-; BE-NEON-NEXT:    vmov.f32 s28, s0
-; BE-NEON-NEXT:    bl __aeabi_h2f
-; BE-NEON-NEXT:    vmov s0, r0
-; BE-NEON-NEXT:    bl llrintf
-; BE-NEON-NEXT:    mov r9, r0
-; BE-NEON-NEXT:    vmov r0, s28
-; BE-NEON-NEXT:    str r1, [sp, #4] @ 4-byte Spill
-; BE-NEON-NEXT:    bl __aeabi_h2f
-; BE-NEON-NEXT:    mov r10, r0
-; BE-NEON-NEXT:    vmov r0, s24
-; BE-NEON-NEXT:    bl __aeabi_h2f
-; BE-NEON-NEXT:    mov r5, r0
-; BE-NEON-NEXT:    vmov r0, s26
-; BE-NEON-NEXT:    bl __aeabi_h2f
-; BE-NEON-NEXT:    mov r7, r0
-; BE-NEON-NEXT:    vmov r0, s20
-; BE-NEON-NEXT:    bl __aeabi_h2f
-; BE-NEON-NEXT:    mov r6, r0
-; BE-NEON-NEXT:    vmov r0, s22
-; BE-NEON-NEXT:    bl __aeabi_h2f
-; BE-NEON-NEXT:    mov r4, r0
-; BE-NEON-NEXT:    vmov r0, s18
-; BE-NEON-NEXT:    bl __aeabi_h2f
-; BE-NEON-NEXT:    vmov s0, r0
-; BE-NEON-NEXT:    bl llrintf
-; BE-NEON-NEXT:    vmov s0, r4
-; BE-NEON-NEXT:    mov r11, r1
-; BE-NEON-NEXT:    vmov.32 d9[0], r0
-; BE-NEON-NEXT:    bl llrintf
-; BE-NEON-NEXT:    vmov s0, r6
-; BE-NEON-NEXT:    mov r8, r1
-; BE-NEON-NEXT:    vmov.32 d10[0], r0
-; BE-NEON-NEXT:    bl llrintf
-; BE-NEON-NEXT:    vmov s0, r7
-; BE-NEON-NEXT:    mov r6, r1
-; BE-NEON-NEXT:    vmov.32 d11[0], r0
-; BE-NEON-NEXT:    bl llrintf
-; BE-NEON-NEXT:    vmov s0, r5
-; BE-NEON-NEXT:    mov r7, r1
-; BE-NEON-NEXT:    vmov.32 d12[0], r0
-; BE-NEON-NEXT:    bl llrintf
-; BE-NEON-NEXT:    vmov s0, r10
-; BE-NEON-NEXT:    mov r5, r1
-; BE-NEON-NEXT:    vmov.32 d13[0], r0
-; BE-NEON-NEXT:    bl llrintf
-; BE-NEON-NEXT:    vmov.32 d14[0], r0
-; BE-NEON-NEXT:    vmov r0, s16
-; BE-NEON-NEXT:    mov r4, r1
-; BE-NEON-NEXT:    bl __aeabi_h2f
-; BE-NEON-NEXT:    vmov s0, r0
-; BE-NEON-NEXT:    vmov.32 d8[0], r9
-; BE-NEON-NEXT:    bl llrintf
-; BE-NEON-NEXT:    vmov.32 d16[0], r0
-; BE-NEON-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
-; BE-NEON-NEXT:    vmov.32 d13[1], r5
-; BE-NEON-NEXT:    vmov.32 d8[1], r0
-; BE-NEON-NEXT:    vmov.32 d11[1], r6
-; BE-NEON-NEXT:    vmov.32 d9[1], r11
-; BE-NEON-NEXT:    vmov.32 d14[1], r4
-; BE-NEON-NEXT:    vmov.32 d12[1], r7
-; BE-NEON-NEXT:    vmov.32 d10[1], r8
-; BE-NEON-NEXT:    vmov.32 d16[1], r1
-; BE-NEON-NEXT:    vrev64.32 d1, d8
-; BE-NEON-NEXT:    vrev64.32 d3, d13
-; BE-NEON-NEXT:    vrev64.32 d5, d11
-; BE-NEON-NEXT:    vrev64.32 d7, d9
-; BE-NEON-NEXT:    vrev64.32 d0, d14
-; BE-NEON-NEXT:    vrev64.32 d2, d12
-; BE-NEON-NEXT:    vrev64.32 d4, d10
-; BE-NEON-NEXT:    vrev64.32 d6, d16
-; BE-NEON-NEXT:    add sp, sp, #8
-; BE-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14}
-; BE-NEON-NEXT:    add sp, sp, #4
-; BE-NEON-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
   %a = call <8 x i64> @llvm.llrint.v8i64.v8f16(<8 x half> %x)
   ret <8 x i64> %a
 }
@@ -929,205 +575,6 @@ define <16 x i64> @llrint_v16i64_v16f16(<16 x half> %x) {
 ; LE-NEXT:    add sp, sp, #4
 ; LE-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
 ;
-; LE-NEON-LABEL: llrint_v16i64_v16f16:
-; LE-NEON:       @ %bb.0:
-; LE-NEON-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; LE-NEON-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; LE-NEON-NEXT:    .pad #4
-; LE-NEON-NEXT:    sub sp, sp, #4
-; LE-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-NEON-NEXT:    .pad #120
-; LE-NEON-NEXT:    sub sp, sp, #120
-; LE-NEON-NEXT:    mov r11, r0
-; LE-NEON-NEXT:    vmov r0, s7
-; LE-NEON-NEXT:    vstr s15, [sp, #24] @ 4-byte Spill
-; LE-NEON-NEXT:    vmov.f32 s23, s13
-; LE-NEON-NEXT:    vstr s14, [sp, #100] @ 4-byte Spill
-; LE-NEON-NEXT:    vmov.f32 s25, s12
-; LE-NEON-NEXT:    vmov.f32 s27, s11
-; LE-NEON-NEXT:    vstr s10, [sp, #104] @ 4-byte Spill
-; LE-NEON-NEXT:    vstr s9, [sp, #108] @ 4-byte Spill
-; LE-NEON-NEXT:    vmov.f32 s24, s8
-; LE-NEON-NEXT:    vmov.f32 s19, s6
-; LE-NEON-NEXT:    vmov.f32 s29, s5
-; LE-NEON-NEXT:    vmov.f32 s17, s4
-; LE-NEON-NEXT:    vmov.f32 s16, s3
-; LE-NEON-NEXT:    vmov.f32 s21, s2
-; LE-NEON-NEXT:    vmov.f32 s26, s1
-; LE-NEON-NEXT:    vmov.f32 s18, s0
-; LE-NEON-NEXT:    bl __aeabi_h2f
-; LE-NEON-NEXT:    vmov s0, r0
-; LE-NEON-NEXT:    bl llrintf
-; LE-NEON-NEXT:    mov r7, r0
-; LE-NEON-NEXT:    vmov r0, s25
-; LE-NEON-NEXT:    str r1, [sp, #56] @ 4-byte Spill
-; LE-NEON-NEXT:    bl __aeabi_h2f
-; LE-NEON-NEXT:    vmov s0, r0
-; LE-NEON-NEXT:    bl llrintf
-; LE-NEON-NEXT:    mov r5, r0
-; LE-NEON-NEXT:    vmov r0, s27
-; LE-NEON-NEXT:    str r1, [sp, #116] @ 4-byte Spill
-; LE-NEON-NEXT:    bl __aeabi_h2f
-; LE-NEON-NEXT:    vmov s0, r0
-; LE-NEON-NEXT:    bl llrintf
-; LE-NEON-NEXT:    mov r6, r0
-; LE-NEON-NEXT:    vmov r0, s29
-; LE-NEON-NEXT:    str r1, [sp, #112] @ 4-byte Spill
-; LE-NEON-NEXT:    bl __aeabi_h2f
-; LE-NEON-NEXT:    vmov s0, r0
-; LE-NEON-NEXT:    bl llrintf
-; LE-NEON-NEXT:    vmov.32 d15[0], r0
-; LE-NEON-NEXT:    vmov r0, s23
-; LE-NEON-NEXT:    mov r4, r1
-; LE-NEON-NEXT:    bl __aeabi_h2f
-; LE-NEON-NEXT:    vmov s0, r0
-; LE-NEON-NEXT:    add lr, sp, #80
-; LE-NEON-NEXT:    vmov.32 d17[0], r6
-; LE-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-NEON-NEXT:    bl llrintf
-; LE-NEON-NEXT:    mov r6, r0
-; LE-NEON-NEXT:    vmov r0, s17
-; LE-NEON-NEXT:    vmov r8, s21
-; LE-NEON-NEXT:    str r1, [sp, #76] @ 4-byte Spill
-; LE-NEON-NEXT:    vmov r10, s19
-; LE-NEON-NEXT:    vmov.32 d10[0], r5
-; LE-NEON-NEXT:    bl __aeabi_h2f
-; LE-NEON-NEXT:    vmov s0, r0
-; LE-NEON-NEXT:    add lr, sp, #40
-; LE-NEON-NEXT:    vmov.32 d11[0], r6
-; LE-NEON-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
-; LE-NEON-NEXT:    bl llrintf
-; LE-NEON-NEXT:    vmov.32 d14[0], r0
-; LE-NEON-NEXT:    mov r0, r10
-; LE-NEON-NEXT:    mov r9, r1
-; LE-NEON-NEXT:    bl __aeabi_h2f
-; LE-NEON-NEXT:    vmov s0, r0
-; LE-NEON-NEXT:    vmov.32 d11[0], r7
-; LE-NEON-NEXT:    bl llrintf
-; LE-NEON-NEXT:    vmov.32 d10[0], r0
-; LE-NEON-NEXT:    mov r0, r8
-; LE-NEON-NEXT:    mov r7, r1
-; LE-NEON-NEXT:    bl __aeabi_h2f
-; LE-NEON-NEXT:    mov r6, r0
-; LE-NEON-NEXT:    ldr r0, [sp, #56] @ 4-byte Reload
-; LE-NEON-NEXT:    vmov.32 d11[1], r0
-; LE-NEON-NEXT:    vmov r0, s18
-; LE-NEON-NEXT:    bl __aeabi_h2f
-; LE-NEON-NEXT:    mov r5, r0
-; LE-NEON-NEXT:    vmov r0, s16
-; LE-NEON-NEXT:    vmov.32 d10[1], r7
-; LE-NEON-NEXT:    add lr, sp, #56
-; LE-NEON-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
-; LE-NEON-NEXT:    bl __aeabi_h2f
-; LE-NEON-NEXT:    vmov s0, r0
-; LE-NEON-NEXT:    vmov.32 d15[1], r4
-; LE-NEON-NEXT:    bl llrintf
-; LE-NEON-NEXT:    vmov.32 d9[0], r0
-; LE-NEON-NEXT:    vmov r0, s26
-; LE-NEON-NEXT:    add lr, sp, #24
-; LE-NEON-NEXT:    vmov r8, s24
-; LE-NEON-NEXT:    vmov.32 d14[1], r9
-; LE-NEON-NEXT:    mov r10, r1
-; LE-NEON-NEXT:    vmov s24, r5
-; LE-NEON-NEXT:    vldr s0, [sp, #24] @ 4-byte Reload
-; LE-NEON-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
-; LE-NEON-NEXT:    vmov r7, s0
-; LE-NEON-NEXT:    bl __aeabi_h2f
-; LE-NEON-NEXT:    vmov.f32 s0, s24
-; LE-NEON-NEXT:    vmov s22, r0
-; LE-NEON-NEXT:    bl llrintf
-; LE-NEON-NEXT:    vmov.f32 s0, s22
-; LE-NEON-NEXT:    mov r5, r1
-; LE-NEON-NEXT:    vmov.32 d14[0], r0
-; LE-NEON-NEXT:    vmov s24, r6
-; LE-NEON-NEXT:    bl llrintf
-; LE-NEON-NEXT:    vmov.32 d15[0], r0
-; LE-NEON-NEXT:    mov r0, r7
-; LE-NEON-NEXT:    mov r6, r1
-; LE-NEON-NEXT:    bl __aeabi_h2f
-; LE-NEON-NEXT:    vmov.f32 s0, s24
-; LE-NEON-NEXT:    vmov s22, r0
-; LE-NEON-NEXT:    bl llrintf
-; LE-NEON-NEXT:    vmov.f32 s0, s22
-; LE-NEON-NEXT:    vmov.32 d8[0], r0
-; LE-NEON-NEXT:    add lr, sp, #8
-; LE-NEON-NEXT:    mov r9, r1
-; LE-NEON-NEXT:    vmov.32 d15[1], r6
-; LE-NEON-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
-; LE-NEON-NEXT:    bl llrintf
-; LE-NEON-NEXT:    vmov.32 d13[0], r0
-; LE-NEON-NEXT:    mov r0, r8
-; LE-NEON-NEXT:    mov r6, r1
-; LE-NEON-NEXT:    bl __aeabi_h2f
-; LE-NEON-NEXT:    vldr s0, [sp, #100] @ 4-byte Reload
-; LE-NEON-NEXT:    mov r7, r0
-; LE-NEON-NEXT:    vmov.32 d14[1], r5
-; LE-NEON-NEXT:    vmov r0, s0
-; LE-NEON-NEXT:    bl __aeabi_h2f
-; LE-NEON-NEXT:    vldr s0, [sp, #104] @ 4-byte Reload
-; LE-NEON-NEXT:    vmov s20, r0
-; LE-NEON-NEXT:    vmov.32 d13[1], r6
-; LE-NEON-NEXT:    vmov r4, s0
-; LE-NEON-NEXT:    vldr s0, [sp, #108] @ 4-byte Reload
-; LE-NEON-NEXT:    vmov r0, s0
-; LE-NEON-NEXT:    bl __aeabi_h2f
-; LE-NEON-NEXT:    vmov.f32 s0, s20
-; LE-NEON-NEXT:    vmov s16, r0
-; LE-NEON-NEXT:    bl llrintf
-; LE-NEON-NEXT:    vmov.f32 s0, s16
-; LE-NEON-NEXT:    mov r5, r1
-; LE-NEON-NEXT:    vmov.32 d12[0], r0
-; LE-NEON-NEXT:    vmov s18, r7
-; LE-NEON-NEXT:    bl llrintf
-; LE-NEON-NEXT:    vmov.32 d11[0], r0
-; LE-NEON-NEXT:    mov r0, r4
-; LE-NEON-NEXT:    mov r6, r1
-; LE-NEON-NEXT:    bl __aeabi_h2f
-; LE-NEON-NEXT:    vmov.f32 s0, s18
-; LE-NEON-NEXT:    vmov s16, r0
-; LE-NEON-NEXT:    bl llrintf
-; LE-NEON-NEXT:    vmov.f32 s0, s16
-; LE-NEON-NEXT:    vmov.32 d10[0], r0
-; LE-NEON-NEXT:    mov r4, r1
-; LE-NEON-NEXT:    vmov.32 d11[1], r6
-; LE-NEON-NEXT:    bl llrintf
-; LE-NEON-NEXT:    add lr, sp, #80
-; LE-NEON-NEXT:    vmov.32 d10[1], r4
-; LE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-NEON-NEXT:    add lr, sp, #40
-; LE-NEON-NEXT:    vldmia lr, {d18, d19} @ 16-byte Reload
-; LE-NEON-NEXT:    add lr, sp, #8
-; LE-NEON-NEXT:    vmov.32 d16[0], r0
-; LE-NEON-NEXT:    ldr r0, [sp, #76] @ 4-byte Reload
-; LE-NEON-NEXT:    vldmia lr, {d20, d21} @ 16-byte Reload
-; LE-NEON-NEXT:    add lr, sp, #24
-; LE-NEON-NEXT:    vmov.32 d19[1], r0
-; LE-NEON-NEXT:    ldr r0, [sp, #116] @ 4-byte Reload
-; LE-NEON-NEXT:    vmov.32 d21[1], r10
-; LE-NEON-NEXT:    vmov.32 d18[1], r0
-; LE-NEON-NEXT:    ldr r0, [sp, #112] @ 4-byte Reload
-; LE-NEON-NEXT:    vmov.32 d12[1], r5
-; LE-NEON-NEXT:    vmov.32 d17[1], r0
-; LE-NEON-NEXT:    add r0, r11, #64
-; LE-NEON-NEXT:    vmov.32 d16[1], r1
-; LE-NEON-NEXT:    vst1.64 {d10, d11}, [r0:128]!
-; LE-NEON-NEXT:    vst1.64 {d16, d17}, [r0:128]!
-; LE-NEON-NEXT:    vst1.64 {d18, d19}, [r0:128]!
-; LE-NEON-NEXT:    vmov.32 d20[1], r9
-; LE-NEON-NEXT:    vst1.64 {d12, d13}, [r0:128]
-; LE-NEON-NEXT:    vst1.64 {d14, d15}, [r11:128]!
-; LE-NEON-NEXT:    vst1.64 {d20, d21}, [r11:128]!
-; LE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-NEON-NEXT:    add lr, sp, #56
-; LE-NEON-NEXT:    vst1.64 {d16, d17}, [r11:128]!
-; LE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-NEON-NEXT:    vst1.64 {d16, d17}, [r11:128]
-; LE-NEON-NEXT:    add sp, sp, #120
-; LE-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-NEON-NEXT:    add sp, sp, #4
-; LE-NEON-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
-;
 ; BE-LABEL: llrint_v16i64_v16f16:
 ; BE:       @ %bb.0:
 ; BE-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
@@ -1337,3057 +784,296 @@ define <16 x i64> @llrint_v16i64_v16f16(<16 x half> %x) {
 ; BE-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
 ; BE-NEXT:    add sp, sp, #4
 ; BE-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
-;
-; BE-NEON-LABEL: llrint_v16i64_v16f16:
-; BE-NEON:       @ %bb.0:
-; BE-NEON-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; BE-NEON-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; BE-NEON-NEXT:    .pad #4
-; BE-NEON-NEXT:    sub sp, sp, #4
-; BE-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
-; BE-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
-; BE-NEON-NEXT:    .pad #112
-; BE-NEON-NEXT:    sub sp, sp, #112
-; BE-NEON-NEXT:    mov r11, r0
-; BE-NEON-NEXT:    vmov r0, s14
-; BE-NEON-NEXT:    vmov.f32 s17, s15
-; BE-NEON-NEXT:    vstr s13, [sp, #52] @ 4-byte Spill
-; BE-NEON-NEXT:    vmov.f32 s21, s12
-; BE-NEON-NEXT:    vstr s10, [sp, #68] @ 4-byte Spill
-; BE-NEON-NEXT:    vmov.f32 s23, s11
-; BE-NEON-NEXT:    vstr s7, [sp, #72] @ 4-byte Spill
-; BE-NEON-NEXT:    vmov.f32 s19, s9
-; BE-NEON-NEXT:    vstr s4, [sp, #28] @ 4-byte Spill
-; BE-NEON-NEXT:    vmov.f32 s26, s8
-; BE-NEON-NEXT:    vmov.f32 s24, s6
-; BE-NEON-NEXT:    vmov.f32 s18, s5
-; BE-NEON-NEXT:    vmov.f32 s25, s3
-; BE-NEON-NEXT:    vmov.f32 s16, s2
-; BE-NEON-NEXT:    vmov.f32 s27, s1
-; BE-NEON-NEXT:    vmov.f32 s29, s0
-; BE-NEON-NEXT:    bl __aeabi_h2f
-; BE-NEON-NEXT:    vmov s0, r0
-; BE-NEON-NEXT:    bl llrintf
-; BE-NEON-NEXT:    mov r8, r0
-; BE-NEON-NEXT:    vmov r0, s29
-; BE-NEON-NEXT:    mov r4, r1
-; BE-NEON-NEXT:    bl __aeabi_h2f
-; BE-NEON-NEXT:    mov r9, r0
-; BE-NEON-NEXT:    vmov r0, s27
-; BE-NEON-NEXT:    bl __aeabi_h2f
-; BE-NEON-NEXT:    mov r7, r0
-; BE-NEON-NEXT:    vmov r0, s21
-; BE-NEON-NEXT:    bl __aeabi_h2f
-; BE-NEON-NEXT:    mov r6, r0
-; BE-NEON-NEXT:    vmov r0, s25
-; BE-NEON-NEXT:    bl __aeabi_h2f
-; BE-NEON-NEXT:    mov r5, r0
-; BE-NEON-NEXT:    vmov r0, s23
-; BE-NEON-NEXT:    bl __aeabi_h2f
-; BE-NEON-NEXT:    vmov s0, r0
-; BE-NEON-NEXT:    bl llrintf
-; BE-NEON-NEXT:    vmov.32 d16[0], r0
-; BE-NEON-NEXT:    vmov s0, r5
-; BE-NEON-NEXT:    str r1, [sp, #108] @ 4-byte Spill
-; BE-NEON-NEXT:    vstr d16, [sp, #96] @ 8-byte Spill
-; BE-NEON-NEXT:    bl llrintf
-; BE-NEON-NEXT:    vmov.32 d16[0], r0
-; BE-NEON-NEXT:    vmov s0, r6
-; BE-NEON-NEXT:    str r1, [sp, #92] @ 4-byte Spill
-; BE-NEON-NEXT:    vstr d16, [sp, #80] @ 8-byte Spill
-; BE-NEON-NEXT:    bl llrintf
-; BE-NEON-NEXT:    vmov.32 d16[0], r0
-; BE-NEON-NEXT:    vmov s0, r7
-; BE-NEON-NEXT:    str r1, [sp, #76] @ 4-byte Spill
-; BE-NEON-NEXT:    vstr d16, [sp, #56] @ 8-byte Spill
-; BE-NEON-NEXT:    bl llrintf
-; BE-NEON-NEXT:    vmov s0, r9
-; BE-NEON-NEXT:    mov r10, r1
-; BE-NEON-NEXT:    vmov.32 d14[0], r0
-; BE-NEON-NEXT:    bl llrintf
-; BE-NEON-NEXT:    vmov.32 d15[0], r0
-; BE-NEON-NEXT:    vmov r0, s17
-; BE-NEON-NEXT:    mov r5, r1
-; BE-NEON-NEXT:    bl __aeabi_h2f
-; BE-NEON-NEXT:    vmov s0, r0
-; BE-NEON-NEXT:    vmov.32 d10[0], r8
-; BE-NEON-NEXT:    vmov r6, s19
-; BE-NEON-NEXT:    bl llrintf
-; BE-NEON-NEXT:    vmov.32 d11[0], r0
-; BE-NEON-NEXT:    mov r0, r6
-; BE-NEON-NEXT:    mov r7, r1
-; BE-NEON-NEXT:    bl __aeabi_h2f
-; BE-NEON-NEXT:    mov r6, r0
-; BE-NEON-NEXT:    vmov r0, s18
-; BE-NEON-NEXT:    vmov.32 d10[1], r4
-; BE-NEON-NEXT:    vstr d10, [sp, #40] @ 8-byte Spill
-; BE-NEON-NEXT:    bl __aeabi_h2f
-; BE-NEON-NEXT:    mov r4, r0
-; BE-NEON-NEXT:    vmov r0, s16
-; BE-NEON-NEXT:    vmov.32 d11[1], r7
-; BE-NEON-NEXT:    vstr d11, [sp, #32] @ 8-byte Spill
-; BE-NEON-NEXT:    bl __aeabi_h2f
-; BE-NEON-NEXT:    vmov.32 d15[1], r5
-; BE-NEON-NEXT:    vmov s0, r0
-; BE-NEON-NEXT:    vstr d15, [sp, #16] @ 8-byte Spill
-; BE-NEON-NEXT:    bl llrintf
-; BE-NEON-NEXT:    vldr s0, [sp, #28] @ 4-byte Reload
-; BE-NEON-NEXT:    vmov r5, s26
-; BE-NEON-NEXT:    vmov.32 d16[0], r0
-; BE-NEON-NEXT:    vmov s26, r4
-; BE-NEON-NEXT:    vmov r0, s0
-; BE-NEON-NEXT:    mov r8, r1
-; BE-NEON-NEXT:    vmov.32 d14[1], r10
-; BE-NEON-NEXT:    vmov r4, s24
-; BE-NEON-NEXT:    vstr d16, [sp] @ 8-byte Spill
-; BE-NEON-NEXT:    vstr d14, [sp, #8] @ 8-byte Spill
-; BE-NEON-NEXT:    bl __aeabi_h2f
-; BE-NEON-NEXT:    vmov.f32 s0, s26
-; BE-NEON-NEXT:    vmov s22, r0
-; BE-NEON-NEXT:    bl llrintf
-; BE-NEON-NEXT:    vmov.f32 s0, s22
-; BE-NEON-NEXT:    mov r7, r1
-; BE-NEON-NEXT:    vmov.32 d13[0], r0
-; BE-NEON-NEXT:    vmov s24, r6
-; BE-NEON-NEXT:    bl llrintf
-; BE-NEON-NEXT:    vmov.32 d14[0], r0
-; BE-NEON-NEXT:    mov r0, r4
-; BE-NEON-NEXT:    mov r6, r1
-; BE-NEON-NEXT:    bl __aeabi_h2f
-; BE-NEON-NEXT:    vmov.f32 s0, s24
-; BE-NEON-NEXT:    vmov s22, r0
-; BE-NEON-NEXT:    bl llrintf
-; BE-NEON-NEXT:    vmov.f32 s0, s22
-; BE-NEON-NEXT:    mov r9, r1
-; BE-NEON-NEXT:    vmov.32 d12[0], r0
-; BE-NEON-NEXT:    vmov.32 d14[1], r6
-; BE-NEON-NEXT:    bl llrintf
-; BE-NEON-NEXT:    vmov.32 d11[0], r0
-; BE-NEON-NEXT:    mov r0, r5
-; BE-NEON-NEXT:    mov r6, r1
-; BE-NEON-NEXT:    bl __aeabi_h2f
-; BE-NEON-NEXT:    vldr s0, [sp, #52] @ 4-byte Reload
-; BE-NEON-NEXT:    mov r4, r0
-; BE-NEON-NEXT:    vmov.32 d13[1], r7
-; BE-NEON-NEXT:    vmov r0, s0
-; BE-NEON-NEXT:    bl __aeabi_h2f
-; BE-NEON-NEXT:    vldr s0, [sp, #68] @ 4-byte Reload
-; BE-NEON-NEXT:    vmov s20, r0
-; BE-NEON-NEXT:    vmov.32 d11[1], r6
-; BE-NEON-NEXT:    vmov r7, s0
-; BE-NEON-NEXT:    vldr s0, [sp, #72] @ 4-byte Reload
-; BE-NEON-NEXT:    vmov r0, s0
-; BE-NEON-NEXT:    bl __aeabi_h2f
-; BE-NEON-NEXT:    vmov.f32 s0, s20
-; BE-NEON-NEXT:    vmov s16, r0
-; BE-NEON-NEXT:    bl llrintf
-; BE-NEON-NEXT:    vmov.f32 s0, s16
-; BE-NEON-NEXT:    mov r5, r1
-; BE-NEON-NEXT:    vmov.32 d10[0], r0
-; BE-NEON-NEXT:    vmov s18, r4
-; BE-NEON-NEXT:    bl llrintf
-; BE-NEON-NEXT:    vmov.32 d15[0], r0
-; BE-NEON-NEXT:    mov r0, r7
-; BE-NEON-NEXT:    mov r4, r1
-; BE-NEON-NEXT:    bl __aeabi_h2f
-; BE-NEON-NEXT:    vmov.f32 s0, s18
-; BE-NEON-NEXT:    vmov s16, r0
-; BE-NEON-NEXT:    bl llrintf
-; BE-NEON-NEXT:    vmov.f32 s0, s16
-; BE-NEON-NEXT:    mov r6, r1
-; BE-NEON-NEXT:    vmov.32 d9[0], r0
-; BE-NEON-NEXT:    vmov.32 d15[1], r4
-; BE-NEON-NEXT:    bl llrintf
-; BE-NEON-NEXT:    vmov.32 d24[0], r0
-; BE-NEON-NEXT:    ldr r0, [sp, #76] @ 4-byte Reload
-; BE-NEON-NEXT:    vldr d23, [sp, #56] @ 8-byte Reload
-; BE-NEON-NEXT:    vldr d20, [sp, #8] @ 8-byte Reload
-; BE-NEON-NEXT:    vmov.32 d23[1], r0
-; BE-NEON-NEXT:    ldr r0, [sp, #92] @ 4-byte Reload
-; BE-NEON-NEXT:    vldr d22, [sp, #80] @ 8-byte Reload
-; BE-NEON-NEXT:    vldr d26, [sp, #16] @ 8-byte Reload
-; BE-NEON-NEXT:    vrev64.32 d21, d20
-; BE-NEON-NEXT:    vmov.32 d22[1], r0
-; BE-NEON-NEXT:    ldr r0, [sp, #108] @ 4-byte Reload
-; BE-NEON-NEXT:    vldr d30, [sp] @ 8-byte Reload
-; BE-NEON-NEXT:    vldr d25, [sp, #96] @ 8-byte Reload
-; BE-NEON-NEXT:    vrev64.32 d20, d26
-; BE-NEON-NEXT:    vldr d26, [sp, #32] @ 8-byte Reload
-; BE-NEON-NEXT:    vmov.32 d10[1], r5
-; BE-NEON-NEXT:    vmov.32 d12[1], r9
-; BE-NEON-NEXT:    vldr d28, [sp, #40] @ 8-byte Reload
-; BE-NEON-NEXT:    vrev64.32 d27, d26
-; BE-NEON-NEXT:    vmov.32 d25[1], r0
-; BE-NEON-NEXT:    add r0, r11, #64
-; BE-NEON-NEXT:    vmov.32 d30[1], r8
-; BE-NEON-NEXT:    vmov.32 d9[1], r6
-; BE-NEON-NEXT:    vrev64.32 d26, d28
-; BE-NEON-NEXT:    vrev64.32 d29, d10
-; BE-NEON-NEXT:    vmov.32 d24[1], r1
-; BE-NEON-NEXT:    vrev64.32 d1, d12
-; BE-NEON-NEXT:    vrev64.32 d28, d23
-; BE-NEON-NEXT:    vrev64.32 d23, d22
-; BE-NEON-NEXT:    vrev64.32 d22, d30
-; BE-NEON-NEXT:    vrev64.32 d31, d25
-; BE-NEON-NEXT:    vrev64.32 d0, d9
-; BE-NEON-NEXT:    vrev64.32 d30, d24
-; BE-NEON-NEXT:    vst1.64 {d0, d1}, [r0:128]!
-; BE-NEON-NEXT:    vst1.64 {d30, d31}, [r0:128]!
-; BE-NEON-NEXT:    vst1.64 {d28, d29}, [r0:128]!
-; BE-NEON-NEXT:    vrev64.32 d19, d13
-; BE-NEON-NEXT:    vst1.64 {d26, d27}, [r0:128]
-; BE-NEON-NEXT:    vst1.64 {d20, d21}, [r11:128]!
-; BE-NEON-NEXT:    vrev64.32 d18, d14
-; BE-NEON-NEXT:    vst1.64 {d22, d23}, [r11:128]!
-; BE-NEON-NEXT:    vrev64.32 d17, d15
-; BE-NEON-NEXT:    vrev64.32 d16, d11
-; BE-NEON-NEXT:    vst1.64 {d18, d19}, [r11:128]!
-; BE-NEON-NEXT:    vst1.64 {d16, d17}, [r11:128]
-; BE-NEON-NEXT:    add sp, sp, #112
-; BE-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
-; BE-NEON-NEXT:    add sp, sp, #4
-; BE-NEON-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
   %a = call <16 x i64> @llvm.llrint.v16i64.v16f16(<16 x half> %x)
   ret <16 x i64> %a
 }
 declare <16 x i64> @llvm.llrint.v16i64.v16f16(<16 x half>)
 
-define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) {
-; LE-LABEL: llrint_v32i64_v32f16:
+define <1 x i64> @llrint_v1i64_v1f32(<1 x float> %x) {
+; LE-LABEL: llrint_v1i64_v1f32:
 ; LE:       @ %bb.0:
-; LE-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; LE-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; LE-NEXT:    .pad #4
-; LE-NEXT:    sub sp, sp, #4
-; LE-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-NEXT:    .pad #248
-; LE-NEXT:    sub sp, sp, #248
-; LE-NEXT:    add lr, sp, #256
-; LE-NEXT:    mov r11, r0
-; LE-NEXT:    vstr s15, [sp, #176] @ 4-byte Spill
-; LE-NEXT:    vmov.f32 s19, s14
-; LE-NEXT:    ldrh r0, [lr, #132]
-; LE-NEXT:    vmov.f32 s17, s11
-; LE-NEXT:    vstr s13, [sp, #196] @ 4-byte Spill
-; LE-NEXT:    vstr s12, [sp, #112] @ 4-byte Spill
-; LE-NEXT:    vstr s10, [sp, #136] @ 4-byte Spill
-; LE-NEXT:    vstr s9, [sp, #160] @ 4-byte Spill
-; LE-NEXT:    vstr s8, [sp, #200] @ 4-byte Spill
-; LE-NEXT:    vstr s7, [sp, #100] @ 4-byte Spill
-; LE-NEXT:    vstr s6, [sp, #116] @ 4-byte Spill
-; LE-NEXT:    vstr s5, [sp, #76] @ 4-byte Spill
-; LE-NEXT:    vstr s4, [sp, #120] @ 4-byte Spill
-; LE-NEXT:    vstr s3, [sp, #156] @ 4-byte Spill
-; LE-NEXT:    vstr s2, [sp, #192] @ 4-byte Spill
-; LE-NEXT:    vstr s1, [sp, #104] @ 4-byte Spill
-; LE-NEXT:    vstr s0, [sp, #108] @ 4-byte Spill
-; LE-NEXT:    bl __aeabi_h2f
-; LE-NEXT:    vmov s0, r0
-; LE-NEXT:    bl llrintf
-; LE-NEXT:    add lr, sp, #256
-; LE-NEXT:    str r0, [sp, #52] @ 4-byte Spill
-; LE-NEXT:    str r1, [sp, #56] @ 4-byte Spill
-; LE-NEXT:    ldrh r0, [lr, #108]
-; LE-NEXT:    bl __aeabi_h2f
-; LE-NEXT:    add lr, sp, #256
-; LE-NEXT:    str r0, [sp, #32] @ 4-byte Spill
-; LE-NEXT:    ldrh r0, [lr, #96]
-; LE-NEXT:    bl __aeabi_h2f
-; LE-NEXT:    add lr, sp, #256
-; LE-NEXT:    mov r5, r0
-; LE-NEXT:    ldrh r0, [lr, #100]
-; LE-NEXT:    bl __aeabi_h2f
-; LE-NEXT:    add lr, sp, #256
-; LE-NEXT:    mov r7, r0
-; LE-NEXT:    ldrh r0, [lr, #156]
-; LE-NEXT:    bl __aeabi_h2f
-; LE-NEXT:    add lr, sp, #256
-; LE-NEXT:    mov r6, r0
-; LE-NEXT:    ldrh r0, [lr, #152]
-; LE-NEXT:    bl __aeabi_h2f
-; LE-NEXT:    add lr, sp, #256
-; LE-NEXT:    mov r4, r0
-; LE-NEXT:    ldrh r0, [lr, #148]
-; LE-NEXT:    bl __aeabi_h2f
-; LE-NEXT:    vmov s0, r0
+; LE-NEXT:    .save {r11, lr}
+; LE-NEXT:    push {r11, lr}
 ; LE-NEXT:    bl llrintf
-; LE-NEXT:    vmov s0, r4
-; LE-NEXT:    mov r8, r1
-; LE-NEXT:    vmov.32 d13[0], r0
+; LE-NEXT:    vmov.32 d0[0], r0
+; LE-NEXT:    vmov.32 d0[1], r1
+; LE-NEXT:    pop {r11, pc}
+;
+; BE-LABEL: llrint_v1i64_v1f32:
+; BE:       @ %bb.0:
+; BE-NEXT:    .save {r11, lr}
+; BE-NEXT:    push {r11, lr}
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    vmov.32 d16[0], r0
+; BE-NEXT:    vmov.32 d16[1], r1
+; BE-NEXT:    vrev64.32 d0, d16
+; BE-NEXT:    pop {r11, pc}
+  %a = call <1 x i64> @llvm.llrint.v1i64.v1f32(<1 x float> %x)
+  ret <1 x i64> %a
+}
+declare <1 x i64> @llvm.llrint.v1i64.v1f32(<1 x float>)
+
+define <2 x i64> @llrint_v2i64_v2f32(<2 x float> %x) {
+; LE-LABEL: llrint_v2i64_v2f32:
+; LE:       @ %bb.0:
+; LE-NEXT:    .save {r4, lr}
+; LE-NEXT:    push {r4, lr}
+; LE-NEXT:    .vsave {d10, d11}
+; LE-NEXT:    vpush {d10, d11}
+; LE-NEXT:    .vsave {d8}
+; LE-NEXT:    vpush {d8}
+; LE-NEXT:    vmov.f64 d8, d0
+; LE-NEXT:    vmov.f32 s0, s17
 ; LE-NEXT:    bl llrintf
-; LE-NEXT:    vmov s0, r6
+; LE-NEXT:    vmov.f32 s0, s16
 ; LE-NEXT:    mov r4, r1
-; LE-NEXT:    vmov.32 d14[0], r0
-; LE-NEXT:    bl llrintf
-; LE-NEXT:    vmov s0, r7
-; LE-NEXT:    mov r6, r1
-; LE-NEXT:    vmov.32 d15[0], r0
-; LE-NEXT:    bl llrintf
-; LE-NEXT:    vmov s0, r5
-; LE-NEXT:    mov r7, r1
 ; LE-NEXT:    vmov.32 d11[0], r0
 ; LE-NEXT:    bl llrintf
-; LE-NEXT:    add lr, sp, #256
 ; LE-NEXT:    vmov.32 d10[0], r0
-; LE-NEXT:    mov r5, r1
-; LE-NEXT:    ldrh r0, [lr, #144]
-; LE-NEXT:    bl __aeabi_h2f
-; LE-NEXT:    add lr, sp, #256
-; LE-NEXT:    mov r10, r0
-; LE-NEXT:    vmov.32 d11[1], r7
-; LE-NEXT:    ldrh r0, [lr, #104]
-; LE-NEXT:    bl __aeabi_h2f
-; LE-NEXT:    vmov.32 d10[1], r5
-; LE-NEXT:    add lr, sp, #80
-; LE-NEXT:    mov r7, r0
-; LE-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
-; LE-NEXT:    add lr, sp, #256
-; LE-NEXT:    ldrh r0, [lr, #124]
-; LE-NEXT:    bl __aeabi_h2f
-; LE-NEXT:    add lr, sp, #256
-; LE-NEXT:    mov r5, r0
-; LE-NEXT:    vmov.32 d15[1], r6
-; LE-NEXT:    ldrh r0, [lr, #120]
-; LE-NEXT:    bl __aeabi_h2f
-; LE-NEXT:    vmov.32 d14[1], r4
-; LE-NEXT:    add lr, sp, #16
-; LE-NEXT:    mov r6, r0
-; LE-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
-; LE-NEXT:    add lr, sp, #256
-; LE-NEXT:    ldrh r0, [lr, #116]
-; LE-NEXT:    bl __aeabi_h2f
-; LE-NEXT:    add lr, sp, #256
-; LE-NEXT:    vorr q5, q6, q6
-; LE-NEXT:    mov r4, r0
-; LE-NEXT:    ldrh r0, [lr, #112]
-; LE-NEXT:    vmov.32 d11[1], r8
-; LE-NEXT:    bl __aeabi_h2f
-; LE-NEXT:    vmov s0, r0
+; LE-NEXT:    vmov.32 d11[1], r4
+; LE-NEXT:    vmov.32 d10[1], r1
+; LE-NEXT:    vorr q0, q5, q5
+; LE-NEXT:    vpop {d8}
+; LE-NEXT:    vpop {d10, d11}
+; LE-NEXT:    pop {r4, pc}
+;
+; BE-LABEL: llrint_v2i64_v2f32:
+; BE:       @ %bb.0:
+; BE-NEXT:    .save {r4, lr}
+; BE-NEXT:    push {r4, lr}
+; BE-NEXT:    .vsave {d10, d11}
+; BE-NEXT:    vpush {d10, d11}
+; BE-NEXT:    .vsave {d8}
+; BE-NEXT:    vpush {d8}
+; BE-NEXT:    vrev64.32 d8, d0
+; BE-NEXT:    vmov.f32 s0, s17
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    vmov.f32 s0, s16
+; BE-NEXT:    mov r4, r1
+; BE-NEXT:    vmov.32 d11[0], r0
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    vmov.32 d10[0], r0
+; BE-NEXT:    vmov.32 d11[1], r4
+; BE-NEXT:    vmov.32 d10[1], r1
+; BE-NEXT:    vrev64.32 q0, q5
+; BE-NEXT:    vpop {d8}
+; BE-NEXT:    vpop {d10, d11}
+; BE-NEXT:    pop {r4, pc}
+  %a = call <2 x i64> @llvm.llrint.v2i64.v2f32(<2 x float> %x)
+  ret <2 x i64> %a
+}
+declare <2 x i64> @llvm.llrint.v2i64.v2f32(<2 x float>)
+
+define <4 x i64> @llrint_v4i64_v4f32(<4 x float> %x) {
+; LE-LABEL: llrint_v4i64_v4f32:
+; LE:       @ %bb.0:
+; LE-NEXT:    .save {r4, r5, r6, lr}
+; LE-NEXT:    push {r4, r5, r6, lr}
+; LE-NEXT:    .vsave {d8, d9, d10, d11, d12, d13}
+; LE-NEXT:    vpush {d8, d9, d10, d11, d12, d13}
+; LE-NEXT:    vorr q5, q0, q0
+; LE-NEXT:    vmov.f32 s0, s23
 ; LE-NEXT:    bl llrintf
-; LE-NEXT:    vmov s0, r4
-; LE-NEXT:    str r1, [sp, #12] @ 4-byte Spill
+; LE-NEXT:    vmov.f32 s0, s20
+; LE-NEXT:    mov r4, r1
+; LE-NEXT:    vmov.32 d9[0], r0
+; LE-NEXT:    bl llrintf
+; LE-NEXT:    vmov.f32 s0, s21
+; LE-NEXT:    mov r5, r1
 ; LE-NEXT:    vmov.32 d12[0], r0
 ; LE-NEXT:    bl llrintf
-; LE-NEXT:    vmov s0, r6
-; LE-NEXT:    add lr, sp, #216
+; LE-NEXT:    vmov.f32 s0, s22
+; LE-NEXT:    mov r6, r1
 ; LE-NEXT:    vmov.32 d13[0], r0
-; LE-NEXT:    str r1, [sp, #8] @ 4-byte Spill
-; LE-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
 ; LE-NEXT:    bl llrintf
-; LE-NEXT:    vmov s0, r5
-; LE-NEXT:    mov r4, r1
-; LE-NEXT:    vmov.32 d12[0], r0
+; LE-NEXT:    vmov.32 d8[0], r0
+; LE-NEXT:    vmov.32 d13[1], r6
+; LE-NEXT:    vmov.32 d9[1], r4
+; LE-NEXT:    vmov.32 d12[1], r5
+; LE-NEXT:    vmov.32 d8[1], r1
+; LE-NEXT:    vorr q0, q6, q6
+; LE-NEXT:    vorr q1, q4, q4
+; LE-NEXT:    vpop {d8, d9, d10, d11, d12, d13}
+; LE-NEXT:    pop {r4, r5, r6, pc}
+;
+; BE-LABEL: llrint_v4i64_v4f32:
+; BE:       @ %bb.0:
+; BE-NEXT:    .save {r4, r5, r6, lr}
+; BE-NEXT:    push {r4, r5, r6, lr}
+; BE-NEXT:    .vsave {d8, d9, d10, d11, d12, d13}
+; BE-NEXT:    vpush {d8, d9, d10, d11, d12, d13}
+; BE-NEXT:    vrev64.32 d8, d1
+; BE-NEXT:    vrev64.32 d9, d0
+; BE-NEXT:    vmov.f32 s0, s17
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    vmov.f32 s0, s18
+; BE-NEXT:    mov r4, r1
+; BE-NEXT:    vmov.32 d11[0], r0
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    vmov.f32 s0, s19
+; BE-NEXT:    mov r5, r1
+; BE-NEXT:    vmov.32 d12[0], r0
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    vmov.f32 s0, s16
+; BE-NEXT:    mov r6, r1
+; BE-NEXT:    vmov.32 d13[0], r0
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    vmov.32 d10[0], r0
+; BE-NEXT:    vmov.32 d13[1], r6
+; BE-NEXT:    vmov.32 d11[1], r4
+; BE-NEXT:    vmov.32 d12[1], r5
+; BE-NEXT:    vmov.32 d10[1], r1
+; BE-NEXT:    vrev64.32 q0, q6
+; BE-NEXT:    vrev64.32 q1, q5
+; BE-NEXT:    vpop {d8, d9, d10, d11, d12, d13}
+; BE-NEXT:    pop {r4, r5, r6, pc}
+  %a = call <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float> %x)
+  ret <4 x i64> %a
+}
+declare <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float>)
+
+define <8 x i64> @llrint_v8i64_v8f32(<8 x float> %x) {
+; LE-LABEL: llrint_v8i64_v8f32:
+; LE:       @ %bb.0:
+; LE-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, lr}
+; LE-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, lr}
+; LE-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-NEXT:    .pad #40
+; LE-NEXT:    sub sp, sp, #40
+; LE-NEXT:    vorr q6, q1, q1
+; LE-NEXT:    add lr, sp, #24
+; LE-NEXT:    vorr q7, q0, q0
+; LE-NEXT:    vstmia lr, {d2, d3} @ 16-byte Spill
+; LE-NEXT:    vmov.f32 s0, s27
 ; LE-NEXT:    bl llrintf
-; LE-NEXT:    vmov s0, r7
-; LE-NEXT:    add lr, sp, #232
-; LE-NEXT:    vmov.32 d13[0], r0
+; LE-NEXT:    vmov.f32 s0, s24
 ; LE-NEXT:    mov r8, r1
-; LE-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-NEXT:    vmov.32 d9[0], r0
 ; LE-NEXT:    bl llrintf
-; LE-NEXT:    vmov s0, r10
+; LE-NEXT:    vmov.f32 s0, s25
 ; LE-NEXT:    mov r9, r1
-; LE-NEXT:    vmov.32 d12[0], r0
-; LE-NEXT:    bl llrintf
-; LE-NEXT:    add lr, sp, #256
 ; LE-NEXT:    vmov.32 d10[0], r0
-; LE-NEXT:    mov r5, r1
-; LE-NEXT:    ldrh r0, [lr, #140]
-; LE-NEXT:    bl __aeabi_h2f
-; LE-NEXT:    vmov s0, r0
-; LE-NEXT:    ldr r0, [sp, #32] @ 4-byte Reload
-; LE-NEXT:    vmov.32 d10[1], r5
-; LE-NEXT:    add lr, sp, #32
-; LE-NEXT:    vmov s16, r0
-; LE-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
 ; LE-NEXT:    bl llrintf
-; LE-NEXT:    add lr, sp, #256
-; LE-NEXT:    mov r6, r1
+; LE-NEXT:    vorr q6, q7, q7
+; LE-NEXT:    add lr, sp, #8
+; LE-NEXT:    mov r10, r1
+; LE-NEXT:    vmov.32 d11[0], r0
+; LE-NEXT:    vmov.f32 s0, s26
+; LE-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-NEXT:    bl llrintf
+; LE-NEXT:    vmov.f32 s0, s27
+; LE-NEXT:    mov r7, r1
+; LE-NEXT:    vmov.32 d14[0], r0
+; LE-NEXT:    bl llrintf
+; LE-NEXT:    vmov.f32 s0, s24
+; LE-NEXT:    mov r4, r1
 ; LE-NEXT:    vmov.32 d15[0], r0
-; LE-NEXT:    ldrh r1, [lr, #128]
-; LE-NEXT:    mov r0, r1
-; LE-NEXT:    bl __aeabi_h2f
-; LE-NEXT:    vmov.f32 s0, s16
-; LE-NEXT:    vmov s18, r0
 ; LE-NEXT:    bl llrintf
-; LE-NEXT:    add lr, sp, #256
+; LE-NEXT:    add lr, sp, #8
 ; LE-NEXT:    mov r5, r1
-; LE-NEXT:    vmov.32 d13[0], r0
-; LE-NEXT:    ldr r1, [sp, #52] @ 4-byte Reload
-; LE-NEXT:    ldrh r0, [lr, #136]
-; LE-NEXT:    vmov.32 d15[1], r6
-; LE-NEXT:    vmov.32 d11[0], r1
-; LE-NEXT:    bl __aeabi_h2f
-; LE-NEXT:    vmov.f32 s0, s18
-; LE-NEXT:    vmov s16, r0
-; LE-NEXT:    bl llrintf
-; LE-NEXT:    vmov.f32 s0, s16
-; LE-NEXT:    vmov.32 d10[0], r0
-; LE-NEXT:    ldr r0, [sp, #56] @ 4-byte Reload
-; LE-NEXT:    mov r6, r1
-; LE-NEXT:    vmov.32 d11[1], r0
-; LE-NEXT:    bl llrintf
-; LE-NEXT:    vmov.32 d13[1], r5
-; LE-NEXT:    add lr, sp, #56
-; LE-NEXT:    ldr r2, [sp, #8] @ 4-byte Reload
-; LE-NEXT:    vmov.32 d12[1], r9
-; LE-NEXT:    vmov.32 d14[0], r0
-; LE-NEXT:    vmov r0, s19
-; LE-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
-; LE-NEXT:    add lr, sp, #232
-; LE-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
-; LE-NEXT:    vmov.32 d13[1], r8
-; LE-NEXT:    vmov.32 d12[1], r4
-; LE-NEXT:    vmov.32 d10[1], r6
-; LE-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
-; LE-NEXT:    add lr, sp, #216
-; LE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-NEXT:    vmov.32 d17[1], r2
-; LE-NEXT:    ldr r2, [sp, #12] @ 4-byte Reload
-; LE-NEXT:    vmov.32 d14[1], r1
-; LE-NEXT:    add r1, r11, #192
-; LE-NEXT:    vmov.32 d16[1], r2
-; LE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-NEXT:    add lr, sp, #32
-; LE-NEXT:    vst1.64 {d10, d11}, [r1:128]!
-; LE-NEXT:    vst1.64 {d14, d15}, [r1:128]!
-; LE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-NEXT:    add lr, sp, #16
-; LE-NEXT:    vst1.64 {d16, d17}, [r1:128]!
-; LE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-NEXT:    vst1.64 {d16, d17}, [r1:128]
-; LE-NEXT:    bl __aeabi_h2f
-; LE-NEXT:    str r0, [sp, #52] @ 4-byte Spill
-; LE-NEXT:    vmov r0, s17
-; LE-NEXT:    bl __aeabi_h2f
-; LE-NEXT:    vldr s0, [sp, #76] @ 4-byte Reload
-; LE-NEXT:    mov r10, r0
-; LE-NEXT:    vmov r0, s0
-; LE-NEXT:    bl __aeabi_h2f
-; LE-NEXT:    vldr s0, [sp, #100] @ 4-byte Reload
-; LE-NEXT:    mov r4, r0
-; LE-NEXT:    vmov r0, s0
-; LE-NEXT:    bl __aeabi_h2f
-; LE-NEXT:    vldr s0, [sp, #104] @ 4-byte Reload
-; LE-NEXT:    mov r7, r0
-; LE-NEXT:    vmov r0, s0
-; LE-NEXT:    bl __aeabi_h2f
-; LE-NEXT:    vldr s0, [sp, #108] @ 4-byte Reload
-; LE-NEXT:    mov r5, r0
-; LE-NEXT:    vmov r0, s0
-; LE-NEXT:    bl __aeabi_h2f
-; LE-NEXT:    vldr s0, [sp, #112] @ 4-byte Reload
-; LE-NEXT:    mov r6, r0
-; LE-NEXT:    vmov r0, s0
-; LE-NEXT:    bl __aeabi_h2f
-; LE-NEXT:    vmov s0, r0
-; LE-NEXT:    bl llrintf
-; LE-NEXT:    vmov s0, r6
-; LE-NEXT:    str r1, [sp, #112] @ 4-byte Spill
-; LE-NEXT:    vmov.32 d14[0], r0
-; LE-NEXT:    bl llrintf
-; LE-NEXT:    vmov s0, r5
-; LE-NEXT:    mov r9, r1
-; LE-NEXT:    vmov.32 d8[0], r0
-; LE-NEXT:    bl llrintf
-; LE-NEXT:    vmov s0, r7
-; LE-NEXT:    mov r8, r1
-; LE-NEXT:    vmov.32 d9[0], r0
-; LE-NEXT:    bl llrintf
-; LE-NEXT:    vmov s0, r4
-; LE-NEXT:    mov r5, r1
-; LE-NEXT:    vmov.32 d11[0], r0
-; LE-NEXT:    bl llrintf
-; LE-NEXT:    vmov s0, r10
-; LE-NEXT:    mov r4, r1
-; LE-NEXT:    vmov.32 d13[0], r0
-; LE-NEXT:    bl llrintf
-; LE-NEXT:    vldr s0, [sp, #116] @ 4-byte Reload
-; LE-NEXT:    mov r6, r0
-; LE-NEXT:    str r1, [sp, #108] @ 4-byte Spill
-; LE-NEXT:    vmov.32 d11[1], r5
-; LE-NEXT:    vmov r0, s0
-; LE-NEXT:    bl __aeabi_h2f
-; LE-NEXT:    vmov s0, r0
-; LE-NEXT:    vmov.32 d13[1], r4
-; LE-NEXT:    bl llrintf
-; LE-NEXT:    vldr s0, [sp, #120] @ 4-byte Reload
-; LE-NEXT:    mov r4, r0
-; LE-NEXT:    mov r5, r1
-; LE-NEXT:    vmov.32 d9[1], r8
-; LE-NEXT:    vmov r0, s0
-; LE-NEXT:    bl __aeabi_h2f
-; LE-NEXT:    vldr s0, [sp, #136] @ 4-byte Reload
-; LE-NEXT:    vmov.32 d10[0], r4
-; LE-NEXT:    vmov r7, s0
-; LE-NEXT:    vmov s0, r0
-; LE-NEXT:    bl llrintf
-; LE-NEXT:    vmov.32 d12[0], r0
-; LE-NEXT:    add lr, sp, #136
-; LE-NEXT:    add r10, r11, #128
-; LE-NEXT:    mov r0, r7
-; LE-NEXT:    vmov.32 d10[1], r5
-; LE-NEXT:    vmov.32 d12[1], r1
-; LE-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
-; LE-NEXT:    add lr, sp, #120
-; LE-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
-; LE-NEXT:    add lr, sp, #80
-; LE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-NEXT:    add lr, sp, #56
-; LE-NEXT:    vmov.32 d13[0], r6
-; LE-NEXT:    vst1.64 {d16, d17}, [r10:128]!
-; LE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-NEXT:    vst1.64 {d16, d17}, [r10:128]!
-; LE-NEXT:    vldr s0, [sp, #156] @ 4-byte Reload
-; LE-NEXT:    vmov r4, s0
-; LE-NEXT:    bl __aeabi_h2f
-; LE-NEXT:    vldr s0, [sp, #160] @ 4-byte Reload
-; LE-NEXT:    mov r5, r0
-; LE-NEXT:    ldr r0, [sp, #52] @ 4-byte Reload
-; LE-NEXT:    vmov.32 d8[1], r9
-; LE-NEXT:    vmov r7, s0
-; LE-NEXT:    add lr, sp, #160
-; LE-NEXT:    vldr s0, [sp, #176] @ 4-byte Reload
-; LE-NEXT:    vmov s20, r0
-; LE-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
-; LE-NEXT:    vmov r0, s0
-; LE-NEXT:    bl __aeabi_h2f
-; LE-NEXT:    vmov.f32 s0, s20
-; LE-NEXT:    vmov s18, r0
-; LE-NEXT:    bl llrintf
-; LE-NEXT:    vmov.f32 s0, s18
-; LE-NEXT:    mov r6, r1
-; LE-NEXT:    vmov.32 d10[0], r0
-; LE-NEXT:    vmov s16, r5
-; LE-NEXT:    bl llrintf
-; LE-NEXT:    vmov.32 d11[0], r0
-; LE-NEXT:    mov r0, r7
-; LE-NEXT:    mov r5, r1
-; LE-NEXT:    bl __aeabi_h2f
-; LE-NEXT:    vmov.f32 s0, s16
-; LE-NEXT:    vmov s18, r0
-; LE-NEXT:    bl llrintf
-; LE-NEXT:    vmov.f32 s0, s18
-; LE-NEXT:    vmov.32 d12[0], r0
-; LE-NEXT:    add lr, sp, #176
-; LE-NEXT:    mov r8, r1
-; LE-NEXT:    vmov.32 d11[1], r5
-; LE-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
-; LE-NEXT:    bl llrintf
-; LE-NEXT:    vmov.32 d13[0], r0
-; LE-NEXT:    mov r0, r4
-; LE-NEXT:    mov r5, r1
-; LE-NEXT:    bl __aeabi_h2f
-; LE-NEXT:    vldr s0, [sp, #196] @ 4-byte Reload
-; LE-NEXT:    mov r7, r0
-; LE-NEXT:    vmov.32 d10[1], r6
-; LE-NEXT:    vmov r0, s0
-; LE-NEXT:    bl __aeabi_h2f
-; LE-NEXT:    vldr s0, [sp, #192] @ 4-byte Reload
-; LE-NEXT:    vmov s16, r0
-; LE-NEXT:    vmov.32 d13[1], r5
-; LE-NEXT:    vmov r6, s0
-; LE-NEXT:    vldr s0, [sp, #200] @ 4-byte Reload
-; LE-NEXT:    vmov r0, s0
-; LE-NEXT:    bl __aeabi_h2f
-; LE-NEXT:    vmov.f32 s0, s16
-; LE-NEXT:    vmov s18, r0
-; LE-NEXT:    bl llrintf
-; LE-NEXT:    vmov.f32 s0, s18
-; LE-NEXT:    add lr, sp, #200
-; LE-NEXT:    vmov.32 d15[0], r0
-; LE-NEXT:    mov r4, r1
-; LE-NEXT:    vmov s16, r7
-; LE-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
-; LE-NEXT:    bl llrintf
-; LE-NEXT:    vmov.32 d12[0], r0
-; LE-NEXT:    mov r0, r6
-; LE-NEXT:    mov r5, r1
-; LE-NEXT:    bl __aeabi_h2f
-; LE-NEXT:    vmov.f32 s0, s16
-; LE-NEXT:    vmov s18, r0
-; LE-NEXT:    bl llrintf
-; LE-NEXT:    vmov.f32 s0, s18
-; LE-NEXT:    mov r6, r1
-; LE-NEXT:    vmov.32 d15[0], r0
-; LE-NEXT:    vmov.32 d12[1], r5
-; LE-NEXT:    bl llrintf
-; LE-NEXT:    add lr, sp, #200
-; LE-NEXT:    vmov.32 d14[0], r0
-; LE-NEXT:    ldr r0, [sp, #112] @ 4-byte Reload
-; LE-NEXT:    vldmia lr, {d18, d19} @ 16-byte Reload
-; LE-NEXT:    add lr, sp, #176
-; LE-NEXT:    vmov.32 d19[1], r4
-; LE-NEXT:    vmov.32 d18[1], r0
-; LE-NEXT:    ldr r0, [sp, #108] @ 4-byte Reload
-; LE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-NEXT:    add lr, sp, #216
-; LE-NEXT:    vmov.32 d17[1], r0
-; LE-NEXT:    add r0, r11, #64
-; LE-NEXT:    vmov.32 d16[1], r8
-; LE-NEXT:    vorr q10, q8, q8
-; LE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-NEXT:    add lr, sp, #232
-; LE-NEXT:    vmov.32 d15[1], r6
-; LE-NEXT:    vst1.64 {d16, d17}, [r10:128]!
-; LE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-NEXT:    add lr, sp, #160
-; LE-NEXT:    vmov.32 d14[1], r1
-; LE-NEXT:    vst1.64 {d16, d17}, [r10:128]
-; LE-NEXT:    vst1.64 {d12, d13}, [r0:128]!
-; LE-NEXT:    vst1.64 {d20, d21}, [r0:128]!
-; LE-NEXT:    vst1.64 {d18, d19}, [r0:128]!
-; LE-NEXT:    vst1.64 {d10, d11}, [r0:128]
-; LE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-NEXT:    add lr, sp, #120
-; LE-NEXT:    vst1.64 {d16, d17}, [r11:128]!
-; LE-NEXT:    vst1.64 {d14, d15}, [r11:128]!
-; LE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-NEXT:    add lr, sp, #136
-; LE-NEXT:    vst1.64 {d16, d17}, [r11:128]!
-; LE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-NEXT:    vst1.64 {d16, d17}, [r11:128]
-; LE-NEXT:    add sp, sp, #248
-; LE-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-NEXT:    add sp, sp, #4
-; LE-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
-;
-; LE-NEON-LABEL: llrint_v32i64_v32f16:
-; LE-NEON:       @ %bb.0:
-; LE-NEON-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; LE-NEON-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; LE-NEON-NEXT:    .pad #4
-; LE-NEON-NEXT:    sub sp, sp, #4
-; LE-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-NEON-NEXT:    .pad #248
-; LE-NEON-NEXT:    sub sp, sp, #248
-; LE-NEON-NEXT:    add lr, sp, #256
-; LE-NEON-NEXT:    mov r11, r0
-; LE-NEON-NEXT:    vstr s15, [sp, #176] @ 4-byte Spill
-; LE-NEON-NEXT:    vmov.f32 s19, s14
-; LE-NEON-NEXT:    ldrh r0, [lr, #132]
-; LE-NEON-NEXT:    vmov.f32 s17, s11
-; LE-NEON-NEXT:    vstr s13, [sp, #196] @ 4-byte Spill
-; LE-NEON-NEXT:    vstr s12, [sp, #112] @ 4-byte Spill
-; LE-NEON-NEXT:    vstr s10, [sp, #136] @ 4-byte Spill
-; LE-NEON-NEXT:    vstr s9, [sp, #160] @ 4-byte Spill
-; LE-NEON-NEXT:    vstr s8, [sp, #200] @ 4-byte Spill
-; LE-NEON-NEXT:    vstr s7, [sp, #100] @ 4-byte Spill
-; LE-NEON-NEXT:    vstr s6, [sp, #116] @ 4-byte Spill
-; LE-NEON-NEXT:    vstr s5, [sp, #76] @ 4-byte Spill
-; LE-NEON-NEXT:    vstr s4, [sp, #120] @ 4-byte Spill
-; LE-NEON-NEXT:    vstr s3, [sp, #156] @ 4-byte Spill
-; LE-NEON-NEXT:    vstr s2, [sp, #192] @ 4-byte Spill
-; LE-NEON-NEXT:    vstr s1, [sp, #104] @ 4-byte Spill
-; LE-NEON-NEXT:    vstr s0, [sp, #108] @ 4-byte Spill
-; LE-NEON-NEXT:    bl __aeabi_h2f
-; LE-NEON-NEXT:    vmov s0, r0
-; LE-NEON-NEXT:    bl llrintf
-; LE-NEON-NEXT:    add lr, sp, #256
-; LE-NEON-NEXT:    str r0, [sp, #52] @ 4-byte Spill
-; LE-NEON-NEXT:    str r1, [sp, #56] @ 4-byte Spill
-; LE-NEON-NEXT:    ldrh r0, [lr, #108]
-; LE-NEON-NEXT:    bl __aeabi_h2f
-; LE-NEON-NEXT:    add lr, sp, #256
-; LE-NEON-NEXT:    str r0, [sp, #32] @ 4-byte Spill
-; LE-NEON-NEXT:    ldrh r0, [lr, #96]
-; LE-NEON-NEXT:    bl __aeabi_h2f
-; LE-NEON-NEXT:    add lr, sp, #256
-; LE-NEON-NEXT:    mov r5, r0
-; LE-NEON-NEXT:    ldrh r0, [lr, #100]
-; LE-NEON-NEXT:    bl __aeabi_h2f
-; LE-NEON-NEXT:    add lr, sp, #256
-; LE-NEON-NEXT:    mov r7, r0
-; LE-NEON-NEXT:    ldrh r0, [lr, #156]
-; LE-NEON-NEXT:    bl __aeabi_h2f
-; LE-NEON-NEXT:    add lr, sp, #256
-; LE-NEON-NEXT:    mov r6, r0
-; LE-NEON-NEXT:    ldrh r0, [lr, #152]
-; LE-NEON-NEXT:    bl __aeabi_h2f
-; LE-NEON-NEXT:    add lr, sp, #256
-; LE-NEON-NEXT:    mov r4, r0
-; LE-NEON-NEXT:    ldrh r0, [lr, #148]
-; LE-NEON-NEXT:    bl __aeabi_h2f
-; LE-NEON-NEXT:    vmov s0, r0
-; LE-NEON-NEXT:    bl llrintf
-; LE-NEON-NEXT:    vmov s0, r4
-; LE-NEON-NEXT:    mov r8, r1
-; LE-NEON-NEXT:    vmov.32 d13[0], r0
-; LE-NEON-NEXT:    bl llrintf
-; LE-NEON-NEXT:    vmov s0, r6
-; LE-NEON-NEXT:    mov r4, r1
-; LE-NEON-NEXT:    vmov.32 d14[0], r0
-; LE-NEON-NEXT:    bl llrintf
-; LE-NEON-NEXT:    vmov s0, r7
-; LE-NEON-NEXT:    mov r6, r1
-; LE-NEON-NEXT:    vmov.32 d15[0], r0
-; LE-NEON-NEXT:    bl llrintf
-; LE-NEON-NEXT:    vmov s0, r5
-; LE-NEON-NEXT:    mov r7, r1
-; LE-NEON-NEXT:    vmov.32 d11[0], r0
-; LE-NEON-NEXT:    bl llrintf
-; LE-NEON-NEXT:    add lr, sp, #256
-; LE-NEON-NEXT:    vmov.32 d10[0], r0
-; LE-NEON-NEXT:    mov r5, r1
-; LE-NEON-NEXT:    ldrh r0, [lr, #144]
-; LE-NEON-NEXT:    bl __aeabi_h2f
-; LE-NEON-NEXT:    add lr, sp, #256
-; LE-NEON-NEXT:    mov r10, r0
-; LE-NEON-NEXT:    vmov.32 d11[1], r7
-; LE-NEON-NEXT:    ldrh r0, [lr, #104]
-; LE-NEON-NEXT:    bl __aeabi_h2f
-; LE-NEON-NEXT:    vmov.32 d10[1], r5
-; LE-NEON-NEXT:    add lr, sp, #80
-; LE-NEON-NEXT:    mov r7, r0
-; LE-NEON-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
-; LE-NEON-NEXT:    add lr, sp, #256
-; LE-NEON-NEXT:    ldrh r0, [lr, #124]
-; LE-NEON-NEXT:    bl __aeabi_h2f
-; LE-NEON-NEXT:    add lr, sp, #256
-; LE-NEON-NEXT:    mov r5, r0
-; LE-NEON-NEXT:    vmov.32 d15[1], r6
-; LE-NEON-NEXT:    ldrh r0, [lr, #120]
-; LE-NEON-NEXT:    bl __aeabi_h2f
-; LE-NEON-NEXT:    vmov.32 d14[1], r4
-; LE-NEON-NEXT:    add lr, sp, #16
-; LE-NEON-NEXT:    mov r6, r0
-; LE-NEON-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
-; LE-NEON-NEXT:    add lr, sp, #256
-; LE-NEON-NEXT:    ldrh r0, [lr, #116]
-; LE-NEON-NEXT:    bl __aeabi_h2f
-; LE-NEON-NEXT:    add lr, sp, #256
-; LE-NEON-NEXT:    vorr q5, q6, q6
-; LE-NEON-NEXT:    mov r4, r0
-; LE-NEON-NEXT:    ldrh r0, [lr, #112]
-; LE-NEON-NEXT:    vmov.32 d11[1], r8
-; LE-NEON-NEXT:    bl __aeabi_h2f
-; LE-NEON-NEXT:    vmov s0, r0
-; LE-NEON-NEXT:    bl llrintf
-; LE-NEON-NEXT:    vmov s0, r4
-; LE-NEON-NEXT:    str r1, [sp, #12] @ 4-byte Spill
-; LE-NEON-NEXT:    vmov.32 d12[0], r0
-; LE-NEON-NEXT:    bl llrintf
-; LE-NEON-NEXT:    vmov s0, r6
-; LE-NEON-NEXT:    add lr, sp, #216
-; LE-NEON-NEXT:    vmov.32 d13[0], r0
-; LE-NEON-NEXT:    str r1, [sp, #8] @ 4-byte Spill
-; LE-NEON-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
-; LE-NEON-NEXT:    bl llrintf
-; LE-NEON-NEXT:    vmov s0, r5
-; LE-NEON-NEXT:    mov r4, r1
-; LE-NEON-NEXT:    vmov.32 d12[0], r0
-; LE-NEON-NEXT:    bl llrintf
-; LE-NEON-NEXT:    vmov s0, r7
-; LE-NEON-NEXT:    add lr, sp, #232
-; LE-NEON-NEXT:    vmov.32 d13[0], r0
-; LE-NEON-NEXT:    mov r8, r1
-; LE-NEON-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
-; LE-NEON-NEXT:    bl llrintf
-; LE-NEON-NEXT:    vmov s0, r10
-; LE-NEON-NEXT:    mov r9, r1
-; LE-NEON-NEXT:    vmov.32 d12[0], r0
-; LE-NEON-NEXT:    bl llrintf
-; LE-NEON-NEXT:    add lr, sp, #256
-; LE-NEON-NEXT:    vmov.32 d10[0], r0
-; LE-NEON-NEXT:    mov r5, r1
-; LE-NEON-NEXT:    ldrh r0, [lr, #140]
-; LE-NEON-NEXT:    bl __aeabi_h2f
-; LE-NEON-NEXT:    vmov s0, r0
-; LE-NEON-NEXT:    ldr r0, [sp, #32] @ 4-byte Reload
-; LE-NEON-NEXT:    vmov.32 d10[1], r5
-; LE-NEON-NEXT:    add lr, sp, #32
-; LE-NEON-NEXT:    vmov s16, r0
-; LE-NEON-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
-; LE-NEON-NEXT:    bl llrintf
-; LE-NEON-NEXT:    add lr, sp, #256
-; LE-NEON-NEXT:    mov r6, r1
-; LE-NEON-NEXT:    vmov.32 d15[0], r0
-; LE-NEON-NEXT:    ldrh r1, [lr, #128]
-; LE-NEON-NEXT:    mov r0, r1
-; LE-NEON-NEXT:    bl __aeabi_h2f
-; LE-NEON-NEXT:    vmov.f32 s0, s16
-; LE-NEON-NEXT:    vmov s18, r0
-; LE-NEON-NEXT:    bl llrintf
-; LE-NEON-NEXT:    add lr, sp, #256
-; LE-NEON-NEXT:    mov r5, r1
-; LE-NEON-NEXT:    vmov.32 d13[0], r0
-; LE-NEON-NEXT:    ldr r1, [sp, #52] @ 4-byte Reload
-; LE-NEON-NEXT:    ldrh r0, [lr, #136]
-; LE-NEON-NEXT:    vmov.32 d15[1], r6
-; LE-NEON-NEXT:    vmov.32 d11[0], r1
-; LE-NEON-NEXT:    bl __aeabi_h2f
-; LE-NEON-NEXT:    vmov.f32 s0, s18
-; LE-NEON-NEXT:    vmov s16, r0
-; LE-NEON-NEXT:    bl llrintf
-; LE-NEON-NEXT:    vmov.f32 s0, s16
-; LE-NEON-NEXT:    vmov.32 d10[0], r0
-; LE-NEON-NEXT:    ldr r0, [sp, #56] @ 4-byte Reload
-; LE-NEON-NEXT:    mov r6, r1
-; LE-NEON-NEXT:    vmov.32 d11[1], r0
-; LE-NEON-NEXT:    bl llrintf
-; LE-NEON-NEXT:    vmov.32 d13[1], r5
-; LE-NEON-NEXT:    add lr, sp, #56
-; LE-NEON-NEXT:    ldr r2, [sp, #8] @ 4-byte Reload
-; LE-NEON-NEXT:    vmov.32 d12[1], r9
-; LE-NEON-NEXT:    vmov.32 d14[0], r0
-; LE-NEON-NEXT:    vmov r0, s19
-; LE-NEON-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
-; LE-NEON-NEXT:    add lr, sp, #232
-; LE-NEON-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
-; LE-NEON-NEXT:    vmov.32 d13[1], r8
-; LE-NEON-NEXT:    vmov.32 d12[1], r4
-; LE-NEON-NEXT:    vmov.32 d10[1], r6
-; LE-NEON-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
-; LE-NEON-NEXT:    add lr, sp, #216
-; LE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-NEON-NEXT:    vmov.32 d17[1], r2
-; LE-NEON-NEXT:    ldr r2, [sp, #12] @ 4-byte Reload
-; LE-NEON-NEXT:    vmov.32 d14[1], r1
-; LE-NEON-NEXT:    add r1, r11, #192
-; LE-NEON-NEXT:    vmov.32 d16[1], r2
-; LE-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-NEON-NEXT:    add lr, sp, #32
-; LE-NEON-NEXT:    vst1.64 {d10, d11}, [r1:128]!
-; LE-NEON-NEXT:    vst1.64 {d14, d15}, [r1:128]!
-; LE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-NEON-NEXT:    add lr, sp, #16
-; LE-NEON-NEXT:    vst1.64 {d16, d17}, [r1:128]!
-; LE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-NEON-NEXT:    vst1.64 {d16, d17}, [r1:128]
-; LE-NEON-NEXT:    bl __aeabi_h2f
-; LE-NEON-NEXT:    str r0, [sp, #52] @ 4-byte Spill
-; LE-NEON-NEXT:    vmov r0, s17
-; LE-NEON-NEXT:    bl __aeabi_h2f
-; LE-NEON-NEXT:    vldr s0, [sp, #76] @ 4-byte Reload
-; LE-NEON-NEXT:    mov r10, r0
-; LE-NEON-NEXT:    vmov r0, s0
-; LE-NEON-NEXT:    bl __aeabi_h2f
-; LE-NEON-NEXT:    vldr s0, [sp, #100] @ 4-byte Reload
-; LE-NEON-NEXT:    mov r4, r0
-; LE-NEON-NEXT:    vmov r0, s0
-; LE-NEON-NEXT:    bl __aeabi_h2f
-; LE-NEON-NEXT:    vldr s0, [sp, #104] @ 4-byte Reload
-; LE-NEON-NEXT:    mov r7, r0
-; LE-NEON-NEXT:    vmov r0, s0
-; LE-NEON-NEXT:    bl __aeabi_h2f
-; LE-NEON-NEXT:    vldr s0, [sp, #108] @ 4-byte Reload
-; LE-NEON-NEXT:    mov r5, r0
-; LE-NEON-NEXT:    vmov r0, s0
-; LE-NEON-NEXT:    bl __aeabi_h2f
-; LE-NEON-NEXT:    vldr s0, [sp, #112] @ 4-byte Reload
-; LE-NEON-NEXT:    mov r6, r0
-; LE-NEON-NEXT:    vmov r0, s0
-; LE-NEON-NEXT:    bl __aeabi_h2f
-; LE-NEON-NEXT:    vmov s0, r0
-; LE-NEON-NEXT:    bl llrintf
-; LE-NEON-NEXT:    vmov s0, r6
-; LE-NEON-NEXT:    str r1, [sp, #112] @ 4-byte Spill
-; LE-NEON-NEXT:    vmov.32 d14[0], r0
-; LE-NEON-NEXT:    bl llrintf
-; LE-NEON-NEXT:    vmov s0, r5
-; LE-NEON-NEXT:    mov r9, r1
-; LE-NEON-NEXT:    vmov.32 d8[0], r0
-; LE-NEON-NEXT:    bl llrintf
-; LE-NEON-NEXT:    vmov s0, r7
-; LE-NEON-NEXT:    mov r8, r1
-; LE-NEON-NEXT:    vmov.32 d9[0], r0
-; LE-NEON-NEXT:    bl llrintf
-; LE-NEON-NEXT:    vmov s0, r4
-; LE-NEON-NEXT:    mov r5, r1
-; LE-NEON-NEXT:    vmov.32 d11[0], r0
-; LE-NEON-NEXT:    bl llrintf
-; LE-NEON-NEXT:    vmov s0, r10
-; LE-NEON-NEXT:    mov r4, r1
-; LE-NEON-NEXT:    vmov.32 d13[0], r0
-; LE-NEON-NEXT:    bl llrintf
-; LE-NEON-NEXT:    vldr s0, [sp, #116] @ 4-byte Reload
-; LE-NEON-NEXT:    mov r6, r0
-; LE-NEON-NEXT:    str r1, [sp, #108] @ 4-byte Spill
-; LE-NEON-NEXT:    vmov.32 d11[1], r5
-; LE-NEON-NEXT:    vmov r0, s0
-; LE-NEON-NEXT:    bl __aeabi_h2f
-; LE-NEON-NEXT:    vmov s0, r0
-; LE-NEON-NEXT:    vmov.32 d13[1], r4
-; LE-NEON-NEXT:    bl llrintf
-; LE-NEON-NEXT:    vldr s0, [sp, #120] @ 4-byte Reload
-; LE-NEON-NEXT:    mov r4, r0
-; LE-NEON-NEXT:    mov r5, r1
-; LE-NEON-NEXT:    vmov.32 d9[1], r8
-; LE-NEON-NEXT:    vmov r0, s0
-; LE-NEON-NEXT:    bl __aeabi_h2f
-; LE-NEON-NEXT:    vldr s0, [sp, #136] @ 4-byte Reload
-; LE-NEON-NEXT:    vmov.32 d10[0], r4
-; LE-NEON-NEXT:    vmov r7, s0
-; LE-NEON-NEXT:    vmov s0, r0
-; LE-NEON-NEXT:    bl llrintf
-; LE-NEON-NEXT:    vmov.32 d12[0], r0
-; LE-NEON-NEXT:    add lr, sp, #136
-; LE-NEON-NEXT:    add r10, r11, #128
-; LE-NEON-NEXT:    mov r0, r7
-; LE-NEON-NEXT:    vmov.32 d10[1], r5
-; LE-NEON-NEXT:    vmov.32 d12[1], r1
-; LE-NEON-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
-; LE-NEON-NEXT:    add lr, sp, #120
-; LE-NEON-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
-; LE-NEON-NEXT:    add lr, sp, #80
-; LE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-NEON-NEXT:    add lr, sp, #56
-; LE-NEON-NEXT:    vmov.32 d13[0], r6
-; LE-NEON-NEXT:    vst1.64 {d16, d17}, [r10:128]!
-; LE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-NEON-NEXT:    vst1.64 {d16, d17}, [r10:128]!
-; LE-NEON-NEXT:    vldr s0, [sp, #156] @ 4-byte Reload
-; LE-NEON-NEXT:    vmov r4, s0
-; LE-NEON-NEXT:    bl __aeabi_h2f
-; LE-NEON-NEXT:    vldr s0, [sp, #160] @ 4-byte Reload
-; LE-NEON-NEXT:    mov r5, r0
-; LE-NEON-NEXT:    ldr r0, [sp, #52] @ 4-byte Reload
-; LE-NEON-NEXT:    vmov.32 d8[1], r9
-; LE-NEON-NEXT:    vmov r7, s0
-; LE-NEON-NEXT:    add lr, sp, #160
-; LE-NEON-NEXT:    vldr s0, [sp, #176] @ 4-byte Reload
-; LE-NEON-NEXT:    vmov s20, r0
-; LE-NEON-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
-; LE-NEON-NEXT:    vmov r0, s0
-; LE-NEON-NEXT:    bl __aeabi_h2f
-; LE-NEON-NEXT:    vmov.f32 s0, s20
-; LE-NEON-NEXT:    vmov s18, r0
-; LE-NEON-NEXT:    bl llrintf
-; LE-NEON-NEXT:    vmov.f32 s0, s18
-; LE-NEON-NEXT:    mov r6, r1
-; LE-NEON-NEXT:    vmov.32 d10[0], r0
-; LE-NEON-NEXT:    vmov s16, r5
-; LE-NEON-NEXT:    bl llrintf
-; LE-NEON-NEXT:    vmov.32 d11[0], r0
-; LE-NEON-NEXT:    mov r0, r7
-; LE-NEON-NEXT:    mov r5, r1
-; LE-NEON-NEXT:    bl __aeabi_h2f
-; LE-NEON-NEXT:    vmov.f32 s0, s16
-; LE-NEON-NEXT:    vmov s18, r0
-; LE-NEON-NEXT:    bl llrintf
-; LE-NEON-NEXT:    vmov.f32 s0, s18
-; LE-NEON-NEXT:    vmov.32 d12[0], r0
-; LE-NEON-NEXT:    add lr, sp, #176
-; LE-NEON-NEXT:    mov r8, r1
-; LE-NEON-NEXT:    vmov.32 d11[1], r5
-; LE-NEON-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
-; LE-NEON-NEXT:    bl llrintf
-; LE-NEON-NEXT:    vmov.32 d13[0], r0
-; LE-NEON-NEXT:    mov r0, r4
-; LE-NEON-NEXT:    mov r5, r1
-; LE-NEON-NEXT:    bl __aeabi_h2f
-; LE-NEON-NEXT:    vldr s0, [sp, #196] @ 4-byte Reload
-; LE-NEON-NEXT:    mov r7, r0
-; LE-NEON-NEXT:    vmov.32 d10[1], r6
-; LE-NEON-NEXT:    vmov r0, s0
-; LE-NEON-NEXT:    bl __aeabi_h2f
-; LE-NEON-NEXT:    vldr s0, [sp, #192] @ 4-byte Reload
-; LE-NEON-NEXT:    vmov s16, r0
-; LE-NEON-NEXT:    vmov.32 d13[1], r5
-; LE-NEON-NEXT:    vmov r6, s0
-; LE-NEON-NEXT:    vldr s0, [sp, #200] @ 4-byte Reload
-; LE-NEON-NEXT:    vmov r0, s0
-; LE-NEON-NEXT:    bl __aeabi_h2f
-; LE-NEON-NEXT:    vmov.f32 s0, s16
-; LE-NEON-NEXT:    vmov s18, r0
-; LE-NEON-NEXT:    bl llrintf
-; LE-NEON-NEXT:    vmov.f32 s0, s18
-; LE-NEON-NEXT:    add lr, sp, #200
-; LE-NEON-NEXT:    vmov.32 d15[0], r0
-; LE-NEON-NEXT:    mov r4, r1
-; LE-NEON-NEXT:    vmov s16, r7
-; LE-NEON-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
-; LE-NEON-NEXT:    bl llrintf
-; LE-NEON-NEXT:    vmov.32 d12[0], r0
-; LE-NEON-NEXT:    mov r0, r6
-; LE-NEON-NEXT:    mov r5, r1
-; LE-NEON-NEXT:    bl __aeabi_h2f
-; LE-NEON-NEXT:    vmov.f32 s0, s16
-; LE-NEON-NEXT:    vmov s18, r0
-; LE-NEON-NEXT:    bl llrintf
-; LE-NEON-NEXT:    vmov.f32 s0, s18
-; LE-NEON-NEXT:    mov r6, r1
-; LE-NEON-NEXT:    vmov.32 d15[0], r0
-; LE-NEON-NEXT:    vmov.32 d12[1], r5
-; LE-NEON-NEXT:    bl llrintf
-; LE-NEON-NEXT:    add lr, sp, #200
-; LE-NEON-NEXT:    vmov.32 d14[0], r0
-; LE-NEON-NEXT:    ldr r0, [sp, #112] @ 4-byte Reload
-; LE-NEON-NEXT:    vldmia lr, {d18, d19} @ 16-byte Reload
-; LE-NEON-NEXT:    add lr, sp, #176
-; LE-NEON-NEXT:    vmov.32 d19[1], r4
-; LE-NEON-NEXT:    vmov.32 d18[1], r0
-; LE-NEON-NEXT:    ldr r0, [sp, #108] @ 4-byte Reload
-; LE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-NEON-NEXT:    add lr, sp, #216
-; LE-NEON-NEXT:    vmov.32 d17[1], r0
-; LE-NEON-NEXT:    add r0, r11, #64
-; LE-NEON-NEXT:    vmov.32 d16[1], r8
-; LE-NEON-NEXT:    vorr q10, q8, q8
-; LE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-NEON-NEXT:    add lr, sp, #232
-; LE-NEON-NEXT:    vmov.32 d15[1], r6
-; LE-NEON-NEXT:    vst1.64 {d16, d17}, [r10:128]!
-; LE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-NEON-NEXT:    add lr, sp, #160
-; LE-NEON-NEXT:    vmov.32 d14[1], r1
-; LE-NEON-NEXT:    vst1.64 {d16, d17}, [r10:128]
-; LE-NEON-NEXT:    vst1.64 {d12, d13}, [r0:128]!
-; LE-NEON-NEXT:    vst1.64 {d20, d21}, [r0:128]!
-; LE-NEON-NEXT:    vst1.64 {d18, d19}, [r0:128]!
-; LE-NEON-NEXT:    vst1.64 {d10, d11}, [r0:128]
-; LE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-NEON-NEXT:    add lr, sp, #120
-; LE-NEON-NEXT:    vst1.64 {d16, d17}, [r11:128]!
-; LE-NEON-NEXT:    vst1.64 {d14, d15}, [r11:128]!
-; LE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-NEON-NEXT:    add lr, sp, #136
-; LE-NEON-NEXT:    vst1.64 {d16, d17}, [r11:128]!
-; LE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-NEON-NEXT:    vst1.64 {d16, d17}, [r11:128]
-; LE-NEON-NEXT:    add sp, sp, #248
-; LE-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-NEON-NEXT:    add sp, sp, #4
-; LE-NEON-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
-;
-; BE-LABEL: llrint_v32i64_v32f16:
-; BE:       @ %bb.0:
-; BE-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; BE-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; BE-NEXT:    .pad #4
-; BE-NEXT:    sub sp, sp, #4
-; BE-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
-; BE-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
-; BE-NEXT:    .pad #176
-; BE-NEXT:    sub sp, sp, #176
-; BE-NEXT:    add lr, sp, #256
-; BE-NEXT:    mov r10, r0
-; BE-NEXT:    vstr s15, [sp, #112] @ 4-byte Spill
-; BE-NEXT:    ldrh r0, [lr, #74]
-; BE-NEXT:    vstr s14, [sp, #80] @ 4-byte Spill
-; BE-NEXT:    vstr s13, [sp, #48] @ 4-byte Spill
-; BE-NEXT:    vstr s12, [sp, #148] @ 4-byte Spill
-; BE-NEXT:    vstr s11, [sp, #76] @ 4-byte Spill
-; BE-NEXT:    vstr s10, [sp, #152] @ 4-byte Spill
-; BE-NEXT:    vstr s9, [sp, #156] @ 4-byte Spill
-; BE-NEXT:    vstr s8, [sp, #120] @ 4-byte Spill
-; BE-NEXT:    vstr s7, [sp, #136] @ 4-byte Spill
-; BE-NEXT:    vstr s6, [sp, #132] @ 4-byte Spill
-; BE-NEXT:    vstr s5, [sp, #144] @ 4-byte Spill
-; BE-NEXT:    vstr s4, [sp, #64] @ 4-byte Spill
-; BE-NEXT:    vstr s3, [sp, #104] @ 4-byte Spill
-; BE-NEXT:    vstr s2, [sp, #88] @ 4-byte Spill
-; BE-NEXT:    vstr s1, [sp, #56] @ 4-byte Spill
-; BE-NEXT:    vstr s0, [sp, #96] @ 4-byte Spill
-; BE-NEXT:    bl __aeabi_h2f
-; BE-NEXT:    vmov s0, r0
-; BE-NEXT:    bl llrintf
-; BE-NEXT:    add lr, sp, #256
-; BE-NEXT:    mov r9, r0
-; BE-NEXT:    mov r8, r1
-; BE-NEXT:    ldrh r0, [lr, #62]
-; BE-NEXT:    bl __aeabi_h2f
-; BE-NEXT:    add lr, sp, #256
-; BE-NEXT:    mov r6, r0
-; BE-NEXT:    ldrh r0, [lr, #58]
-; BE-NEXT:    bl __aeabi_h2f
-; BE-NEXT:    add lr, sp, #256
-; BE-NEXT:    mov r7, r0
-; BE-NEXT:    ldrh r0, [lr, #66]
-; BE-NEXT:    bl __aeabi_h2f
-; BE-NEXT:    add lr, sp, #256
-; BE-NEXT:    mov r4, r0
-; BE-NEXT:    ldrh r0, [lr, #54]
-; BE-NEXT:    bl __aeabi_h2f
-; BE-NEXT:    add lr, sp, #256
-; BE-NEXT:    mov r5, r0
-; BE-NEXT:    ldrh r0, [lr, #50]
-; BE-NEXT:    bl __aeabi_h2f
-; BE-NEXT:    vmov s0, r0
-; BE-NEXT:    bl llrintf
-; BE-NEXT:    vmov.32 d16[0], r0
-; BE-NEXT:    vmov s0, r5
-; BE-NEXT:    str r1, [sp, #44] @ 4-byte Spill
-; BE-NEXT:    vstr d16, [sp, #168] @ 8-byte Spill
-; BE-NEXT:    bl llrintf
-; BE-NEXT:    vmov.32 d16[0], r0
-; BE-NEXT:    vmov s0, r4
-; BE-NEXT:    str r1, [sp, #40] @ 4-byte Spill
-; BE-NEXT:    vstr d16, [sp, #160] @ 8-byte Spill
-; BE-NEXT:    bl llrintf
-; BE-NEXT:    vmov.32 d16[0], r0
-; BE-NEXT:    vmov s0, r7
-; BE-NEXT:    mov r4, r1
-; BE-NEXT:    vstr d16, [sp, #32] @ 8-byte Spill
-; BE-NEXT:    bl llrintf
-; BE-NEXT:    vmov.32 d16[0], r0
-; BE-NEXT:    vmov s0, r6
-; BE-NEXT:    mov r11, r1
-; BE-NEXT:    vstr d16, [sp, #24] @ 8-byte Spill
-; BE-NEXT:    bl llrintf
-; BE-NEXT:    add lr, sp, #256
-; BE-NEXT:    vmov.32 d16[0], r0
-; BE-NEXT:    mov r6, r1
-; BE-NEXT:    ldrh r0, [lr, #34]
-; BE-NEXT:    vstr d16, [sp, #16] @ 8-byte Spill
-; BE-NEXT:    bl __aeabi_h2f
-; BE-NEXT:    vmov s0, r0
-; BE-NEXT:    vmov.32 d8[0], r9
-; BE-NEXT:    bl llrintf
-; BE-NEXT:    add lr, sp, #256
-; BE-NEXT:    mov r7, r1
-; BE-NEXT:    vmov.32 d12[0], r0
-; BE-NEXT:    ldrh r1, [lr, #38]
-; BE-NEXT:    mov r0, r1
-; BE-NEXT:    bl __aeabi_h2f
-; BE-NEXT:    vmov.32 d8[1], r8
-; BE-NEXT:    vmov s0, r0
-; BE-NEXT:    vstr d8, [sp, #8] @ 8-byte Spill
-; BE-NEXT:    bl llrintf
-; BE-NEXT:    add lr, sp, #256
-; BE-NEXT:    mov r5, r1
-; BE-NEXT:    vmov.32 d13[0], r0
-; BE-NEXT:    ldrh r1, [lr, #26]
-; BE-NEXT:    mov r0, r1
-; BE-NEXT:    bl __aeabi_h2f
-; BE-NEXT:    vmov s0, r0
-; BE-NEXT:    vmov.32 d12[1], r7
-; BE-NEXT:    bl llrintf
-; BE-NEXT:    add lr, sp, #256
-; BE-NEXT:    mov r7, r1
-; BE-NEXT:    vmov.32 d9[0], r0
-; BE-NEXT:    ldrh r1, [lr, #30]
-; BE-NEXT:    mov r0, r1
-; BE-NEXT:    bl __aeabi_h2f
-; BE-NEXT:    vmov s0, r0
-; BE-NEXT:    vmov.32 d13[1], r5
-; BE-NEXT:    bl llrintf
-; BE-NEXT:    add lr, sp, #256
-; BE-NEXT:    mov r5, r1
-; BE-NEXT:    vmov.32 d15[0], r0
-; BE-NEXT:    ldrh r1, [lr, #78]
-; BE-NEXT:    mov r0, r1
-; BE-NEXT:    bl __aeabi_h2f
-; BE-NEXT:    vmov s0, r0
-; BE-NEXT:    vmov.32 d9[1], r7
-; BE-NEXT:    bl llrintf
-; BE-NEXT:    add lr, sp, #256
-; BE-NEXT:    mov r7, r1
-; BE-NEXT:    vmov.32 d14[0], r0
-; BE-NEXT:    ldrh r1, [lr, #82]
-; BE-NEXT:    mov r0, r1
-; BE-NEXT:    bl __aeabi_h2f
-; BE-NEXT:    vmov s0, r0
-; BE-NEXT:    vmov.32 d15[1], r5
-; BE-NEXT:    bl llrintf
-; BE-NEXT:    add lr, sp, #256
-; BE-NEXT:    mov r5, r1
-; BE-NEXT:    vmov.32 d8[0], r0
-; BE-NEXT:    ldrh r1, [lr, #86]
-; BE-NEXT:    mov r0, r1
-; BE-NEXT:    bl __aeabi_h2f
-; BE-NEXT:    vmov s0, r0
-; BE-NEXT:    vmov.32 d14[1], r7
-; BE-NEXT:    bl llrintf
-; BE-NEXT:    add lr, sp, #256
-; BE-NEXT:    mov r7, r1
-; BE-NEXT:    vmov.32 d10[0], r0
-; BE-NEXT:    ldrh r1, [lr, #70]
-; BE-NEXT:    mov r0, r1
-; BE-NEXT:    bl __aeabi_h2f
-; BE-NEXT:    vmov s0, r0
-; BE-NEXT:    vmov.32 d8[1], r5
-; BE-NEXT:    bl llrintf
-; BE-NEXT:    add lr, sp, #256
-; BE-NEXT:    mov r5, r1
-; BE-NEXT:    vmov.32 d11[0], r0
-; BE-NEXT:    ldrh r1, [lr, #46]
-; BE-NEXT:    mov r0, r1
-; BE-NEXT:    bl __aeabi_h2f
-; BE-NEXT:    vmov s0, r0
-; BE-NEXT:    vmov.32 d10[1], r7
-; BE-NEXT:    bl llrintf
-; BE-NEXT:    vmov.32 d25[0], r0
-; BE-NEXT:    ldr r0, [sp, #40] @ 4-byte Reload
-; BE-NEXT:    ldr r2, [sp, #44] @ 4-byte Reload
-; BE-NEXT:    vldr d24, [sp, #160] @ 8-byte Reload
-; BE-NEXT:    vldr s0, [sp, #48] @ 4-byte Reload
-; BE-NEXT:    vmov.32 d24[1], r0
-; BE-NEXT:    vmov r0, s0
-; BE-NEXT:    vldr d26, [sp, #16] @ 8-byte Reload
-; BE-NEXT:    vstr d24, [sp, #160] @ 8-byte Spill
-; BE-NEXT:    vldr d24, [sp, #8] @ 8-byte Reload
-; BE-NEXT:    vrev64.32 d23, d14
-; BE-NEXT:    vldr d29, [sp, #24] @ 8-byte Reload
-; BE-NEXT:    vrev64.32 d22, d24
-; BE-NEXT:    vldr d24, [sp, #168] @ 8-byte Reload
-; BE-NEXT:    vmov.32 d26[1], r6
-; BE-NEXT:    vldr d28, [sp, #32] @ 8-byte Reload
-; BE-NEXT:    vmov.32 d25[1], r1
-; BE-NEXT:    add r1, r10, #192
-; BE-NEXT:    vmov.32 d29[1], r11
-; BE-NEXT:    add r11, r10, #128
-; BE-NEXT:    vmov.32 d24[1], r2
-; BE-NEXT:    vmov.32 d11[1], r5
-; BE-NEXT:    vmov.32 d28[1], r4
-; BE-NEXT:    vrev64.32 d27, d26
-; BE-NEXT:    vstr d24, [sp, #168] @ 8-byte Spill
-; BE-NEXT:    vstr d25, [sp, #48] @ 8-byte Spill
-; BE-NEXT:    vrev64.32 d25, d11
-; BE-NEXT:    vrev64.32 d26, d29
-; BE-NEXT:    vrev64.32 d24, d28
-; BE-NEXT:    vst1.64 {d26, d27}, [r1:128]!
-; BE-NEXT:    vst1.64 {d24, d25}, [r1:128]!
-; BE-NEXT:    vrev64.32 d21, d10
-; BE-NEXT:    vrev64.32 d19, d15
-; BE-NEXT:    vrev64.32 d17, d13
-; BE-NEXT:    vrev64.32 d20, d8
-; BE-NEXT:    vst1.64 {d22, d23}, [r1:128]!
-; BE-NEXT:    vrev64.32 d18, d9
-; BE-NEXT:    vrev64.32 d16, d12
-; BE-NEXT:    vst1.64 {d20, d21}, [r1:128]
-; BE-NEXT:    vst1.64 {d18, d19}, [r11:128]!
-; BE-NEXT:    vst1.64 {d16, d17}, [r11:128]!
-; BE-NEXT:    bl __aeabi_h2f
-; BE-NEXT:    vmov s0, r0
-; BE-NEXT:    bl llrintf
-; BE-NEXT:    add lr, sp, #256
-; BE-NEXT:    mov r7, r0
-; BE-NEXT:    mov r8, r1
-; BE-NEXT:    ldrh r0, [lr, #42]
-; BE-NEXT:    bl __aeabi_h2f
-; BE-NEXT:    vldr s0, [sp, #56] @ 4-byte Reload
-; BE-NEXT:    mov r4, r0
-; BE-NEXT:    vmov r0, s0
-; BE-NEXT:    bl __aeabi_h2f
-; BE-NEXT:    vmov s0, r0
-; BE-NEXT:    bl llrintf
-; BE-NEXT:    vmov s0, r4
-; BE-NEXT:    str r1, [sp, #44] @ 4-byte Spill
-; BE-NEXT:    vmov.32 d10[0], r0
-; BE-NEXT:    bl llrintf
-; BE-NEXT:    vldr s0, [sp, #64] @ 4-byte Reload
-; BE-NEXT:    vmov.32 d16[0], r0
-; BE-NEXT:    vmov r2, s0
-; BE-NEXT:    vldr s0, [sp, #80] @ 4-byte Reload
-; BE-NEXT:    vmov.32 d16[1], r1
-; BE-NEXT:    vmov r4, s0
-; BE-NEXT:    vldr s0, [sp, #76] @ 4-byte Reload
-; BE-NEXT:    vstr d16, [sp, #80] @ 8-byte Spill
-; BE-NEXT:    vmov r5, s0
-; BE-NEXT:    mov r0, r2
-; BE-NEXT:    bl __aeabi_h2f
-; BE-NEXT:    vmov s0, r0
-; BE-NEXT:    bl llrintf
-; BE-NEXT:    vmov.32 d11[0], r0
-; BE-NEXT:    mov r0, r4
-; BE-NEXT:    mov r9, r1
-; BE-NEXT:    bl __aeabi_h2f
-; BE-NEXT:    vmov s0, r0
-; BE-NEXT:    vmov.32 d8[0], r7
-; BE-NEXT:    bl llrintf
-; BE-NEXT:    vmov.32 d12[0], r0
-; BE-NEXT:    mov r0, r5
-; BE-NEXT:    mov r6, r1
-; BE-NEXT:    bl __aeabi_h2f
-; BE-NEXT:    vldr s0, [sp, #88] @ 4-byte Reload
-; BE-NEXT:    mov r4, r0
-; BE-NEXT:    vmov.32 d8[1], r8
-; BE-NEXT:    vmov r7, s0
-; BE-NEXT:    vldr s0, [sp, #96] @ 4-byte Reload
-; BE-NEXT:    vstr d8, [sp, #88] @ 8-byte Spill
-; BE-NEXT:    vmov r0, s0
-; BE-NEXT:    bl __aeabi_h2f
-; BE-NEXT:    vldr s0, [sp, #104] @ 4-byte Reload
-; BE-NEXT:    vmov s19, r0
-; BE-NEXT:    vmov.32 d12[1], r6
-; BE-NEXT:    vmov r5, s0
-; BE-NEXT:    vldr s0, [sp, #112] @ 4-byte Reload
-; BE-NEXT:    vstr d12, [sp, #104] @ 8-byte Spill
-; BE-NEXT:    vmov r0, s0
-; BE-NEXT:    bl __aeabi_h2f
-; BE-NEXT:    vmov.f32 s0, s19
-; BE-NEXT:    vmov s30, r0
-; BE-NEXT:    bl llrintf
-; BE-NEXT:    vmov.f32 s0, s30
-; BE-NEXT:    mov r6, r1
-; BE-NEXT:    vmov.32 d9[0], r0
-; BE-NEXT:    vmov s17, r4
-; BE-NEXT:    bl llrintf
-; BE-NEXT:    vmov.32 d12[0], r0
-; BE-NEXT:    mov r0, r5
-; BE-NEXT:    mov r4, r1
-; BE-NEXT:    bl __aeabi_h2f
-; BE-NEXT:    vmov.f32 s0, s17
-; BE-NEXT:    vmov s30, r0
-; BE-NEXT:    bl llrintf
-; BE-NEXT:    vmov.f32 s0, s30
-; BE-NEXT:    vmov.32 d16[0], r0
-; BE-NEXT:    str r1, [sp, #76] @ 4-byte Spill
-; BE-NEXT:    vmov.32 d12[1], r4
-; BE-NEXT:    vstr d16, [sp, #64] @ 8-byte Spill
-; BE-NEXT:    vstr d12, [sp, #112] @ 8-byte Spill
-; BE-NEXT:    bl llrintf
-; BE-NEXT:    vmov.32 d16[0], r0
-; BE-NEXT:    mov r0, r7
-; BE-NEXT:    mov r8, r1
-; BE-NEXT:    vmov.32 d9[1], r6
-; BE-NEXT:    vstr d16, [sp, #56] @ 8-byte Spill
-; BE-NEXT:    vstr d9, [sp, #96] @ 8-byte Spill
-; BE-NEXT:    bl __aeabi_h2f
-; BE-NEXT:    vldr s0, [sp, #120] @ 4-byte Reload
-; BE-NEXT:    mov r5, r0
-; BE-NEXT:    ldr r0, [sp, #44] @ 4-byte Reload
-; BE-NEXT:    vmov r7, s0
-; BE-NEXT:    vldr s0, [sp, #132] @ 4-byte Reload
-; BE-NEXT:    vmov.32 d10[1], r0
-; BE-NEXT:    vmov r0, s0
-; BE-NEXT:    vstr d10, [sp, #120] @ 8-byte Spill
-; BE-NEXT:    bl __aeabi_h2f
-; BE-NEXT:    vldr s0, [sp, #136] @ 4-byte Reload
-; BE-NEXT:    vmov s26, r0
-; BE-NEXT:    vmov.32 d11[1], r9
-; BE-NEXT:    vmov r4, s0
-; BE-NEXT:    vldr s0, [sp, #144] @ 4-byte Reload
-; BE-NEXT:    vstr d11, [sp, #136] @ 8-byte Spill
-; BE-NEXT:    vmov r0, s0
-; BE-NEXT:    bl __aeabi_h2f
-; BE-NEXT:    vmov.f32 s0, s26
-; BE-NEXT:    vmov s22, r0
-; BE-NEXT:    bl llrintf
-; BE-NEXT:    vmov.f32 s0, s22
-; BE-NEXT:    mov r6, r1
-; BE-NEXT:    vmov.32 d13[0], r0
-; BE-NEXT:    vmov s24, r5
-; BE-NEXT:    bl llrintf
-; BE-NEXT:    vmov.32 d14[0], r0
-; BE-NEXT:    mov r0, r4
-; BE-NEXT:    mov r5, r1
-; BE-NEXT:    bl __aeabi_h2f
-; BE-NEXT:    vmov.f32 s0, s24
-; BE-NEXT:    vmov s22, r0
-; BE-NEXT:    bl llrintf
-; BE-NEXT:    vmov.f32 s0, s22
-; BE-NEXT:    mov r9, r1
-; BE-NEXT:    vmov.32 d12[0], r0
-; BE-NEXT:    vmov.32 d14[1], r5
-; BE-NEXT:    bl llrintf
-; BE-NEXT:    vmov.32 d11[0], r0
-; BE-NEXT:    mov r0, r7
-; BE-NEXT:    mov r5, r1
-; BE-NEXT:    bl __aeabi_h2f
-; BE-NEXT:    vldr s0, [sp, #148] @ 4-byte Reload
-; BE-NEXT:    mov r7, r0
-; BE-NEXT:    vmov.32 d13[1], r6
-; BE-NEXT:    vmov r0, s0
-; BE-NEXT:    bl __aeabi_h2f
-; BE-NEXT:    vldr s0, [sp, #152] @ 4-byte Reload
-; BE-NEXT:    vmov s20, r0
-; BE-NEXT:    vmov.32 d11[1], r5
-; BE-NEXT:    vmov r4, s0
-; BE-NEXT:    vldr s0, [sp, #156] @ 4-byte Reload
-; BE-NEXT:    vmov r0, s0
-; BE-NEXT:    bl __aeabi_h2f
-; BE-NEXT:    vmov.f32 s0, s20
-; BE-NEXT:    vmov s16, r0
-; BE-NEXT:    bl llrintf
-; BE-NEXT:    vmov.f32 s0, s16
-; BE-NEXT:    mov r6, r1
-; BE-NEXT:    vmov.32 d10[0], r0
-; BE-NEXT:    vmov s18, r7
-; BE-NEXT:    bl llrintf
-; BE-NEXT:    vmov.32 d15[0], r0
-; BE-NEXT:    mov r0, r4
-; BE-NEXT:    mov r5, r1
-; BE-NEXT:    bl __aeabi_h2f
-; BE-NEXT:    vmov.f32 s0, s18
-; BE-NEXT:    vmov s16, r0
-; BE-NEXT:    bl llrintf
-; BE-NEXT:    vmov.f32 s0, s16
-; BE-NEXT:    mov r4, r1
-; BE-NEXT:    vmov.32 d9[0], r0
-; BE-NEXT:    vmov.32 d15[1], r5
-; BE-NEXT:    bl llrintf
-; BE-NEXT:    vldr d16, [sp, #160] @ 8-byte Reload
-; BE-NEXT:    vldr d20, [sp, #136] @ 8-byte Reload
-; BE-NEXT:    vrev64.32 d19, d14
-; BE-NEXT:    vrev64.32 d31, d16
-; BE-NEXT:    vldr d16, [sp, #168] @ 8-byte Reload
-; BE-NEXT:    vrev64.32 d18, d20
-; BE-NEXT:    vldr d20, [sp, #120] @ 8-byte Reload
-; BE-NEXT:    vldr d22, [sp, #96] @ 8-byte Reload
-; BE-NEXT:    vmov.32 d28[0], r0
-; BE-NEXT:    ldr r0, [sp, #76] @ 4-byte Reload
-; BE-NEXT:    vrev64.32 d21, d20
-; BE-NEXT:    vrev64.32 d30, d16
-; BE-NEXT:    vldr d16, [sp, #48] @ 8-byte Reload
-; BE-NEXT:    vldr d23, [sp, #64] @ 8-byte Reload
-; BE-NEXT:    vrev64.32 d20, d22
-; BE-NEXT:    vldr d22, [sp, #112] @ 8-byte Reload
-; BE-NEXT:    vrev64.32 d1, d16
-; BE-NEXT:    vldr d16, [sp, #80] @ 8-byte Reload
-; BE-NEXT:    vmov.32 d23[1], r0
-; BE-NEXT:    add r0, r10, #64
-; BE-NEXT:    vrev64.32 d25, d22
-; BE-NEXT:    vldr d22, [sp, #104] @ 8-byte Reload
-; BE-NEXT:    vmov.32 d9[1], r4
-; BE-NEXT:    vrev64.32 d0, d16
-; BE-NEXT:    vmov.32 d28[1], r1
-; BE-NEXT:    vldr d29, [sp, #56] @ 8-byte Reload
-; BE-NEXT:    vrev64.32 d3, d15
-; BE-NEXT:    vrev64.32 d24, d22
-; BE-NEXT:    vldr d22, [sp, #88] @ 8-byte Reload
-; BE-NEXT:    vmov.32 d10[1], r6
-; BE-NEXT:    vrev64.32 d5, d23
-; BE-NEXT:    vst1.64 {d0, d1}, [r11:128]!
-; BE-NEXT:    vrev64.32 d2, d9
-; BE-NEXT:    vrev64.32 d27, d22
-; BE-NEXT:    vmov.32 d29[1], r8
-; BE-NEXT:    vrev64.32 d4, d28
-; BE-NEXT:    vst1.64 {d30, d31}, [r11:128]
-; BE-NEXT:    vst1.64 {d2, d3}, [r0:128]!
-; BE-NEXT:    vmov.32 d12[1], r9
-; BE-NEXT:    vrev64.32 d26, d10
-; BE-NEXT:    vst1.64 {d4, d5}, [r0:128]!
-; BE-NEXT:    vrev64.32 d23, d29
-; BE-NEXT:    vst1.64 {d26, d27}, [r0:128]!
-; BE-NEXT:    vrev64.32 d22, d12
-; BE-NEXT:    vst1.64 {d24, d25}, [r0:128]
-; BE-NEXT:    vst1.64 {d20, d21}, [r10:128]!
-; BE-NEXT:    vst1.64 {d22, d23}, [r10:128]!
-; BE-NEXT:    vrev64.32 d17, d11
-; BE-NEXT:    vrev64.32 d16, d13
-; BE-NEXT:    vst1.64 {d18, d19}, [r10:128]!
-; BE-NEXT:    vst1.64 {d16, d17}, [r10:128]
-; BE-NEXT:    add sp, sp, #176
-; BE-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
-; BE-NEXT:    add sp, sp, #4
-; BE-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
-;
-; BE-NEON-LABEL: llrint_v32i64_v32f16:
-; BE-NEON:       @ %bb.0:
-; BE-NEON-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; BE-NEON-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; BE-NEON-NEXT:    .pad #4
-; BE-NEON-NEXT:    sub sp, sp, #4
-; BE-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
-; BE-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
-; BE-NEON-NEXT:    .pad #176
-; BE-NEON-NEXT:    sub sp, sp, #176
-; BE-NEON-NEXT:    add lr, sp, #256
-; BE-NEON-NEXT:    mov r10, r0
-; BE-NEON-NEXT:    vstr s15, [sp, #112] @ 4-byte Spill
-; BE-NEON-NEXT:    ldrh r0, [lr, #74]
-; BE-NEON-NEXT:    vstr s14, [sp, #80] @ 4-byte Spill
-; BE-NEON-NEXT:    vstr s13, [sp, #48] @ 4-byte Spill
-; BE-NEON-NEXT:    vstr s12, [sp, #148] @ 4-byte Spill
-; BE-NEON-NEXT:    vstr s11, [sp, #76] @ 4-byte Spill
-; BE-NEON-NEXT:    vstr s10, [sp, #152] @ 4-byte Spill
-; BE-NEON-NEXT:    vstr s9, [sp, #156] @ 4-byte Spill
-; BE-NEON-NEXT:    vstr s8, [sp, #120] @ 4-byte Spill
-; BE-NEON-NEXT:    vstr s7, [sp, #136] @ 4-byte Spill
-; BE-NEON-NEXT:    vstr s6, [sp, #132] @ 4-byte Spill
-; BE-NEON-NEXT:    vstr s5, [sp, #144] @ 4-byte Spill
-; BE-NEON-NEXT:    vstr s4, [sp, #64] @ 4-byte Spill
-; BE-NEON-NEXT:    vstr s3, [sp, #104] @ 4-byte Spill
-; BE-NEON-NEXT:    vstr s2, [sp, #88] @ 4-byte Spill
-; BE-NEON-NEXT:    vstr s1, [sp, #56] @ 4-byte Spill
-; BE-NEON-NEXT:    vstr s0, [sp, #96] @ 4-byte Spill
-; BE-NEON-NEXT:    bl __aeabi_h2f
-; BE-NEON-NEXT:    vmov s0, r0
-; BE-NEON-NEXT:    bl llrintf
-; BE-NEON-NEXT:    add lr, sp, #256
-; BE-NEON-NEXT:    mov r9, r0
-; BE-NEON-NEXT:    mov r8, r1
-; BE-NEON-NEXT:    ldrh r0, [lr, #62]
-; BE-NEON-NEXT:    bl __aeabi_h2f
-; BE-NEON-NEXT:    add lr, sp, #256
-; BE-NEON-NEXT:    mov r6, r0
-; BE-NEON-NEXT:    ldrh r0, [lr, #58]
-; BE-NEON-NEXT:    bl __aeabi_h2f
-; BE-NEON-NEXT:    add lr, sp, #256
-; BE-NEON-NEXT:    mov r7, r0
-; BE-NEON-NEXT:    ldrh r0, [lr, #66]
-; BE-NEON-NEXT:    bl __aeabi_h2f
-; BE-NEON-NEXT:    add lr, sp, #256
-; BE-NEON-NEXT:    mov r4, r0
-; BE-NEON-NEXT:    ldrh r0, [lr, #54]
-; BE-NEON-NEXT:    bl __aeabi_h2f
-; BE-NEON-NEXT:    add lr, sp, #256
-; BE-NEON-NEXT:    mov r5, r0
-; BE-NEON-NEXT:    ldrh r0, [lr, #50]
-; BE-NEON-NEXT:    bl __aeabi_h2f
-; BE-NEON-NEXT:    vmov s0, r0
-; BE-NEON-NEXT:    bl llrintf
-; BE-NEON-NEXT:    vmov.32 d16[0], r0
-; BE-NEON-NEXT:    vmov s0, r5
-; BE-NEON-NEXT:    str r1, [sp, #44] @ 4-byte Spill
-; BE-NEON-NEXT:    vstr d16, [sp, #168] @ 8-byte Spill
-; BE-NEON-NEXT:    bl llrintf
-; BE-NEON-NEXT:    vmov.32 d16[0], r0
-; BE-NEON-NEXT:    vmov s0, r4
-; BE-NEON-NEXT:    str r1, [sp, #40] @ 4-byte Spill
-; BE-NEON-NEXT:    vstr d16, [sp, #160] @ 8-byte Spill
-; BE-NEON-NEXT:    bl llrintf
-; BE-NEON-NEXT:    vmov.32 d16[0], r0
-; BE-NEON-NEXT:    vmov s0, r7
-; BE-NEON-NEXT:    mov r4, r1
-; BE-NEON-NEXT:    vstr d16, [sp, #32] @ 8-byte Spill
-; BE-NEON-NEXT:    bl llrintf
-; BE-NEON-NEXT:    vmov.32 d16[0], r0
-; BE-NEON-NEXT:    vmov s0, r6
-; BE-NEON-NEXT:    mov r11, r1
-; BE-NEON-NEXT:    vstr d16, [sp, #24] @ 8-byte Spill
-; BE-NEON-NEXT:    bl llrintf
-; BE-NEON-NEXT:    add lr, sp, #256
-; BE-NEON-NEXT:    vmov.32 d16[0], r0
-; BE-NEON-NEXT:    mov r6, r1
-; BE-NEON-NEXT:    ldrh r0, [lr, #34]
-; BE-NEON-NEXT:    vstr d16, [sp, #16] @ 8-byte Spill
-; BE-NEON-NEXT:    bl __aeabi_h2f
-; BE-NEON-NEXT:    vmov s0, r0
-; BE-NEON-NEXT:    vmov.32 d8[0], r9
-; BE-NEON-NEXT:    bl llrintf
-; BE-NEON-NEXT:    add lr, sp, #256
-; BE-NEON-NEXT:    mov r7, r1
-; BE-NEON-NEXT:    vmov.32 d12[0], r0
-; BE-NEON-NEXT:    ldrh r1, [lr, #38]
-; BE-NEON-NEXT:    mov r0, r1
-; BE-NEON-NEXT:    bl __aeabi_h2f
-; BE-NEON-NEXT:    vmov.32 d8[1], r8
-; BE-NEON-NEXT:    vmov s0, r0
-; BE-NEON-NEXT:    vstr d8, [sp, #8] @ 8-byte Spill
-; BE-NEON-NEXT:    bl llrintf
-; BE-NEON-NEXT:    add lr, sp, #256
-; BE-NEON-NEXT:    mov r5, r1
-; BE-NEON-NEXT:    vmov.32 d13[0], r0
-; BE-NEON-NEXT:    ldrh r1, [lr, #26]
-; BE-NEON-NEXT:    mov r0, r1
-; BE-NEON-NEXT:    bl __aeabi_h2f
-; BE-NEON-NEXT:    vmov s0, r0
-; BE-NEON-NEXT:    vmov.32 d12[1], r7
-; BE-NEON-NEXT:    bl llrintf
-; BE-NEON-NEXT:    add lr, sp, #256
-; BE-NEON-NEXT:    mov r7, r1
-; BE-NEON-NEXT:    vmov.32 d9[0], r0
-; BE-NEON-NEXT:    ldrh r1, [lr, #30]
-; BE-NEON-NEXT:    mov r0, r1
-; BE-NEON-NEXT:    bl __aeabi_h2f
-; BE-NEON-NEXT:    vmov s0, r0
-; BE-NEON-NEXT:    vmov.32 d13[1], r5
-; BE-NEON-NEXT:    bl llrintf
-; BE-NEON-NEXT:    add lr, sp, #256
-; BE-NEON-NEXT:    mov r5, r1
-; BE-NEON-NEXT:    vmov.32 d15[0], r0
-; BE-NEON-NEXT:    ldrh r1, [lr, #78]
-; BE-NEON-NEXT:    mov r0, r1
-; BE-NEON-NEXT:    bl __aeabi_h2f
-; BE-NEON-NEXT:    vmov s0, r0
-; BE-NEON-NEXT:    vmov.32 d9[1], r7
-; BE-NEON-NEXT:    bl llrintf
-; BE-NEON-NEXT:    add lr, sp, #256
-; BE-NEON-NEXT:    mov r7, r1
-; BE-NEON-NEXT:    vmov.32 d14[0], r0
-; BE-NEON-NEXT:    ldrh r1, [lr, #82]
-; BE-NEON-NEXT:    mov r0, r1
-; BE-NEON-NEXT:    bl __aeabi_h2f
-; BE-NEON-NEXT:    vmov s0, r0
-; BE-NEON-NEXT:    vmov.32 d15[1], r5
-; BE-NEON-NEXT:    bl llrintf
-; BE-NEON-NEXT:    add lr, sp, #256
-; BE-NEON-NEXT:    mov r5, r1
-; BE-NEON-NEXT:    vmov.32 d8[0], r0
-; BE-NEON-NEXT:    ldrh r1, [lr, #86]
-; BE-NEON-NEXT:    mov r0, r1
-; BE-NEON-NEXT:    bl __aeabi_h2f
-; BE-NEON-NEXT:    vmov s0, r0
-; BE-NEON-NEXT:    vmov.32 d14[1], r7
-; BE-NEON-NEXT:    bl llrintf
-; BE-NEON-NEXT:    add lr, sp, #256
-; BE-NEON-NEXT:    mov r7, r1
-; BE-NEON-NEXT:    vmov.32 d10[0], r0
-; BE-NEON-NEXT:    ldrh r1, [lr, #70]
-; BE-NEON-NEXT:    mov r0, r1
-; BE-NEON-NEXT:    bl __aeabi_h2f
-; BE-NEON-NEXT:    vmov s0, r0
-; BE-NEON-NEXT:    vmov.32 d8[1], r5
-; BE-NEON-NEXT:    bl llrintf
-; BE-NEON-NEXT:    add lr, sp, #256
-; BE-NEON-NEXT:    mov r5, r1
-; BE-NEON-NEXT:    vmov.32 d11[0], r0
-; BE-NEON-NEXT:    ldrh r1, [lr, #46]
-; BE-NEON-NEXT:    mov r0, r1
-; BE-NEON-NEXT:    bl __aeabi_h2f
-; BE-NEON-NEXT:    vmov s0, r0
-; BE-NEON-NEXT:    vmov.32 d10[1], r7
-; BE-NEON-NEXT:    bl llrintf
-; BE-NEON-NEXT:    vmov.32 d25[0], r0
-; BE-NEON-NEXT:    ldr r0, [sp, #40] @ 4-byte Reload
-; BE-NEON-NEXT:    ldr r2, [sp, #44] @ 4-byte Reload
-; BE-NEON-NEXT:    vldr d24, [sp, #160] @ 8-byte Reload
-; BE-NEON-NEXT:    vldr s0, [sp, #48] @ 4-byte Reload
-; BE-NEON-NEXT:    vmov.32 d24[1], r0
-; BE-NEON-NEXT:    vmov r0, s0
-; BE-NEON-NEXT:    vldr d26, [sp, #16] @ 8-byte Reload
-; BE-NEON-NEXT:    vstr d24, [sp, #160] @ 8-byte Spill
-; BE-NEON-NEXT:    vldr d24, [sp, #8] @ 8-byte Reload
-; BE-NEON-NEXT:    vrev64.32 d23, d14
-; BE-NEON-NEXT:    vldr d29, [sp, #24] @ 8-byte Reload
-; BE-NEON-NEXT:    vrev64.32 d22, d24
-; BE-NEON-NEXT:    vldr d24, [sp, #168] @ 8-byte Reload
-; BE-NEON-NEXT:    vmov.32 d26[1], r6
-; BE-NEON-NEXT:    vldr d28, [sp, #32] @ 8-byte Reload
-; BE-NEON-NEXT:    vmov.32 d25[1], r1
-; BE-NEON-NEXT:    add r1, r10, #192
-; BE-NEON-NEXT:    vmov.32 d29[1], r11
-; BE-NEON-NEXT:    add r11, r10, #128
-; BE-NEON-NEXT:    vmov.32 d24[1], r2
-; BE-NEON-NEXT:    vmov.32 d11[1], r5
-; BE-NEON-NEXT:    vmov.32 d28[1], r4
-; BE-NEON-NEXT:    vrev64.32 d27, d26
-; BE-NEON-NEXT:    vstr d24, [sp, #168] @ 8-byte Spill
-; BE-NEON-NEXT:    vstr d25, [sp, #48] @ 8-byte Spill
-; BE-NEON-NEXT:    vrev64.32 d25, d11
-; BE-NEON-NEXT:    vrev64.32 d26, d29
-; BE-NEON-NEXT:    vrev64.32 d24, d28
-; BE-NEON-NEXT:    vst1.64 {d26, d27}, [r1:128]!
-; BE-NEON-NEXT:    vst1.64 {d24, d25}, [r1:128]!
-; BE-NEON-NEXT:    vrev64.32 d21, d10
-; BE-NEON-NEXT:    vrev64.32 d19, d15
-; BE-NEON-NEXT:    vrev64.32 d17, d13
-; BE-NEON-NEXT:    vrev64.32 d20, d8
-; BE-NEON-NEXT:    vst1.64 {d22, d23}, [r1:128]!
-; BE-NEON-NEXT:    vrev64.32 d18, d9
-; BE-NEON-NEXT:    vrev64.32 d16, d12
-; BE-NEON-NEXT:    vst1.64 {d20, d21}, [r1:128]
-; BE-NEON-NEXT:    vst1.64 {d18, d19}, [r11:128]!
-; BE-NEON-NEXT:    vst1.64 {d16, d17}, [r11:128]!
-; BE-NEON-NEXT:    bl __aeabi_h2f
-; BE-NEON-NEXT:    vmov s0, r0
-; BE-NEON-NEXT:    bl llrintf
-; BE-NEON-NEXT:    add lr, sp, #256
-; BE-NEON-NEXT:    mov r7, r0
-; BE-NEON-NEXT:    mov r8, r1
-; BE-NEON-NEXT:    ldrh r0, [lr, #42]
-; BE-NEON-NEXT:    bl __aeabi_h2f
-; BE-NEON-NEXT:    vldr s0, [sp, #56] @ 4-byte Reload
-; BE-NEON-NEXT:    mov r4, r0
-; BE-NEON-NEXT:    vmov r0, s0
-; BE-NEON-NEXT:    bl __aeabi_h2f
-; BE-NEON-NEXT:    vmov s0, r0
-; BE-NEON-NEXT:    bl llrintf
-; BE-NEON-NEXT:    vmov s0, r4
-; BE-NEON-NEXT:    str r1, [sp, #44] @ 4-byte Spill
-; BE-NEON-NEXT:    vmov.32 d10[0], r0
-; BE-NEON-NEXT:    bl llrintf
-; BE-NEON-NEXT:    vldr s0, [sp, #64] @ 4-byte Reload
-; BE-NEON-NEXT:    vmov.32 d16[0], r0
-; BE-NEON-NEXT:    vmov r2, s0
-; BE-NEON-NEXT:    vldr s0, [sp, #80] @ 4-byte Reload
-; BE-NEON-NEXT:    vmov.32 d16[1], r1
-; BE-NEON-NEXT:    vmov r4, s0
-; BE-NEON-NEXT:    vldr s0, [sp, #76] @ 4-byte Reload
-; BE-NEON-NEXT:    vstr d16, [sp, #80] @ 8-byte Spill
-; BE-NEON-NEXT:    vmov r5, s0
-; BE-NEON-NEXT:    mov r0, r2
-; BE-NEON-NEXT:    bl __aeabi_h2f
-; BE-NEON-NEXT:    vmov s0, r0
-; BE-NEON-NEXT:    bl llrintf
-; BE-NEON-NEXT:    vmov.32 d11[0], r0
-; BE-NEON-NEXT:    mov r0, r4
-; BE-NEON-NEXT:    mov r9, r1
-; BE-NEON-NEXT:    bl __aeabi_h2f
-; BE-NEON-NEXT:    vmov s0, r0
-; BE-NEON-NEXT:    vmov.32 d8[0], r7
-; BE-NEON-NEXT:    bl llrintf
-; BE-NEON-NEXT:    vmov.32 d12[0], r0
-; BE-NEON-NEXT:    mov r0, r5
-; BE-NEON-NEXT:    mov r6, r1
-; BE-NEON-NEXT:    bl __aeabi_h2f
-; BE-NEON-NEXT:    vldr s0, [sp, #88] @ 4-byte Reload
-; BE-NEON-NEXT:    mov r4, r0
-; BE-NEON-NEXT:    vmov.32 d8[1], r8
-; BE-NEON-NEXT:    vmov r7, s0
-; BE-NEON-NEXT:    vldr s0, [sp, #96] @ 4-byte Reload
-; BE-NEON-NEXT:    vstr d8, [sp, #88] @ 8-byte Spill
-; BE-NEON-NEXT:    vmov r0, s0
-; BE-NEON-NEXT:    bl __aeabi_h2f
-; BE-NEON-NEXT:    vldr s0, [sp, #104] @ 4-byte Reload
-; BE-NEON-NEXT:    vmov s19, r0
-; BE-NEON-NEXT:    vmov.32 d12[1], r6
-; BE-NEON-NEXT:    vmov r5, s0
-; BE-NEON-NEXT:    vldr s0, [sp, #112] @ 4-byte Reload
-; BE-NEON-NEXT:    vstr d12, [sp, #104] @ 8-byte Spill
-; BE-NEON-NEXT:    vmov r0, s0
-; BE-NEON-NEXT:    bl __aeabi_h2f
-; BE-NEON-NEXT:    vmov.f32 s0, s19
-; BE-NEON-NEXT:    vmov s30, r0
-; BE-NEON-NEXT:    bl llrintf
-; BE-NEON-NEXT:    vmov.f32 s0, s30
-; BE-NEON-NEXT:    mov r6, r1
-; BE-NEON-NEXT:    vmov.32 d9[0], r0
-; BE-NEON-NEXT:    vmov s17, r4
-; BE-NEON-NEXT:    bl llrintf
-; BE-NEON-NEXT:    vmov.32 d12[0], r0
-; BE-NEON-NEXT:    mov r0, r5
-; BE-NEON-NEXT:    mov r4, r1
-; BE-NEON-NEXT:    bl __aeabi_h2f
-; BE-NEON-NEXT:    vmov.f32 s0, s17
-; BE-NEON-NEXT:    vmov s30, r0
-; BE-NEON-NEXT:    bl llrintf
-; BE-NEON-NEXT:    vmov.f32 s0, s30
-; BE-NEON-NEXT:    vmov.32 d16[0], r0
-; BE-NEON-NEXT:    str r1, [sp, #76] @ 4-byte Spill
-; BE-NEON-NEXT:    vmov.32 d12[1], r4
-; BE-NEON-NEXT:    vstr d16, [sp, #64] @ 8-byte Spill
-; BE-NEON-NEXT:    vstr d12, [sp, #112] @ 8-byte Spill
-; BE-NEON-NEXT:    bl llrintf
-; BE-NEON-NEXT:    vmov.32 d16[0], r0
-; BE-NEON-NEXT:    mov r0, r7
-; BE-NEON-NEXT:    mov r8, r1
-; BE-NEON-NEXT:    vmov.32 d9[1], r6
-; BE-NEON-NEXT:    vstr d16, [sp, #56] @ 8-byte Spill
-; BE-NEON-NEXT:    vstr d9, [sp, #96] @ 8-byte Spill
-; BE-NEON-NEXT:    bl __aeabi_h2f
-; BE-NEON-NEXT:    vldr s0, [sp, #120] @ 4-byte Reload
-; BE-NEON-NEXT:    mov r5, r0
-; BE-NEON-NEXT:    ldr r0, [sp, #44] @ 4-byte Reload
-; BE-NEON-NEXT:    vmov r7, s0
-; BE-NEON-NEXT:    vldr s0, [sp, #132] @ 4-byte Reload
-; BE-NEON-NEXT:    vmov.32 d10[1], r0
-; BE-NEON-NEXT:    vmov r0, s0
-; BE-NEON-NEXT:    vstr d10, [sp, #120] @ 8-byte Spill
-; BE-NEON-NEXT:    bl __aeabi_h2f
-; BE-NEON-NEXT:    vldr s0, [sp, #136] @ 4-byte Reload
-; BE-NEON-NEXT:    vmov s26, r0
-; BE-NEON-NEXT:    vmov.32 d11[1], r9
-; BE-NEON-NEXT:    vmov r4, s0
-; BE-NEON-NEXT:    vldr s0, [sp, #144] @ 4-byte Reload
-; BE-NEON-NEXT:    vstr d11, [sp, #136] @ 8-byte Spill
-; BE-NEON-NEXT:    vmov r0, s0
-; BE-NEON-NEXT:    bl __aeabi_h2f
-; BE-NEON-NEXT:    vmov.f32 s0, s26
-; BE-NEON-NEXT:    vmov s22, r0
-; BE-NEON-NEXT:    bl llrintf
-; BE-NEON-NEXT:    vmov.f32 s0, s22
-; BE-NEON-NEXT:    mov r6, r1
-; BE-NEON-NEXT:    vmov.32 d13[0], r0
-; BE-NEON-NEXT:    vmov s24, r5
-; BE-NEON-NEXT:    bl llrintf
-; BE-NEON-NEXT:    vmov.32 d14[0], r0
-; BE-NEON-NEXT:    mov r0, r4
-; BE-NEON-NEXT:    mov r5, r1
-; BE-NEON-NEXT:    bl __aeabi_h2f
-; BE-NEON-NEXT:    vmov.f32 s0, s24
-; BE-NEON-NEXT:    vmov s22, r0
-; BE-NEON-NEXT:    bl llrintf
-; BE-NEON-NEXT:    vmov.f32 s0, s22
-; BE-NEON-NEXT:    mov r9, r1
-; BE-NEON-NEXT:    vmov.32 d12[0], r0
-; BE-NEON-NEXT:    vmov.32 d14[1], r5
-; BE-NEON-NEXT:    bl llrintf
-; BE-NEON-NEXT:    vmov.32 d11[0], r0
-; BE-NEON-NEXT:    mov r0, r7
-; BE-NEON-NEXT:    mov r5, r1
-; BE-NEON-NEXT:    bl __aeabi_h2f
-; BE-NEON-NEXT:    vldr s0, [sp, #148] @ 4-byte Reload
-; BE-NEON-NEXT:    mov r7, r0
-; BE-NEON-NEXT:    vmov.32 d13[1], r6
-; BE-NEON-NEXT:    vmov r0, s0
-; BE-NEON-NEXT:    bl __aeabi_h2f
-; BE-NEON-NEXT:    vldr s0, [sp, #152] @ 4-byte Reload
-; BE-NEON-NEXT:    vmov s20, r0
-; BE-NEON-NEXT:    vmov.32 d11[1], r5
-; BE-NEON-NEXT:    vmov r4, s0
-; BE-NEON-NEXT:    vldr s0, [sp, #156] @ 4-byte Reload
-; BE-NEON-NEXT:    vmov r0, s0
-; BE-NEON-NEXT:    bl __aeabi_h2f
-; BE-NEON-NEXT:    vmov.f32 s0, s20
-; BE-NEON-NEXT:    vmov s16, r0
-; BE-NEON-NEXT:    bl llrintf
-; BE-NEON-NEXT:    vmov.f32 s0, s16
-; BE-NEON-NEXT:    mov r6, r1
-; BE-NEON-NEXT:    vmov.32 d10[0], r0
-; BE-NEON-NEXT:    vmov s18, r7
-; BE-NEON-NEXT:    bl llrintf
-; BE-NEON-NEXT:    vmov.32 d15[0], r0
-; BE-NEON-NEXT:    mov r0, r4
-; BE-NEON-NEXT:    mov r5, r1
-; BE-NEON-NEXT:    bl __aeabi_h2f
-; BE-NEON-NEXT:    vmov.f32 s0, s18
-; BE-NEON-NEXT:    vmov s16, r0
-; BE-NEON-NEXT:    bl llrintf
-; BE-NEON-NEXT:    vmov.f32 s0, s16
-; BE-NEON-NEXT:    mov r4, r1
-; BE-NEON-NEXT:    vmov.32 d9[0], r0
-; BE-NEON-NEXT:    vmov.32 d15[1], r5
-; BE-NEON-NEXT:    bl llrintf
-; BE-NEON-NEXT:    vldr d16, [sp, #160] @ 8-byte Reload
-; BE-NEON-NEXT:    vldr d20, [sp, #136] @ 8-byte Reload
-; BE-NEON-NEXT:    vrev64.32 d19, d14
-; BE-NEON-NEXT:    vrev64.32 d31, d16
-; BE-NEON-NEXT:    vldr d16, [sp, #168] @ 8-byte Reload
-; BE-NEON-NEXT:    vrev64.32 d18, d20
-; BE-NEON-NEXT:    vldr d20, [sp, #120] @ 8-byte Reload
-; BE-NEON-NEXT:    vldr d22, [sp, #96] @ 8-byte Reload
-; BE-NEON-NEXT:    vmov.32 d28[0], r0
-; BE-NEON-NEXT:    ldr r0, [sp, #76] @ 4-byte Reload
-; BE-NEON-NEXT:    vrev64.32 d21, d20
-; BE-NEON-NEXT:    vrev64.32 d30, d16
-; BE-NEON-NEXT:    vldr d16, [sp, #48] @ 8-byte Reload
-; BE-NEON-NEXT:    vldr d23, [sp, #64] @ 8-byte Reload
-; BE-NEON-NEXT:    vrev64.32 d20, d22
-; BE-NEON-NEXT:    vldr d22, [sp, #112] @ 8-byte Reload
-; BE-NEON-NEXT:    vrev64.32 d1, d16
-; BE-NEON-NEXT:    vldr d16, [sp, #80] @ 8-byte Reload
-; BE-NEON-NEXT:    vmov.32 d23[1], r0
-; BE-NEON-NEXT:    add r0, r10, #64
-; BE-NEON-NEXT:    vrev64.32 d25, d22
-; BE-NEON-NEXT:    vldr d22, [sp, #104] @ 8-byte Reload
-; BE-NEON-NEXT:    vmov.32 d9[1], r4
-; BE-NEON-NEXT:    vrev64.32 d0, d16
-; BE-NEON-NEXT:    vmov.32 d28[1], r1
-; BE-NEON-NEXT:    vldr d29, [sp, #56] @ 8-byte Reload
-; BE-NEON-NEXT:    vrev64.32 d3, d15
-; BE-NEON-NEXT:    vrev64.32 d24, d22
-; BE-NEON-NEXT:    vldr d22, [sp, #88] @ 8-byte Reload
-; BE-NEON-NEXT:    vmov.32 d10[1], r6
-; BE-NEON-NEXT:    vrev64.32 d5, d23
-; BE-NEON-NEXT:    vst1.64 {d0, d1}, [r11:128]!
-; BE-NEON-NEXT:    vrev64.32 d2, d9
-; BE-NEON-NEXT:    vrev64.32 d27, d22
-; BE-NEON-NEXT:    vmov.32 d29[1], r8
-; BE-NEON-NEXT:    vrev64.32 d4, d28
-; BE-NEON-NEXT:    vst1.64 {d30, d31}, [r11:128]
-; BE-NEON-NEXT:    vst1.64 {d2, d3}, [r0:128]!
-; BE-NEON-NEXT:    vmov.32 d12[1], r9
-; BE-NEON-NEXT:    vrev64.32 d26, d10
-; BE-NEON-NEXT:    vst1.64 {d4, d5}, [r0:128]!
-; BE-NEON-NEXT:    vrev64.32 d23, d29
-; BE-NEON-NEXT:    vst1.64 {d26, d27}, [r0:128]!
-; BE-NEON-NEXT:    vrev64.32 d22, d12
-; BE-NEON-NEXT:    vst1.64 {d24, d25}, [r0:128]
-; BE-NEON-NEXT:    vst1.64 {d20, d21}, [r10:128]!
-; BE-NEON-NEXT:    vst1.64 {d22, d23}, [r10:128]!
-; BE-NEON-NEXT:    vrev64.32 d17, d11
-; BE-NEON-NEXT:    vrev64.32 d16, d13
-; BE-NEON-NEXT:    vst1.64 {d18, d19}, [r10:128]!
-; BE-NEON-NEXT:    vst1.64 {d16, d17}, [r10:128]
-; BE-NEON-NEXT:    add sp, sp, #176
-; BE-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
-; BE-NEON-NEXT:    add sp, sp, #4
-; BE-NEON-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
-  %a = call <32 x i64> @llvm.llrint.v32i64.v32f16(<32 x half> %x)
-  ret <32 x i64> %a
-}
-declare <32 x i64> @llvm.llrint.v32i64.v32f16(<32 x half>)
-
-define <1 x i64> @llrint_v1i64_v1f32(<1 x float> %x) {
-; LE-LABEL: llrint_v1i64_v1f32:
-; LE:       @ %bb.0:
-; LE-NEXT:    .save {r11, lr}
-; LE-NEXT:    push {r11, lr}
-; LE-NEXT:    bl llrintf
-; LE-NEXT:    vmov.32 d0[0], r0
-; LE-NEXT:    vmov.32 d0[1], r1
-; LE-NEXT:    pop {r11, pc}
-;
-; LE-NEON-LABEL: llrint_v1i64_v1f32:
-; LE-NEON:       @ %bb.0:
-; LE-NEON-NEXT:    .save {r11, lr}
-; LE-NEON-NEXT:    push {r11, lr}
-; LE-NEON-NEXT:    bl llrintf
-; LE-NEON-NEXT:    vmov.32 d0[0], r0
-; LE-NEON-NEXT:    vmov.32 d0[1], r1
-; LE-NEON-NEXT:    pop {r11, pc}
-;
-; BE-LABEL: llrint_v1i64_v1f32:
-; BE:       @ %bb.0:
-; BE-NEXT:    .save {r11, lr}
-; BE-NEXT:    push {r11, lr}
-; BE-NEXT:    bl llrintf
-; BE-NEXT:    vmov.32 d16[0], r0
-; BE-NEXT:    vmov.32 d16[1], r1
-; BE-NEXT:    vrev64.32 d0, d16
-; BE-NEXT:    pop {r11, pc}
-;
-; BE-NEON-LABEL: llrint_v1i64_v1f32:
-; BE-NEON:       @ %bb.0:
-; BE-NEON-NEXT:    .save {r11, lr}
-; BE-NEON-NEXT:    push {r11, lr}
-; BE-NEON-NEXT:    bl llrintf
-; BE-NEON-NEXT:    vmov.32 d16[0], r0
-; BE-NEON-NEXT:    vmov.32 d16[1], r1
-; BE-NEON-NEXT:    vrev64.32 d0, d16
-; BE-NEON-NEXT:    pop {r11, pc}
-  %a = call <1 x i64> @llvm.llrint.v1i64.v1f32(<1 x float> %x)
-  ret <1 x i64> %a
-}
-declare <1 x i64> @llvm.llrint.v1i64.v1f32(<1 x float>)
-
-define <2 x i64> @llrint_v2i64_v2f32(<2 x float> %x) {
-; LE-LABEL: llrint_v2i64_v2f32:
-; LE:       @ %bb.0:
-; LE-NEXT:    .save {r4, lr}
-; LE-NEXT:    push {r4, lr}
-; LE-NEXT:    .vsave {d10, d11}
-; LE-NEXT:    vpush {d10, d11}
-; LE-NEXT:    .vsave {d8}
-; LE-NEXT:    vpush {d8}
-; LE-NEXT:    vmov.f64 d8, d0
-; LE-NEXT:    vmov.f32 s0, s17
-; LE-NEXT:    bl llrintf
-; LE-NEXT:    vmov.f32 s0, s16
-; LE-NEXT:    mov r4, r1
-; LE-NEXT:    vmov.32 d11[0], r0
-; LE-NEXT:    bl llrintf
-; LE-NEXT:    vmov.32 d10[0], r0
-; LE-NEXT:    vmov.32 d11[1], r4
-; LE-NEXT:    vmov.32 d10[1], r1
-; LE-NEXT:    vorr q0, q5, q5
-; LE-NEXT:    vpop {d8}
-; LE-NEXT:    vpop {d10, d11}
-; LE-NEXT:    pop {r4, pc}
-;
-; LE-NEON-LABEL: llrint_v2i64_v2f32:
-; LE-NEON:       @ %bb.0:
-; LE-NEON-NEXT:    .save {r4, lr}
-; LE-NEON-NEXT:    push {r4, lr}
-; LE-NEON-NEXT:    .vsave {d10, d11}
-; LE-NEON-NEXT:    vpush {d10, d11}
-; LE-NEON-NEXT:    .vsave {d8}
-; LE-NEON-NEXT:    vpush {d8}
-; LE-NEON-NEXT:    vmov.f64 d8, d0
-; LE-NEON-NEXT:    vmov.f32 s0, s17
-; LE-NEON-NEXT:    bl llrintf
-; LE-NEON-NEXT:    vmov.f32 s0, s16
-; LE-NEON-NEXT:    mov r4, r1
-; LE-NEON-NEXT:    vmov.32 d11[0], r0
-; LE-NEON-NEXT:    bl llrintf
-; LE-NEON-NEXT:    vmov.32 d10[0], r0
-; LE-NEON-NEXT:    vmov.32 d11[1], r4
-; LE-NEON-NEXT:    vmov.32 d10[1], r1
-; LE-NEON-NEXT:    vorr q0, q5, q5
-; LE-NEON-NEXT:    vpop {d8}
-; LE-NEON-NEXT:    vpop {d10, d11}
-; LE-NEON-NEXT:    pop {r4, pc}
-;
-; BE-LABEL: llrint_v2i64_v2f32:
-; BE:       @ %bb.0:
-; BE-NEXT:    .save {r4, lr}
-; BE-NEXT:    push {r4, lr}
-; BE-NEXT:    .vsave {d10, d11}
-; BE-NEXT:    vpush {d10, d11}
-; BE-NEXT:    .vsave {d8}
-; BE-NEXT:    vpush {d8}
-; BE-NEXT:    vrev64.32 d8, d0
-; BE-NEXT:    vmov.f32 s0, s17
-; BE-NEXT:    bl llrintf
-; BE-NEXT:    vmov.f32 s0, s16
-; BE-NEXT:    mov r4, r1
-; BE-NEXT:    vmov.32 d11[0], r0
-; BE-NEXT:    bl llrintf
-; BE-NEXT:    vmov.32 d10[0], r0
-; BE-NEXT:    vmov.32 d11[1], r4
-; BE-NEXT:    vmov.32 d10[1], r1
-; BE-NEXT:    vrev64.32 q0, q5
-; BE-NEXT:    vpop {d8}
-; BE-NEXT:    vpop {d10, d11}
-; BE-NEXT:    pop {r4, pc}
-;
-; BE-NEON-LABEL: llrint_v2i64_v2f32:
-; BE-NEON:       @ %bb.0:
-; BE-NEON-NEXT:    .save {r4, lr}
-; BE-NEON-NEXT:    push {r4, lr}
-; BE-NEON-NEXT:    .vsave {d10, d11}
-; BE-NEON-NEXT:    vpush {d10, d11}
-; BE-NEON-NEXT:    .vsave {d8}
-; BE-NEON-NEXT:    vpush {d8}
-; BE-NEON-NEXT:    vrev64.32 d8, d0
-; BE-NEON-NEXT:    vmov.f32 s0, s17
-; BE-NEON-NEXT:    bl llrintf
-; BE-NEON-NEXT:    vmov.f32 s0, s16
-; BE-NEON-NEXT:    mov r4, r1
-; BE-NEON-NEXT:    vmov.32 d11[0], r0
-; BE-NEON-NEXT:    bl llrintf
-; BE-NEON-NEXT:    vmov.32 d10[0], r0
-; BE-NEON-NEXT:    vmov.32 d11[1], r4
-; BE-NEON-NEXT:    vmov.32 d10[1], r1
-; BE-NEON-NEXT:    vrev64.32 q0, q5
-; BE-NEON-NEXT:    vpop {d8}
-; BE-NEON-NEXT:    vpop {d10, d11}
-; BE-NEON-NEXT:    pop {r4, pc}
-  %a = call <2 x i64> @llvm.llrint.v2i64.v2f32(<2 x float> %x)
-  ret <2 x i64> %a
-}
-declare <2 x i64> @llvm.llrint.v2i64.v2f32(<2 x float>)
-
-define <4 x i64> @llrint_v4i64_v4f32(<4 x float> %x) {
-; LE-LABEL: llrint_v4i64_v4f32:
-; LE:       @ %bb.0:
-; LE-NEXT:    .save {r4, r5, r6, lr}
-; LE-NEXT:    push {r4, r5, r6, lr}
-; LE-NEXT:    .vsave {d8, d9, d10, d11, d12, d13}
-; LE-NEXT:    vpush {d8, d9, d10, d11, d12, d13}
-; LE-NEXT:    vorr q5, q0, q0
-; LE-NEXT:    vmov.f32 s0, s23
-; LE-NEXT:    bl llrintf
-; LE-NEXT:    vmov.f32 s0, s20
-; LE-NEXT:    mov r4, r1
-; LE-NEXT:    vmov.32 d9[0], r0
-; LE-NEXT:    bl llrintf
-; LE-NEXT:    vmov.f32 s0, s21
-; LE-NEXT:    mov r5, r1
-; LE-NEXT:    vmov.32 d12[0], r0
-; LE-NEXT:    bl llrintf
-; LE-NEXT:    vmov.f32 s0, s22
-; LE-NEXT:    mov r6, r1
-; LE-NEXT:    vmov.32 d13[0], r0
-; LE-NEXT:    bl llrintf
-; LE-NEXT:    vmov.32 d8[0], r0
-; LE-NEXT:    vmov.32 d13[1], r6
-; LE-NEXT:    vmov.32 d9[1], r4
-; LE-NEXT:    vmov.32 d12[1], r5
-; LE-NEXT:    vmov.32 d8[1], r1
-; LE-NEXT:    vorr q0, q6, q6
-; LE-NEXT:    vorr q1, q4, q4
-; LE-NEXT:    vpop {d8, d9, d10, d11, d12, d13}
-; LE-NEXT:    pop {r4, r5, r6, pc}
-;
-; LE-NEON-LABEL: llrint_v4i64_v4f32:
-; LE-NEON:       @ %bb.0:
-; LE-NEON-NEXT:    .save {r4, r5, r6, lr}
-; LE-NEON-NEXT:    push {r4, r5, r6, lr}
-; LE-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13}
-; LE-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13}
-; LE-NEON-NEXT:    vorr q5, q0, q0
-; LE-NEON-NEXT:    vmov.f32 s0, s23
-; LE-NEON-NEXT:    bl llrintf
-; LE-NEON-NEXT:    vmov.f32 s0, s20
-; LE-NEON-NEXT:    mov r4, r1
-; LE-NEON-NEXT:    vmov.32 d9[0], r0
-; LE-NEON-NEXT:    bl llrintf
-; LE-NEON-NEXT:    vmov.f32 s0, s21
-; LE-NEON-NEXT:    mov r5, r1
-; LE-NEON-NEXT:    vmov.32 d12[0], r0
-; LE-NEON-NEXT:    bl llrintf
-; LE-NEON-NEXT:    vmov.f32 s0, s22
-; LE-NEON-NEXT:    mov r6, r1
-; LE-NEON-NEXT:    vmov.32 d13[0], r0
-; LE-NEON-NEXT:    bl llrintf
-; LE-NEON-NEXT:    vmov.32 d8[0], r0
-; LE-NEON-NEXT:    vmov.32 d13[1], r6
-; LE-NEON-NEXT:    vmov.32 d9[1], r4
-; LE-NEON-NEXT:    vmov.32 d12[1], r5
-; LE-NEON-NEXT:    vmov.32 d8[1], r1
-; LE-NEON-NEXT:    vorr q0, q6, q6
-; LE-NEON-NEXT:    vorr q1, q4, q4
-; LE-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13}
-; LE-NEON-NEXT:    pop {r4, r5, r6, pc}
-;
-; BE-LABEL: llrint_v4i64_v4f32:
-; BE:       @ %bb.0:
-; BE-NEXT:    .save {r4, r5, r6, lr}
-; BE-NEXT:    push {r4, r5, r6, lr}
-; BE-NEXT:    .vsave {d8, d9, d10, d11, d12, d13}
-; BE-NEXT:    vpush {d8, d9, d10, d11, d12, d13}
-; BE-NEXT:    vrev64.32 d8, d1
-; BE-NEXT:    vrev64.32 d9, d0
-; BE-NEXT:    vmov.f32 s0, s17
-; BE-NEXT:    bl llrintf
-; BE-NEXT:    vmov.f32 s0, s18
-; BE-NEXT:    mov r4, r1
-; BE-NEXT:    vmov.32 d11[0], r0
-; BE-NEXT:    bl llrintf
-; BE-NEXT:    vmov.f32 s0, s19
-; BE-NEXT:    mov r5, r1
-; BE-NEXT:    vmov.32 d12[0], r0
-; BE-NEXT:    bl llrintf
-; BE-NEXT:    vmov.f32 s0, s16
-; BE-NEXT:    mov r6, r1
-; BE-NEXT:    vmov.32 d13[0], r0
-; BE-NEXT:    bl llrintf
-; BE-NEXT:    vmov.32 d10[0], r0
-; BE-NEXT:    vmov.32 d13[1], r6
-; BE-NEXT:    vmov.32 d11[1], r4
-; BE-NEXT:    vmov.32 d12[1], r5
-; BE-NEXT:    vmov.32 d10[1], r1
-; BE-NEXT:    vrev64.32 q0, q6
-; BE-NEXT:    vrev64.32 q1, q5
-; BE-NEXT:    vpop {d8, d9, d10, d11, d12, d13}
-; BE-NEXT:    pop {r4, r5, r6, pc}
-;
-; BE-NEON-LABEL: llrint_v4i64_v4f32:
-; BE-NEON:       @ %bb.0:
-; BE-NEON-NEXT:    .save {r4, r5, r6, lr}
-; BE-NEON-NEXT:    push {r4, r5, r6, lr}
-; BE-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13}
-; BE-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13}
-; BE-NEON-NEXT:    vrev64.32 d8, d1
-; BE-NEON-NEXT:    vrev64.32 d9, d0
-; BE-NEON-NEXT:    vmov.f32 s0, s17
-; BE-NEON-NEXT:    bl llrintf
-; BE-NEON-NEXT:    vmov.f32 s0, s18
-; BE-NEON-NEXT:    mov r4, r1
-; BE-NEON-NEXT:    vmov.32 d11[0], r0
-; BE-NEON-NEXT:    bl llrintf
-; BE-NEON-NEXT:    vmov.f32 s0, s19
-; BE-NEON-NEXT:    mov r5, r1
-; BE-NEON-NEXT:    vmov.32 d12[0], r0
-; BE-NEON-NEXT:    bl llrintf
-; BE-NEON-NEXT:    vmov.f32 s0, s16
-; BE-NEON-NEXT:    mov r6, r1
-; BE-NEON-NEXT:    vmov.32 d13[0], r0
-; BE-NEON-NEXT:    bl llrintf
-; BE-NEON-NEXT:    vmov.32 d10[0], r0
-; BE-NEON-NEXT:    vmov.32 d13[1], r6
-; BE-NEON-NEXT:    vmov.32 d11[1], r4
-; BE-NEON-NEXT:    vmov.32 d12[1], r5
-; BE-NEON-NEXT:    vmov.32 d10[1], r1
-; BE-NEON-NEXT:    vrev64.32 q0, q6
-; BE-NEON-NEXT:    vrev64.32 q1, q5
-; BE-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13}
-; BE-NEON-NEXT:    pop {r4, r5, r6, pc}
-  %a = call <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float> %x)
-  ret <4 x i64> %a
-}
-declare <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float>)
-
-define <8 x i64> @llrint_v8i64_v8f32(<8 x float> %x) {
-; LE-LABEL: llrint_v8i64_v8f32:
-; LE:       @ %bb.0:
-; LE-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, lr}
-; LE-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, lr}
-; LE-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-NEXT:    .pad #40
-; LE-NEXT:    sub sp, sp, #40
-; LE-NEXT:    vorr q6, q1, q1
-; LE-NEXT:    add lr, sp, #24
-; LE-NEXT:    vorr q7, q0, q0
-; LE-NEXT:    vstmia lr, {d2, d3} @ 16-byte Spill
-; LE-NEXT:    vmov.f32 s0, s27
-; LE-NEXT:    bl llrintf
-; LE-NEXT:    vmov.f32 s0, s24
-; LE-NEXT:    mov r8, r1
-; LE-NEXT:    vmov.32 d9[0], r0
-; LE-NEXT:    bl llrintf
-; LE-NEXT:    vmov.f32 s0, s25
-; LE-NEXT:    mov r9, r1
-; LE-NEXT:    vmov.32 d10[0], r0
-; LE-NEXT:    bl llrintf
-; LE-NEXT:    vorr q6, q7, q7
-; LE-NEXT:    add lr, sp, #8
-; LE-NEXT:    mov r10, r1
-; LE-NEXT:    vmov.32 d11[0], r0
-; LE-NEXT:    vmov.f32 s0, s26
-; LE-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
-; LE-NEXT:    bl llrintf
-; LE-NEXT:    vmov.f32 s0, s27
-; LE-NEXT:    mov r7, r1
-; LE-NEXT:    vmov.32 d14[0], r0
-; LE-NEXT:    bl llrintf
-; LE-NEXT:    vmov.f32 s0, s24
-; LE-NEXT:    mov r4, r1
-; LE-NEXT:    vmov.32 d15[0], r0
-; LE-NEXT:    bl llrintf
-; LE-NEXT:    add lr, sp, #8
-; LE-NEXT:    mov r5, r1
-; LE-NEXT:    vmov.32 d12[0], r0
-; LE-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
-; LE-NEXT:    vmov.f32 s0, s1
-; LE-NEXT:    bl llrintf
-; LE-NEXT:    add lr, sp, #24
-; LE-NEXT:    mov r6, r1
-; LE-NEXT:    vmov.32 d13[0], r0
-; LE-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
-; LE-NEXT:    vmov.f32 s0, s2
-; LE-NEXT:    bl llrintf
-; LE-NEXT:    vmov.32 d8[0], r0
-; LE-NEXT:    vmov.32 d13[1], r6
-; LE-NEXT:    vmov.32 d15[1], r4
-; LE-NEXT:    vmov.32 d11[1], r10
-; LE-NEXT:    vmov.32 d9[1], r8
-; LE-NEXT:    vmov.32 d12[1], r5
-; LE-NEXT:    vmov.32 d14[1], r7
-; LE-NEXT:    vorr q0, q6, q6
-; LE-NEXT:    vmov.32 d10[1], r9
-; LE-NEXT:    vorr q1, q7, q7
-; LE-NEXT:    vmov.32 d8[1], r1
-; LE-NEXT:    vorr q2, q5, q5
-; LE-NEXT:    vorr q3, q4, q4
-; LE-NEXT:    add sp, sp, #40
-; LE-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, pc}
-;
-; LE-NEON-LABEL: llrint_v8i64_v8f32:
-; LE-NEON:       @ %bb.0:
-; LE-NEON-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, lr}
-; LE-NEON-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, lr}
-; LE-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-NEON-NEXT:    .pad #40
-; LE-NEON-NEXT:    sub sp, sp, #40
-; LE-NEON-NEXT:    vorr q6, q1, q1
-; LE-NEON-NEXT:    add lr, sp, #24
-; LE-NEON-NEXT:    vorr q7, q0, q0
-; LE-NEON-NEXT:    vstmia lr, {d2, d3} @ 16-byte Spill
-; LE-NEON-NEXT:    vmov.f32 s0, s27
-; LE-NEON-NEXT:    bl llrintf
-; LE-NEON-NEXT:    vmov.f32 s0, s24
-; LE-NEON-NEXT:    mov r8, r1
-; LE-NEON-NEXT:    vmov.32 d9[0], r0
-; LE-NEON-NEXT:    bl llrintf
-; LE-NEON-NEXT:    vmov.f32 s0, s25
-; LE-NEON-NEXT:    mov r9, r1
-; LE-NEON-NEXT:    vmov.32 d10[0], r0
-; LE-NEON-NEXT:    bl llrintf
-; LE-NEON-NEXT:    vorr q6, q7, q7
-; LE-NEON-NEXT:    add lr, sp, #8
-; LE-NEON-NEXT:    mov r10, r1
-; LE-NEON-NEXT:    vmov.32 d11[0], r0
-; LE-NEON-NEXT:    vmov.f32 s0, s26
-; LE-NEON-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
-; LE-NEON-NEXT:    bl llrintf
-; LE-NEON-NEXT:    vmov.f32 s0, s27
-; LE-NEON-NEXT:    mov r7, r1
-; LE-NEON-NEXT:    vmov.32 d14[0], r0
-; LE-NEON-NEXT:    bl llrintf
-; LE-NEON-NEXT:    vmov.f32 s0, s24
-; LE-NEON-NEXT:    mov r4, r1
-; LE-NEON-NEXT:    vmov.32 d15[0], r0
-; LE-NEON-NEXT:    bl llrintf
-; LE-NEON-NEXT:    add lr, sp, #8
-; LE-NEON-NEXT:    mov r5, r1
-; LE-NEON-NEXT:    vmov.32 d12[0], r0
-; LE-NEON-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
-; LE-NEON-NEXT:    vmov.f32 s0, s1
-; LE-NEON-NEXT:    bl llrintf
-; LE-NEON-NEXT:    add lr, sp, #24
-; LE-NEON-NEXT:    mov r6, r1
-; LE-NEON-NEXT:    vmov.32 d13[0], r0
-; LE-NEON-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
-; LE-NEON-NEXT:    vmov.f32 s0, s2
-; LE-NEON-NEXT:    bl llrintf
-; LE-NEON-NEXT:    vmov.32 d8[0], r0
-; LE-NEON-NEXT:    vmov.32 d13[1], r6
-; LE-NEON-NEXT:    vmov.32 d15[1], r4
-; LE-NEON-NEXT:    vmov.32 d11[1], r10
-; LE-NEON-NEXT:    vmov.32 d9[1], r8
-; LE-NEON-NEXT:    vmov.32 d12[1], r5
-; LE-NEON-NEXT:    vmov.32 d14[1], r7
-; LE-NEON-NEXT:    vorr q0, q6, q6
-; LE-NEON-NEXT:    vmov.32 d10[1], r9
-; LE-NEON-NEXT:    vorr q1, q7, q7
-; LE-NEON-NEXT:    vmov.32 d8[1], r1
-; LE-NEON-NEXT:    vorr q2, q5, q5
-; LE-NEON-NEXT:    vorr q3, q4, q4
-; LE-NEON-NEXT:    add sp, sp, #40
-; LE-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-NEON-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, pc}
-;
-; BE-LABEL: llrint_v8i64_v8f32:
-; BE:       @ %bb.0:
-; BE-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, lr}
-; BE-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, lr}
-; BE-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
-; BE-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
-; BE-NEXT:    .pad #32
-; BE-NEXT:    sub sp, sp, #32
-; BE-NEXT:    vorr q4, q1, q1
-; BE-NEXT:    add lr, sp, #8
-; BE-NEXT:    vorr q5, q0, q0
-; BE-NEXT:    vstmia lr, {d0, d1} @ 16-byte Spill
-; BE-NEXT:    vrev64.32 d12, d8
-; BE-NEXT:    vmov.f32 s0, s25
-; BE-NEXT:    bl llrintf
-; BE-NEXT:    vmov.f32 s0, s24
-; BE-NEXT:    mov r8, r1
-; BE-NEXT:    vmov.32 d15[0], r0
-; BE-NEXT:    bl llrintf
-; BE-NEXT:    vrev64.32 d0, d11
-; BE-NEXT:    mov r9, r1
-; BE-NEXT:    vrev64.32 d8, d9
-; BE-NEXT:    vorr d9, d0, d0
-; BE-NEXT:    vmov.32 d14[0], r0
-; BE-NEXT:    vstr d8, [sp, #24] @ 8-byte Spill
-; BE-NEXT:    bl llrintf
-; BE-NEXT:    vmov.f32 s0, s17
-; BE-NEXT:    mov r10, r1
-; BE-NEXT:    vmov.32 d10[0], r0
-; BE-NEXT:    bl llrintf
-; BE-NEXT:    add lr, sp, #8
-; BE-NEXT:    vmov.f32 s0, s19
-; BE-NEXT:    mov r7, r1
-; BE-NEXT:    vmov.32 d13[0], r0
-; BE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-NEXT:    vrev64.32 d8, d16
-; BE-NEXT:    vstr d8, [sp, #8] @ 8-byte Spill
-; BE-NEXT:    bl llrintf
-; BE-NEXT:    vmov.f32 s0, s16
-; BE-NEXT:    mov r4, r1
-; BE-NEXT:    vmov.32 d11[0], r0
-; BE-NEXT:    bl llrintf
-; BE-NEXT:    vldr d0, [sp, #8] @ 8-byte Reload
-; BE-NEXT:    mov r5, r1
-; BE-NEXT:    vmov.32 d8[0], r0
-; BE-NEXT:    vmov.f32 s0, s1
-; BE-NEXT:    bl llrintf
-; BE-NEXT:    vldr d0, [sp, #24] @ 8-byte Reload
-; BE-NEXT:    mov r6, r1
-; BE-NEXT:    @ kill: def $s0 killed $s0 killed $d0
-; BE-NEXT:    vmov.32 d9[0], r0
-; BE-NEXT:    bl llrintf
-; BE-NEXT:    vmov.32 d12[0], r0
-; BE-NEXT:    vmov.32 d9[1], r6
-; BE-NEXT:    vmov.32 d11[1], r4
-; BE-NEXT:    vmov.32 d15[1], r8
-; BE-NEXT:    vmov.32 d13[1], r7
-; BE-NEXT:    vmov.32 d8[1], r5
-; BE-NEXT:    vmov.32 d10[1], r10
-; BE-NEXT:    vmov.32 d14[1], r9
-; BE-NEXT:    vmov.32 d12[1], r1
-; BE-NEXT:    vrev64.32 q0, q4
-; BE-NEXT:    vrev64.32 q1, q5
-; BE-NEXT:    vrev64.32 q2, q7
-; BE-NEXT:    vrev64.32 q3, q6
-; BE-NEXT:    add sp, sp, #32
-; BE-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
-; BE-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, pc}
-;
-; BE-NEON-LABEL: llrint_v8i64_v8f32:
-; BE-NEON:       @ %bb.0:
-; BE-NEON-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, lr}
-; BE-NEON-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, lr}
-; BE-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
-; BE-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
-; BE-NEON-NEXT:    .pad #32
-; BE-NEON-NEXT:    sub sp, sp, #32
-; BE-NEON-NEXT:    vorr q4, q1, q1
-; BE-NEON-NEXT:    add lr, sp, #8
-; BE-NEON-NEXT:    vorr q5, q0, q0
-; BE-NEON-NEXT:    vstmia lr, {d0, d1} @ 16-byte Spill
-; BE-NEON-NEXT:    vrev64.32 d12, d8
-; BE-NEON-NEXT:    vmov.f32 s0, s25
-; BE-NEON-NEXT:    bl llrintf
-; BE-NEON-NEXT:    vmov.f32 s0, s24
-; BE-NEON-NEXT:    mov r8, r1
-; BE-NEON-NEXT:    vmov.32 d15[0], r0
-; BE-NEON-NEXT:    bl llrintf
-; BE-NEON-NEXT:    vrev64.32 d0, d11
-; BE-NEON-NEXT:    mov r9, r1
-; BE-NEON-NEXT:    vrev64.32 d8, d9
-; BE-NEON-NEXT:    vorr d9, d0, d0
-; BE-NEON-NEXT:    vmov.32 d14[0], r0
-; BE-NEON-NEXT:    vstr d8, [sp, #24] @ 8-byte Spill
-; BE-NEON-NEXT:    bl llrintf
-; BE-NEON-NEXT:    vmov.f32 s0, s17
-; BE-NEON-NEXT:    mov r10, r1
-; BE-NEON-NEXT:    vmov.32 d10[0], r0
-; BE-NEON-NEXT:    bl llrintf
-; BE-NEON-NEXT:    add lr, sp, #8
-; BE-NEON-NEXT:    vmov.f32 s0, s19
-; BE-NEON-NEXT:    mov r7, r1
-; BE-NEON-NEXT:    vmov.32 d13[0], r0
-; BE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-NEON-NEXT:    vrev64.32 d8, d16
-; BE-NEON-NEXT:    vstr d8, [sp, #8] @ 8-byte Spill
-; BE-NEON-NEXT:    bl llrintf
-; BE-NEON-NEXT:    vmov.f32 s0, s16
-; BE-NEON-NEXT:    mov r4, r1
-; BE-NEON-NEXT:    vmov.32 d11[0], r0
-; BE-NEON-NEXT:    bl llrintf
-; BE-NEON-NEXT:    vldr d0, [sp, #8] @ 8-byte Reload
-; BE-NEON-NEXT:    mov r5, r1
-; BE-NEON-NEXT:    vmov.32 d8[0], r0
-; BE-NEON-NEXT:    vmov.f32 s0, s1
-; BE-NEON-NEXT:    bl llrintf
-; BE-NEON-NEXT:    vldr d0, [sp, #24] @ 8-byte Reload
-; BE-NEON-NEXT:    mov r6, r1
-; BE-NEON-NEXT:    @ kill: def $s0 killed $s0 killed $d0
-; BE-NEON-NEXT:    vmov.32 d9[0], r0
-; BE-NEON-NEXT:    bl llrintf
-; BE-NEON-NEXT:    vmov.32 d12[0], r0
-; BE-NEON-NEXT:    vmov.32 d9[1], r6
-; BE-NEON-NEXT:    vmov.32 d11[1], r4
-; BE-NEON-NEXT:    vmov.32 d15[1], r8
-; BE-NEON-NEXT:    vmov.32 d13[1], r7
-; BE-NEON-NEXT:    vmov.32 d8[1], r5
-; BE-NEON-NEXT:    vmov.32 d10[1], r10
-; BE-NEON-NEXT:    vmov.32 d14[1], r9
-; BE-NEON-NEXT:    vmov.32 d12[1], r1
-; BE-NEON-NEXT:    vrev64.32 q0, q4
-; BE-NEON-NEXT:    vrev64.32 q1, q5
-; BE-NEON-NEXT:    vrev64.32 q2, q7
-; BE-NEON-NEXT:    vrev64.32 q3, q6
-; BE-NEON-NEXT:    add sp, sp, #32
-; BE-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
-; BE-NEON-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, pc}
-  %a = call <8 x i64> @llvm.llrint.v8i64.v8f32(<8 x float> %x)
-  ret <8 x i64> %a
-}
-declare <8 x i64> @llvm.llrint.v8i64.v8f32(<8 x float>)
-
-define <16 x i64> @llrint_v16i64_v16f32(<16 x float> %x) {
-; LE-LABEL: llrint_v16i64_v16f32:
-; LE:       @ %bb.0:
-; LE-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; LE-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; LE-NEXT:    .pad #4
-; LE-NEXT:    sub sp, sp, #4
-; LE-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-NEXT:    .pad #160
-; LE-NEXT:    sub sp, sp, #160
-; LE-NEXT:    add lr, sp, #112
-; LE-NEXT:    vorr q5, q3, q3
-; LE-NEXT:    vorr q6, q0, q0
-; LE-NEXT:    mov r4, r0
-; LE-NEXT:    vstmia lr, {d4, d5} @ 16-byte Spill
-; LE-NEXT:    add lr, sp, #48
-; LE-NEXT:    vorr q7, q1, q1
-; LE-NEXT:    vstmia lr, {d0, d1} @ 16-byte Spill
-; LE-NEXT:    vmov.f32 s0, s23
-; LE-NEXT:    bl llrintf
-; LE-NEXT:    vmov.f32 s0, s24
-; LE-NEXT:    add lr, sp, #144
-; LE-NEXT:    vmov.32 d17[0], r0
-; LE-NEXT:    str r1, [sp, #108] @ 4-byte Spill
-; LE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-NEXT:    bl llrintf
-; LE-NEXT:    vmov.f32 s0, s25
-; LE-NEXT:    str r1, [sp, #84] @ 4-byte Spill
-; LE-NEXT:    vmov.32 d8[0], r0
-; LE-NEXT:    bl llrintf
-; LE-NEXT:    vmov.f32 s0, s28
-; LE-NEXT:    add lr, sp, #128
-; LE-NEXT:    vmov.32 d9[0], r0
-; LE-NEXT:    str r1, [sp, #44] @ 4-byte Spill
-; LE-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
-; LE-NEXT:    bl llrintf
-; LE-NEXT:    vmov.f32 s0, s29
-; LE-NEXT:    mov r9, r1
-; LE-NEXT:    vmov.32 d8[0], r0
-; LE-NEXT:    bl llrintf
-; LE-NEXT:    vmov.f32 s0, s30
-; LE-NEXT:    mov r6, r1
-; LE-NEXT:    vmov.32 d9[0], r0
-; LE-NEXT:    bl llrintf
-; LE-NEXT:    vmov.f32 s0, s31
-; LE-NEXT:    mov r5, r1
-; LE-NEXT:    vmov.32 d12[0], r0
-; LE-NEXT:    bl llrintf
-; LE-NEXT:    add lr, sp, #112
-; LE-NEXT:    mov r7, r1
-; LE-NEXT:    vmov.32 d13[0], r0
-; LE-NEXT:    vldmia lr, {d14, d15} @ 16-byte Reload
-; LE-NEXT:    vmov.f32 s0, s29
-; LE-NEXT:    bl llrintf
-; LE-NEXT:    vmov.f32 s0, s22
-; LE-NEXT:    add lr, sp, #24
-; LE-NEXT:    vmov.32 d17[0], r0
-; LE-NEXT:    mov r11, r1
-; LE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-NEXT:    vmov.32 d13[1], r7
-; LE-NEXT:    bl llrintf
-; LE-NEXT:    add lr, sp, #144
-; LE-NEXT:    vmov.f32 s0, s21
-; LE-NEXT:    vmov.32 d12[1], r5
-; LE-NEXT:    str r1, [sp, #40] @ 4-byte Spill
-; LE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-NEXT:    vmov.32 d16[0], r0
-; LE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-NEXT:    add lr, sp, #88
-; LE-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
-; LE-NEXT:    bl llrintf
-; LE-NEXT:    vmov.f32 s0, s20
-; LE-NEXT:    mov r10, r1
-; LE-NEXT:    vmov.32 d13[0], r0
-; LE-NEXT:    vmov.32 d9[1], r6
-; LE-NEXT:    bl llrintf
-; LE-NEXT:    vmov.f32 s0, s31
-; LE-NEXT:    vmov.32 d12[0], r0
-; LE-NEXT:    add lr, sp, #8
-; LE-NEXT:    mov r8, r1
-; LE-NEXT:    vmov.32 d8[1], r9
-; LE-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
-; LE-NEXT:    add lr, sp, #64
-; LE-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
-; LE-NEXT:    bl llrintf
-; LE-NEXT:    add lr, sp, #128
-; LE-NEXT:    vmov.32 d9[0], r0
-; LE-NEXT:    ldr r0, [sp, #44] @ 4-byte Reload
-; LE-NEXT:    mov r9, r1
-; LE-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
-; LE-NEXT:    add lr, sp, #48
-; LE-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
-; LE-NEXT:    vmov.f32 s0, s27
-; LE-NEXT:    vmov.32 d11[1], r0
-; LE-NEXT:    bl llrintf
-; LE-NEXT:    vmov.f32 s0, s26
-; LE-NEXT:    vmov.32 d15[0], r0
-; LE-NEXT:    ldr r0, [sp, #84] @ 4-byte Reload
-; LE-NEXT:    add lr, sp, #128
-; LE-NEXT:    mov r7, r1
-; LE-NEXT:    vmov.32 d10[1], r0
-; LE-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
-; LE-NEXT:    bl llrintf
-; LE-NEXT:    vmov.32 d14[0], r0
-; LE-NEXT:    add lr, sp, #144
-; LE-NEXT:    ldr r0, [sp, #108] @ 4-byte Reload
-; LE-NEXT:    mov r5, r1
-; LE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-NEXT:    vmov.32 d17[1], r0
-; LE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-NEXT:    add lr, sp, #112
-; LE-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
-; LE-NEXT:    vmov.f32 s0, s20
+; LE-NEXT:    vmov.32 d12[0], r0
+; LE-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
+; LE-NEXT:    vmov.f32 s0, s1
 ; LE-NEXT:    bl llrintf
 ; LE-NEXT:    add lr, sp, #24
-; LE-NEXT:    vmov.f32 s0, s22
 ; LE-NEXT:    mov r6, r1
-; LE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-NEXT:    vmov.32 d16[0], r0
-; LE-NEXT:    vmov.32 d17[1], r11
-; LE-NEXT:    vorr q6, q8, q8
+; LE-NEXT:    vmov.32 d13[0], r0
+; LE-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
+; LE-NEXT:    vmov.f32 s0, s2
 ; LE-NEXT:    bl llrintf
-; LE-NEXT:    add lr, sp, #144
 ; LE-NEXT:    vmov.32 d8[0], r0
-; LE-NEXT:    ldr r0, [sp, #40] @ 4-byte Reload
-; LE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-NEXT:    add lr, sp, #8
-; LE-NEXT:    vldmia lr, {d18, d19} @ 16-byte Reload
-; LE-NEXT:    add lr, sp, #128
-; LE-NEXT:    vmov.32 d9[1], r9
-; LE-NEXT:    vmov.32 d12[1], r6
-; LE-NEXT:    vmov.32 d19[1], r10
+; LE-NEXT:    vmov.32 d13[1], r6
+; LE-NEXT:    vmov.32 d15[1], r4
+; LE-NEXT:    vmov.32 d11[1], r10
+; LE-NEXT:    vmov.32 d9[1], r8
+; LE-NEXT:    vmov.32 d12[1], r5
+; LE-NEXT:    vmov.32 d14[1], r7
+; LE-NEXT:    vorr q0, q6, q6
+; LE-NEXT:    vmov.32 d10[1], r9
+; LE-NEXT:    vorr q1, q7, q7
 ; LE-NEXT:    vmov.32 d8[1], r1
-; LE-NEXT:    vmov.32 d16[1], r0
-; LE-NEXT:    add r0, r4, #64
-; LE-NEXT:    vmov.32 d18[1], r8
-; LE-NEXT:    vst1.64 {d12, d13}, [r0:128]!
-; LE-NEXT:    vst1.64 {d8, d9}, [r0:128]!
-; LE-NEXT:    vst1.64 {d18, d19}, [r0:128]!
-; LE-NEXT:    vst1.64 {d16, d17}, [r0:128]
-; LE-NEXT:    vmov.32 d15[1], r7
-; LE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-NEXT:    add lr, sp, #64
-; LE-NEXT:    vmov.32 d14[1], r5
-; LE-NEXT:    vst1.64 {d16, d17}, [r4:128]!
-; LE-NEXT:    vst1.64 {d14, d15}, [r4:128]!
-; LE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-NEXT:    add lr, sp, #88
-; LE-NEXT:    vst1.64 {d16, d17}, [r4:128]!
-; LE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-NEXT:    vst1.64 {d16, d17}, [r4:128]
-; LE-NEXT:    add sp, sp, #160
+; LE-NEXT:    vorr q2, q5, q5
+; LE-NEXT:    vorr q3, q4, q4
+; LE-NEXT:    add sp, sp, #40
 ; LE-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-NEXT:    add sp, sp, #4
-; LE-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
-;
-; LE-NEON-LABEL: llrint_v16i64_v16f32:
-; LE-NEON:       @ %bb.0:
-; LE-NEON-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; LE-NEON-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; LE-NEON-NEXT:    .pad #4
-; LE-NEON-NEXT:    sub sp, sp, #4
-; LE-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-NEON-NEXT:    .pad #160
-; LE-NEON-NEXT:    sub sp, sp, #160
-; LE-NEON-NEXT:    add lr, sp, #112
-; LE-NEON-NEXT:    vorr q5, q3, q3
-; LE-NEON-NEXT:    vorr q6, q0, q0
-; LE-NEON-NEXT:    mov r4, r0
-; LE-NEON-NEXT:    vstmia lr, {d4, d5} @ 16-byte Spill
-; LE-NEON-NEXT:    add lr, sp, #48
-; LE-NEON-NEXT:    vorr q7, q1, q1
-; LE-NEON-NEXT:    vstmia lr, {d0, d1} @ 16-byte Spill
-; LE-NEON-NEXT:    vmov.f32 s0, s23
-; LE-NEON-NEXT:    bl llrintf
-; LE-NEON-NEXT:    vmov.f32 s0, s24
-; LE-NEON-NEXT:    add lr, sp, #144
-; LE-NEON-NEXT:    vmov.32 d17[0], r0
-; LE-NEON-NEXT:    str r1, [sp, #108] @ 4-byte Spill
-; LE-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-NEON-NEXT:    bl llrintf
-; LE-NEON-NEXT:    vmov.f32 s0, s25
-; LE-NEON-NEXT:    str r1, [sp, #84] @ 4-byte Spill
-; LE-NEON-NEXT:    vmov.32 d8[0], r0
-; LE-NEON-NEXT:    bl llrintf
-; LE-NEON-NEXT:    vmov.f32 s0, s28
-; LE-NEON-NEXT:    add lr, sp, #128
-; LE-NEON-NEXT:    vmov.32 d9[0], r0
-; LE-NEON-NEXT:    str r1, [sp, #44] @ 4-byte Spill
-; LE-NEON-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
-; LE-NEON-NEXT:    bl llrintf
-; LE-NEON-NEXT:    vmov.f32 s0, s29
-; LE-NEON-NEXT:    mov r9, r1
-; LE-NEON-NEXT:    vmov.32 d8[0], r0
-; LE-NEON-NEXT:    bl llrintf
-; LE-NEON-NEXT:    vmov.f32 s0, s30
-; LE-NEON-NEXT:    mov r6, r1
-; LE-NEON-NEXT:    vmov.32 d9[0], r0
-; LE-NEON-NEXT:    bl llrintf
-; LE-NEON-NEXT:    vmov.f32 s0, s31
-; LE-NEON-NEXT:    mov r5, r1
-; LE-NEON-NEXT:    vmov.32 d12[0], r0
-; LE-NEON-NEXT:    bl llrintf
-; LE-NEON-NEXT:    add lr, sp, #112
-; LE-NEON-NEXT:    mov r7, r1
-; LE-NEON-NEXT:    vmov.32 d13[0], r0
-; LE-NEON-NEXT:    vldmia lr, {d14, d15} @ 16-byte Reload
-; LE-NEON-NEXT:    vmov.f32 s0, s29
-; LE-NEON-NEXT:    bl llrintf
-; LE-NEON-NEXT:    vmov.f32 s0, s22
-; LE-NEON-NEXT:    add lr, sp, #24
-; LE-NEON-NEXT:    vmov.32 d17[0], r0
-; LE-NEON-NEXT:    mov r11, r1
-; LE-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-NEON-NEXT:    vmov.32 d13[1], r7
-; LE-NEON-NEXT:    bl llrintf
-; LE-NEON-NEXT:    add lr, sp, #144
-; LE-NEON-NEXT:    vmov.f32 s0, s21
-; LE-NEON-NEXT:    vmov.32 d12[1], r5
-; LE-NEON-NEXT:    str r1, [sp, #40] @ 4-byte Spill
-; LE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-NEON-NEXT:    vmov.32 d16[0], r0
-; LE-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-NEON-NEXT:    add lr, sp, #88
-; LE-NEON-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
-; LE-NEON-NEXT:    bl llrintf
-; LE-NEON-NEXT:    vmov.f32 s0, s20
-; LE-NEON-NEXT:    mov r10, r1
-; LE-NEON-NEXT:    vmov.32 d13[0], r0
-; LE-NEON-NEXT:    vmov.32 d9[1], r6
-; LE-NEON-NEXT:    bl llrintf
-; LE-NEON-NEXT:    vmov.f32 s0, s31
-; LE-NEON-NEXT:    vmov.32 d12[0], r0
-; LE-NEON-NEXT:    add lr, sp, #8
-; LE-NEON-NEXT:    mov r8, r1
-; LE-NEON-NEXT:    vmov.32 d8[1], r9
-; LE-NEON-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
-; LE-NEON-NEXT:    add lr, sp, #64
-; LE-NEON-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
-; LE-NEON-NEXT:    bl llrintf
-; LE-NEON-NEXT:    add lr, sp, #128
-; LE-NEON-NEXT:    vmov.32 d9[0], r0
-; LE-NEON-NEXT:    ldr r0, [sp, #44] @ 4-byte Reload
-; LE-NEON-NEXT:    mov r9, r1
-; LE-NEON-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
-; LE-NEON-NEXT:    add lr, sp, #48
-; LE-NEON-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
-; LE-NEON-NEXT:    vmov.f32 s0, s27
-; LE-NEON-NEXT:    vmov.32 d11[1], r0
-; LE-NEON-NEXT:    bl llrintf
-; LE-NEON-NEXT:    vmov.f32 s0, s26
-; LE-NEON-NEXT:    vmov.32 d15[0], r0
-; LE-NEON-NEXT:    ldr r0, [sp, #84] @ 4-byte Reload
-; LE-NEON-NEXT:    add lr, sp, #128
-; LE-NEON-NEXT:    mov r7, r1
-; LE-NEON-NEXT:    vmov.32 d10[1], r0
-; LE-NEON-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
-; LE-NEON-NEXT:    bl llrintf
-; LE-NEON-NEXT:    vmov.32 d14[0], r0
-; LE-NEON-NEXT:    add lr, sp, #144
-; LE-NEON-NEXT:    ldr r0, [sp, #108] @ 4-byte Reload
-; LE-NEON-NEXT:    mov r5, r1
-; LE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-NEON-NEXT:    vmov.32 d17[1], r0
-; LE-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-NEON-NEXT:    add lr, sp, #112
-; LE-NEON-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
-; LE-NEON-NEXT:    vmov.f32 s0, s20
-; LE-NEON-NEXT:    bl llrintf
-; LE-NEON-NEXT:    add lr, sp, #24
-; LE-NEON-NEXT:    vmov.f32 s0, s22
-; LE-NEON-NEXT:    mov r6, r1
-; LE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-NEON-NEXT:    vmov.32 d16[0], r0
-; LE-NEON-NEXT:    vmov.32 d17[1], r11
-; LE-NEON-NEXT:    vorr q6, q8, q8
-; LE-NEON-NEXT:    bl llrintf
-; LE-NEON-NEXT:    add lr, sp, #144
-; LE-NEON-NEXT:    vmov.32 d8[0], r0
-; LE-NEON-NEXT:    ldr r0, [sp, #40] @ 4-byte Reload
-; LE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-NEON-NEXT:    add lr, sp, #8
-; LE-NEON-NEXT:    vldmia lr, {d18, d19} @ 16-byte Reload
-; LE-NEON-NEXT:    add lr, sp, #128
-; LE-NEON-NEXT:    vmov.32 d9[1], r9
-; LE-NEON-NEXT:    vmov.32 d12[1], r6
-; LE-NEON-NEXT:    vmov.32 d19[1], r10
-; LE-NEON-NEXT:    vmov.32 d8[1], r1
-; LE-NEON-NEXT:    vmov.32 d16[1], r0
-; LE-NEON-NEXT:    add r0, r4, #64
-; LE-NEON-NEXT:    vmov.32 d18[1], r8
-; LE-NEON-NEXT:    vst1.64 {d12, d13}, [r0:128]!
-; LE-NEON-NEXT:    vst1.64 {d8, d9}, [r0:128]!
-; LE-NEON-NEXT:    vst1.64 {d18, d19}, [r0:128]!
-; LE-NEON-NEXT:    vst1.64 {d16, d17}, [r0:128]
-; LE-NEON-NEXT:    vmov.32 d15[1], r7
-; LE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-NEON-NEXT:    add lr, sp, #64
-; LE-NEON-NEXT:    vmov.32 d14[1], r5
-; LE-NEON-NEXT:    vst1.64 {d16, d17}, [r4:128]!
-; LE-NEON-NEXT:    vst1.64 {d14, d15}, [r4:128]!
-; LE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-NEON-NEXT:    add lr, sp, #88
-; LE-NEON-NEXT:    vst1.64 {d16, d17}, [r4:128]!
-; LE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-NEON-NEXT:    vst1.64 {d16, d17}, [r4:128]
-; LE-NEON-NEXT:    add sp, sp, #160
-; LE-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-NEON-NEXT:    add sp, sp, #4
-; LE-NEON-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+; LE-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, pc}
 ;
-; BE-LABEL: llrint_v16i64_v16f32:
+; BE-LABEL: llrint_v8i64_v8f32:
 ; BE:       @ %bb.0:
-; BE-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; BE-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; BE-NEXT:    .pad #4
-; BE-NEXT:    sub sp, sp, #4
+; BE-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, lr}
+; BE-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, lr}
 ; BE-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
 ; BE-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
-; BE-NEXT:    .pad #144
-; BE-NEXT:    sub sp, sp, #144
-; BE-NEXT:    vorr q6, q3, q3
-; BE-NEXT:    add lr, sp, #112
-; BE-NEXT:    vorr q7, q0, q0
-; BE-NEXT:    mov r4, r0
-; BE-NEXT:    vstmia lr, {d4, d5} @ 16-byte Spill
-; BE-NEXT:    add lr, sp, #96
-; BE-NEXT:    vrev64.32 d8, d13
-; BE-NEXT:    vstmia lr, {d2, d3} @ 16-byte Spill
-; BE-NEXT:    vmov.f32 s0, s17
-; BE-NEXT:    bl llrintf
-; BE-NEXT:    vmov.f32 s0, s16
-; BE-NEXT:    str r1, [sp, #88] @ 4-byte Spill
-; BE-NEXT:    vmov.32 d11[0], r0
-; BE-NEXT:    bl llrintf
-; BE-NEXT:    vrev64.32 d8, d14
-; BE-NEXT:    add lr, sp, #128
-; BE-NEXT:    vmov.32 d10[0], r0
-; BE-NEXT:    str r1, [sp, #92] @ 4-byte Spill
-; BE-NEXT:    vmov.f32 s0, s16
-; BE-NEXT:    vrev64.32 d9, d12
-; BE-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
-; BE-NEXT:    vstr d9, [sp, #64] @ 8-byte Spill
-; BE-NEXT:    bl llrintf
-; BE-NEXT:    vmov.f32 s0, s19
-; BE-NEXT:    mov r9, r1
-; BE-NEXT:    vmov.32 d12[0], r0
-; BE-NEXT:    bl llrintf
-; BE-NEXT:    vmov.f32 s0, s17
-; BE-NEXT:    str r1, [sp, #84] @ 4-byte Spill
-; BE-NEXT:    vmov.32 d11[0], r0
-; BE-NEXT:    vrev64.32 d9, d15
-; BE-NEXT:    bl llrintf
-; BE-NEXT:    vmov.f32 s0, s18
-; BE-NEXT:    mov r6, r1
-; BE-NEXT:    vmov.32 d13[0], r0
-; BE-NEXT:    bl llrintf
-; BE-NEXT:    vmov.f32 s0, s19
-; BE-NEXT:    mov r5, r1
-; BE-NEXT:    vmov.32 d14[0], r0
-; BE-NEXT:    bl llrintf
-; BE-NEXT:    vldr d0, [sp, #64] @ 8-byte Reload
-; BE-NEXT:    mov r7, r1
-; BE-NEXT:    @ kill: def $s0 killed $s0 killed $d0
-; BE-NEXT:    vmov.32 d15[0], r0
-; BE-NEXT:    bl llrintf
-; BE-NEXT:    vmov.32 d10[0], r0
-; BE-NEXT:    add lr, sp, #40
-; BE-NEXT:    str r1, [sp, #60] @ 4-byte Spill
-; BE-NEXT:    vmov.32 d15[1], r7
-; BE-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
-; BE-NEXT:    add lr, sp, #96
-; BE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-NEXT:    vrev64.32 d8, d16
-; BE-NEXT:    vmov.f32 s0, s17
-; BE-NEXT:    bl llrintf
-; BE-NEXT:    vmov.f32 s0, s16
-; BE-NEXT:    vmov.32 d14[1], r5
-; BE-NEXT:    add lr, sp, #64
-; BE-NEXT:    mov r10, r1
-; BE-NEXT:    vmov.32 d11[0], r0
-; BE-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
-; BE-NEXT:    bl llrintf
-; BE-NEXT:    vmov.32 d10[0], r0
-; BE-NEXT:    add lr, sp, #24
-; BE-NEXT:    mov r11, r1
-; BE-NEXT:    vmov.32 d13[1], r6
-; BE-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
-; BE-NEXT:    add lr, sp, #96
-; BE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-NEXT:    vrev64.32 d8, d17
-; BE-NEXT:    vmov.f32 s0, s17
-; BE-NEXT:    bl llrintf
-; BE-NEXT:    vmov.f32 s0, s16
-; BE-NEXT:    vmov.32 d12[1], r9
-; BE-NEXT:    add lr, sp, #96
-; BE-NEXT:    mov r8, r1
-; BE-NEXT:    vmov.32 d11[0], r0
-; BE-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
-; BE-NEXT:    bl llrintf
-; BE-NEXT:    vmov.32 d10[0], r0
+; BE-NEXT:    .pad #32
+; BE-NEXT:    sub sp, sp, #32
+; BE-NEXT:    vorr q4, q1, q1
 ; BE-NEXT:    add lr, sp, #8
-; BE-NEXT:    ldr r0, [sp, #88] @ 4-byte Reload
-; BE-NEXT:    mov r9, r1
-; BE-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
-; BE-NEXT:    add lr, sp, #112
-; BE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-NEXT:    add lr, sp, #128
-; BE-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
-; BE-NEXT:    vrev64.32 d8, d16
-; BE-NEXT:    vmov.32 d11[1], r0
-; BE-NEXT:    vmov.f32 s0, s17
+; BE-NEXT:    vorr q5, q0, q0
+; BE-NEXT:    vstmia lr, {d0, d1} @ 16-byte Spill
+; BE-NEXT:    vrev64.32 d12, d8
+; BE-NEXT:    vmov.f32 s0, s25
 ; BE-NEXT:    bl llrintf
-; BE-NEXT:    vmov.f32 s0, s16
+; BE-NEXT:    vmov.f32 s0, s24
+; BE-NEXT:    mov r8, r1
 ; BE-NEXT:    vmov.32 d15[0], r0
-; BE-NEXT:    ldr r0, [sp, #92] @ 4-byte Reload
-; BE-NEXT:    add lr, sp, #128
-; BE-NEXT:    mov r7, r1
-; BE-NEXT:    vmov.32 d10[1], r0
-; BE-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
 ; BE-NEXT:    bl llrintf
-; BE-NEXT:    add lr, sp, #112
+; BE-NEXT:    vrev64.32 d0, d11
+; BE-NEXT:    mov r9, r1
+; BE-NEXT:    vrev64.32 d8, d9
+; BE-NEXT:    vorr d9, d0, d0
 ; BE-NEXT:    vmov.32 d14[0], r0
-; BE-NEXT:    ldr r0, [sp, #84] @ 4-byte Reload
-; BE-NEXT:    mov r5, r1
-; BE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-NEXT:    add lr, sp, #40
-; BE-NEXT:    vrev64.32 d8, d17
-; BE-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
+; BE-NEXT:    vstr d8, [sp, #24] @ 8-byte Spill
+; BE-NEXT:    bl llrintf
 ; BE-NEXT:    vmov.f32 s0, s17
-; BE-NEXT:    vmov.32 d13[1], r0
+; BE-NEXT:    mov r10, r1
+; BE-NEXT:    vmov.32 d10[0], r0
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    add lr, sp, #8
+; BE-NEXT:    vmov.f32 s0, s19
+; BE-NEXT:    mov r7, r1
+; BE-NEXT:    vmov.32 d13[0], r0
+; BE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEXT:    vrev64.32 d8, d16
+; BE-NEXT:    vstr d8, [sp, #8] @ 8-byte Spill
 ; BE-NEXT:    bl llrintf
 ; BE-NEXT:    vmov.f32 s0, s16
+; BE-NEXT:    mov r4, r1
 ; BE-NEXT:    vmov.32 d11[0], r0
-; BE-NEXT:    ldr r0, [sp, #60] @ 4-byte Reload
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    vldr d0, [sp, #8] @ 8-byte Reload
+; BE-NEXT:    mov r5, r1
+; BE-NEXT:    vmov.32 d8[0], r0
+; BE-NEXT:    vmov.f32 s0, s1
+; BE-NEXT:    bl llrintf
+; BE-NEXT:    vldr d0, [sp, #24] @ 8-byte Reload
 ; BE-NEXT:    mov r6, r1
-; BE-NEXT:    vmov.32 d12[1], r0
+; BE-NEXT:    @ kill: def $s0 killed $s0 killed $d0
+; BE-NEXT:    vmov.32 d9[0], r0
 ; BE-NEXT:    bl llrintf
-; BE-NEXT:    add lr, sp, #24
-; BE-NEXT:    vmov.32 d10[0], r0
-; BE-NEXT:    add r0, r4, #64
-; BE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-NEXT:    add lr, sp, #8
-; BE-NEXT:    vmov.32 d17[1], r10
-; BE-NEXT:    vmov.32 d16[1], r11
-; BE-NEXT:    vorr q12, q8, q8
-; BE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-NEXT:    add lr, sp, #128
-; BE-NEXT:    vmov.32 d15[1], r7
-; BE-NEXT:    vmov.32 d11[1], r6
-; BE-NEXT:    vmov.32 d14[1], r5
-; BE-NEXT:    vldmia lr, {d18, d19} @ 16-byte Reload
-; BE-NEXT:    add lr, sp, #96
-; BE-NEXT:    vmov.32 d10[1], r1
-; BE-NEXT:    vmov.32 d17[1], r8
-; BE-NEXT:    vldmia lr, {d20, d21} @ 16-byte Reload
-; BE-NEXT:    add lr, sp, #64
-; BE-NEXT:    vmov.32 d16[1], r9
-; BE-NEXT:    vrev64.32 q14, q7
-; BE-NEXT:    vorr q13, q8, q8
-; BE-NEXT:    vrev64.32 q15, q5
-; BE-NEXT:    vldmia lr, {d22, d23} @ 16-byte Reload
-; BE-NEXT:    vrev64.32 q8, q6
-; BE-NEXT:    vst1.64 {d28, d29}, [r0:128]!
-; BE-NEXT:    vst1.64 {d30, d31}, [r0:128]!
-; BE-NEXT:    vrev64.32 q9, q9
-; BE-NEXT:    vrev64.32 q10, q10
-; BE-NEXT:    vst1.64 {d16, d17}, [r0:128]!
-; BE-NEXT:    vrev64.32 q11, q11
-; BE-NEXT:    vrev64.32 q12, q12
-; BE-NEXT:    vst1.64 {d18, d19}, [r0:128]
-; BE-NEXT:    vst1.64 {d20, d21}, [r4:128]!
-; BE-NEXT:    vst1.64 {d22, d23}, [r4:128]!
-; BE-NEXT:    vrev64.32 q13, q13
-; BE-NEXT:    vst1.64 {d24, d25}, [r4:128]!
-; BE-NEXT:    vst1.64 {d26, d27}, [r4:128]
-; BE-NEXT:    add sp, sp, #144
+; BE-NEXT:    vmov.32 d12[0], r0
+; BE-NEXT:    vmov.32 d9[1], r6
+; BE-NEXT:    vmov.32 d11[1], r4
+; BE-NEXT:    vmov.32 d15[1], r8
+; BE-NEXT:    vmov.32 d13[1], r7
+; BE-NEXT:    vmov.32 d8[1], r5
+; BE-NEXT:    vmov.32 d10[1], r10
+; BE-NEXT:    vmov.32 d14[1], r9
+; BE-NEXT:    vmov.32 d12[1], r1
+; BE-NEXT:    vrev64.32 q0, q4
+; BE-NEXT:    vrev64.32 q1, q5
+; BE-NEXT:    vrev64.32 q2, q7
+; BE-NEXT:    vrev64.32 q3, q6
+; BE-NEXT:    add sp, sp, #32
 ; BE-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
-; BE-NEXT:    add sp, sp, #4
-; BE-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
-;
-; BE-NEON-LABEL: llrint_v16i64_v16f32:
-; BE-NEON:       @ %bb.0:
-; BE-NEON-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; BE-NEON-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; BE-NEON-NEXT:    .pad #4
-; BE-NEON-NEXT:    sub sp, sp, #4
-; BE-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
-; BE-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
-; BE-NEON-NEXT:    .pad #144
-; BE-NEON-NEXT:    sub sp, sp, #144
-; BE-NEON-NEXT:    vorr q6, q3, q3
-; BE-NEON-NEXT:    add lr, sp, #112
-; BE-NEON-NEXT:    vorr q7, q0, q0
-; BE-NEON-NEXT:    mov r4, r0
-; BE-NEON-NEXT:    vstmia lr, {d4, d5} @ 16-byte Spill
-; BE-NEON-NEXT:    add lr, sp, #96
-; BE-NEON-NEXT:    vrev64.32 d8, d13
-; BE-NEON-NEXT:    vstmia lr, {d2, d3} @ 16-byte Spill
-; BE-NEON-NEXT:    vmov.f32 s0, s17
-; BE-NEON-NEXT:    bl llrintf
-; BE-NEON-NEXT:    vmov.f32 s0, s16
-; BE-NEON-NEXT:    str r1, [sp, #88] @ 4-byte Spill
-; BE-NEON-NEXT:    vmov.32 d11[0], r0
-; BE-NEON-NEXT:    bl llrintf
-; BE-NEON-NEXT:    vrev64.32 d8, d14
-; BE-NEON-NEXT:    add lr, sp, #128
-; BE-NEON-NEXT:    vmov.32 d10[0], r0
-; BE-NEON-NEXT:    str r1, [sp, #92] @ 4-byte Spill
-; BE-NEON-NEXT:    vmov.f32 s0, s16
-; BE-NEON-NEXT:    vrev64.32 d9, d12
-; BE-NEON-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
-; BE-NEON-NEXT:    vstr d9, [sp, #64] @ 8-byte Spill
-; BE-NEON-NEXT:    bl llrintf
-; BE-NEON-NEXT:    vmov.f32 s0, s19
-; BE-NEON-NEXT:    mov r9, r1
-; BE-NEON-NEXT:    vmov.32 d12[0], r0
-; BE-NEON-NEXT:    bl llrintf
-; BE-NEON-NEXT:    vmov.f32 s0, s17
-; BE-NEON-NEXT:    str r1, [sp, #84] @ 4-byte Spill
-; BE-NEON-NEXT:    vmov.32 d11[0], r0
-; BE-NEON-NEXT:    vrev64.32 d9, d15
-; BE-NEON-NEXT:    bl llrintf
-; BE-NEON-NEXT:    vmov.f32 s0, s18
-; BE-NEON-NEXT:    mov r6, r1
-; BE-NEON-NEXT:    vmov.32 d13[0], r0
-; BE-NEON-NEXT:    bl llrintf
-; BE-NEON-NEXT:    vmov.f32 s0, s19
-; BE-NEON-NEXT:    mov r5, r1
-; BE-NEON-NEXT:    vmov.32 d14[0], r0
-; BE-NEON-NEXT:    bl llrintf
-; BE-NEON-NEXT:    vldr d0, [sp, #64] @ 8-byte Reload
-; BE-NEON-NEXT:    mov r7, r1
-; BE-NEON-NEXT:    @ kill: def $s0 killed $s0 killed $d0
-; BE-NEON-NEXT:    vmov.32 d15[0], r0
-; BE-NEON-NEXT:    bl llrintf
-; BE-NEON-NEXT:    vmov.32 d10[0], r0
-; BE-NEON-NEXT:    add lr, sp, #40
-; BE-NEON-NEXT:    str r1, [sp, #60] @ 4-byte Spill
-; BE-NEON-NEXT:    vmov.32 d15[1], r7
-; BE-NEON-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
-; BE-NEON-NEXT:    add lr, sp, #96
-; BE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-NEON-NEXT:    vrev64.32 d8, d16
-; BE-NEON-NEXT:    vmov.f32 s0, s17
-; BE-NEON-NEXT:    bl llrintf
-; BE-NEON-NEXT:    vmov.f32 s0, s16
-; BE-NEON-NEXT:    vmov.32 d14[1], r5
-; BE-NEON-NEXT:    add lr, sp, #64
-; BE-NEON-NEXT:    mov r10, r1
-; BE-NEON-NEXT:    vmov.32 d11[0], r0
-; BE-NEON-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
-; BE-NEON-NEXT:    bl llrintf
-; BE-NEON-NEXT:    vmov.32 d10[0], r0
-; BE-NEON-NEXT:    add lr, sp, #24
-; BE-NEON-NEXT:    mov r11, r1
-; BE-NEON-NEXT:    vmov.32 d13[1], r6
-; BE-NEON-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
-; BE-NEON-NEXT:    add lr, sp, #96
-; BE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-NEON-NEXT:    vrev64.32 d8, d17
-; BE-NEON-NEXT:    vmov.f32 s0, s17
-; BE-NEON-NEXT:    bl llrintf
-; BE-NEON-NEXT:    vmov.f32 s0, s16
-; BE-NEON-NEXT:    vmov.32 d12[1], r9
-; BE-NEON-NEXT:    add lr, sp, #96
-; BE-NEON-NEXT:    mov r8, r1
-; BE-NEON-NEXT:    vmov.32 d11[0], r0
-; BE-NEON-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
-; BE-NEON-NEXT:    bl llrintf
-; BE-NEON-NEXT:    vmov.32 d10[0], r0
-; BE-NEON-NEXT:    add lr, sp, #8
-; BE-NEON-NEXT:    ldr r0, [sp, #88] @ 4-byte Reload
-; BE-NEON-NEXT:    mov r9, r1
-; BE-NEON-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
-; BE-NEON-NEXT:    add lr, sp, #112
-; BE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-NEON-NEXT:    add lr, sp, #128
-; BE-NEON-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
-; BE-NEON-NEXT:    vrev64.32 d8, d16
-; BE-NEON-NEXT:    vmov.32 d11[1], r0
-; BE-NEON-NEXT:    vmov.f32 s0, s17
-; BE-NEON-NEXT:    bl llrintf
-; BE-NEON-NEXT:    vmov.f32 s0, s16
-; BE-NEON-NEXT:    vmov.32 d15[0], r0
-; BE-NEON-NEXT:    ldr r0, [sp, #92] @ 4-byte Reload
-; BE-NEON-NEXT:    add lr, sp, #128
-; BE-NEON-NEXT:    mov r7, r1
-; BE-NEON-NEXT:    vmov.32 d10[1], r0
-; BE-NEON-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
-; BE-NEON-NEXT:    bl llrintf
-; BE-NEON-NEXT:    add lr, sp, #112
-; BE-NEON-NEXT:    vmov.32 d14[0], r0
-; BE-NEON-NEXT:    ldr r0, [sp, #84] @ 4-byte Reload
-; BE-NEON-NEXT:    mov r5, r1
-; BE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-NEON-NEXT:    add lr, sp, #40
-; BE-NEON-NEXT:    vrev64.32 d8, d17
-; BE-NEON-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
-; BE-NEON-NEXT:    vmov.f32 s0, s17
-; BE-NEON-NEXT:    vmov.32 d13[1], r0
-; BE-NEON-NEXT:    bl llrintf
-; BE-NEON-NEXT:    vmov.f32 s0, s16
-; BE-NEON-NEXT:    vmov.32 d11[0], r0
-; BE-NEON-NEXT:    ldr r0, [sp, #60] @ 4-byte Reload
-; BE-NEON-NEXT:    mov r6, r1
-; BE-NEON-NEXT:    vmov.32 d12[1], r0
-; BE-NEON-NEXT:    bl llrintf
-; BE-NEON-NEXT:    add lr, sp, #24
-; BE-NEON-NEXT:    vmov.32 d10[0], r0
-; BE-NEON-NEXT:    add r0, r4, #64
-; BE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-NEON-NEXT:    add lr, sp, #8
-; BE-NEON-NEXT:    vmov.32 d17[1], r10
-; BE-NEON-NEXT:    vmov.32 d16[1], r11
-; BE-NEON-NEXT:    vorr q12, q8, q8
-; BE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-NEON-NEXT:    add lr, sp, #128
-; BE-NEON-NEXT:    vmov.32 d15[1], r7
-; BE-NEON-NEXT:    vmov.32 d11[1], r6
-; BE-NEON-NEXT:    vmov.32 d14[1], r5
-; BE-NEON-NEXT:    vldmia lr, {d18, d19} @ 16-byte Reload
-; BE-NEON-NEXT:    add lr, sp, #96
-; BE-NEON-NEXT:    vmov.32 d10[1], r1
-; BE-NEON-NEXT:    vmov.32 d17[1], r8
-; BE-NEON-NEXT:    vldmia lr, {d20, d21} @ 16-byte Reload
-; BE-NEON-NEXT:    add lr, sp, #64
-; BE-NEON-NEXT:    vmov.32 d16[1], r9
-; BE-NEON-NEXT:    vrev64.32 q14, q7
-; BE-NEON-NEXT:    vorr q13, q8, q8
-; BE-NEON-NEXT:    vrev64.32 q15, q5
-; BE-NEON-NEXT:    vldmia lr, {d22, d23} @ 16-byte Reload
-; BE-NEON-NEXT:    vrev64.32 q8, q6
-; BE-NEON-NEXT:    vst1.64 {d28, d29}, [r0:128]!
-; BE-NEON-NEXT:    vst1.64 {d30, d31}, [r0:128]!
-; BE-NEON-NEXT:    vrev64.32 q9, q9
-; BE-NEON-NEXT:    vrev64.32 q10, q10
-; BE-NEON-NEXT:    vst1.64 {d16, d17}, [r0:128]!
-; BE-NEON-NEXT:    vrev64.32 q11, q11
-; BE-NEON-NEXT:    vrev64.32 q12, q12
-; BE-NEON-NEXT:    vst1.64 {d18, d19}, [r0:128]
-; BE-NEON-NEXT:    vst1.64 {d20, d21}, [r4:128]!
-; BE-NEON-NEXT:    vst1.64 {d22, d23}, [r4:128]!
-; BE-NEON-NEXT:    vrev64.32 q13, q13
-; BE-NEON-NEXT:    vst1.64 {d24, d25}, [r4:128]!
-; BE-NEON-NEXT:    vst1.64 {d26, d27}, [r4:128]
-; BE-NEON-NEXT:    add sp, sp, #144
-; BE-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
-; BE-NEON-NEXT:    add sp, sp, #4
-; BE-NEON-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
-  %a = call <16 x i64> @llvm.llrint.v16i64.v16f32(<16 x float> %x)
-  ret <16 x i64> %a
+; BE-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, pc}
+  %a = call <8 x i64> @llvm.llrint.v8i64.v8f32(<8 x float> %x)
+  ret <8 x i64> %a
 }
-declare <16 x i64> @llvm.llrint.v16i64.v16f32(<16 x float>)
+declare <8 x i64> @llvm.llrint.v8i64.v8f32(<8 x float>)
 
-define <32 x i64> @llrint_v32i64_v32f32(<32 x float> %x) {
-; LE-LABEL: llrint_v32i64_v32f32:
+define <16 x i64> @llrint_v16i64_v16f32(<16 x float> %x) {
+; LE-LABEL: llrint_v16i64_v16f32:
 ; LE:       @ %bb.0:
 ; LE-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
 ; LE-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
@@ -4395,625 +1081,155 @@ define <32 x i64> @llrint_v32i64_v32f32(<32 x float> %x) {
 ; LE-NEXT:    sub sp, sp, #4
 ; LE-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
 ; LE-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-NEXT:    .pad #184
-; LE-NEXT:    sub sp, sp, #184
-; LE-NEXT:    add lr, sp, #152
-; LE-NEXT:    vorr q7, q3, q3
-; LE-NEXT:    vorr q4, q2, q2
-; LE-NEXT:    mov r5, r0
-; LE-NEXT:    vstmia lr, {d2, d3} @ 16-byte Spill
-; LE-NEXT:    add lr, sp, #88
+; LE-NEXT:    .pad #160
+; LE-NEXT:    sub sp, sp, #160
+; LE-NEXT:    add lr, sp, #112
+; LE-NEXT:    vorr q5, q3, q3
+; LE-NEXT:    vorr q6, q0, q0
+; LE-NEXT:    mov r4, r0
+; LE-NEXT:    vstmia lr, {d4, d5} @ 16-byte Spill
+; LE-NEXT:    add lr, sp, #48
+; LE-NEXT:    vorr q7, q1, q1
 ; LE-NEXT:    vstmia lr, {d0, d1} @ 16-byte Spill
-; LE-NEXT:    vmov.f32 s0, s3
-; LE-NEXT:    str r0, [sp, #68] @ 4-byte Spill
+; LE-NEXT:    vmov.f32 s0, s23
 ; LE-NEXT:    bl llrintf
-; LE-NEXT:    vmov.f32 s0, s18
-; LE-NEXT:    add lr, sp, #168
+; LE-NEXT:    vmov.f32 s0, s24
+; LE-NEXT:    add lr, sp, #144
 ; LE-NEXT:    vmov.32 d17[0], r0
-; LE-NEXT:    str r1, [sp, #16] @ 4-byte Spill
+; LE-NEXT:    str r1, [sp, #108] @ 4-byte Spill
 ; LE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
 ; LE-NEXT:    bl llrintf
-; LE-NEXT:    vmov.f32 s0, s16
-; LE-NEXT:    mov r8, r1
-; LE-NEXT:    vmov.32 d10[0], r0
-; LE-NEXT:    bl llrintf
-; LE-NEXT:    vmov.f32 s0, s17
-; LE-NEXT:    mov r6, r1
-; LE-NEXT:    vmov.32 d12[0], r0
-; LE-NEXT:    bl llrintf
-; LE-NEXT:    vmov.f32 s0, s19
-; LE-NEXT:    mov r4, r1
-; LE-NEXT:    vmov.32 d13[0], r0
-; LE-NEXT:    bl llrintf
-; LE-NEXT:    vmov.f32 s0, s31
-; LE-NEXT:    mov r7, r1
-; LE-NEXT:    vmov.32 d11[0], r0
-; LE-NEXT:    bl llrintf
-; LE-NEXT:    vmov.f32 s0, s30
-; LE-NEXT:    str r1, [sp, #8] @ 4-byte Spill
-; LE-NEXT:    vmov.32 d9[0], r0
-; LE-NEXT:    vmov.32 d11[1], r7
-; LE-NEXT:    bl llrintf
-; LE-NEXT:    vmov.f32 s0, s29
+; LE-NEXT:    vmov.f32 s0, s25
+; LE-NEXT:    str r1, [sp, #84] @ 4-byte Spill
 ; LE-NEXT:    vmov.32 d8[0], r0
-; LE-NEXT:    add lr, sp, #104
-; LE-NEXT:    str r1, [sp, #12] @ 4-byte Spill
-; LE-NEXT:    vmov.32 d13[1], r4
-; LE-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
-; LE-NEXT:    bl llrintf
-; LE-NEXT:    vmov.32 d9[0], r0
-; LE-NEXT:    add r0, sp, #320
-; LE-NEXT:    add lr, sp, #120
-; LE-NEXT:    mov r11, r1
-; LE-NEXT:    vld1.64 {d0, d1}, [r0]
-; LE-NEXT:    add r0, sp, #304
-; LE-NEXT:    vstmia lr, {d0, d1} @ 16-byte Spill
-; LE-NEXT:    add lr, sp, #72
-; LE-NEXT:    vld1.64 {d0, d1}, [r0]
-; LE-NEXT:    add r0, sp, #336
-; LE-NEXT:    vstmia lr, {d0, d1} @ 16-byte Spill
-; LE-NEXT:    add lr, sp, #32
-; LE-NEXT:    vld1.64 {d0, d1}, [r0]
-; LE-NEXT:    add r0, sp, #288
-; LE-NEXT:    vmov.32 d12[1], r6
-; LE-NEXT:    vstmia lr, {d0, d1} @ 16-byte Spill
-; LE-NEXT:    add lr, sp, #48
-; LE-NEXT:    vld1.64 {d0, d1}, [r0]
-; LE-NEXT:    vmov.32 d10[1], r8
-; LE-NEXT:    add r8, r5, #64
-; LE-NEXT:    vstmia lr, {d0, d1} @ 16-byte Spill
-; LE-NEXT:    add lr, sp, #152
-; LE-NEXT:    vst1.64 {d12, d13}, [r8:128]!
-; LE-NEXT:    vst1.64 {d10, d11}, [r8:128]!
-; LE-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
-; LE-NEXT:    vmov.f32 s0, s27
 ; LE-NEXT:    bl llrintf
 ; LE-NEXT:    vmov.f32 s0, s28
-; LE-NEXT:    mov r4, r1
-; LE-NEXT:    vmov.32 d11[0], r0
+; LE-NEXT:    add lr, sp, #128
+; LE-NEXT:    vmov.32 d9[0], r0
+; LE-NEXT:    str r1, [sp, #44] @ 4-byte Spill
+; LE-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
 ; LE-NEXT:    bl llrintf
-; LE-NEXT:    vmov.f32 s0, s26
+; LE-NEXT:    vmov.f32 s0, s29
 ; LE-NEXT:    mov r9, r1
 ; LE-NEXT:    vmov.32 d8[0], r0
-; LE-NEXT:    vmov.32 d11[1], r4
-; LE-NEXT:    bl llrintf
-; LE-NEXT:    vmov.32 d10[0], r0
-; LE-NEXT:    add lr, sp, #136
-; LE-NEXT:    ldr r0, [sp, #16] @ 4-byte Reload
-; LE-NEXT:    mov r10, r1
-; LE-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
-; LE-NEXT:    add lr, sp, #168
-; LE-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
-; LE-NEXT:    add lr, sp, #88
-; LE-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
-; LE-NEXT:    vmov.f32 s0, s26
-; LE-NEXT:    vmov.32 d11[1], r0
-; LE-NEXT:    bl llrintf
-; LE-NEXT:    vmov.f32 s0, s25
-; LE-NEXT:    vmov.32 d10[0], r0
-; LE-NEXT:    add lr, sp, #168
-; LE-NEXT:    ldr r0, [sp, #8] @ 4-byte Reload
-; LE-NEXT:    mov r7, r1
-; LE-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
-; LE-NEXT:    add lr, sp, #104
-; LE-NEXT:    vorr q5, q6, q6
-; LE-NEXT:    vldmia lr, {d14, d15} @ 16-byte Reload
-; LE-NEXT:    vmov.32 d15[1], r0
-; LE-NEXT:    bl llrintf
-; LE-NEXT:    vmov.f32 s0, s20
-; LE-NEXT:    vmov.32 d13[0], r0
-; LE-NEXT:    ldr r0, [sp, #12] @ 4-byte Reload
-; LE-NEXT:    add lr, sp, #104
-; LE-NEXT:    mov r4, r1
-; LE-NEXT:    vmov.32 d14[1], r0
-; LE-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
 ; LE-NEXT:    bl llrintf
-; LE-NEXT:    vmov.32 d12[0], r0
-; LE-NEXT:    add lr, sp, #152
+; LE-NEXT:    vmov.f32 s0, s30
 ; LE-NEXT:    mov r6, r1
-; LE-NEXT:    vorr q7, q6, q6
-; LE-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
-; LE-NEXT:    vmov.32 d9[1], r11
-; LE-NEXT:    vmov.f32 s0, s25
+; LE-NEXT:    vmov.32 d9[0], r0
 ; LE-NEXT:    bl llrintf
-; LE-NEXT:    vmov.f32 s0, s24
+; LE-NEXT:    vmov.f32 s0, s31
 ; LE-NEXT:    mov r5, r1
-; LE-NEXT:    vmov.32 d11[0], r0
-; LE-NEXT:    vmov.32 d8[1], r9
+; LE-NEXT:    vmov.32 d12[0], r0
 ; LE-NEXT:    bl llrintf
-; LE-NEXT:    add lr, sp, #136
-; LE-NEXT:    mov r11, r1
-; LE-NEXT:    vmov.32 d10[0], r0
-; LE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-NEXT:    vmov.32 d16[1], r10
-; LE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-NEXT:    add lr, sp, #120
-; LE-NEXT:    vst1.64 {d8, d9}, [r8:128]!
-; LE-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
-; LE-NEXT:    vmov.f32 s0, s1
+; LE-NEXT:    add lr, sp, #112
+; LE-NEXT:    mov r7, r1
+; LE-NEXT:    vmov.32 d13[0], r0
+; LE-NEXT:    vldmia lr, {d14, d15} @ 16-byte Reload
+; LE-NEXT:    vmov.f32 s0, s29
 ; LE-NEXT:    bl llrintf
-; LE-NEXT:    add lr, sp, #152
+; LE-NEXT:    vmov.f32 s0, s22
+; LE-NEXT:    add lr, sp, #24
 ; LE-NEXT:    vmov.32 d17[0], r0
-; LE-NEXT:    mov r10, r1
+; LE-NEXT:    mov r11, r1
 ; LE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-NEXT:    add lr, sp, #104
-; LE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-NEXT:    add lr, sp, #72
-; LE-NEXT:    vst1.64 {d16, d17}, [r8:128]
-; LE-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
-; LE-NEXT:    vmov.f32 s0, s19
+; LE-NEXT:    vmov.32 d13[1], r7
 ; LE-NEXT:    bl llrintf
-; LE-NEXT:    add lr, sp, #168
-; LE-NEXT:    vmov.f32 s0, s18
-; LE-NEXT:    mov r8, r1
-; LE-NEXT:    vmov.32 d13[0], r0
+; LE-NEXT:    add lr, sp, #144
+; LE-NEXT:    vmov.f32 s0, s21
+; LE-NEXT:    vmov.32 d12[1], r5
+; LE-NEXT:    str r1, [sp, #40] @ 4-byte Spill
 ; LE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-NEXT:    vmov.32 d16[1], r7
+; LE-NEXT:    vmov.32 d16[0], r0
 ; LE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-NEXT:    bl llrintf
-; LE-NEXT:    vmov.f32 s0, s17
-; LE-NEXT:    vmov.32 d12[0], r0
-; LE-NEXT:    add lr, sp, #104
-; LE-NEXT:    mov r7, r1
-; LE-NEXT:    vmov.32 d15[1], r4
-; LE-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
-; LE-NEXT:    bl llrintf
-; LE-NEXT:    vmov.f32 s0, s16
-; LE-NEXT:    mov r9, r1
-; LE-NEXT:    vmov.32 d13[0], r0
-; LE-NEXT:    vmov.32 d14[1], r6
-; LE-NEXT:    bl llrintf
-; LE-NEXT:    vmov.32 d12[0], r0
 ; LE-NEXT:    add lr, sp, #88
-; LE-NEXT:    mov r6, r1
-; LE-NEXT:    vmov.32 d11[1], r5
-; LE-NEXT:    vmov.32 d10[1], r11
-; LE-NEXT:    ldr r11, [sp, #68] @ 4-byte Reload
 ; LE-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
-; LE-NEXT:    add lr, sp, #16
-; LE-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
-; LE-NEXT:    add lr, sp, #32
-; LE-NEXT:    vst1.64 {d14, d15}, [r11:128]!
-; LE-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
-; LE-NEXT:    vmov.f32 s0, s23
 ; LE-NEXT:    bl llrintf
-; LE-NEXT:    add lr, sp, #152
-; LE-NEXT:    mov r4, r1
-; LE-NEXT:    vmov.32 d9[0], r0
-; LE-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
-; LE-NEXT:    add lr, sp, #120
-; LE-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
-; LE-NEXT:    @ kill: def $s0 killed $s0 killed $q0
-; LE-NEXT:    vmov.32 d13[1], r10
+; LE-NEXT:    vmov.f32 s0, s20
+; LE-NEXT:    mov r10, r1
+; LE-NEXT:    vmov.32 d13[0], r0
+; LE-NEXT:    vmov.32 d9[1], r6
 ; LE-NEXT:    bl llrintf
-; LE-NEXT:    vmov.f32 s0, s22
+; LE-NEXT:    vmov.f32 s0, s31
 ; LE-NEXT:    vmov.32 d12[0], r0
-; LE-NEXT:    add lr, sp, #152
-; LE-NEXT:    mov r5, r1
-; LE-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
-; LE-NEXT:    add lr, sp, #104
-; LE-NEXT:    vldmia lr, {d14, d15} @ 16-byte Reload
-; LE-NEXT:    vmov.32 d15[1], r8
-; LE-NEXT:    bl llrintf
-; LE-NEXT:    vmov.f32 s0, s21
-; LE-NEXT:    vmov.32 d8[0], r0
-; LE-NEXT:    add lr, sp, #72
+; LE-NEXT:    add lr, sp, #8
 ; LE-NEXT:    mov r8, r1
-; LE-NEXT:    vmov.32 d14[1], r7
+; LE-NEXT:    vmov.32 d8[1], r9
+; LE-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-NEXT:    add lr, sp, #64
 ; LE-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
-; LE-NEXT:    add lr, sp, #104
-; LE-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
 ; LE-NEXT:    bl llrintf
-; LE-NEXT:    vmov.f32 s0, s20
-; LE-NEXT:    add lr, sp, #88
-; LE-NEXT:    mov r7, r1
+; LE-NEXT:    add lr, sp, #128
 ; LE-NEXT:    vmov.32 d9[0], r0
-; LE-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
-; LE-NEXT:    vmov.32 d13[1], r9
-; LE-NEXT:    bl llrintf
-; LE-NEXT:    vmov.32 d8[0], r0
-; LE-NEXT:    add lr, sp, #32
+; LE-NEXT:    ldr r0, [sp, #44] @ 4-byte Reload
 ; LE-NEXT:    mov r9, r1
-; LE-NEXT:    vmov.32 d12[1], r6
-; LE-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
-; LE-NEXT:    add lr, sp, #88
-; LE-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
-; LE-NEXT:    add lr, sp, #120
-; LE-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
-; LE-NEXT:    vmov.f32 s0, s19
+; LE-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; LE-NEXT:    add lr, sp, #48
+; LE-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
+; LE-NEXT:    vmov.f32 s0, s27
+; LE-NEXT:    vmov.32 d11[1], r0
 ; LE-NEXT:    bl llrintf
-; LE-NEXT:    vmov.f32 s0, s18
-; LE-NEXT:    add lr, sp, #72
-; LE-NEXT:    mov r10, r1
+; LE-NEXT:    vmov.f32 s0, s26
 ; LE-NEXT:    vmov.32 d15[0], r0
-; LE-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
-; LE-NEXT:    vmov.32 d13[1], r4
+; LE-NEXT:    ldr r0, [sp, #84] @ 4-byte Reload
+; LE-NEXT:    add lr, sp, #128
+; LE-NEXT:    mov r7, r1
+; LE-NEXT:    vmov.32 d10[1], r0
+; LE-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
 ; LE-NEXT:    bl llrintf
-; LE-NEXT:    add lr, sp, #152
-; LE-NEXT:    mov r4, r1
 ; LE-NEXT:    vmov.32 d14[0], r0
+; LE-NEXT:    add lr, sp, #144
+; LE-NEXT:    ldr r0, [sp, #108] @ 4-byte Reload
+; LE-NEXT:    mov r5, r1
 ; LE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-NEXT:    vmov.32 d16[1], r5
+; LE-NEXT:    vmov.32 d17[1], r0
 ; LE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-NEXT:    add lr, sp, #168
-; LE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-NEXT:    add lr, sp, #48
-; LE-NEXT:    vst1.64 {d16, d17}, [r11:128]!
+; LE-NEXT:    add lr, sp, #112
 ; LE-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
-; LE-NEXT:    vmov.f32 s0, s21
-; LE-NEXT:    bl llrintf
 ; LE-NEXT:    vmov.f32 s0, s20
-; LE-NEXT:    vmov.32 d12[1], r8
-; LE-NEXT:    add lr, sp, #72
-; LE-NEXT:    mov r5, r1
-; LE-NEXT:    vmov.32 d9[0], r0
-; LE-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
 ; LE-NEXT:    bl llrintf
-; LE-NEXT:    vmov.f32 s0, s23
-; LE-NEXT:    add lr, sp, #32
+; LE-NEXT:    add lr, sp, #24
+; LE-NEXT:    vmov.f32 s0, s22
 ; LE-NEXT:    mov r6, r1
-; LE-NEXT:    vmov.32 d8[0], r0
-; LE-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
-; LE-NEXT:    vmov.32 d13[1], r7
-; LE-NEXT:    bl llrintf
-; LE-NEXT:    add lr, sp, #48
-; LE-NEXT:    mov r7, r1
-; LE-NEXT:    vmov.32 d11[0], r0
-; LE-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
-; LE-NEXT:    vmov.f32 s0, s2
-; LE-NEXT:    vmov.32 d12[1], r9
-; LE-NEXT:    bl llrintf
-; LE-NEXT:    add lr, sp, #16
-; LE-NEXT:    vmov.32 d10[0], r0
-; LE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-NEXT:    add lr, sp, #136
-; LE-NEXT:    vmov.32 d11[1], r7
-; LE-NEXT:    vst1.64 {d16, d17}, [r11:128]!
 ; LE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-NEXT:    add lr, sp, #152
-; LE-NEXT:    vmov.32 d15[1], r10
-; LE-NEXT:    vst1.64 {d16, d17}, [r11:128]
-; LE-NEXT:    vmov.32 d10[1], r1
-; LE-NEXT:    ldr r1, [sp, #68] @ 4-byte Reload
+; LE-NEXT:    vmov.32 d16[0], r0
+; LE-NEXT:    vmov.32 d17[1], r11
+; LE-NEXT:    vorr q6, q8, q8
+; LE-NEXT:    bl llrintf
+; LE-NEXT:    add lr, sp, #144
+; LE-NEXT:    vmov.32 d8[0], r0
+; LE-NEXT:    ldr r0, [sp, #40] @ 4-byte Reload
 ; LE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-NEXT:    add r0, r1, #192
-; LE-NEXT:    add lr, sp, #72
-; LE-NEXT:    vmov.32 d14[1], r4
-; LE-NEXT:    vst1.64 {d16, d17}, [r0:128]!
-; LE-NEXT:    vst1.64 {d14, d15}, [r0:128]!
+; LE-NEXT:    add lr, sp, #8
+; LE-NEXT:    vldmia lr, {d18, d19} @ 16-byte Reload
+; LE-NEXT:    add lr, sp, #128
+; LE-NEXT:    vmov.32 d9[1], r9
+; LE-NEXT:    vmov.32 d12[1], r6
+; LE-NEXT:    vmov.32 d19[1], r10
+; LE-NEXT:    vmov.32 d8[1], r1
+; LE-NEXT:    vmov.32 d16[1], r0
+; LE-NEXT:    add r0, r4, #64
+; LE-NEXT:    vmov.32 d18[1], r8
 ; LE-NEXT:    vst1.64 {d12, d13}, [r0:128]!
-; LE-NEXT:    vmov.32 d9[1], r5
-; LE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-NEXT:    add lr, sp, #88
-; LE-NEXT:    vmov.32 d8[1], r6
-; LE-NEXT:    vst1.64 {d16, d17}, [r0:128]
-; LE-NEXT:    add r0, r1, #128
 ; LE-NEXT:    vst1.64 {d8, d9}, [r0:128]!
-; LE-NEXT:    vst1.64 {d10, d11}, [r0:128]!
+; LE-NEXT:    vst1.64 {d18, d19}, [r0:128]!
+; LE-NEXT:    vst1.64 {d16, d17}, [r0:128]
+; LE-NEXT:    vmov.32 d15[1], r7
 ; LE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-NEXT:    add lr, sp, #104
-; LE-NEXT:    vst1.64 {d16, d17}, [r0:128]!
+; LE-NEXT:    add lr, sp, #64
+; LE-NEXT:    vmov.32 d14[1], r5
+; LE-NEXT:    vst1.64 {d16, d17}, [r4:128]!
+; LE-NEXT:    vst1.64 {d14, d15}, [r4:128]!
 ; LE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-NEXT:    vst1.64 {d16, d17}, [r0:128]
-; LE-NEXT:    add sp, sp, #184
+; LE-NEXT:    add lr, sp, #88
+; LE-NEXT:    vst1.64 {d16, d17}, [r4:128]!
+; LE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEXT:    vst1.64 {d16, d17}, [r4:128]
+; LE-NEXT:    add sp, sp, #160
 ; LE-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
 ; LE-NEXT:    add sp, sp, #4
 ; LE-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
 ;
-; LE-NEON-LABEL: llrint_v32i64_v32f32:
-; LE-NEON:       @ %bb.0:
-; LE-NEON-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; LE-NEON-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; LE-NEON-NEXT:    .pad #4
-; LE-NEON-NEXT:    sub sp, sp, #4
-; LE-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-NEON-NEXT:    .pad #184
-; LE-NEON-NEXT:    sub sp, sp, #184
-; LE-NEON-NEXT:    add lr, sp, #152
-; LE-NEON-NEXT:    vorr q7, q3, q3
-; LE-NEON-NEXT:    vorr q4, q2, q2
-; LE-NEON-NEXT:    mov r5, r0
-; LE-NEON-NEXT:    vstmia lr, {d2, d3} @ 16-byte Spill
-; LE-NEON-NEXT:    add lr, sp, #88
-; LE-NEON-NEXT:    vstmia lr, {d0, d1} @ 16-byte Spill
-; LE-NEON-NEXT:    vmov.f32 s0, s3
-; LE-NEON-NEXT:    str r0, [sp, #68] @ 4-byte Spill
-; LE-NEON-NEXT:    bl llrintf
-; LE-NEON-NEXT:    vmov.f32 s0, s18
-; LE-NEON-NEXT:    add lr, sp, #168
-; LE-NEON-NEXT:    vmov.32 d17[0], r0
-; LE-NEON-NEXT:    str r1, [sp, #16] @ 4-byte Spill
-; LE-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-NEON-NEXT:    bl llrintf
-; LE-NEON-NEXT:    vmov.f32 s0, s16
-; LE-NEON-NEXT:    mov r8, r1
-; LE-NEON-NEXT:    vmov.32 d10[0], r0
-; LE-NEON-NEXT:    bl llrintf
-; LE-NEON-NEXT:    vmov.f32 s0, s17
-; LE-NEON-NEXT:    mov r6, r1
-; LE-NEON-NEXT:    vmov.32 d12[0], r0
-; LE-NEON-NEXT:    bl llrintf
-; LE-NEON-NEXT:    vmov.f32 s0, s19
-; LE-NEON-NEXT:    mov r4, r1
-; LE-NEON-NEXT:    vmov.32 d13[0], r0
-; LE-NEON-NEXT:    bl llrintf
-; LE-NEON-NEXT:    vmov.f32 s0, s31
-; LE-NEON-NEXT:    mov r7, r1
-; LE-NEON-NEXT:    vmov.32 d11[0], r0
-; LE-NEON-NEXT:    bl llrintf
-; LE-NEON-NEXT:    vmov.f32 s0, s30
-; LE-NEON-NEXT:    str r1, [sp, #8] @ 4-byte Spill
-; LE-NEON-NEXT:    vmov.32 d9[0], r0
-; LE-NEON-NEXT:    vmov.32 d11[1], r7
-; LE-NEON-NEXT:    bl llrintf
-; LE-NEON-NEXT:    vmov.f32 s0, s29
-; LE-NEON-NEXT:    vmov.32 d8[0], r0
-; LE-NEON-NEXT:    add lr, sp, #104
-; LE-NEON-NEXT:    str r1, [sp, #12] @ 4-byte Spill
-; LE-NEON-NEXT:    vmov.32 d13[1], r4
-; LE-NEON-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
-; LE-NEON-NEXT:    bl llrintf
-; LE-NEON-NEXT:    vmov.32 d9[0], r0
-; LE-NEON-NEXT:    add r0, sp, #320
-; LE-NEON-NEXT:    add lr, sp, #120
-; LE-NEON-NEXT:    mov r11, r1
-; LE-NEON-NEXT:    vld1.64 {d0, d1}, [r0]
-; LE-NEON-NEXT:    add r0, sp, #304
-; LE-NEON-NEXT:    vstmia lr, {d0, d1} @ 16-byte Spill
-; LE-NEON-NEXT:    add lr, sp, #72
-; LE-NEON-NEXT:    vld1.64 {d0, d1}, [r0]
-; LE-NEON-NEXT:    add r0, sp, #336
-; LE-NEON-NEXT:    vstmia lr, {d0, d1} @ 16-byte Spill
-; LE-NEON-NEXT:    add lr, sp, #32
-; LE-NEON-NEXT:    vld1.64 {d0, d1}, [r0]
-; LE-NEON-NEXT:    add r0, sp, #288
-; LE-NEON-NEXT:    vmov.32 d12[1], r6
-; LE-NEON-NEXT:    vstmia lr, {d0, d1} @ 16-byte Spill
-; LE-NEON-NEXT:    add lr, sp, #48
-; LE-NEON-NEXT:    vld1.64 {d0, d1}, [r0]
-; LE-NEON-NEXT:    vmov.32 d10[1], r8
-; LE-NEON-NEXT:    add r8, r5, #64
-; LE-NEON-NEXT:    vstmia lr, {d0, d1} @ 16-byte Spill
-; LE-NEON-NEXT:    add lr, sp, #152
-; LE-NEON-NEXT:    vst1.64 {d12, d13}, [r8:128]!
-; LE-NEON-NEXT:    vst1.64 {d10, d11}, [r8:128]!
-; LE-NEON-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
-; LE-NEON-NEXT:    vmov.f32 s0, s27
-; LE-NEON-NEXT:    bl llrintf
-; LE-NEON-NEXT:    vmov.f32 s0, s28
-; LE-NEON-NEXT:    mov r4, r1
-; LE-NEON-NEXT:    vmov.32 d11[0], r0
-; LE-NEON-NEXT:    bl llrintf
-; LE-NEON-NEXT:    vmov.f32 s0, s26
-; LE-NEON-NEXT:    mov r9, r1
-; LE-NEON-NEXT:    vmov.32 d8[0], r0
-; LE-NEON-NEXT:    vmov.32 d11[1], r4
-; LE-NEON-NEXT:    bl llrintf
-; LE-NEON-NEXT:    vmov.32 d10[0], r0
-; LE-NEON-NEXT:    add lr, sp, #136
-; LE-NEON-NEXT:    ldr r0, [sp, #16] @ 4-byte Reload
-; LE-NEON-NEXT:    mov r10, r1
-; LE-NEON-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
-; LE-NEON-NEXT:    add lr, sp, #168
-; LE-NEON-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
-; LE-NEON-NEXT:    add lr, sp, #88
-; LE-NEON-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
-; LE-NEON-NEXT:    vmov.f32 s0, s26
-; LE-NEON-NEXT:    vmov.32 d11[1], r0
-; LE-NEON-NEXT:    bl llrintf
-; LE-NEON-NEXT:    vmov.f32 s0, s25
-; LE-NEON-NEXT:    vmov.32 d10[0], r0
-; LE-NEON-NEXT:    add lr, sp, #168
-; LE-NEON-NEXT:    ldr r0, [sp, #8] @ 4-byte Reload
-; LE-NEON-NEXT:    mov r7, r1
-; LE-NEON-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
-; LE-NEON-NEXT:    add lr, sp, #104
-; LE-NEON-NEXT:    vorr q5, q6, q6
-; LE-NEON-NEXT:    vldmia lr, {d14, d15} @ 16-byte Reload
-; LE-NEON-NEXT:    vmov.32 d15[1], r0
-; LE-NEON-NEXT:    bl llrintf
-; LE-NEON-NEXT:    vmov.f32 s0, s20
-; LE-NEON-NEXT:    vmov.32 d13[0], r0
-; LE-NEON-NEXT:    ldr r0, [sp, #12] @ 4-byte Reload
-; LE-NEON-NEXT:    add lr, sp, #104
-; LE-NEON-NEXT:    mov r4, r1
-; LE-NEON-NEXT:    vmov.32 d14[1], r0
-; LE-NEON-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
-; LE-NEON-NEXT:    bl llrintf
-; LE-NEON-NEXT:    vmov.32 d12[0], r0
-; LE-NEON-NEXT:    add lr, sp, #152
-; LE-NEON-NEXT:    mov r6, r1
-; LE-NEON-NEXT:    vorr q7, q6, q6
-; LE-NEON-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
-; LE-NEON-NEXT:    vmov.32 d9[1], r11
-; LE-NEON-NEXT:    vmov.f32 s0, s25
-; LE-NEON-NEXT:    bl llrintf
-; LE-NEON-NEXT:    vmov.f32 s0, s24
-; LE-NEON-NEXT:    mov r5, r1
-; LE-NEON-NEXT:    vmov.32 d11[0], r0
-; LE-NEON-NEXT:    vmov.32 d8[1], r9
-; LE-NEON-NEXT:    bl llrintf
-; LE-NEON-NEXT:    add lr, sp, #136
-; LE-NEON-NEXT:    mov r11, r1
-; LE-NEON-NEXT:    vmov.32 d10[0], r0
-; LE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-NEON-NEXT:    vmov.32 d16[1], r10
-; LE-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-NEON-NEXT:    add lr, sp, #120
-; LE-NEON-NEXT:    vst1.64 {d8, d9}, [r8:128]!
-; LE-NEON-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
-; LE-NEON-NEXT:    vmov.f32 s0, s1
-; LE-NEON-NEXT:    bl llrintf
-; LE-NEON-NEXT:    add lr, sp, #152
-; LE-NEON-NEXT:    vmov.32 d17[0], r0
-; LE-NEON-NEXT:    mov r10, r1
-; LE-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-NEON-NEXT:    add lr, sp, #104
-; LE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-NEON-NEXT:    add lr, sp, #72
-; LE-NEON-NEXT:    vst1.64 {d16, d17}, [r8:128]
-; LE-NEON-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
-; LE-NEON-NEXT:    vmov.f32 s0, s19
-; LE-NEON-NEXT:    bl llrintf
-; LE-NEON-NEXT:    add lr, sp, #168
-; LE-NEON-NEXT:    vmov.f32 s0, s18
-; LE-NEON-NEXT:    mov r8, r1
-; LE-NEON-NEXT:    vmov.32 d13[0], r0
-; LE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-NEON-NEXT:    vmov.32 d16[1], r7
-; LE-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-NEON-NEXT:    bl llrintf
-; LE-NEON-NEXT:    vmov.f32 s0, s17
-; LE-NEON-NEXT:    vmov.32 d12[0], r0
-; LE-NEON-NEXT:    add lr, sp, #104
-; LE-NEON-NEXT:    mov r7, r1
-; LE-NEON-NEXT:    vmov.32 d15[1], r4
-; LE-NEON-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
-; LE-NEON-NEXT:    bl llrintf
-; LE-NEON-NEXT:    vmov.f32 s0, s16
-; LE-NEON-NEXT:    mov r9, r1
-; LE-NEON-NEXT:    vmov.32 d13[0], r0
-; LE-NEON-NEXT:    vmov.32 d14[1], r6
-; LE-NEON-NEXT:    bl llrintf
-; LE-NEON-NEXT:    vmov.32 d12[0], r0
-; LE-NEON-NEXT:    add lr, sp, #88
-; LE-NEON-NEXT:    mov r6, r1
-; LE-NEON-NEXT:    vmov.32 d11[1], r5
-; LE-NEON-NEXT:    vmov.32 d10[1], r11
-; LE-NEON-NEXT:    ldr r11, [sp, #68] @ 4-byte Reload
-; LE-NEON-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
-; LE-NEON-NEXT:    add lr, sp, #16
-; LE-NEON-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
-; LE-NEON-NEXT:    add lr, sp, #32
-; LE-NEON-NEXT:    vst1.64 {d14, d15}, [r11:128]!
-; LE-NEON-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
-; LE-NEON-NEXT:    vmov.f32 s0, s23
-; LE-NEON-NEXT:    bl llrintf
-; LE-NEON-NEXT:    add lr, sp, #152
-; LE-NEON-NEXT:    mov r4, r1
-; LE-NEON-NEXT:    vmov.32 d9[0], r0
-; LE-NEON-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
-; LE-NEON-NEXT:    add lr, sp, #120
-; LE-NEON-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
-; LE-NEON-NEXT:    @ kill: def $s0 killed $s0 killed $q0
-; LE-NEON-NEXT:    vmov.32 d13[1], r10
-; LE-NEON-NEXT:    bl llrintf
-; LE-NEON-NEXT:    vmov.f32 s0, s22
-; LE-NEON-NEXT:    vmov.32 d12[0], r0
-; LE-NEON-NEXT:    add lr, sp, #152
-; LE-NEON-NEXT:    mov r5, r1
-; LE-NEON-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
-; LE-NEON-NEXT:    add lr, sp, #104
-; LE-NEON-NEXT:    vldmia lr, {d14, d15} @ 16-byte Reload
-; LE-NEON-NEXT:    vmov.32 d15[1], r8
-; LE-NEON-NEXT:    bl llrintf
-; LE-NEON-NEXT:    vmov.f32 s0, s21
-; LE-NEON-NEXT:    vmov.32 d8[0], r0
-; LE-NEON-NEXT:    add lr, sp, #72
-; LE-NEON-NEXT:    mov r8, r1
-; LE-NEON-NEXT:    vmov.32 d14[1], r7
-; LE-NEON-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
-; LE-NEON-NEXT:    add lr, sp, #104
-; LE-NEON-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
-; LE-NEON-NEXT:    bl llrintf
-; LE-NEON-NEXT:    vmov.f32 s0, s20
-; LE-NEON-NEXT:    add lr, sp, #88
-; LE-NEON-NEXT:    mov r7, r1
-; LE-NEON-NEXT:    vmov.32 d9[0], r0
-; LE-NEON-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
-; LE-NEON-NEXT:    vmov.32 d13[1], r9
-; LE-NEON-NEXT:    bl llrintf
-; LE-NEON-NEXT:    vmov.32 d8[0], r0
-; LE-NEON-NEXT:    add lr, sp, #32
-; LE-NEON-NEXT:    mov r9, r1
-; LE-NEON-NEXT:    vmov.32 d12[1], r6
-; LE-NEON-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
-; LE-NEON-NEXT:    add lr, sp, #88
-; LE-NEON-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
-; LE-NEON-NEXT:    add lr, sp, #120
-; LE-NEON-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
-; LE-NEON-NEXT:    vmov.f32 s0, s19
-; LE-NEON-NEXT:    bl llrintf
-; LE-NEON-NEXT:    vmov.f32 s0, s18
-; LE-NEON-NEXT:    add lr, sp, #72
-; LE-NEON-NEXT:    mov r10, r1
-; LE-NEON-NEXT:    vmov.32 d15[0], r0
-; LE-NEON-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
-; LE-NEON-NEXT:    vmov.32 d13[1], r4
-; LE-NEON-NEXT:    bl llrintf
-; LE-NEON-NEXT:    add lr, sp, #152
-; LE-NEON-NEXT:    mov r4, r1
-; LE-NEON-NEXT:    vmov.32 d14[0], r0
-; LE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-NEON-NEXT:    vmov.32 d16[1], r5
-; LE-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-NEON-NEXT:    add lr, sp, #168
-; LE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-NEON-NEXT:    add lr, sp, #48
-; LE-NEON-NEXT:    vst1.64 {d16, d17}, [r11:128]!
-; LE-NEON-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
-; LE-NEON-NEXT:    vmov.f32 s0, s21
-; LE-NEON-NEXT:    bl llrintf
-; LE-NEON-NEXT:    vmov.f32 s0, s20
-; LE-NEON-NEXT:    vmov.32 d12[1], r8
-; LE-NEON-NEXT:    add lr, sp, #72
-; LE-NEON-NEXT:    mov r5, r1
-; LE-NEON-NEXT:    vmov.32 d9[0], r0
-; LE-NEON-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
-; LE-NEON-NEXT:    bl llrintf
-; LE-NEON-NEXT:    vmov.f32 s0, s23
-; LE-NEON-NEXT:    add lr, sp, #32
-; LE-NEON-NEXT:    mov r6, r1
-; LE-NEON-NEXT:    vmov.32 d8[0], r0
-; LE-NEON-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
-; LE-NEON-NEXT:    vmov.32 d13[1], r7
-; LE-NEON-NEXT:    bl llrintf
-; LE-NEON-NEXT:    add lr, sp, #48
-; LE-NEON-NEXT:    mov r7, r1
-; LE-NEON-NEXT:    vmov.32 d11[0], r0
-; LE-NEON-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
-; LE-NEON-NEXT:    vmov.f32 s0, s2
-; LE-NEON-NEXT:    vmov.32 d12[1], r9
-; LE-NEON-NEXT:    bl llrintf
-; LE-NEON-NEXT:    add lr, sp, #16
-; LE-NEON-NEXT:    vmov.32 d10[0], r0
-; LE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-NEON-NEXT:    add lr, sp, #136
-; LE-NEON-NEXT:    vmov.32 d11[1], r7
-; LE-NEON-NEXT:    vst1.64 {d16, d17}, [r11:128]!
-; LE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-NEON-NEXT:    add lr, sp, #152
-; LE-NEON-NEXT:    vmov.32 d15[1], r10
-; LE-NEON-NEXT:    vst1.64 {d16, d17}, [r11:128]
-; LE-NEON-NEXT:    vmov.32 d10[1], r1
-; LE-NEON-NEXT:    ldr r1, [sp, #68] @ 4-byte Reload
-; LE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-NEON-NEXT:    add r0, r1, #192
-; LE-NEON-NEXT:    add lr, sp, #72
-; LE-NEON-NEXT:    vmov.32 d14[1], r4
-; LE-NEON-NEXT:    vst1.64 {d16, d17}, [r0:128]!
-; LE-NEON-NEXT:    vst1.64 {d14, d15}, [r0:128]!
-; LE-NEON-NEXT:    vst1.64 {d12, d13}, [r0:128]!
-; LE-NEON-NEXT:    vmov.32 d9[1], r5
-; LE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-NEON-NEXT:    add lr, sp, #88
-; LE-NEON-NEXT:    vmov.32 d8[1], r6
-; LE-NEON-NEXT:    vst1.64 {d16, d17}, [r0:128]
-; LE-NEON-NEXT:    add r0, r1, #128
-; LE-NEON-NEXT:    vst1.64 {d8, d9}, [r0:128]!
-; LE-NEON-NEXT:    vst1.64 {d10, d11}, [r0:128]!
-; LE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-NEON-NEXT:    add lr, sp, #104
-; LE-NEON-NEXT:    vst1.64 {d16, d17}, [r0:128]!
-; LE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-NEON-NEXT:    vst1.64 {d16, d17}, [r0:128]
-; LE-NEON-NEXT:    add sp, sp, #184
-; LE-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-NEON-NEXT:    add sp, sp, #4
-; LE-NEON-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
-;
-; BE-LABEL: llrint_v32i64_v32f32:
+; BE-LABEL: llrint_v16i64_v16f32:
 ; BE:       @ %bb.0:
 ; BE-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
 ; BE-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
@@ -5021,655 +1237,171 @@ define <32 x i64> @llrint_v32i64_v32f32(<32 x float> %x) {
 ; BE-NEXT:    sub sp, sp, #4
 ; BE-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
 ; BE-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
-; BE-NEXT:    .pad #256
-; BE-NEXT:    sub sp, sp, #256
-; BE-NEXT:    add lr, sp, #208
-; BE-NEXT:    str r0, [sp, #156] @ 4-byte Spill
-; BE-NEXT:    add r0, sp, #408
-; BE-NEXT:    vstmia lr, {d6, d7} @ 16-byte Spill
-; BE-NEXT:    add lr, sp, #120
-; BE-NEXT:    vld1.64 {d10, d11}, [r0]
-; BE-NEXT:    add r0, sp, #392
+; BE-NEXT:    .pad #144
+; BE-NEXT:    sub sp, sp, #144
+; BE-NEXT:    vorr q6, q3, q3
+; BE-NEXT:    add lr, sp, #112
+; BE-NEXT:    vorr q7, q0, q0
+; BE-NEXT:    mov r4, r0
 ; BE-NEXT:    vstmia lr, {d4, d5} @ 16-byte Spill
-; BE-NEXT:    add lr, sp, #160
+; BE-NEXT:    add lr, sp, #96
+; BE-NEXT:    vrev64.32 d8, d13
 ; BE-NEXT:    vstmia lr, {d2, d3} @ 16-byte Spill
-; BE-NEXT:    add lr, sp, #176
-; BE-NEXT:    vrev64.32 d8, d10
-; BE-NEXT:    vstmia lr, {d0, d1} @ 16-byte Spill
-; BE-NEXT:    add lr, sp, #136
 ; BE-NEXT:    vmov.f32 s0, s17
-; BE-NEXT:    vld1.64 {d12, d13}, [r0]
-; BE-NEXT:    add r0, sp, #360
-; BE-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
-; BE-NEXT:    add lr, sp, #192
-; BE-NEXT:    vld1.64 {d16, d17}, [r0]
-; BE-NEXT:    add r0, sp, #376
-; BE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-NEXT:    add lr, sp, #40
-; BE-NEXT:    vld1.64 {d16, d17}, [r0]
-; BE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
 ; BE-NEXT:    bl llrintf
 ; BE-NEXT:    vmov.f32 s0, s16
 ; BE-NEXT:    str r1, [sp, #88] @ 4-byte Spill
-; BE-NEXT:    vmov.32 d15[0], r0
-; BE-NEXT:    bl llrintf
-; BE-NEXT:    vrev64.32 d9, d11
-; BE-NEXT:    add lr, sp, #240
-; BE-NEXT:    vmov.32 d14[0], r0
-; BE-NEXT:    str r1, [sp, #104] @ 4-byte Spill
-; BE-NEXT:    vmov.f32 s0, s18
-; BE-NEXT:    vrev64.32 d8, d13
-; BE-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
-; BE-NEXT:    bl llrintf
-; BE-NEXT:    vmov.f32 s0, s17
-; BE-NEXT:    mov r5, r1
-; BE-NEXT:    vmov.32 d14[0], r0
-; BE-NEXT:    bl llrintf
-; BE-NEXT:    vmov.f32 s0, s19
-; BE-NEXT:    add lr, sp, #192
-; BE-NEXT:    str r1, [sp, #72] @ 4-byte Spill
-; BE-NEXT:    vmov.32 d13[0], r0
-; BE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-NEXT:    vrev64.32 d10, d16
-; BE-NEXT:    bl llrintf
-; BE-NEXT:    vmov.f32 s0, s20
-; BE-NEXT:    add lr, sp, #224
-; BE-NEXT:    vmov.32 d15[0], r0
-; BE-NEXT:    mov r4, r1
-; BE-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
-; BE-NEXT:    bl llrintf
-; BE-NEXT:    vmov.f32 s0, s21
-; BE-NEXT:    mov r7, r1
-; BE-NEXT:    vmov.32 d14[0], r0
-; BE-NEXT:    bl llrintf
-; BE-NEXT:    vmov.f32 s0, s16
-; BE-NEXT:    mov r6, r1
-; BE-NEXT:    vmov.32 d15[0], r0
-; BE-NEXT:    bl llrintf
-; BE-NEXT:    vmov.32 d12[0], r0
-; BE-NEXT:    add lr, sp, #24
-; BE-NEXT:    mov r9, r1
-; BE-NEXT:    vmov.32 d15[1], r6
-; BE-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
-; BE-NEXT:    add lr, sp, #192
-; BE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-NEXT:    vrev64.32 d8, d17
-; BE-NEXT:    vmov.f32 s0, s17
-; BE-NEXT:    bl llrintf
-; BE-NEXT:    vmov.f32 s0, s16
-; BE-NEXT:    vmov.32 d14[1], r7
-; BE-NEXT:    add lr, sp, #56
-; BE-NEXT:    mov r10, r1
 ; BE-NEXT:    vmov.32 d11[0], r0
-; BE-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
 ; BE-NEXT:    bl llrintf
+; BE-NEXT:    vrev64.32 d8, d14
+; BE-NEXT:    add lr, sp, #128
 ; BE-NEXT:    vmov.32 d10[0], r0
-; BE-NEXT:    add lr, sp, #192
-; BE-NEXT:    mov r11, r1
-; BE-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
-; BE-NEXT:    add lr, sp, #40
-; BE-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
-; BE-NEXT:    add lr, sp, #224
-; BE-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
-; BE-NEXT:    vrev64.32 d8, d12
-; BE-NEXT:    vmov.32 d11[1], r4
-; BE-NEXT:    vmov.f32 s0, s17
-; BE-NEXT:    bl llrintf
+; BE-NEXT:    str r1, [sp, #92] @ 4-byte Spill
 ; BE-NEXT:    vmov.f32 s0, s16
-; BE-NEXT:    vmov.32 d10[1], r5
-; BE-NEXT:    add lr, sp, #224
-; BE-NEXT:    mov r8, r1
-; BE-NEXT:    vmov.32 d15[0], r0
+; BE-NEXT:    vrev64.32 d9, d12
 ; BE-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-NEXT:    vstr d9, [sp, #64] @ 8-byte Spill
 ; BE-NEXT:    bl llrintf
-; BE-NEXT:    vrev64.32 d8, d13
-; BE-NEXT:    add lr, sp, #8
-; BE-NEXT:    vmov.32 d14[0], r0
-; BE-NEXT:    ldr r0, [sp, #88] @ 4-byte Reload
-; BE-NEXT:    vmov.f32 s0, s17
-; BE-NEXT:    mov r5, r1
-; BE-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
-; BE-NEXT:    add lr, sp, #240
-; BE-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
-; BE-NEXT:    vmov.32 d11[1], r0
-; BE-NEXT:    bl llrintf
-; BE-NEXT:    vmov.f32 s0, s16
-; BE-NEXT:    vmov.32 d15[0], r0
-; BE-NEXT:    ldr r0, [sp, #104] @ 4-byte Reload
-; BE-NEXT:    add lr, sp, #240
-; BE-NEXT:    mov r4, r1
-; BE-NEXT:    vmov.32 d10[1], r0
-; BE-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-NEXT:    vmov.f32 s0, s19
+; BE-NEXT:    mov r9, r1
+; BE-NEXT:    vmov.32 d12[0], r0
 ; BE-NEXT:    bl llrintf
-; BE-NEXT:    add lr, sp, #136
-; BE-NEXT:    vmov.32 d14[0], r0
-; BE-NEXT:    ldr r0, [sp, #72] @ 4-byte Reload
-; BE-NEXT:    mov r6, r1
-; BE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-NEXT:    add lr, sp, #24
-; BE-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
-; BE-NEXT:    vrev64.32 d8, d16
-; BE-NEXT:    vmov.32 d13[1], r0
 ; BE-NEXT:    vmov.f32 s0, s17
-; BE-NEXT:    bl llrintf
-; BE-NEXT:    vmov.f32 s0, s16
-; BE-NEXT:    mov r7, r1
+; BE-NEXT:    str r1, [sp, #84] @ 4-byte Spill
 ; BE-NEXT:    vmov.32 d11[0], r0
-; BE-NEXT:    vmov.32 d12[1], r9
-; BE-NEXT:    bl llrintf
-; BE-NEXT:    add lr, sp, #192
-; BE-NEXT:    vmov.32 d15[1], r4
-; BE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-NEXT:    add lr, sp, #8
-; BE-NEXT:    vmov.32 d17[1], r10
-; BE-NEXT:    vmov.32 d16[1], r11
-; BE-NEXT:    vorr q9, q8, q8
-; BE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-NEXT:    add lr, sp, #192
-; BE-NEXT:    vmov.32 d17[1], r8
-; BE-NEXT:    vmov.32 d16[1], r5
-; BE-NEXT:    vorr q10, q8, q8
-; BE-NEXT:    vrev64.32 q8, q6
-; BE-NEXT:    vmov.32 d14[1], r6
-; BE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-NEXT:    add lr, sp, #240
-; BE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-NEXT:    vmov.32 d10[0], r0
-; BE-NEXT:    vrev64.32 q8, q8
-; BE-NEXT:    vmov.32 d11[1], r7
-; BE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-NEXT:    add lr, sp, #224
-; BE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-NEXT:    vmov.32 d10[1], r1
-; BE-NEXT:    vrev64.32 q8, q8
-; BE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-NEXT:    add lr, sp, #56
-; BE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-NEXT:    add lr, sp, #136
-; BE-NEXT:    vrev64.32 q8, q8
-; BE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-NEXT:    add lr, sp, #104
-; BE-NEXT:    vrev64.32 q8, q9
-; BE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-NEXT:    add lr, sp, #88
-; BE-NEXT:    vrev64.32 q8, q10
-; BE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-NEXT:    add lr, sp, #72
-; BE-NEXT:    vrev64.32 q8, q7
-; BE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-NEXT:    add lr, sp, #208
-; BE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-NEXT:    add lr, sp, #56
-; BE-NEXT:    vrev64.32 d8, d17
-; BE-NEXT:    vrev64.32 q8, q5
-; BE-NEXT:    vmov.f32 s0, s17
-; BE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-NEXT:    vrev64.32 d9, d15
 ; BE-NEXT:    bl llrintf
-; BE-NEXT:    vmov.f32 s0, s16
-; BE-NEXT:    mov r4, r1
+; BE-NEXT:    vmov.f32 s0, s18
+; BE-NEXT:    mov r6, r1
 ; BE-NEXT:    vmov.32 d13[0], r0
 ; BE-NEXT:    bl llrintf
-; BE-NEXT:    add lr, sp, #120
-; BE-NEXT:    vmov.32 d12[0], r0
-; BE-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
-; BE-NEXT:    vmov.32 d13[1], r4
-; BE-NEXT:    vrev64.32 d8, d10
-; BE-NEXT:    vmov.32 d12[1], r1
-; BE-NEXT:    vmov.f32 s0, s17
-; BE-NEXT:    vrev64.32 q6, q6
-; BE-NEXT:    bl llrintf
-; BE-NEXT:    vmov.f32 s0, s16
-; BE-NEXT:    vmov.32 d15[0], r0
-; BE-NEXT:    vmov.32 d15[1], r1
-; BE-NEXT:    bl llrintf
-; BE-NEXT:    vmov.32 d14[0], r0
-; BE-NEXT:    ldr r6, [sp, #156] @ 4-byte Reload
-; BE-NEXT:    vrev64.32 d8, d11
-; BE-NEXT:    add r5, r6, #64
-; BE-NEXT:    vmov.32 d14[1], r1
-; BE-NEXT:    vmov.f32 s0, s17
-; BE-NEXT:    vrev64.32 q8, q7
-; BE-NEXT:    vst1.64 {d16, d17}, [r5:128]!
-; BE-NEXT:    bl llrintf
-; BE-NEXT:    vmov.f32 s0, s16
-; BE-NEXT:    vmov.32 d15[0], r0
-; BE-NEXT:    vmov.32 d15[1], r1
-; BE-NEXT:    bl llrintf
-; BE-NEXT:    add lr, sp, #208
-; BE-NEXT:    vmov.32 d14[0], r0
-; BE-NEXT:    vldmia lr, {d18, d19} @ 16-byte Reload
-; BE-NEXT:    vmov.32 d14[1], r1
-; BE-NEXT:    vrev64.32 d8, d18
-; BE-NEXT:    vrev64.32 q8, q7
-; BE-NEXT:    vmov.f32 s0, s17
-; BE-NEXT:    vst1.64 {d16, d17}, [r5:128]!
+; BE-NEXT:    vmov.f32 s0, s19
+; BE-NEXT:    mov r5, r1
+; BE-NEXT:    vmov.32 d14[0], r0
 ; BE-NEXT:    bl llrintf
-; BE-NEXT:    vmov.f32 s0, s16
-; BE-NEXT:    mov r4, r1
+; BE-NEXT:    vldr d0, [sp, #64] @ 8-byte Reload
+; BE-NEXT:    mov r7, r1
+; BE-NEXT:    @ kill: def $s0 killed $s0 killed $d0
 ; BE-NEXT:    vmov.32 d15[0], r0
 ; BE-NEXT:    bl llrintf
-; BE-NEXT:    vmov.32 d14[0], r0
-; BE-NEXT:    add lr, sp, #160
-; BE-NEXT:    vmov.32 d15[1], r4
-; BE-NEXT:    vmov.32 d14[1], r1
-; BE-NEXT:    vrev64.32 q8, q7
-; BE-NEXT:    vst1.64 {d16, d17}, [r5:128]!
-; BE-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
-; BE-NEXT:    vrev64.32 d8, d11
-; BE-NEXT:    vst1.64 {d12, d13}, [r5:128]
+; BE-NEXT:    vmov.32 d10[0], r0
+; BE-NEXT:    add lr, sp, #40
+; BE-NEXT:    str r1, [sp, #60] @ 4-byte Spill
+; BE-NEXT:    vmov.32 d15[1], r7
+; BE-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-NEXT:    add lr, sp, #96
+; BE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEXT:    vrev64.32 d8, d16
 ; BE-NEXT:    vmov.f32 s0, s17
 ; BE-NEXT:    bl llrintf
 ; BE-NEXT:    vmov.f32 s0, s16
-; BE-NEXT:    mov r4, r1
-; BE-NEXT:    vmov.32 d13[0], r0
+; BE-NEXT:    vmov.32 d14[1], r5
+; BE-NEXT:    add lr, sp, #64
+; BE-NEXT:    mov r10, r1
+; BE-NEXT:    vmov.32 d11[0], r0
+; BE-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
 ; BE-NEXT:    bl llrintf
-; BE-NEXT:    vmov.32 d12[0], r0
-; BE-NEXT:    add lr, sp, #208
-; BE-NEXT:    vmov.32 d13[1], r4
-; BE-NEXT:    vmov.32 d12[1], r1
-; BE-NEXT:    vrev64.32 q8, q6
-; BE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-NEXT:    add lr, sp, #176
-; BE-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
-; BE-NEXT:    vrev64.32 d8, d12
+; BE-NEXT:    vmov.32 d10[0], r0
+; BE-NEXT:    add lr, sp, #24
+; BE-NEXT:    mov r11, r1
+; BE-NEXT:    vmov.32 d13[1], r6
+; BE-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-NEXT:    add lr, sp, #96
+; BE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEXT:    vrev64.32 d8, d17
 ; BE-NEXT:    vmov.f32 s0, s17
 ; BE-NEXT:    bl llrintf
 ; BE-NEXT:    vmov.f32 s0, s16
-; BE-NEXT:    vmov.32 d15[0], r0
-; BE-NEXT:    vmov.32 d15[1], r1
+; BE-NEXT:    vmov.32 d12[1], r9
+; BE-NEXT:    add lr, sp, #96
+; BE-NEXT:    mov r8, r1
+; BE-NEXT:    vmov.32 d11[0], r0
+; BE-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
 ; BE-NEXT:    bl llrintf
-; BE-NEXT:    vmov.32 d14[0], r0
-; BE-NEXT:    mov r5, r6
-; BE-NEXT:    vrev64.32 d8, d13
-; BE-NEXT:    vmov.32 d14[1], r1
+; BE-NEXT:    vmov.32 d10[0], r0
+; BE-NEXT:    add lr, sp, #8
+; BE-NEXT:    ldr r0, [sp, #88] @ 4-byte Reload
+; BE-NEXT:    mov r9, r1
+; BE-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-NEXT:    add lr, sp, #112
+; BE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEXT:    add lr, sp, #128
+; BE-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; BE-NEXT:    vrev64.32 d8, d16
+; BE-NEXT:    vmov.32 d11[1], r0
 ; BE-NEXT:    vmov.f32 s0, s17
-; BE-NEXT:    vrev64.32 q8, q7
-; BE-NEXT:    vst1.64 {d16, d17}, [r5:128]!
 ; BE-NEXT:    bl llrintf
 ; BE-NEXT:    vmov.f32 s0, s16
 ; BE-NEXT:    vmov.32 d15[0], r0
-; BE-NEXT:    vmov.32 d15[1], r1
+; BE-NEXT:    ldr r0, [sp, #92] @ 4-byte Reload
+; BE-NEXT:    add lr, sp, #128
+; BE-NEXT:    mov r7, r1
+; BE-NEXT:    vmov.32 d10[1], r0
+; BE-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
 ; BE-NEXT:    bl llrintf
+; BE-NEXT:    add lr, sp, #112
 ; BE-NEXT:    vmov.32 d14[0], r0
-; BE-NEXT:    vrev64.32 d8, d10
-; BE-NEXT:    vmov.32 d14[1], r1
+; BE-NEXT:    ldr r0, [sp, #84] @ 4-byte Reload
+; BE-NEXT:    mov r5, r1
+; BE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEXT:    add lr, sp, #40
+; BE-NEXT:    vrev64.32 d8, d17
+; BE-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
 ; BE-NEXT:    vmov.f32 s0, s17
-; BE-NEXT:    vrev64.32 q8, q7
-; BE-NEXT:    vst1.64 {d16, d17}, [r5:128]!
+; BE-NEXT:    vmov.32 d13[1], r0
 ; BE-NEXT:    bl llrintf
 ; BE-NEXT:    vmov.f32 s0, s16
-; BE-NEXT:    mov r4, r1
-; BE-NEXT:    vmov.32 d15[0], r0
+; BE-NEXT:    vmov.32 d11[0], r0
+; BE-NEXT:    ldr r0, [sp, #60] @ 4-byte Reload
+; BE-NEXT:    mov r6, r1
+; BE-NEXT:    vmov.32 d12[1], r0
 ; BE-NEXT:    bl llrintf
-; BE-NEXT:    vmov.32 d14[0], r0
-; BE-NEXT:    add lr, sp, #208
-; BE-NEXT:    add r0, r6, #192
-; BE-NEXT:    vmov.32 d15[1], r4
-; BE-NEXT:    vmov.32 d14[1], r1
-; BE-NEXT:    vrev64.32 q8, q7
-; BE-NEXT:    vst1.64 {d16, d17}, [r5:128]!
-; BE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-NEXT:    add lr, sp, #56
-; BE-NEXT:    vst1.64 {d16, d17}, [r5:128]
-; BE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-NEXT:    add lr, sp, #192
-; BE-NEXT:    vst1.64 {d16, d17}, [r0:128]!
-; BE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-NEXT:    add lr, sp, #240
-; BE-NEXT:    vst1.64 {d16, d17}, [r0:128]!
-; BE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-NEXT:    add lr, sp, #224
-; BE-NEXT:    vst1.64 {d16, d17}, [r0:128]!
-; BE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-NEXT:    add lr, sp, #136
-; BE-NEXT:    vst1.64 {d16, d17}, [r0:128]
-; BE-NEXT:    add r0, r6, #128
-; BE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-NEXT:    add lr, sp, #104
-; BE-NEXT:    vst1.64 {d16, d17}, [r0:128]!
+; BE-NEXT:    add lr, sp, #24
+; BE-NEXT:    vmov.32 d10[0], r0
+; BE-NEXT:    add r0, r4, #64
 ; BE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-NEXT:    add lr, sp, #88
-; BE-NEXT:    vst1.64 {d16, d17}, [r0:128]!
+; BE-NEXT:    add lr, sp, #8
+; BE-NEXT:    vmov.32 d17[1], r10
+; BE-NEXT:    vmov.32 d16[1], r11
+; BE-NEXT:    vorr q12, q8, q8
 ; BE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-NEXT:    add lr, sp, #72
+; BE-NEXT:    add lr, sp, #128
+; BE-NEXT:    vmov.32 d15[1], r7
+; BE-NEXT:    vmov.32 d11[1], r6
+; BE-NEXT:    vmov.32 d14[1], r5
+; BE-NEXT:    vldmia lr, {d18, d19} @ 16-byte Reload
+; BE-NEXT:    add lr, sp, #96
+; BE-NEXT:    vmov.32 d10[1], r1
+; BE-NEXT:    vmov.32 d17[1], r8
+; BE-NEXT:    vldmia lr, {d20, d21} @ 16-byte Reload
+; BE-NEXT:    add lr, sp, #64
+; BE-NEXT:    vmov.32 d16[1], r9
+; BE-NEXT:    vrev64.32 q14, q7
+; BE-NEXT:    vorr q13, q8, q8
+; BE-NEXT:    vrev64.32 q15, q5
+; BE-NEXT:    vldmia lr, {d22, d23} @ 16-byte Reload
+; BE-NEXT:    vrev64.32 q8, q6
+; BE-NEXT:    vst1.64 {d28, d29}, [r0:128]!
+; BE-NEXT:    vst1.64 {d30, d31}, [r0:128]!
+; BE-NEXT:    vrev64.32 q9, q9
+; BE-NEXT:    vrev64.32 q10, q10
 ; BE-NEXT:    vst1.64 {d16, d17}, [r0:128]!
-; BE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-NEXT:    vst1.64 {d16, d17}, [r0:128]
-; BE-NEXT:    add sp, sp, #256
+; BE-NEXT:    vrev64.32 q11, q11
+; BE-NEXT:    vrev64.32 q12, q12
+; BE-NEXT:    vst1.64 {d18, d19}, [r0:128]
+; BE-NEXT:    vst1.64 {d20, d21}, [r4:128]!
+; BE-NEXT:    vst1.64 {d22, d23}, [r4:128]!
+; BE-NEXT:    vrev64.32 q13, q13
+; BE-NEXT:    vst1.64 {d24, d25}, [r4:128]!
+; BE-NEXT:    vst1.64 {d26, d27}, [r4:128]
+; BE-NEXT:    add sp, sp, #144
 ; BE-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
 ; BE-NEXT:    add sp, sp, #4
 ; BE-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
-;
-; BE-NEON-LABEL: llrint_v32i64_v32f32:
-; BE-NEON:       @ %bb.0:
-; BE-NEON-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; BE-NEON-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; BE-NEON-NEXT:    .pad #4
-; BE-NEON-NEXT:    sub sp, sp, #4
-; BE-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
-; BE-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
-; BE-NEON-NEXT:    .pad #256
-; BE-NEON-NEXT:    sub sp, sp, #256
-; BE-NEON-NEXT:    add lr, sp, #208
-; BE-NEON-NEXT:    str r0, [sp, #156] @ 4-byte Spill
-; BE-NEON-NEXT:    add r0, sp, #408
-; BE-NEON-NEXT:    vstmia lr, {d6, d7} @ 16-byte Spill
-; BE-NEON-NEXT:    add lr, sp, #120
-; BE-NEON-NEXT:    vld1.64 {d10, d11}, [r0]
-; BE-NEON-NEXT:    add r0, sp, #392
-; BE-NEON-NEXT:    vstmia lr, {d4, d5} @ 16-byte Spill
-; BE-NEON-NEXT:    add lr, sp, #160
-; BE-NEON-NEXT:    vstmia lr, {d2, d3} @ 16-byte Spill
-; BE-NEON-NEXT:    add lr, sp, #176
-; BE-NEON-NEXT:    vrev64.32 d8, d10
-; BE-NEON-NEXT:    vstmia lr, {d0, d1} @ 16-byte Spill
-; BE-NEON-NEXT:    add lr, sp, #136
-; BE-NEON-NEXT:    vmov.f32 s0, s17
-; BE-NEON-NEXT:    vld1.64 {d12, d13}, [r0]
-; BE-NEON-NEXT:    add r0, sp, #360
-; BE-NEON-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
-; BE-NEON-NEXT:    add lr, sp, #192
-; BE-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
-; BE-NEON-NEXT:    add r0, sp, #376
-; BE-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-NEON-NEXT:    add lr, sp, #40
-; BE-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
-; BE-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-NEON-NEXT:    bl llrintf
-; BE-NEON-NEXT:    vmov.f32 s0, s16
-; BE-NEON-NEXT:    str r1, [sp, #88] @ 4-byte Spill
-; BE-NEON-NEXT:    vmov.32 d15[0], r0
-; BE-NEON-NEXT:    bl llrintf
-; BE-NEON-NEXT:    vrev64.32 d9, d11
-; BE-NEON-NEXT:    add lr, sp, #240
-; BE-NEON-NEXT:    vmov.32 d14[0], r0
-; BE-NEON-NEXT:    str r1, [sp, #104] @ 4-byte Spill
-; BE-NEON-NEXT:    vmov.f32 s0, s18
-; BE-NEON-NEXT:    vrev64.32 d8, d13
-; BE-NEON-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
-; BE-NEON-NEXT:    bl llrintf
-; BE-NEON-NEXT:    vmov.f32 s0, s17
-; BE-NEON-NEXT:    mov r5, r1
-; BE-NEON-NEXT:    vmov.32 d14[0], r0
-; BE-NEON-NEXT:    bl llrintf
-; BE-NEON-NEXT:    vmov.f32 s0, s19
-; BE-NEON-NEXT:    add lr, sp, #192
-; BE-NEON-NEXT:    str r1, [sp, #72] @ 4-byte Spill
-; BE-NEON-NEXT:    vmov.32 d13[0], r0
-; BE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-NEON-NEXT:    vrev64.32 d10, d16
-; BE-NEON-NEXT:    bl llrintf
-; BE-NEON-NEXT:    vmov.f32 s0, s20
-; BE-NEON-NEXT:    add lr, sp, #224
-; BE-NEON-NEXT:    vmov.32 d15[0], r0
-; BE-NEON-NEXT:    mov r4, r1
-; BE-NEON-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
-; BE-NEON-NEXT:    bl llrintf
-; BE-NEON-NEXT:    vmov.f32 s0, s21
-; BE-NEON-NEXT:    mov r7, r1
-; BE-NEON-NEXT:    vmov.32 d14[0], r0
-; BE-NEON-NEXT:    bl llrintf
-; BE-NEON-NEXT:    vmov.f32 s0, s16
-; BE-NEON-NEXT:    mov r6, r1
-; BE-NEON-NEXT:    vmov.32 d15[0], r0
-; BE-NEON-NEXT:    bl llrintf
-; BE-NEON-NEXT:    vmov.32 d12[0], r0
-; BE-NEON-NEXT:    add lr, sp, #24
-; BE-NEON-NEXT:    mov r9, r1
-; BE-NEON-NEXT:    vmov.32 d15[1], r6
-; BE-NEON-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
-; BE-NEON-NEXT:    add lr, sp, #192
-; BE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-NEON-NEXT:    vrev64.32 d8, d17
-; BE-NEON-NEXT:    vmov.f32 s0, s17
-; BE-NEON-NEXT:    bl llrintf
-; BE-NEON-NEXT:    vmov.f32 s0, s16
-; BE-NEON-NEXT:    vmov.32 d14[1], r7
-; BE-NEON-NEXT:    add lr, sp, #56
-; BE-NEON-NEXT:    mov r10, r1
-; BE-NEON-NEXT:    vmov.32 d11[0], r0
-; BE-NEON-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
-; BE-NEON-NEXT:    bl llrintf
-; BE-NEON-NEXT:    vmov.32 d10[0], r0
-; BE-NEON-NEXT:    add lr, sp, #192
-; BE-NEON-NEXT:    mov r11, r1
-; BE-NEON-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
-; BE-NEON-NEXT:    add lr, sp, #40
-; BE-NEON-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
-; BE-NEON-NEXT:    add lr, sp, #224
-; BE-NEON-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
-; BE-NEON-NEXT:    vrev64.32 d8, d12
-; BE-NEON-NEXT:    vmov.32 d11[1], r4
-; BE-NEON-NEXT:    vmov.f32 s0, s17
-; BE-NEON-NEXT:    bl llrintf
-; BE-NEON-NEXT:    vmov.f32 s0, s16
-; BE-NEON-NEXT:    vmov.32 d10[1], r5
-; BE-NEON-NEXT:    add lr, sp, #224
-; BE-NEON-NEXT:    mov r8, r1
-; BE-NEON-NEXT:    vmov.32 d15[0], r0
-; BE-NEON-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
-; BE-NEON-NEXT:    bl llrintf
-; BE-NEON-NEXT:    vrev64.32 d8, d13
-; BE-NEON-NEXT:    add lr, sp, #8
-; BE-NEON-NEXT:    vmov.32 d14[0], r0
-; BE-NEON-NEXT:    ldr r0, [sp, #88] @ 4-byte Reload
-; BE-NEON-NEXT:    vmov.f32 s0, s17
-; BE-NEON-NEXT:    mov r5, r1
-; BE-NEON-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
-; BE-NEON-NEXT:    add lr, sp, #240
-; BE-NEON-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
-; BE-NEON-NEXT:    vmov.32 d11[1], r0
-; BE-NEON-NEXT:    bl llrintf
-; BE-NEON-NEXT:    vmov.f32 s0, s16
-; BE-NEON-NEXT:    vmov.32 d15[0], r0
-; BE-NEON-NEXT:    ldr r0, [sp, #104] @ 4-byte Reload
-; BE-NEON-NEXT:    add lr, sp, #240
-; BE-NEON-NEXT:    mov r4, r1
-; BE-NEON-NEXT:    vmov.32 d10[1], r0
-; BE-NEON-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
-; BE-NEON-NEXT:    bl llrintf
-; BE-NEON-NEXT:    add lr, sp, #136
-; BE-NEON-NEXT:    vmov.32 d14[0], r0
-; BE-NEON-NEXT:    ldr r0, [sp, #72] @ 4-byte Reload
-; BE-NEON-NEXT:    mov r6, r1
-; BE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-NEON-NEXT:    add lr, sp, #24
-; BE-NEON-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
-; BE-NEON-NEXT:    vrev64.32 d8, d16
-; BE-NEON-NEXT:    vmov.32 d13[1], r0
-; BE-NEON-NEXT:    vmov.f32 s0, s17
-; BE-NEON-NEXT:    bl llrintf
-; BE-NEON-NEXT:    vmov.f32 s0, s16
-; BE-NEON-NEXT:    mov r7, r1
-; BE-NEON-NEXT:    vmov.32 d11[0], r0
-; BE-NEON-NEXT:    vmov.32 d12[1], r9
-; BE-NEON-NEXT:    bl llrintf
-; BE-NEON-NEXT:    add lr, sp, #192
-; BE-NEON-NEXT:    vmov.32 d15[1], r4
-; BE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-NEON-NEXT:    add lr, sp, #8
-; BE-NEON-NEXT:    vmov.32 d17[1], r10
-; BE-NEON-NEXT:    vmov.32 d16[1], r11
-; BE-NEON-NEXT:    vorr q9, q8, q8
-; BE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-NEON-NEXT:    add lr, sp, #192
-; BE-NEON-NEXT:    vmov.32 d17[1], r8
-; BE-NEON-NEXT:    vmov.32 d16[1], r5
-; BE-NEON-NEXT:    vorr q10, q8, q8
-; BE-NEON-NEXT:    vrev64.32 q8, q6
-; BE-NEON-NEXT:    vmov.32 d14[1], r6
-; BE-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-NEON-NEXT:    add lr, sp, #240
-; BE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-NEON-NEXT:    vmov.32 d10[0], r0
-; BE-NEON-NEXT:    vrev64.32 q8, q8
-; BE-NEON-NEXT:    vmov.32 d11[1], r7
-; BE-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-NEON-NEXT:    add lr, sp, #224
-; BE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-NEON-NEXT:    vmov.32 d10[1], r1
-; BE-NEON-NEXT:    vrev64.32 q8, q8
-; BE-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-NEON-NEXT:    add lr, sp, #56
-; BE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-NEON-NEXT:    add lr, sp, #136
-; BE-NEON-NEXT:    vrev64.32 q8, q8
-; BE-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-NEON-NEXT:    add lr, sp, #104
-; BE-NEON-NEXT:    vrev64.32 q8, q9
-; BE-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-NEON-NEXT:    add lr, sp, #88
-; BE-NEON-NEXT:    vrev64.32 q8, q10
-; BE-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-NEON-NEXT:    add lr, sp, #72
-; BE-NEON-NEXT:    vrev64.32 q8, q7
-; BE-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-NEON-NEXT:    add lr, sp, #208
-; BE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-NEON-NEXT:    add lr, sp, #56
-; BE-NEON-NEXT:    vrev64.32 d8, d17
-; BE-NEON-NEXT:    vrev64.32 q8, q5
-; BE-NEON-NEXT:    vmov.f32 s0, s17
-; BE-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-NEON-NEXT:    bl llrintf
-; BE-NEON-NEXT:    vmov.f32 s0, s16
-; BE-NEON-NEXT:    mov r4, r1
-; BE-NEON-NEXT:    vmov.32 d13[0], r0
-; BE-NEON-NEXT:    bl llrintf
-; BE-NEON-NEXT:    add lr, sp, #120
-; BE-NEON-NEXT:    vmov.32 d12[0], r0
-; BE-NEON-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
-; BE-NEON-NEXT:    vmov.32 d13[1], r4
-; BE-NEON-NEXT:    vrev64.32 d8, d10
-; BE-NEON-NEXT:    vmov.32 d12[1], r1
-; BE-NEON-NEXT:    vmov.f32 s0, s17
-; BE-NEON-NEXT:    vrev64.32 q6, q6
-; BE-NEON-NEXT:    bl llrintf
-; BE-NEON-NEXT:    vmov.f32 s0, s16
-; BE-NEON-NEXT:    vmov.32 d15[0], r0
-; BE-NEON-NEXT:    vmov.32 d15[1], r1
-; BE-NEON-NEXT:    bl llrintf
-; BE-NEON-NEXT:    vmov.32 d14[0], r0
-; BE-NEON-NEXT:    ldr r6, [sp, #156] @ 4-byte Reload
-; BE-NEON-NEXT:    vrev64.32 d8, d11
-; BE-NEON-NEXT:    add r5, r6, #64
-; BE-NEON-NEXT:    vmov.32 d14[1], r1
-; BE-NEON-NEXT:    vmov.f32 s0, s17
-; BE-NEON-NEXT:    vrev64.32 q8, q7
-; BE-NEON-NEXT:    vst1.64 {d16, d17}, [r5:128]!
-; BE-NEON-NEXT:    bl llrintf
-; BE-NEON-NEXT:    vmov.f32 s0, s16
-; BE-NEON-NEXT:    vmov.32 d15[0], r0
-; BE-NEON-NEXT:    vmov.32 d15[1], r1
-; BE-NEON-NEXT:    bl llrintf
-; BE-NEON-NEXT:    add lr, sp, #208
-; BE-NEON-NEXT:    vmov.32 d14[0], r0
-; BE-NEON-NEXT:    vldmia lr, {d18, d19} @ 16-byte Reload
-; BE-NEON-NEXT:    vmov.32 d14[1], r1
-; BE-NEON-NEXT:    vrev64.32 d8, d18
-; BE-NEON-NEXT:    vrev64.32 q8, q7
-; BE-NEON-NEXT:    vmov.f32 s0, s17
-; BE-NEON-NEXT:    vst1.64 {d16, d17}, [r5:128]!
-; BE-NEON-NEXT:    bl llrintf
-; BE-NEON-NEXT:    vmov.f32 s0, s16
-; BE-NEON-NEXT:    mov r4, r1
-; BE-NEON-NEXT:    vmov.32 d15[0], r0
-; BE-NEON-NEXT:    bl llrintf
-; BE-NEON-NEXT:    vmov.32 d14[0], r0
-; BE-NEON-NEXT:    add lr, sp, #160
-; BE-NEON-NEXT:    vmov.32 d15[1], r4
-; BE-NEON-NEXT:    vmov.32 d14[1], r1
-; BE-NEON-NEXT:    vrev64.32 q8, q7
-; BE-NEON-NEXT:    vst1.64 {d16, d17}, [r5:128]!
-; BE-NEON-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
-; BE-NEON-NEXT:    vrev64.32 d8, d11
-; BE-NEON-NEXT:    vst1.64 {d12, d13}, [r5:128]
-; BE-NEON-NEXT:    vmov.f32 s0, s17
-; BE-NEON-NEXT:    bl llrintf
-; BE-NEON-NEXT:    vmov.f32 s0, s16
-; BE-NEON-NEXT:    mov r4, r1
-; BE-NEON-NEXT:    vmov.32 d13[0], r0
-; BE-NEON-NEXT:    bl llrintf
-; BE-NEON-NEXT:    vmov.32 d12[0], r0
-; BE-NEON-NEXT:    add lr, sp, #208
-; BE-NEON-NEXT:    vmov.32 d13[1], r4
-; BE-NEON-NEXT:    vmov.32 d12[1], r1
-; BE-NEON-NEXT:    vrev64.32 q8, q6
-; BE-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-NEON-NEXT:    add lr, sp, #176
-; BE-NEON-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
-; BE-NEON-NEXT:    vrev64.32 d8, d12
-; BE-NEON-NEXT:    vmov.f32 s0, s17
-; BE-NEON-NEXT:    bl llrintf
-; BE-NEON-NEXT:    vmov.f32 s0, s16
-; BE-NEON-NEXT:    vmov.32 d15[0], r0
-; BE-NEON-NEXT:    vmov.32 d15[1], r1
-; BE-NEON-NEXT:    bl llrintf
-; BE-NEON-NEXT:    vmov.32 d14[0], r0
-; BE-NEON-NEXT:    mov r5, r6
-; BE-NEON-NEXT:    vrev64.32 d8, d13
-; BE-NEON-NEXT:    vmov.32 d14[1], r1
-; BE-NEON-NEXT:    vmov.f32 s0, s17
-; BE-NEON-NEXT:    vrev64.32 q8, q7
-; BE-NEON-NEXT:    vst1.64 {d16, d17}, [r5:128]!
-; BE-NEON-NEXT:    bl llrintf
-; BE-NEON-NEXT:    vmov.f32 s0, s16
-; BE-NEON-NEXT:    vmov.32 d15[0], r0
-; BE-NEON-NEXT:    vmov.32 d15[1], r1
-; BE-NEON-NEXT:    bl llrintf
-; BE-NEON-NEXT:    vmov.32 d14[0], r0
-; BE-NEON-NEXT:    vrev64.32 d8, d10
-; BE-NEON-NEXT:    vmov.32 d14[1], r1
-; BE-NEON-NEXT:    vmov.f32 s0, s17
-; BE-NEON-NEXT:    vrev64.32 q8, q7
-; BE-NEON-NEXT:    vst1.64 {d16, d17}, [r5:128]!
-; BE-NEON-NEXT:    bl llrintf
-; BE-NEON-NEXT:    vmov.f32 s0, s16
-; BE-NEON-NEXT:    mov r4, r1
-; BE-NEON-NEXT:    vmov.32 d15[0], r0
-; BE-NEON-NEXT:    bl llrintf
-; BE-NEON-NEXT:    vmov.32 d14[0], r0
-; BE-NEON-NEXT:    add lr, sp, #208
-; BE-NEON-NEXT:    add r0, r6, #192
-; BE-NEON-NEXT:    vmov.32 d15[1], r4
-; BE-NEON-NEXT:    vmov.32 d14[1], r1
-; BE-NEON-NEXT:    vrev64.32 q8, q7
-; BE-NEON-NEXT:    vst1.64 {d16, d17}, [r5:128]!
-; BE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-NEON-NEXT:    add lr, sp, #56
-; BE-NEON-NEXT:    vst1.64 {d16, d17}, [r5:128]
-; BE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-NEON-NEXT:    add lr, sp, #192
-; BE-NEON-NEXT:    vst1.64 {d16, d17}, [r0:128]!
-; BE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-NEON-NEXT:    add lr, sp, #240
-; BE-NEON-NEXT:    vst1.64 {d16, d17}, [r0:128]!
-; BE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-NEON-NEXT:    add lr, sp, #224
-; BE-NEON-NEXT:    vst1.64 {d16, d17}, [r0:128]!
-; BE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-NEON-NEXT:    add lr, sp, #136
-; BE-NEON-NEXT:    vst1.64 {d16, d17}, [r0:128]
-; BE-NEON-NEXT:    add r0, r6, #128
-; BE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-NEON-NEXT:    add lr, sp, #104
-; BE-NEON-NEXT:    vst1.64 {d16, d17}, [r0:128]!
-; BE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-NEON-NEXT:    add lr, sp, #88
-; BE-NEON-NEXT:    vst1.64 {d16, d17}, [r0:128]!
-; BE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-NEON-NEXT:    add lr, sp, #72
-; BE-NEON-NEXT:    vst1.64 {d16, d17}, [r0:128]!
-; BE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-NEON-NEXT:    vst1.64 {d16, d17}, [r0:128]
-; BE-NEON-NEXT:    add sp, sp, #256
-; BE-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
-; BE-NEON-NEXT:    add sp, sp, #4
-; BE-NEON-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
-  %a = call <32 x i64> @llvm.llrint.v32i64.v32f32(<32 x float> %x)
-  ret <32 x i64> %a
+  %a = call <16 x i64> @llvm.llrint.v16i64.v16f32(<16 x float> %x)
+  ret <16 x i64> %a
 }
-declare <32 x i64> @llvm.llrint.v32i64.v32f32(<32 x float>)
+declare <16 x i64> @llvm.llrint.v16i64.v16f32(<16 x float>)
 
 define <1 x i64> @llrint_v1i64_v1f64(<1 x double> %x) {
 ; LE-LABEL: llrint_v1i64_v1f64:
@@ -5681,15 +1413,6 @@ define <1 x i64> @llrint_v1i64_v1f64(<1 x double> %x) {
 ; LE-NEXT:    vmov.32 d0[1], r1
 ; LE-NEXT:    pop {r11, pc}
 ;
-; LE-NEON-LABEL: llrint_v1i64_v1f64:
-; LE-NEON:       @ %bb.0:
-; LE-NEON-NEXT:    .save {r11, lr}
-; LE-NEON-NEXT:    push {r11, lr}
-; LE-NEON-NEXT:    bl llrint
-; LE-NEON-NEXT:    vmov.32 d0[0], r0
-; LE-NEON-NEXT:    vmov.32 d0[1], r1
-; LE-NEON-NEXT:    pop {r11, pc}
-;
 ; BE-LABEL: llrint_v1i64_v1f64:
 ; BE:       @ %bb.0:
 ; BE-NEXT:    .save {r11, lr}
@@ -5699,16 +1422,6 @@ define <1 x i64> @llrint_v1i64_v1f64(<1 x double> %x) {
 ; BE-NEXT:    vmov.32 d16[1], r1
 ; BE-NEXT:    vrev64.32 d0, d16
 ; BE-NEXT:    pop {r11, pc}
-;
-; BE-NEON-LABEL: llrint_v1i64_v1f64:
-; BE-NEON:       @ %bb.0:
-; BE-NEON-NEXT:    .save {r11, lr}
-; BE-NEON-NEXT:    push {r11, lr}
-; BE-NEON-NEXT:    bl llrint
-; BE-NEON-NEXT:    vmov.32 d16[0], r0
-; BE-NEON-NEXT:    vmov.32 d16[1], r1
-; BE-NEON-NEXT:    vrev64.32 d0, d16
-; BE-NEON-NEXT:    pop {r11, pc}
   %a = call <1 x i64> @llvm.llrint.v1i64.v1f64(<1 x double> %x)
   ret <1 x i64> %a
 }
@@ -5735,26 +1448,6 @@ define <2 x i64> @llrint_v2i64_v2f64(<2 x double> %x) {
 ; LE-NEXT:    vpop {d8, d9, d10, d11}
 ; LE-NEXT:    pop {r4, pc}
 ;
-; LE-NEON-LABEL: llrint_v2i64_v2f64:
-; LE-NEON:       @ %bb.0:
-; LE-NEON-NEXT:    .save {r4, lr}
-; LE-NEON-NEXT:    push {r4, lr}
-; LE-NEON-NEXT:    .vsave {d8, d9, d10, d11}
-; LE-NEON-NEXT:    vpush {d8, d9, d10, d11}
-; LE-NEON-NEXT:    vorr q4, q0, q0
-; LE-NEON-NEXT:    vorr d0, d9, d9
-; LE-NEON-NEXT:    bl llrint
-; LE-NEON-NEXT:    vorr d0, d8, d8
-; LE-NEON-NEXT:    mov r4, r1
-; LE-NEON-NEXT:    vmov.32 d11[0], r0
-; LE-NEON-NEXT:    bl llrint
-; LE-NEON-NEXT:    vmov.32 d10[0], r0
-; LE-NEON-NEXT:    vmov.32 d11[1], r4
-; LE-NEON-NEXT:    vmov.32 d10[1], r1
-; LE-NEON-NEXT:    vorr q0, q5, q5
-; LE-NEON-NEXT:    vpop {d8, d9, d10, d11}
-; LE-NEON-NEXT:    pop {r4, pc}
-;
 ; BE-LABEL: llrint_v2i64_v2f64:
 ; BE:       @ %bb.0:
 ; BE-NEXT:    .save {r4, lr}
@@ -5774,26 +1467,6 @@ define <2 x i64> @llrint_v2i64_v2f64(<2 x double> %x) {
 ; BE-NEXT:    vrev64.32 q0, q5
 ; BE-NEXT:    vpop {d8, d9, d10, d11}
 ; BE-NEXT:    pop {r4, pc}
-;
-; BE-NEON-LABEL: llrint_v2i64_v2f64:
-; BE-NEON:       @ %bb.0:
-; BE-NEON-NEXT:    .save {r4, lr}
-; BE-NEON-NEXT:    push {r4, lr}
-; BE-NEON-NEXT:    .vsave {d8, d9, d10, d11}
-; BE-NEON-NEXT:    vpush {d8, d9, d10, d11}
-; BE-NEON-NEXT:    vorr q4, q0, q0
-; BE-NEON-NEXT:    vorr d0, d9, d9
-; BE-NEON-NEXT:    bl llrint
-; BE-NEON-NEXT:    vorr d0, d8, d8
-; BE-NEON-NEXT:    mov r4, r1
-; BE-NEON-NEXT:    vmov.32 d11[0], r0
-; BE-NEON-NEXT:    bl llrint
-; BE-NEON-NEXT:    vmov.32 d10[0], r0
-; BE-NEON-NEXT:    vmov.32 d11[1], r4
-; BE-NEON-NEXT:    vmov.32 d10[1], r1
-; BE-NEON-NEXT:    vrev64.32 q0, q5
-; BE-NEON-NEXT:    vpop {d8, d9, d10, d11}
-; BE-NEON-NEXT:    pop {r4, pc}
   %a = call <2 x i64> @llvm.llrint.v2i64.v2f64(<2 x double> %x)
   ret <2 x i64> %a
 }
@@ -5832,38 +1505,6 @@ define <4 x i64> @llrint_v4i64_v4f64(<4 x double> %x) {
 ; LE-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
 ; LE-NEXT:    pop {r4, r5, r6, pc}
 ;
-; LE-NEON-LABEL: llrint_v4i64_v4f64:
-; LE-NEON:       @ %bb.0:
-; LE-NEON-NEXT:    .save {r4, r5, r6, lr}
-; LE-NEON-NEXT:    push {r4, r5, r6, lr}
-; LE-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-NEON-NEXT:    vorr q5, q1, q1
-; LE-NEON-NEXT:    vorr q6, q0, q0
-; LE-NEON-NEXT:    vorr d0, d11, d11
-; LE-NEON-NEXT:    bl llrint
-; LE-NEON-NEXT:    vorr d0, d12, d12
-; LE-NEON-NEXT:    mov r4, r1
-; LE-NEON-NEXT:    vmov.32 d9[0], r0
-; LE-NEON-NEXT:    bl llrint
-; LE-NEON-NEXT:    vorr d0, d13, d13
-; LE-NEON-NEXT:    mov r5, r1
-; LE-NEON-NEXT:    vmov.32 d14[0], r0
-; LE-NEON-NEXT:    bl llrint
-; LE-NEON-NEXT:    vorr d0, d10, d10
-; LE-NEON-NEXT:    mov r6, r1
-; LE-NEON-NEXT:    vmov.32 d15[0], r0
-; LE-NEON-NEXT:    bl llrint
-; LE-NEON-NEXT:    vmov.32 d8[0], r0
-; LE-NEON-NEXT:    vmov.32 d15[1], r6
-; LE-NEON-NEXT:    vmov.32 d9[1], r4
-; LE-NEON-NEXT:    vmov.32 d14[1], r5
-; LE-NEON-NEXT:    vmov.32 d8[1], r1
-; LE-NEON-NEXT:    vorr q0, q7, q7
-; LE-NEON-NEXT:    vorr q1, q4, q4
-; LE-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-NEON-NEXT:    pop {r4, r5, r6, pc}
-;
 ; BE-LABEL: llrint_v4i64_v4f64:
 ; BE:       @ %bb.0:
 ; BE-NEXT:    .save {r4, r5, r6, lr}
@@ -5883,1039 +1524,161 @@ define <4 x i64> @llrint_v4i64_v4f64(<4 x double> %x) {
 ; BE-NEXT:    vmov.32 d14[0], r0
 ; BE-NEXT:    bl llrint
 ; BE-NEXT:    vorr d0, d8, d8
-; BE-NEXT:    mov r6, r1
-; BE-NEXT:    vmov.32 d15[0], r0
-; BE-NEXT:    bl llrint
-; BE-NEXT:    vmov.32 d12[0], r0
-; BE-NEXT:    vmov.32 d15[1], r6
-; BE-NEXT:    vmov.32 d13[1], r4
-; BE-NEXT:    vmov.32 d14[1], r5
-; BE-NEXT:    vmov.32 d12[1], r1
-; BE-NEXT:    vrev64.32 q0, q7
-; BE-NEXT:    vrev64.32 q1, q6
-; BE-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
-; BE-NEXT:    pop {r4, r5, r6, pc}
-;
-; BE-NEON-LABEL: llrint_v4i64_v4f64:
-; BE-NEON:       @ %bb.0:
-; BE-NEON-NEXT:    .save {r4, r5, r6, lr}
-; BE-NEON-NEXT:    push {r4, r5, r6, lr}
-; BE-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
-; BE-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
-; BE-NEON-NEXT:    vorr q4, q1, q1
-; BE-NEON-NEXT:    vorr q5, q0, q0
-; BE-NEON-NEXT:    vorr d0, d9, d9
-; BE-NEON-NEXT:    bl llrint
-; BE-NEON-NEXT:    vorr d0, d10, d10
-; BE-NEON-NEXT:    mov r4, r1
-; BE-NEON-NEXT:    vmov.32 d13[0], r0
-; BE-NEON-NEXT:    bl llrint
-; BE-NEON-NEXT:    vorr d0, d11, d11
-; BE-NEON-NEXT:    mov r5, r1
-; BE-NEON-NEXT:    vmov.32 d14[0], r0
-; BE-NEON-NEXT:    bl llrint
-; BE-NEON-NEXT:    vorr d0, d8, d8
-; BE-NEON-NEXT:    mov r6, r1
-; BE-NEON-NEXT:    vmov.32 d15[0], r0
-; BE-NEON-NEXT:    bl llrint
-; BE-NEON-NEXT:    vmov.32 d12[0], r0
-; BE-NEON-NEXT:    vmov.32 d15[1], r6
-; BE-NEON-NEXT:    vmov.32 d13[1], r4
-; BE-NEON-NEXT:    vmov.32 d14[1], r5
-; BE-NEON-NEXT:    vmov.32 d12[1], r1
-; BE-NEON-NEXT:    vrev64.32 q0, q7
-; BE-NEON-NEXT:    vrev64.32 q1, q6
-; BE-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
-; BE-NEON-NEXT:    pop {r4, r5, r6, pc}
-  %a = call <4 x i64> @llvm.llrint.v4i64.v4f64(<4 x double> %x)
-  ret <4 x i64> %a
-}
-declare <4 x i64> @llvm.llrint.v4i64.v4f64(<4 x double>)
-
-define <8 x i64> @llrint_v8i64_v8f64(<8 x double> %x) {
-; LE-LABEL: llrint_v8i64_v8f64:
-; LE:       @ %bb.0:
-; LE-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, lr}
-; LE-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, lr}
-; LE-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-NEXT:    .pad #40
-; LE-NEXT:    sub sp, sp, #40
-; LE-NEXT:    vorr q4, q0, q0
-; LE-NEXT:    add lr, sp, #24
-; LE-NEXT:    vorr d0, d7, d7
-; LE-NEXT:    vstmia lr, {d6, d7} @ 16-byte Spill
-; LE-NEXT:    vorr q7, q2, q2
-; LE-NEXT:    vorr q6, q1, q1
-; LE-NEXT:    bl llrint
-; LE-NEXT:    vorr d0, d14, d14
-; LE-NEXT:    add lr, sp, #8
-; LE-NEXT:    vmov.32 d17[0], r0
-; LE-NEXT:    mov r8, r1
-; LE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-NEXT:    bl llrint
-; LE-NEXT:    vorr d0, d15, d15
-; LE-NEXT:    mov r9, r1
-; LE-NEXT:    vmov.32 d10[0], r0
-; LE-NEXT:    bl llrint
-; LE-NEXT:    vorr d0, d12, d12
-; LE-NEXT:    mov r10, r1
-; LE-NEXT:    vmov.32 d11[0], r0
-; LE-NEXT:    bl llrint
-; LE-NEXT:    vorr d0, d13, d13
-; LE-NEXT:    mov r7, r1
-; LE-NEXT:    vmov.32 d14[0], r0
-; LE-NEXT:    bl llrint
-; LE-NEXT:    vorr d0, d8, d8
-; LE-NEXT:    mov r4, r1
-; LE-NEXT:    vmov.32 d15[0], r0
-; LE-NEXT:    bl llrint
-; LE-NEXT:    vorr d0, d9, d9
-; LE-NEXT:    mov r5, r1
-; LE-NEXT:    vmov.32 d12[0], r0
-; LE-NEXT:    bl llrint
-; LE-NEXT:    add lr, sp, #24
-; LE-NEXT:    mov r6, r1
-; LE-NEXT:    vmov.32 d13[0], r0
-; LE-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
-; LE-NEXT:    @ kill: def $d0 killed $d0 killed $q0
-; LE-NEXT:    bl llrint
-; LE-NEXT:    add lr, sp, #8
-; LE-NEXT:    vmov.32 d13[1], r6
-; LE-NEXT:    vldmia lr, {d6, d7} @ 16-byte Reload
-; LE-NEXT:    vmov.32 d15[1], r4
-; LE-NEXT:    vmov.32 d11[1], r10
-; LE-NEXT:    vmov.32 d6[0], r0
-; LE-NEXT:    vmov.32 d12[1], r5
-; LE-NEXT:    vmov.32 d14[1], r7
-; LE-NEXT:    vorr q0, q6, q6
-; LE-NEXT:    vmov.32 d10[1], r9
-; LE-NEXT:    vorr q1, q7, q7
-; LE-NEXT:    vmov.32 d7[1], r8
-; LE-NEXT:    vorr q2, q5, q5
-; LE-NEXT:    vmov.32 d6[1], r1
-; LE-NEXT:    add sp, sp, #40
-; LE-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, pc}
-;
-; LE-NEON-LABEL: llrint_v8i64_v8f64:
-; LE-NEON:       @ %bb.0:
-; LE-NEON-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, lr}
-; LE-NEON-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, lr}
-; LE-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-NEON-NEXT:    .pad #40
-; LE-NEON-NEXT:    sub sp, sp, #40
-; LE-NEON-NEXT:    vorr q4, q0, q0
-; LE-NEON-NEXT:    add lr, sp, #24
-; LE-NEON-NEXT:    vorr d0, d7, d7
-; LE-NEON-NEXT:    vstmia lr, {d6, d7} @ 16-byte Spill
-; LE-NEON-NEXT:    vorr q7, q2, q2
-; LE-NEON-NEXT:    vorr q6, q1, q1
-; LE-NEON-NEXT:    bl llrint
-; LE-NEON-NEXT:    vorr d0, d14, d14
-; LE-NEON-NEXT:    add lr, sp, #8
-; LE-NEON-NEXT:    vmov.32 d17[0], r0
-; LE-NEON-NEXT:    mov r8, r1
-; LE-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-NEON-NEXT:    bl llrint
-; LE-NEON-NEXT:    vorr d0, d15, d15
-; LE-NEON-NEXT:    mov r9, r1
-; LE-NEON-NEXT:    vmov.32 d10[0], r0
-; LE-NEON-NEXT:    bl llrint
-; LE-NEON-NEXT:    vorr d0, d12, d12
-; LE-NEON-NEXT:    mov r10, r1
-; LE-NEON-NEXT:    vmov.32 d11[0], r0
-; LE-NEON-NEXT:    bl llrint
-; LE-NEON-NEXT:    vorr d0, d13, d13
-; LE-NEON-NEXT:    mov r7, r1
-; LE-NEON-NEXT:    vmov.32 d14[0], r0
-; LE-NEON-NEXT:    bl llrint
-; LE-NEON-NEXT:    vorr d0, d8, d8
-; LE-NEON-NEXT:    mov r4, r1
-; LE-NEON-NEXT:    vmov.32 d15[0], r0
-; LE-NEON-NEXT:    bl llrint
-; LE-NEON-NEXT:    vorr d0, d9, d9
-; LE-NEON-NEXT:    mov r5, r1
-; LE-NEON-NEXT:    vmov.32 d12[0], r0
-; LE-NEON-NEXT:    bl llrint
-; LE-NEON-NEXT:    add lr, sp, #24
-; LE-NEON-NEXT:    mov r6, r1
-; LE-NEON-NEXT:    vmov.32 d13[0], r0
-; LE-NEON-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
-; LE-NEON-NEXT:    @ kill: def $d0 killed $d0 killed $q0
-; LE-NEON-NEXT:    bl llrint
-; LE-NEON-NEXT:    add lr, sp, #8
-; LE-NEON-NEXT:    vmov.32 d13[1], r6
-; LE-NEON-NEXT:    vldmia lr, {d6, d7} @ 16-byte Reload
-; LE-NEON-NEXT:    vmov.32 d15[1], r4
-; LE-NEON-NEXT:    vmov.32 d11[1], r10
-; LE-NEON-NEXT:    vmov.32 d6[0], r0
-; LE-NEON-NEXT:    vmov.32 d12[1], r5
-; LE-NEON-NEXT:    vmov.32 d14[1], r7
-; LE-NEON-NEXT:    vorr q0, q6, q6
-; LE-NEON-NEXT:    vmov.32 d10[1], r9
-; LE-NEON-NEXT:    vorr q1, q7, q7
-; LE-NEON-NEXT:    vmov.32 d7[1], r8
-; LE-NEON-NEXT:    vorr q2, q5, q5
-; LE-NEON-NEXT:    vmov.32 d6[1], r1
-; LE-NEON-NEXT:    add sp, sp, #40
-; LE-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-NEON-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, pc}
-;
-; BE-LABEL: llrint_v8i64_v8f64:
-; BE:       @ %bb.0:
-; BE-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, lr}
-; BE-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, lr}
-; BE-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
-; BE-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
-; BE-NEXT:    .pad #40
-; BE-NEXT:    sub sp, sp, #40
-; BE-NEXT:    vorr q4, q0, q0
-; BE-NEXT:    add lr, sp, #24
-; BE-NEXT:    vorr d0, d7, d7
-; BE-NEXT:    vstmia lr, {d6, d7} @ 16-byte Spill
-; BE-NEXT:    vorr q7, q2, q2
-; BE-NEXT:    vorr q6, q1, q1
-; BE-NEXT:    bl llrint
-; BE-NEXT:    vorr d0, d14, d14
-; BE-NEXT:    add lr, sp, #8
-; BE-NEXT:    vmov.32 d17[0], r0
-; BE-NEXT:    mov r8, r1
-; BE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-NEXT:    bl llrint
-; BE-NEXT:    vorr d0, d15, d15
-; BE-NEXT:    mov r9, r1
-; BE-NEXT:    vmov.32 d10[0], r0
-; BE-NEXT:    bl llrint
-; BE-NEXT:    vorr d0, d12, d12
-; BE-NEXT:    mov r10, r1
-; BE-NEXT:    vmov.32 d11[0], r0
-; BE-NEXT:    bl llrint
-; BE-NEXT:    vorr d0, d13, d13
-; BE-NEXT:    mov r7, r1
-; BE-NEXT:    vmov.32 d14[0], r0
-; BE-NEXT:    bl llrint
-; BE-NEXT:    vorr d0, d8, d8
-; BE-NEXT:    mov r4, r1
+; BE-NEXT:    mov r6, r1
 ; BE-NEXT:    vmov.32 d15[0], r0
 ; BE-NEXT:    bl llrint
-; BE-NEXT:    vorr d0, d9, d9
-; BE-NEXT:    mov r5, r1
 ; BE-NEXT:    vmov.32 d12[0], r0
-; BE-NEXT:    bl llrint
-; BE-NEXT:    add lr, sp, #24
-; BE-NEXT:    mov r6, r1
-; BE-NEXT:    vmov.32 d13[0], r0
-; BE-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
-; BE-NEXT:    @ kill: def $d0 killed $d0 killed $q0
-; BE-NEXT:    bl llrint
-; BE-NEXT:    add lr, sp, #8
-; BE-NEXT:    vmov.32 d13[1], r6
-; BE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-NEXT:    vmov.32 d15[1], r4
-; BE-NEXT:    vmov.32 d16[0], r0
-; BE-NEXT:    vmov.32 d11[1], r10
-; BE-NEXT:    vmov.32 d17[1], r8
-; BE-NEXT:    vmov.32 d12[1], r5
-; BE-NEXT:    vmov.32 d14[1], r7
-; BE-NEXT:    vmov.32 d10[1], r9
-; BE-NEXT:    vmov.32 d16[1], r1
-; BE-NEXT:    vrev64.32 q0, q6
-; BE-NEXT:    vrev64.32 q1, q7
-; BE-NEXT:    vrev64.32 q2, q5
-; BE-NEXT:    vrev64.32 q3, q8
-; BE-NEXT:    add sp, sp, #40
+; BE-NEXT:    vmov.32 d15[1], r6
+; BE-NEXT:    vmov.32 d13[1], r4
+; BE-NEXT:    vmov.32 d14[1], r5
+; BE-NEXT:    vmov.32 d12[1], r1
+; BE-NEXT:    vrev64.32 q0, q7
+; BE-NEXT:    vrev64.32 q1, q6
 ; BE-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
-; BE-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, pc}
-;
-; BE-NEON-LABEL: llrint_v8i64_v8f64:
-; BE-NEON:       @ %bb.0:
-; BE-NEON-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, lr}
-; BE-NEON-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, lr}
-; BE-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
-; BE-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
-; BE-NEON-NEXT:    .pad #40
-; BE-NEON-NEXT:    sub sp, sp, #40
-; BE-NEON-NEXT:    vorr q4, q0, q0
-; BE-NEON-NEXT:    add lr, sp, #24
-; BE-NEON-NEXT:    vorr d0, d7, d7
-; BE-NEON-NEXT:    vstmia lr, {d6, d7} @ 16-byte Spill
-; BE-NEON-NEXT:    vorr q7, q2, q2
-; BE-NEON-NEXT:    vorr q6, q1, q1
-; BE-NEON-NEXT:    bl llrint
-; BE-NEON-NEXT:    vorr d0, d14, d14
-; BE-NEON-NEXT:    add lr, sp, #8
-; BE-NEON-NEXT:    vmov.32 d17[0], r0
-; BE-NEON-NEXT:    mov r8, r1
-; BE-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-NEON-NEXT:    bl llrint
-; BE-NEON-NEXT:    vorr d0, d15, d15
-; BE-NEON-NEXT:    mov r9, r1
-; BE-NEON-NEXT:    vmov.32 d10[0], r0
-; BE-NEON-NEXT:    bl llrint
-; BE-NEON-NEXT:    vorr d0, d12, d12
-; BE-NEON-NEXT:    mov r10, r1
-; BE-NEON-NEXT:    vmov.32 d11[0], r0
-; BE-NEON-NEXT:    bl llrint
-; BE-NEON-NEXT:    vorr d0, d13, d13
-; BE-NEON-NEXT:    mov r7, r1
-; BE-NEON-NEXT:    vmov.32 d14[0], r0
-; BE-NEON-NEXT:    bl llrint
-; BE-NEON-NEXT:    vorr d0, d8, d8
-; BE-NEON-NEXT:    mov r4, r1
-; BE-NEON-NEXT:    vmov.32 d15[0], r0
-; BE-NEON-NEXT:    bl llrint
-; BE-NEON-NEXT:    vorr d0, d9, d9
-; BE-NEON-NEXT:    mov r5, r1
-; BE-NEON-NEXT:    vmov.32 d12[0], r0
-; BE-NEON-NEXT:    bl llrint
-; BE-NEON-NEXT:    add lr, sp, #24
-; BE-NEON-NEXT:    mov r6, r1
-; BE-NEON-NEXT:    vmov.32 d13[0], r0
-; BE-NEON-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
-; BE-NEON-NEXT:    @ kill: def $d0 killed $d0 killed $q0
-; BE-NEON-NEXT:    bl llrint
-; BE-NEON-NEXT:    add lr, sp, #8
-; BE-NEON-NEXT:    vmov.32 d13[1], r6
-; BE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-NEON-NEXT:    vmov.32 d15[1], r4
-; BE-NEON-NEXT:    vmov.32 d16[0], r0
-; BE-NEON-NEXT:    vmov.32 d11[1], r10
-; BE-NEON-NEXT:    vmov.32 d17[1], r8
-; BE-NEON-NEXT:    vmov.32 d12[1], r5
-; BE-NEON-NEXT:    vmov.32 d14[1], r7
-; BE-NEON-NEXT:    vmov.32 d10[1], r9
-; BE-NEON-NEXT:    vmov.32 d16[1], r1
-; BE-NEON-NEXT:    vrev64.32 q0, q6
-; BE-NEON-NEXT:    vrev64.32 q1, q7
-; BE-NEON-NEXT:    vrev64.32 q2, q5
-; BE-NEON-NEXT:    vrev64.32 q3, q8
-; BE-NEON-NEXT:    add sp, sp, #40
-; BE-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
-; BE-NEON-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, pc}
-  %a = call <8 x i64> @llvm.llrint.v8i64.v8f64(<8 x double> %x)
-  ret <8 x i64> %a
+; BE-NEXT:    pop {r4, r5, r6, pc}
+  %a = call <4 x i64> @llvm.llrint.v4i64.v4f64(<4 x double> %x)
+  ret <4 x i64> %a
 }
-declare <8 x i64> @llvm.llrint.v8i64.v8f64(<8 x double>)
+declare <4 x i64> @llvm.llrint.v4i64.v4f64(<4 x double>)
 
-define <16 x i64> @llrint_v16f64(<16 x double> %x) {
-; LE-LABEL: llrint_v16f64:
+define <8 x i64> @llrint_v8i64_v8f64(<8 x double> %x) {
+; LE-LABEL: llrint_v8i64_v8f64:
 ; LE:       @ %bb.0:
-; LE-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; LE-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; LE-NEXT:    .pad #4
-; LE-NEXT:    sub sp, sp, #4
+; LE-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, lr}
+; LE-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, lr}
 ; LE-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
 ; LE-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-NEXT:    .pad #176
-; LE-NEXT:    sub sp, sp, #176
-; LE-NEXT:    add lr, sp, #40
-; LE-NEXT:    str r0, [sp, #140] @ 4-byte Spill
-; LE-NEXT:    add r0, sp, #312
-; LE-NEXT:    vorr q6, q2, q2
+; LE-NEXT:    .pad #40
+; LE-NEXT:    sub sp, sp, #40
+; LE-NEXT:    vorr q4, q0, q0
+; LE-NEXT:    add lr, sp, #24
+; LE-NEXT:    vorr d0, d7, d7
 ; LE-NEXT:    vstmia lr, {d6, d7} @ 16-byte Spill
-; LE-NEXT:    add lr, sp, #96
-; LE-NEXT:    vorr q7, q1, q1
-; LE-NEXT:    vstmia lr, {d0, d1} @ 16-byte Spill
-; LE-NEXT:    add lr, sp, #144
-; LE-NEXT:    vorr d0, d1, d1
-; LE-NEXT:    vld1.64 {d16, d17}, [r0]
-; LE-NEXT:    add r0, sp, #280
-; LE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-NEXT:    add lr, sp, #80
-; LE-NEXT:    vld1.64 {d16, d17}, [r0]
-; LE-NEXT:    add r0, sp, #296
-; LE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-NEXT:    add lr, sp, #120
-; LE-NEXT:    vld1.64 {d16, d17}, [r0]
-; LE-NEXT:    add r0, sp, #328
-; LE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-NEXT:    add lr, sp, #56
-; LE-NEXT:    vld1.64 {d16, d17}, [r0]
-; LE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-NEXT:    vorr q7, q2, q2
+; LE-NEXT:    vorr q6, q1, q1
 ; LE-NEXT:    bl llrint
 ; LE-NEXT:    vorr d0, d14, d14
-; LE-NEXT:    str r1, [sp, #116] @ 4-byte Spill
-; LE-NEXT:    vmov.32 d11[0], r0
+; LE-NEXT:    add lr, sp, #8
+; LE-NEXT:    vmov.32 d17[0], r0
+; LE-NEXT:    mov r8, r1
+; LE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
 ; LE-NEXT:    bl llrint
 ; LE-NEXT:    vorr d0, d15, d15
-; LE-NEXT:    str r1, [sp, #76] @ 4-byte Spill
-; LE-NEXT:    vmov.32 d8[0], r0
+; LE-NEXT:    mov r9, r1
+; LE-NEXT:    vmov.32 d10[0], r0
 ; LE-NEXT:    bl llrint
 ; LE-NEXT:    vorr d0, d12, d12
-; LE-NEXT:    add lr, sp, #160
-; LE-NEXT:    vmov.32 d9[0], r0
-; LE-NEXT:    str r1, [sp, #72] @ 4-byte Spill
-; LE-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-NEXT:    mov r10, r1
+; LE-NEXT:    vmov.32 d11[0], r0
 ; LE-NEXT:    bl llrint
 ; LE-NEXT:    vorr d0, d13, d13
-; LE-NEXT:    mov r6, r1
+; LE-NEXT:    mov r7, r1
 ; LE-NEXT:    vmov.32 d14[0], r0
 ; LE-NEXT:    bl llrint
-; LE-NEXT:    add lr, sp, #40
+; LE-NEXT:    vorr d0, d8, d8
 ; LE-NEXT:    mov r4, r1
 ; LE-NEXT:    vmov.32 d15[0], r0
-; LE-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
-; LE-NEXT:    vorr d0, d8, d8
 ; LE-NEXT:    bl llrint
 ; LE-NEXT:    vorr d0, d9, d9
-; LE-NEXT:    mov r7, r1
-; LE-NEXT:    vmov.32 d12[0], r0
-; LE-NEXT:    bl llrint
-; LE-NEXT:    add lr, sp, #96
 ; LE-NEXT:    mov r5, r1
-; LE-NEXT:    vmov.32 d13[0], r0
-; LE-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
-; LE-NEXT:    @ kill: def $d0 killed $d0 killed $q0
-; LE-NEXT:    bl llrint
-; LE-NEXT:    vmov.32 d10[0], r0
-; LE-NEXT:    add lr, sp, #40
-; LE-NEXT:    mov r10, r1
-; LE-NEXT:    vmov.32 d13[1], r5
-; LE-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
-; LE-NEXT:    add lr, sp, #56
-; LE-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
-; LE-NEXT:    vorr d0, d9, d9
-; LE-NEXT:    bl llrint
-; LE-NEXT:    vorr d0, d8, d8
-; LE-NEXT:    vmov.32 d12[1], r7
-; LE-NEXT:    add lr, sp, #96
-; LE-NEXT:    mov r9, r1
-; LE-NEXT:    vmov.32 d11[0], r0
-; LE-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-NEXT:    vmov.32 d12[0], r0
 ; LE-NEXT:    bl llrint
-; LE-NEXT:    vmov.32 d10[0], r0
 ; LE-NEXT:    add lr, sp, #24
-; LE-NEXT:    mov r11, r1
-; LE-NEXT:    vmov.32 d15[1], r4
-; LE-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
-; LE-NEXT:    add lr, sp, #144
-; LE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-NEXT:    vorr d0, d17, d17
-; LE-NEXT:    bl llrint
-; LE-NEXT:    add lr, sp, #8
-; LE-NEXT:    vmov.32 d14[1], r6
-; LE-NEXT:    mov r8, r1
-; LE-NEXT:    vmov.32 d17[0], r0
-; LE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-NEXT:    add lr, sp, #56
-; LE-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
-; LE-NEXT:    add lr, sp, #80
-; LE-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
-; LE-NEXT:    vorr d0, d11, d11
-; LE-NEXT:    bl llrint
-; LE-NEXT:    vmov.32 d15[0], r0
-; LE-NEXT:    add lr, sp, #160
-; LE-NEXT:    vorr d0, d10, d10
-; LE-NEXT:    ldr r0, [sp, #72] @ 4-byte Reload
-; LE-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
 ; LE-NEXT:    mov r6, r1
-; LE-NEXT:    vmov.32 d9[1], r0
-; LE-NEXT:    bl llrint
-; LE-NEXT:    vmov.32 d14[0], r0
-; LE-NEXT:    ldr r0, [sp, #76] @ 4-byte Reload
-; LE-NEXT:    add lr, sp, #160
-; LE-NEXT:    mov r4, r1
-; LE-NEXT:    vmov.32 d8[1], r0
-; LE-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
-; LE-NEXT:    add lr, sp, #120
-; LE-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
-; LE-NEXT:    vorr d0, d11, d11
-; LE-NEXT:    bl llrint
 ; LE-NEXT:    vmov.32 d13[0], r0
-; LE-NEXT:    add lr, sp, #40
-; LE-NEXT:    vorr d0, d10, d10
-; LE-NEXT:    ldr r0, [sp, #116] @ 4-byte Reload
-; LE-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
-; LE-NEXT:    mov r5, r1
-; LE-NEXT:    vmov.32 d9[1], r0
-; LE-NEXT:    bl llrint
-; LE-NEXT:    add lr, sp, #144
-; LE-NEXT:    mov r7, r1
-; LE-NEXT:    vmov.32 d12[0], r0
 ; LE-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
 ; LE-NEXT:    @ kill: def $d0 killed $d0 killed $q0
-; LE-NEXT:    vmov.32 d8[1], r10
 ; LE-NEXT:    bl llrint
 ; LE-NEXT:    add lr, sp, #8
-; LE-NEXT:    vmov.32 d15[1], r6
-; LE-NEXT:    vldmia lr, {d20, d21} @ 16-byte Reload
-; LE-NEXT:    add lr, sp, #24
-; LE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-NEXT:    add lr, sp, #160
-; LE-NEXT:    vmov.32 d20[0], r0
-; LE-NEXT:    vmov.32 d21[1], r8
-; LE-NEXT:    vmov.32 d20[1], r1
-; LE-NEXT:    ldr r1, [sp, #140] @ 4-byte Reload
-; LE-NEXT:    vmov.32 d13[1], r5
-; LE-NEXT:    mov r0, r1
-; LE-NEXT:    vst1.64 {d8, d9}, [r0:128]!
-; LE-NEXT:    vldmia lr, {d18, d19} @ 16-byte Reload
-; LE-NEXT:    add lr, sp, #56
-; LE-NEXT:    vmov.32 d14[1], r4
-; LE-NEXT:    vst1.64 {d18, d19}, [r0:128]!
-; LE-NEXT:    vldmia lr, {d18, d19} @ 16-byte Reload
-; LE-NEXT:    add lr, sp, #96
-; LE-NEXT:    vmov.32 d12[1], r7
-; LE-NEXT:    vst1.64 {d18, d19}, [r0:128]!
-; LE-NEXT:    vldmia lr, {d18, d19} @ 16-byte Reload
-; LE-NEXT:    vmov.32 d17[1], r9
-; LE-NEXT:    vst1.64 {d18, d19}, [r0:128]
-; LE-NEXT:    add r0, r1, #64
-; LE-NEXT:    vst1.64 {d14, d15}, [r0:128]!
-; LE-NEXT:    vst1.64 {d12, d13}, [r0:128]!
-; LE-NEXT:    vmov.32 d16[1], r11
-; LE-NEXT:    vst1.64 {d20, d21}, [r0:128]!
-; LE-NEXT:    vst1.64 {d16, d17}, [r0:128]
-; LE-NEXT:    add sp, sp, #176
+; LE-NEXT:    vmov.32 d13[1], r6
+; LE-NEXT:    vldmia lr, {d6, d7} @ 16-byte Reload
+; LE-NEXT:    vmov.32 d15[1], r4
+; LE-NEXT:    vmov.32 d11[1], r10
+; LE-NEXT:    vmov.32 d6[0], r0
+; LE-NEXT:    vmov.32 d12[1], r5
+; LE-NEXT:    vmov.32 d14[1], r7
+; LE-NEXT:    vorr q0, q6, q6
+; LE-NEXT:    vmov.32 d10[1], r9
+; LE-NEXT:    vorr q1, q7, q7
+; LE-NEXT:    vmov.32 d7[1], r8
+; LE-NEXT:    vorr q2, q5, q5
+; LE-NEXT:    vmov.32 d6[1], r1
+; LE-NEXT:    add sp, sp, #40
 ; LE-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-NEXT:    add sp, sp, #4
-; LE-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
-;
-; LE-NEON-LABEL: llrint_v16f64:
-; LE-NEON:       @ %bb.0:
-; LE-NEON-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; LE-NEON-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; LE-NEON-NEXT:    .pad #4
-; LE-NEON-NEXT:    sub sp, sp, #4
-; LE-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-NEON-NEXT:    .pad #176
-; LE-NEON-NEXT:    sub sp, sp, #176
-; LE-NEON-NEXT:    add lr, sp, #40
-; LE-NEON-NEXT:    str r0, [sp, #140] @ 4-byte Spill
-; LE-NEON-NEXT:    add r0, sp, #312
-; LE-NEON-NEXT:    vorr q6, q2, q2
-; LE-NEON-NEXT:    vstmia lr, {d6, d7} @ 16-byte Spill
-; LE-NEON-NEXT:    add lr, sp, #96
-; LE-NEON-NEXT:    vorr q7, q1, q1
-; LE-NEON-NEXT:    vstmia lr, {d0, d1} @ 16-byte Spill
-; LE-NEON-NEXT:    add lr, sp, #144
-; LE-NEON-NEXT:    vorr d0, d1, d1
-; LE-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
-; LE-NEON-NEXT:    add r0, sp, #280
-; LE-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-NEON-NEXT:    add lr, sp, #80
-; LE-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
-; LE-NEON-NEXT:    add r0, sp, #296
-; LE-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-NEON-NEXT:    add lr, sp, #120
-; LE-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
-; LE-NEON-NEXT:    add r0, sp, #328
-; LE-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-NEON-NEXT:    add lr, sp, #56
-; LE-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
-; LE-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-NEON-NEXT:    bl llrint
-; LE-NEON-NEXT:    vorr d0, d14, d14
-; LE-NEON-NEXT:    str r1, [sp, #116] @ 4-byte Spill
-; LE-NEON-NEXT:    vmov.32 d11[0], r0
-; LE-NEON-NEXT:    bl llrint
-; LE-NEON-NEXT:    vorr d0, d15, d15
-; LE-NEON-NEXT:    str r1, [sp, #76] @ 4-byte Spill
-; LE-NEON-NEXT:    vmov.32 d8[0], r0
-; LE-NEON-NEXT:    bl llrint
-; LE-NEON-NEXT:    vorr d0, d12, d12
-; LE-NEON-NEXT:    add lr, sp, #160
-; LE-NEON-NEXT:    vmov.32 d9[0], r0
-; LE-NEON-NEXT:    str r1, [sp, #72] @ 4-byte Spill
-; LE-NEON-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
-; LE-NEON-NEXT:    bl llrint
-; LE-NEON-NEXT:    vorr d0, d13, d13
-; LE-NEON-NEXT:    mov r6, r1
-; LE-NEON-NEXT:    vmov.32 d14[0], r0
-; LE-NEON-NEXT:    bl llrint
-; LE-NEON-NEXT:    add lr, sp, #40
-; LE-NEON-NEXT:    mov r4, r1
-; LE-NEON-NEXT:    vmov.32 d15[0], r0
-; LE-NEON-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
-; LE-NEON-NEXT:    vorr d0, d8, d8
-; LE-NEON-NEXT:    bl llrint
-; LE-NEON-NEXT:    vorr d0, d9, d9
-; LE-NEON-NEXT:    mov r7, r1
-; LE-NEON-NEXT:    vmov.32 d12[0], r0
-; LE-NEON-NEXT:    bl llrint
-; LE-NEON-NEXT:    add lr, sp, #96
-; LE-NEON-NEXT:    mov r5, r1
-; LE-NEON-NEXT:    vmov.32 d13[0], r0
-; LE-NEON-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
-; LE-NEON-NEXT:    @ kill: def $d0 killed $d0 killed $q0
-; LE-NEON-NEXT:    bl llrint
-; LE-NEON-NEXT:    vmov.32 d10[0], r0
-; LE-NEON-NEXT:    add lr, sp, #40
-; LE-NEON-NEXT:    mov r10, r1
-; LE-NEON-NEXT:    vmov.32 d13[1], r5
-; LE-NEON-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
-; LE-NEON-NEXT:    add lr, sp, #56
-; LE-NEON-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
-; LE-NEON-NEXT:    vorr d0, d9, d9
-; LE-NEON-NEXT:    bl llrint
-; LE-NEON-NEXT:    vorr d0, d8, d8
-; LE-NEON-NEXT:    vmov.32 d12[1], r7
-; LE-NEON-NEXT:    add lr, sp, #96
-; LE-NEON-NEXT:    mov r9, r1
-; LE-NEON-NEXT:    vmov.32 d11[0], r0
-; LE-NEON-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
-; LE-NEON-NEXT:    bl llrint
-; LE-NEON-NEXT:    vmov.32 d10[0], r0
-; LE-NEON-NEXT:    add lr, sp, #24
-; LE-NEON-NEXT:    mov r11, r1
-; LE-NEON-NEXT:    vmov.32 d15[1], r4
-; LE-NEON-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
-; LE-NEON-NEXT:    add lr, sp, #144
-; LE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-NEON-NEXT:    vorr d0, d17, d17
-; LE-NEON-NEXT:    bl llrint
-; LE-NEON-NEXT:    add lr, sp, #8
-; LE-NEON-NEXT:    vmov.32 d14[1], r6
-; LE-NEON-NEXT:    mov r8, r1
-; LE-NEON-NEXT:    vmov.32 d17[0], r0
-; LE-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-NEON-NEXT:    add lr, sp, #56
-; LE-NEON-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
-; LE-NEON-NEXT:    add lr, sp, #80
-; LE-NEON-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
-; LE-NEON-NEXT:    vorr d0, d11, d11
-; LE-NEON-NEXT:    bl llrint
-; LE-NEON-NEXT:    vmov.32 d15[0], r0
-; LE-NEON-NEXT:    add lr, sp, #160
-; LE-NEON-NEXT:    vorr d0, d10, d10
-; LE-NEON-NEXT:    ldr r0, [sp, #72] @ 4-byte Reload
-; LE-NEON-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
-; LE-NEON-NEXT:    mov r6, r1
-; LE-NEON-NEXT:    vmov.32 d9[1], r0
-; LE-NEON-NEXT:    bl llrint
-; LE-NEON-NEXT:    vmov.32 d14[0], r0
-; LE-NEON-NEXT:    ldr r0, [sp, #76] @ 4-byte Reload
-; LE-NEON-NEXT:    add lr, sp, #160
-; LE-NEON-NEXT:    mov r4, r1
-; LE-NEON-NEXT:    vmov.32 d8[1], r0
-; LE-NEON-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
-; LE-NEON-NEXT:    add lr, sp, #120
-; LE-NEON-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
-; LE-NEON-NEXT:    vorr d0, d11, d11
-; LE-NEON-NEXT:    bl llrint
-; LE-NEON-NEXT:    vmov.32 d13[0], r0
-; LE-NEON-NEXT:    add lr, sp, #40
-; LE-NEON-NEXT:    vorr d0, d10, d10
-; LE-NEON-NEXT:    ldr r0, [sp, #116] @ 4-byte Reload
-; LE-NEON-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
-; LE-NEON-NEXT:    mov r5, r1
-; LE-NEON-NEXT:    vmov.32 d9[1], r0
-; LE-NEON-NEXT:    bl llrint
-; LE-NEON-NEXT:    add lr, sp, #144
-; LE-NEON-NEXT:    mov r7, r1
-; LE-NEON-NEXT:    vmov.32 d12[0], r0
-; LE-NEON-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
-; LE-NEON-NEXT:    @ kill: def $d0 killed $d0 killed $q0
-; LE-NEON-NEXT:    vmov.32 d8[1], r10
-; LE-NEON-NEXT:    bl llrint
-; LE-NEON-NEXT:    add lr, sp, #8
-; LE-NEON-NEXT:    vmov.32 d15[1], r6
-; LE-NEON-NEXT:    vldmia lr, {d20, d21} @ 16-byte Reload
-; LE-NEON-NEXT:    add lr, sp, #24
-; LE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-NEON-NEXT:    add lr, sp, #160
-; LE-NEON-NEXT:    vmov.32 d20[0], r0
-; LE-NEON-NEXT:    vmov.32 d21[1], r8
-; LE-NEON-NEXT:    vmov.32 d20[1], r1
-; LE-NEON-NEXT:    ldr r1, [sp, #140] @ 4-byte Reload
-; LE-NEON-NEXT:    vmov.32 d13[1], r5
-; LE-NEON-NEXT:    mov r0, r1
-; LE-NEON-NEXT:    vst1.64 {d8, d9}, [r0:128]!
-; LE-NEON-NEXT:    vldmia lr, {d18, d19} @ 16-byte Reload
-; LE-NEON-NEXT:    add lr, sp, #56
-; LE-NEON-NEXT:    vmov.32 d14[1], r4
-; LE-NEON-NEXT:    vst1.64 {d18, d19}, [r0:128]!
-; LE-NEON-NEXT:    vldmia lr, {d18, d19} @ 16-byte Reload
-; LE-NEON-NEXT:    add lr, sp, #96
-; LE-NEON-NEXT:    vmov.32 d12[1], r7
-; LE-NEON-NEXT:    vst1.64 {d18, d19}, [r0:128]!
-; LE-NEON-NEXT:    vldmia lr, {d18, d19} @ 16-byte Reload
-; LE-NEON-NEXT:    vmov.32 d17[1], r9
-; LE-NEON-NEXT:    vst1.64 {d18, d19}, [r0:128]
-; LE-NEON-NEXT:    add r0, r1, #64
-; LE-NEON-NEXT:    vst1.64 {d14, d15}, [r0:128]!
-; LE-NEON-NEXT:    vst1.64 {d12, d13}, [r0:128]!
-; LE-NEON-NEXT:    vmov.32 d16[1], r11
-; LE-NEON-NEXT:    vst1.64 {d20, d21}, [r0:128]!
-; LE-NEON-NEXT:    vst1.64 {d16, d17}, [r0:128]
-; LE-NEON-NEXT:    add sp, sp, #176
-; LE-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-NEON-NEXT:    add sp, sp, #4
-; LE-NEON-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+; LE-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, pc}
 ;
-; BE-LABEL: llrint_v16f64:
+; BE-LABEL: llrint_v8i64_v8f64:
 ; BE:       @ %bb.0:
-; BE-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; BE-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; BE-NEXT:    .pad #4
-; BE-NEXT:    sub sp, sp, #4
+; BE-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, lr}
+; BE-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, lr}
 ; BE-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
 ; BE-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
-; BE-NEXT:    .pad #168
-; BE-NEXT:    sub sp, sp, #168
-; BE-NEXT:    add lr, sp, #64
-; BE-NEXT:    str r0, [sp, #132] @ 4-byte Spill
-; BE-NEXT:    add r0, sp, #304
-; BE-NEXT:    vorr q4, q3, q3
-; BE-NEXT:    vstmia lr, {d0, d1} @ 16-byte Spill
-; BE-NEXT:    add lr, sp, #48
-; BE-NEXT:    vorr d0, d1, d1
-; BE-NEXT:    vld1.64 {d16, d17}, [r0]
-; BE-NEXT:    add r0, sp, #320
-; BE-NEXT:    vorr q6, q2, q2
-; BE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-NEXT:    add lr, sp, #88
-; BE-NEXT:    vorr q7, q1, q1
-; BE-NEXT:    vld1.64 {d16, d17}, [r0]
-; BE-NEXT:    add r0, sp, #272
-; BE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-NEXT:    add lr, sp, #112
-; BE-NEXT:    vld1.64 {d16, d17}, [r0]
-; BE-NEXT:    add r0, sp, #288
-; BE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-NEXT:    .pad #40
+; BE-NEXT:    sub sp, sp, #40
+; BE-NEXT:    vorr q4, q0, q0
 ; BE-NEXT:    add lr, sp, #24
-; BE-NEXT:    vld1.64 {d16, d17}, [r0]
-; BE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-NEXT:    vorr d0, d7, d7
+; BE-NEXT:    vstmia lr, {d6, d7} @ 16-byte Spill
+; BE-NEXT:    vorr q7, q2, q2
+; BE-NEXT:    vorr q6, q1, q1
 ; BE-NEXT:    bl llrint
 ; BE-NEXT:    vorr d0, d14, d14
-; BE-NEXT:    add lr, sp, #136
+; BE-NEXT:    add lr, sp, #8
 ; BE-NEXT:    vmov.32 d17[0], r0
-; BE-NEXT:    str r1, [sp, #108] @ 4-byte Spill
+; BE-NEXT:    mov r8, r1
 ; BE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
 ; BE-NEXT:    bl llrint
 ; BE-NEXT:    vorr d0, d15, d15
-; BE-NEXT:    str r1, [sp, #84] @ 4-byte Spill
+; BE-NEXT:    mov r9, r1
 ; BE-NEXT:    vmov.32 d10[0], r0
 ; BE-NEXT:    bl llrint
 ; BE-NEXT:    vorr d0, d12, d12
-; BE-NEXT:    add lr, sp, #152
+; BE-NEXT:    mov r10, r1
 ; BE-NEXT:    vmov.32 d11[0], r0
-; BE-NEXT:    str r1, [sp, #44] @ 4-byte Spill
-; BE-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
 ; BE-NEXT:    bl llrint
 ; BE-NEXT:    vorr d0, d13, d13
-; BE-NEXT:    mov r6, r1
-; BE-NEXT:    vmov.32 d10[0], r0
-; BE-NEXT:    bl llrint
-; BE-NEXT:    vorr d0, d8, d8
-; BE-NEXT:    mov r4, r1
-; BE-NEXT:    vmov.32 d11[0], r0
-; BE-NEXT:    bl llrint
-; BE-NEXT:    vorr d0, d9, d9
 ; BE-NEXT:    mov r7, r1
-; BE-NEXT:    vmov.32 d12[0], r0
-; BE-NEXT:    bl llrint
-; BE-NEXT:    add lr, sp, #64
-; BE-NEXT:    mov r5, r1
-; BE-NEXT:    vmov.32 d13[0], r0
-; BE-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
-; BE-NEXT:    @ kill: def $d0 killed $d0 killed $q0
-; BE-NEXT:    bl llrint
-; BE-NEXT:    add lr, sp, #136
-; BE-NEXT:    mov r9, r1
-; BE-NEXT:    vmov.32 d13[1], r5
-; BE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-NEXT:    vmov.32 d16[0], r0
-; BE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-NEXT:    add lr, sp, #24
-; BE-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
-; BE-NEXT:    vorr d0, d9, d9
+; BE-NEXT:    vmov.32 d14[0], r0
 ; BE-NEXT:    bl llrint
 ; BE-NEXT:    vorr d0, d8, d8
-; BE-NEXT:    vmov.32 d12[1], r7
-; BE-NEXT:    add lr, sp, #64
-; BE-NEXT:    mov r10, r1
+; BE-NEXT:    mov r4, r1
 ; BE-NEXT:    vmov.32 d15[0], r0
-; BE-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
 ; BE-NEXT:    bl llrint
-; BE-NEXT:    vmov.32 d14[0], r0
-; BE-NEXT:    add lr, sp, #8
-; BE-NEXT:    mov r11, r1
-; BE-NEXT:    vmov.32 d11[1], r4
-; BE-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
-; BE-NEXT:    add lr, sp, #48
-; BE-NEXT:    vorr q6, q5, q5
-; BE-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
 ; BE-NEXT:    vorr d0, d9, d9
+; BE-NEXT:    mov r5, r1
+; BE-NEXT:    vmov.32 d12[0], r0
 ; BE-NEXT:    bl llrint
-; BE-NEXT:    vorr d0, d8, d8
-; BE-NEXT:    vmov.32 d12[1], r6
 ; BE-NEXT:    add lr, sp, #24
-; BE-NEXT:    mov r8, r1
-; BE-NEXT:    vmov.32 d11[0], r0
-; BE-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
-; BE-NEXT:    bl llrint
-; BE-NEXT:    vmov.32 d10[0], r0
-; BE-NEXT:    add lr, sp, #48
-; BE-NEXT:    ldr r0, [sp, #44] @ 4-byte Reload
 ; BE-NEXT:    mov r6, r1
-; BE-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
-; BE-NEXT:    add lr, sp, #152
-; BE-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
-; BE-NEXT:    add lr, sp, #88
-; BE-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
-; BE-NEXT:    vorr d0, d13, d13
-; BE-NEXT:    vmov.32 d9[1], r0
-; BE-NEXT:    bl llrint
-; BE-NEXT:    vmov.32 d15[0], r0
-; BE-NEXT:    ldr r0, [sp, #84] @ 4-byte Reload
-; BE-NEXT:    vorr d0, d12, d12
-; BE-NEXT:    add lr, sp, #152
-; BE-NEXT:    mov r4, r1
-; BE-NEXT:    vmov.32 d8[1], r0
-; BE-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
-; BE-NEXT:    bl llrint
-; BE-NEXT:    add lr, sp, #136
-; BE-NEXT:    vmov.32 d14[0], r0
-; BE-NEXT:    ldr r0, [sp, #108] @ 4-byte Reload
-; BE-NEXT:    mov r5, r1
-; BE-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
-; BE-NEXT:    add lr, sp, #112
-; BE-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
-; BE-NEXT:    vorr d0, d9, d9
-; BE-NEXT:    vmov.32 d11[1], r0
-; BE-NEXT:    bl llrint
-; BE-NEXT:    vorr d0, d8, d8
-; BE-NEXT:    mov r7, r1
 ; BE-NEXT:    vmov.32 d13[0], r0
-; BE-NEXT:    vmov.32 d10[1], r9
+; BE-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
+; BE-NEXT:    @ kill: def $d0 killed $d0 killed $q0
 ; BE-NEXT:    bl llrint
 ; BE-NEXT:    add lr, sp, #8
-; BE-NEXT:    vmov.32 d12[0], r0
-; BE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-NEXT:    add lr, sp, #48
-; BE-NEXT:    vmov.32 d17[1], r10
-; BE-NEXT:    vmov.32 d16[1], r11
-; BE-NEXT:    vorr q12, q8, q8
+; BE-NEXT:    vmov.32 d13[1], r6
 ; BE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-NEXT:    add lr, sp, #152
-; BE-NEXT:    vmov.32 d17[1], r8
-; BE-NEXT:    vldmia lr, {d18, d19} @ 16-byte Reload
-; BE-NEXT:    add lr, sp, #24
-; BE-NEXT:    vmov.32 d13[1], r7
-; BE-NEXT:    vmov.32 d16[1], r6
-; BE-NEXT:    vldmia lr, {d20, d21} @ 16-byte Reload
-; BE-NEXT:    add lr, sp, #64
-; BE-NEXT:    vorr q13, q8, q8
-; BE-NEXT:    vmov.32 d12[1], r1
-; BE-NEXT:    ldr r1, [sp, #132] @ 4-byte Reload
-; BE-NEXT:    vrev64.32 q8, q5
-; BE-NEXT:    mov r0, r1
-; BE-NEXT:    vldmia lr, {d22, d23} @ 16-byte Reload
-; BE-NEXT:    vrev64.32 q9, q9
-; BE-NEXT:    vrev64.32 q10, q10
-; BE-NEXT:    vst1.64 {d16, d17}, [r0:128]!
-; BE-NEXT:    vst1.64 {d18, d19}, [r0:128]!
-; BE-NEXT:    vrev64.32 q11, q11
 ; BE-NEXT:    vmov.32 d15[1], r4
-; BE-NEXT:    vst1.64 {d20, d21}, [r0:128]!
-; BE-NEXT:    vrev64.32 q15, q6
-; BE-NEXT:    vmov.32 d14[1], r5
-; BE-NEXT:    vrev64.32 q12, q12
-; BE-NEXT:    vst1.64 {d22, d23}, [r0:128]
-; BE-NEXT:    add r0, r1, #64
-; BE-NEXT:    vrev64.32 q13, q13
-; BE-NEXT:    vst1.64 {d30, d31}, [r0:128]!
-; BE-NEXT:    vst1.64 {d24, d25}, [r0:128]!
-; BE-NEXT:    vrev64.32 q14, q7
-; BE-NEXT:    vst1.64 {d26, d27}, [r0:128]!
-; BE-NEXT:    vst1.64 {d28, d29}, [r0:128]
-; BE-NEXT:    add sp, sp, #168
+; BE-NEXT:    vmov.32 d16[0], r0
+; BE-NEXT:    vmov.32 d11[1], r10
+; BE-NEXT:    vmov.32 d17[1], r8
+; BE-NEXT:    vmov.32 d12[1], r5
+; BE-NEXT:    vmov.32 d14[1], r7
+; BE-NEXT:    vmov.32 d10[1], r9
+; BE-NEXT:    vmov.32 d16[1], r1
+; BE-NEXT:    vrev64.32 q0, q6
+; BE-NEXT:    vrev64.32 q1, q7
+; BE-NEXT:    vrev64.32 q2, q5
+; BE-NEXT:    vrev64.32 q3, q8
+; BE-NEXT:    add sp, sp, #40
 ; BE-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
-; BE-NEXT:    add sp, sp, #4
-; BE-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
-;
-; BE-NEON-LABEL: llrint_v16f64:
-; BE-NEON:       @ %bb.0:
-; BE-NEON-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; BE-NEON-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; BE-NEON-NEXT:    .pad #4
-; BE-NEON-NEXT:    sub sp, sp, #4
-; BE-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
-; BE-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
-; BE-NEON-NEXT:    .pad #168
-; BE-NEON-NEXT:    sub sp, sp, #168
-; BE-NEON-NEXT:    add lr, sp, #64
-; BE-NEON-NEXT:    str r0, [sp, #132] @ 4-byte Spill
-; BE-NEON-NEXT:    add r0, sp, #304
-; BE-NEON-NEXT:    vorr q4, q3, q3
-; BE-NEON-NEXT:    vstmia lr, {d0, d1} @ 16-byte Spill
-; BE-NEON-NEXT:    add lr, sp, #48
-; BE-NEON-NEXT:    vorr d0, d1, d1
-; BE-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
-; BE-NEON-NEXT:    add r0, sp, #320
-; BE-NEON-NEXT:    vorr q6, q2, q2
-; BE-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-NEON-NEXT:    add lr, sp, #88
-; BE-NEON-NEXT:    vorr q7, q1, q1
-; BE-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
-; BE-NEON-NEXT:    add r0, sp, #272
-; BE-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-NEON-NEXT:    add lr, sp, #112
-; BE-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
-; BE-NEON-NEXT:    add r0, sp, #288
-; BE-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-NEON-NEXT:    add lr, sp, #24
-; BE-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
-; BE-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-NEON-NEXT:    bl llrint
-; BE-NEON-NEXT:    vorr d0, d14, d14
-; BE-NEON-NEXT:    add lr, sp, #136
-; BE-NEON-NEXT:    vmov.32 d17[0], r0
-; BE-NEON-NEXT:    str r1, [sp, #108] @ 4-byte Spill
-; BE-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-NEON-NEXT:    bl llrint
-; BE-NEON-NEXT:    vorr d0, d15, d15
-; BE-NEON-NEXT:    str r1, [sp, #84] @ 4-byte Spill
-; BE-NEON-NEXT:    vmov.32 d10[0], r0
-; BE-NEON-NEXT:    bl llrint
-; BE-NEON-NEXT:    vorr d0, d12, d12
-; BE-NEON-NEXT:    add lr, sp, #152
-; BE-NEON-NEXT:    vmov.32 d11[0], r0
-; BE-NEON-NEXT:    str r1, [sp, #44] @ 4-byte Spill
-; BE-NEON-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
-; BE-NEON-NEXT:    bl llrint
-; BE-NEON-NEXT:    vorr d0, d13, d13
-; BE-NEON-NEXT:    mov r6, r1
-; BE-NEON-NEXT:    vmov.32 d10[0], r0
-; BE-NEON-NEXT:    bl llrint
-; BE-NEON-NEXT:    vorr d0, d8, d8
-; BE-NEON-NEXT:    mov r4, r1
-; BE-NEON-NEXT:    vmov.32 d11[0], r0
-; BE-NEON-NEXT:    bl llrint
-; BE-NEON-NEXT:    vorr d0, d9, d9
-; BE-NEON-NEXT:    mov r7, r1
-; BE-NEON-NEXT:    vmov.32 d12[0], r0
-; BE-NEON-NEXT:    bl llrint
-; BE-NEON-NEXT:    add lr, sp, #64
-; BE-NEON-NEXT:    mov r5, r1
-; BE-NEON-NEXT:    vmov.32 d13[0], r0
-; BE-NEON-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
-; BE-NEON-NEXT:    @ kill: def $d0 killed $d0 killed $q0
-; BE-NEON-NEXT:    bl llrint
-; BE-NEON-NEXT:    add lr, sp, #136
-; BE-NEON-NEXT:    mov r9, r1
-; BE-NEON-NEXT:    vmov.32 d13[1], r5
-; BE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-NEON-NEXT:    vmov.32 d16[0], r0
-; BE-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-NEON-NEXT:    add lr, sp, #24
-; BE-NEON-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
-; BE-NEON-NEXT:    vorr d0, d9, d9
-; BE-NEON-NEXT:    bl llrint
-; BE-NEON-NEXT:    vorr d0, d8, d8
-; BE-NEON-NEXT:    vmov.32 d12[1], r7
-; BE-NEON-NEXT:    add lr, sp, #64
-; BE-NEON-NEXT:    mov r10, r1
-; BE-NEON-NEXT:    vmov.32 d15[0], r0
-; BE-NEON-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
-; BE-NEON-NEXT:    bl llrint
-; BE-NEON-NEXT:    vmov.32 d14[0], r0
-; BE-NEON-NEXT:    add lr, sp, #8
-; BE-NEON-NEXT:    mov r11, r1
-; BE-NEON-NEXT:    vmov.32 d11[1], r4
-; BE-NEON-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
-; BE-NEON-NEXT:    add lr, sp, #48
-; BE-NEON-NEXT:    vorr q6, q5, q5
-; BE-NEON-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
-; BE-NEON-NEXT:    vorr d0, d9, d9
-; BE-NEON-NEXT:    bl llrint
-; BE-NEON-NEXT:    vorr d0, d8, d8
-; BE-NEON-NEXT:    vmov.32 d12[1], r6
-; BE-NEON-NEXT:    add lr, sp, #24
-; BE-NEON-NEXT:    mov r8, r1
-; BE-NEON-NEXT:    vmov.32 d11[0], r0
-; BE-NEON-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
-; BE-NEON-NEXT:    bl llrint
-; BE-NEON-NEXT:    vmov.32 d10[0], r0
-; BE-NEON-NEXT:    add lr, sp, #48
-; BE-NEON-NEXT:    ldr r0, [sp, #44] @ 4-byte Reload
-; BE-NEON-NEXT:    mov r6, r1
-; BE-NEON-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
-; BE-NEON-NEXT:    add lr, sp, #152
-; BE-NEON-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
-; BE-NEON-NEXT:    add lr, sp, #88
-; BE-NEON-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
-; BE-NEON-NEXT:    vorr d0, d13, d13
-; BE-NEON-NEXT:    vmov.32 d9[1], r0
-; BE-NEON-NEXT:    bl llrint
-; BE-NEON-NEXT:    vmov.32 d15[0], r0
-; BE-NEON-NEXT:    ldr r0, [sp, #84] @ 4-byte Reload
-; BE-NEON-NEXT:    vorr d0, d12, d12
-; BE-NEON-NEXT:    add lr, sp, #152
-; BE-NEON-NEXT:    mov r4, r1
-; BE-NEON-NEXT:    vmov.32 d8[1], r0
-; BE-NEON-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
-; BE-NEON-NEXT:    bl llrint
-; BE-NEON-NEXT:    add lr, sp, #136
-; BE-NEON-NEXT:    vmov.32 d14[0], r0
-; BE-NEON-NEXT:    ldr r0, [sp, #108] @ 4-byte Reload
-; BE-NEON-NEXT:    mov r5, r1
-; BE-NEON-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
-; BE-NEON-NEXT:    add lr, sp, #112
-; BE-NEON-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
-; BE-NEON-NEXT:    vorr d0, d9, d9
-; BE-NEON-NEXT:    vmov.32 d11[1], r0
-; BE-NEON-NEXT:    bl llrint
-; BE-NEON-NEXT:    vorr d0, d8, d8
-; BE-NEON-NEXT:    mov r7, r1
-; BE-NEON-NEXT:    vmov.32 d13[0], r0
-; BE-NEON-NEXT:    vmov.32 d10[1], r9
-; BE-NEON-NEXT:    bl llrint
-; BE-NEON-NEXT:    add lr, sp, #8
-; BE-NEON-NEXT:    vmov.32 d12[0], r0
-; BE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-NEON-NEXT:    add lr, sp, #48
-; BE-NEON-NEXT:    vmov.32 d17[1], r10
-; BE-NEON-NEXT:    vmov.32 d16[1], r11
-; BE-NEON-NEXT:    vorr q12, q8, q8
-; BE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-NEON-NEXT:    add lr, sp, #152
-; BE-NEON-NEXT:    vmov.32 d17[1], r8
-; BE-NEON-NEXT:    vldmia lr, {d18, d19} @ 16-byte Reload
-; BE-NEON-NEXT:    add lr, sp, #24
-; BE-NEON-NEXT:    vmov.32 d13[1], r7
-; BE-NEON-NEXT:    vmov.32 d16[1], r6
-; BE-NEON-NEXT:    vldmia lr, {d20, d21} @ 16-byte Reload
-; BE-NEON-NEXT:    add lr, sp, #64
-; BE-NEON-NEXT:    vorr q13, q8, q8
-; BE-NEON-NEXT:    vmov.32 d12[1], r1
-; BE-NEON-NEXT:    ldr r1, [sp, #132] @ 4-byte Reload
-; BE-NEON-NEXT:    vrev64.32 q8, q5
-; BE-NEON-NEXT:    mov r0, r1
-; BE-NEON-NEXT:    vldmia lr, {d22, d23} @ 16-byte Reload
-; BE-NEON-NEXT:    vrev64.32 q9, q9
-; BE-NEON-NEXT:    vrev64.32 q10, q10
-; BE-NEON-NEXT:    vst1.64 {d16, d17}, [r0:128]!
-; BE-NEON-NEXT:    vst1.64 {d18, d19}, [r0:128]!
-; BE-NEON-NEXT:    vrev64.32 q11, q11
-; BE-NEON-NEXT:    vmov.32 d15[1], r4
-; BE-NEON-NEXT:    vst1.64 {d20, d21}, [r0:128]!
-; BE-NEON-NEXT:    vrev64.32 q15, q6
-; BE-NEON-NEXT:    vmov.32 d14[1], r5
-; BE-NEON-NEXT:    vrev64.32 q12, q12
-; BE-NEON-NEXT:    vst1.64 {d22, d23}, [r0:128]
-; BE-NEON-NEXT:    add r0, r1, #64
-; BE-NEON-NEXT:    vrev64.32 q13, q13
-; BE-NEON-NEXT:    vst1.64 {d30, d31}, [r0:128]!
-; BE-NEON-NEXT:    vst1.64 {d24, d25}, [r0:128]!
-; BE-NEON-NEXT:    vrev64.32 q14, q7
-; BE-NEON-NEXT:    vst1.64 {d26, d27}, [r0:128]!
-; BE-NEON-NEXT:    vst1.64 {d28, d29}, [r0:128]
-; BE-NEON-NEXT:    add sp, sp, #168
-; BE-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
-; BE-NEON-NEXT:    add sp, sp, #4
-; BE-NEON-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
-  %a = call <16 x i64> @llvm.llrint.v16i64.v16f64(<16 x double> %x)
-  ret <16 x i64> %a
+; BE-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, pc}
+  %a = call <8 x i64> @llvm.llrint.v8i64.v8f64(<8 x double> %x)
+  ret <8 x i64> %a
 }
-declare <16 x i64> @llvm.llrint.v16i64.v16f64(<16 x double>)
+declare <8 x i64> @llvm.llrint.v8i64.v8f64(<8 x double>)
 
-define <32 x i64> @llrint_v32f64(<32 x double> %x) {
-; LE-LABEL: llrint_v32f64:
+define <16 x i64> @llrint_v16f64(<16 x double> %x) {
+; LE-LABEL: llrint_v16f64:
 ; LE:       @ %bb.0:
 ; LE-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
 ; LE-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
@@ -6923,691 +1686,172 @@ define <32 x i64> @llrint_v32f64(<32 x double> %x) {
 ; LE-NEXT:    sub sp, sp, #4
 ; LE-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
 ; LE-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-NEXT:    .pad #208
-; LE-NEXT:    sub sp, sp, #208
-; LE-NEXT:    add lr, sp, #72
-; LE-NEXT:    str r0, [sp, #156] @ 4-byte Spill
-; LE-NEXT:    add r0, sp, #456
-; LE-NEXT:    vorr q4, q0, q0
-; LE-NEXT:    vstmia lr, {d6, d7} @ 16-byte Spill
-; LE-NEXT:    add lr, sp, #56
-; LE-NEXT:    vorr d0, d7, d7
-; LE-NEXT:    vstmia lr, {d2, d3} @ 16-byte Spill
-; LE-NEXT:    add lr, sp, #104
-; LE-NEXT:    vorr q5, q2, q2
-; LE-NEXT:    vld1.64 {d16, d17}, [r0]
-; LE-NEXT:    add r0, sp, #344
-; LE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-NEXT:    add lr, sp, #192
-; LE-NEXT:    vld1.64 {d16, d17}, [r0]
-; LE-NEXT:    add r0, sp, #376
-; LE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-NEXT:    add lr, sp, #160
-; LE-NEXT:    vld1.64 {d16, d17}, [r0]
-; LE-NEXT:    add r0, sp, #360
-; LE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-NEXT:    add lr, sp, #136
-; LE-NEXT:    vld1.64 {d16, d17}, [r0]
-; LE-NEXT:    add r0, sp, #440
-; LE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-NEXT:    add lr, sp, #176
-; LE-NEXT:    vld1.64 {d16, d17}, [r0]
-; LE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-NEXT:    bl llrint
-; LE-NEXT:    vorr d0, d10, d10
-; LE-NEXT:    str r1, [sp, #120] @ 4-byte Spill
-; LE-NEXT:    vmov.32 d13[0], r0
-; LE-NEXT:    bl llrint
-; LE-NEXT:    vorr d0, d11, d11
-; LE-NEXT:    mov r10, r1
-; LE-NEXT:    vmov.32 d14[0], r0
-; LE-NEXT:    bl llrint
-; LE-NEXT:    vorr d0, d8, d8
-; LE-NEXT:    add lr, sp, #88
-; LE-NEXT:    vmov.32 d15[0], r0
-; LE-NEXT:    mov r11, r1
-; LE-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
-; LE-NEXT:    bl llrint
-; LE-NEXT:    vorr d0, d9, d9
-; LE-NEXT:    mov r9, r1
-; LE-NEXT:    vmov.32 d10[0], r0
-; LE-NEXT:    bl llrint
-; LE-NEXT:    add lr, sp, #40
-; LE-NEXT:    vmov.32 d11[0], r0
-; LE-NEXT:    mov r6, r1
-; LE-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
-; LE-NEXT:    add lr, sp, #56
-; LE-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
-; LE-NEXT:    vorr d0, d10, d10
-; LE-NEXT:    bl llrint
-; LE-NEXT:    vorr d0, d11, d11
-; LE-NEXT:    mov r4, r1
-; LE-NEXT:    vmov.32 d8[0], r0
-; LE-NEXT:    bl llrint
-; LE-NEXT:    add lr, sp, #72
-; LE-NEXT:    mov r7, r1
-; LE-NEXT:    vmov.32 d9[0], r0
-; LE-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
-; LE-NEXT:    @ kill: def $d0 killed $d0 killed $q0
-; LE-NEXT:    bl llrint
-; LE-NEXT:    vmov.32 d12[0], r0
-; LE-NEXT:    add lr, sp, #56
-; LE-NEXT:    mov r5, r1
-; LE-NEXT:    vmov.32 d9[1], r7
-; LE-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
-; LE-NEXT:    add lr, sp, #176
-; LE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-NEXT:    vorr d0, d17, d17
-; LE-NEXT:    bl llrint
-; LE-NEXT:    vmov.32 d8[1], r4
-; LE-NEXT:    add lr, sp, #72
-; LE-NEXT:    mov r7, r1
-; LE-NEXT:    vmov.32 d13[0], r0
-; LE-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
-; LE-NEXT:    add lr, sp, #104
-; LE-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
-; LE-NEXT:    vorr d0, d9, d9
-; LE-NEXT:    bl llrint
+; LE-NEXT:    .pad #176
+; LE-NEXT:    sub sp, sp, #176
 ; LE-NEXT:    add lr, sp, #40
-; LE-NEXT:    vorr d0, d8, d8
-; LE-NEXT:    mov r8, r1
-; LE-NEXT:    vmov.32 d15[0], r0
-; LE-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
-; LE-NEXT:    vmov.32 d11[1], r6
-; LE-NEXT:    bl llrint
-; LE-NEXT:    add lr, sp, #176
-; LE-NEXT:    mov r4, r1
-; LE-NEXT:    vmov.32 d14[0], r0
-; LE-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
-; LE-NEXT:    @ kill: def $d0 killed $d0 killed $q0
-; LE-NEXT:    vmov.32 d10[1], r9
-; LE-NEXT:    bl llrint
-; LE-NEXT:    add lr, sp, #88
-; LE-NEXT:    vmov.32 d12[0], r0
-; LE-NEXT:    ldr r0, [sp, #120] @ 4-byte Reload
-; LE-NEXT:    vldmia lr, {d20, d21} @ 16-byte Reload
-; LE-NEXT:    add lr, sp, #56
-; LE-NEXT:    vldmia lr, {d18, d19} @ 16-byte Reload
-; LE-NEXT:    add lr, sp, #24
-; LE-NEXT:    vmov.32 d19[1], r0
-; LE-NEXT:    add r0, sp, #408
-; LE-NEXT:    ldr r2, [sp, #156] @ 4-byte Reload
+; LE-NEXT:    str r0, [sp, #140] @ 4-byte Spill
+; LE-NEXT:    add r0, sp, #312
+; LE-NEXT:    vorr q6, q2, q2
+; LE-NEXT:    vstmia lr, {d6, d7} @ 16-byte Spill
+; LE-NEXT:    add lr, sp, #96
+; LE-NEXT:    vorr q7, q1, q1
+; LE-NEXT:    vstmia lr, {d0, d1} @ 16-byte Spill
+; LE-NEXT:    add lr, sp, #144
+; LE-NEXT:    vorr d0, d1, d1
 ; LE-NEXT:    vld1.64 {d16, d17}, [r0]
-; LE-NEXT:    vmov.32 d13[1], r7
-; LE-NEXT:    mov r0, r2
-; LE-NEXT:    vmov.32 d12[1], r1
-; LE-NEXT:    add r1, sp, #488
-; LE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-NEXT:    add lr, sp, #40
-; LE-NEXT:    vld1.64 {d16, d17}, [r1]
-; LE-NEXT:    add r1, sp, #472
-; LE-NEXT:    vst1.64 {d10, d11}, [r0:128]!
-; LE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-NEXT:    add lr, sp, #72
-; LE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-NEXT:    add lr, sp, #56
-; LE-NEXT:    vmov.32 d21[1], r11
-; LE-NEXT:    vmov.32 d20[1], r10
-; LE-NEXT:    add r10, r2, #192
-; LE-NEXT:    vst1.64 {d16, d17}, [r0:128]!
-; LE-NEXT:    vld1.64 {d16, d17}, [r1]
-; LE-NEXT:    add r1, sp, #392
-; LE-NEXT:    vmov.32 d18[1], r5
-; LE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-NEXT:    add lr, sp, #72
-; LE-NEXT:    vst1.64 {d20, d21}, [r0:128]!
-; LE-NEXT:    vld1.64 {d16, d17}, [r1]
+; LE-NEXT:    add r0, sp, #280
 ; LE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-NEXT:    add lr, sp, #104
-; LE-NEXT:    vst1.64 {d18, d19}, [r0:128]
-; LE-NEXT:    add r0, sp, #312
+; LE-NEXT:    add lr, sp, #80
 ; LE-NEXT:    vld1.64 {d16, d17}, [r0]
-; LE-NEXT:    add r0, sp, #328
-; LE-NEXT:    vmov.32 d15[1], r8
+; LE-NEXT:    add r0, sp, #296
 ; LE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
 ; LE-NEXT:    add lr, sp, #120
 ; LE-NEXT:    vld1.64 {d16, d17}, [r0]
-; LE-NEXT:    add r0, sp, #424
-; LE-NEXT:    vmov.32 d14[1], r4
-; LE-NEXT:    vst1.64 {d12, d13}, [r10:128]!
+; LE-NEXT:    add r0, sp, #328
 ; LE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-NEXT:    add lr, sp, #176
+; LE-NEXT:    add lr, sp, #56
 ; LE-NEXT:    vld1.64 {d16, d17}, [r0]
-; LE-NEXT:    vst1.64 {d14, d15}, [r10:128]!
 ; LE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-NEXT:    add lr, sp, #192
-; LE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-NEXT:    vorr d0, d17, d17
 ; LE-NEXT:    bl llrint
-; LE-NEXT:    add lr, sp, #136
-; LE-NEXT:    mov r9, r1
-; LE-NEXT:    vmov.32 d9[0], r0
-; LE-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
-; LE-NEXT:    vorr d0, d10, d10
+; LE-NEXT:    vorr d0, d14, d14
+; LE-NEXT:    str r1, [sp, #116] @ 4-byte Spill
+; LE-NEXT:    vmov.32 d11[0], r0
 ; LE-NEXT:    bl llrint
-; LE-NEXT:    vorr d0, d11, d11
-; LE-NEXT:    mov r8, r1
-; LE-NEXT:    vmov.32 d12[0], r0
+; LE-NEXT:    vorr d0, d15, d15
+; LE-NEXT:    str r1, [sp, #76] @ 4-byte Spill
+; LE-NEXT:    vmov.32 d8[0], r0
 ; LE-NEXT:    bl llrint
+; LE-NEXT:    vorr d0, d12, d12
 ; LE-NEXT:    add lr, sp, #160
-; LE-NEXT:    mov r5, r1
-; LE-NEXT:    vmov.32 d13[0], r0
-; LE-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
-; LE-NEXT:    vorr d0, d10, d10
+; LE-NEXT:    vmov.32 d9[0], r0
+; LE-NEXT:    str r1, [sp, #72] @ 4-byte Spill
+; LE-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
 ; LE-NEXT:    bl llrint
-; LE-NEXT:    vorr d0, d11, d11
+; LE-NEXT:    vorr d0, d13, d13
 ; LE-NEXT:    mov r6, r1
 ; LE-NEXT:    vmov.32 d14[0], r0
 ; LE-NEXT:    bl llrint
-; LE-NEXT:    add lr, sp, #192
+; LE-NEXT:    add lr, sp, #40
 ; LE-NEXT:    mov r4, r1
 ; LE-NEXT:    vmov.32 d15[0], r0
-; LE-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
-; LE-NEXT:    @ kill: def $d0 killed $d0 killed $q0
-; LE-NEXT:    bl llrint
-; LE-NEXT:    vmov.32 d8[0], r0
-; LE-NEXT:    add lr, sp, #192
-; LE-NEXT:    mov r11, r1
-; LE-NEXT:    vmov.32 d15[1], r4
-; LE-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
-; LE-NEXT:    add lr, sp, #176
 ; LE-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
-; LE-NEXT:    vorr d0, d9, d9
-; LE-NEXT:    bl llrint
 ; LE-NEXT:    vorr d0, d8, d8
-; LE-NEXT:    vmov.32 d14[1], r6
-; LE-NEXT:    add lr, sp, #136
-; LE-NEXT:    mov r4, r1
-; LE-NEXT:    vmov.32 d11[0], r0
-; LE-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
 ; LE-NEXT:    bl llrint
-; LE-NEXT:    vmov.32 d10[0], r0
-; LE-NEXT:    add lr, sp, #176
-; LE-NEXT:    mov r7, r1
-; LE-NEXT:    vmov.32 d13[1], r5
-; LE-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
-; LE-NEXT:    add lr, sp, #24
-; LE-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
 ; LE-NEXT:    vorr d0, d9, d9
+; LE-NEXT:    mov r7, r1
+; LE-NEXT:    vmov.32 d12[0], r0
 ; LE-NEXT:    bl llrint
-; LE-NEXT:    vorr d0, d8, d8
-; LE-NEXT:    vmov.32 d12[1], r8
-; LE-NEXT:    add lr, sp, #88
+; LE-NEXT:    add lr, sp, #96
 ; LE-NEXT:    mov r5, r1
-; LE-NEXT:    vmov.32 d15[0], r0
-; LE-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
-; LE-NEXT:    bl llrint
-; LE-NEXT:    add lr, sp, #192
-; LE-NEXT:    str r1, [sp, #24] @ 4-byte Spill
-; LE-NEXT:    vmov.32 d14[0], r0
-; LE-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
-; LE-NEXT:    add lr, sp, #40
-; LE-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
-; LE-NEXT:    vorr d0, d11, d11
-; LE-NEXT:    vmov.32 d9[1], r9
-; LE-NEXT:    bl llrint
-; LE-NEXT:    vorr d0, d10, d10
-; LE-NEXT:    vmov.32 d8[1], r11
-; LE-NEXT:    add lr, sp, #192
-; LE-NEXT:    mov r6, r1
 ; LE-NEXT:    vmov.32 d13[0], r0
-; LE-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
+; LE-NEXT:    @ kill: def $d0 killed $d0 killed $q0
 ; LE-NEXT:    bl llrint
-; LE-NEXT:    vmov.32 d12[0], r0
-; LE-NEXT:    add lr, sp, #160
-; LE-NEXT:    str r1, [sp, #40] @ 4-byte Spill
-; LE-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
-; LE-NEXT:    add lr, sp, #176
-; LE-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; LE-NEXT:    vmov.32 d10[0], r0
+; LE-NEXT:    add lr, sp, #40
+; LE-NEXT:    mov r10, r1
+; LE-NEXT:    vmov.32 d13[1], r5
+; LE-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
 ; LE-NEXT:    add lr, sp, #56
 ; LE-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
 ; LE-NEXT:    vorr d0, d9, d9
-; LE-NEXT:    vmov.32 d11[1], r4
 ; LE-NEXT:    bl llrint
 ; LE-NEXT:    vorr d0, d8, d8
-; LE-NEXT:    vmov.32 d10[1], r7
-; LE-NEXT:    add lr, sp, #176
-; LE-NEXT:    mov r8, r1
-; LE-NEXT:    vmov.32 d13[0], r0
-; LE-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-NEXT:    vmov.32 d12[1], r7
+; LE-NEXT:    add lr, sp, #96
+; LE-NEXT:    mov r9, r1
+; LE-NEXT:    vmov.32 d11[0], r0
+; LE-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
 ; LE-NEXT:    bl llrint
-; LE-NEXT:    vmov.32 d12[0], r0
-; LE-NEXT:    add lr, sp, #8
+; LE-NEXT:    vmov.32 d10[0], r0
+; LE-NEXT:    add lr, sp, #24
 ; LE-NEXT:    mov r11, r1
-; LE-NEXT:    vmov.32 d15[1], r5
-; LE-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
-; LE-NEXT:    add lr, sp, #72
-; LE-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
-; LE-NEXT:    vorr d0, d9, d9
+; LE-NEXT:    vmov.32 d15[1], r4
+; LE-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-NEXT:    add lr, sp, #144
+; LE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEXT:    vorr d0, d17, d17
 ; LE-NEXT:    bl llrint
-; LE-NEXT:    vmov.32 d11[0], r0
-; LE-NEXT:    ldr r0, [sp, #24] @ 4-byte Reload
-; LE-NEXT:    vorr d0, d8, d8
+; LE-NEXT:    add lr, sp, #8
+; LE-NEXT:    vmov.32 d14[1], r6
+; LE-NEXT:    mov r8, r1
+; LE-NEXT:    vmov.32 d17[0], r0
+; LE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
 ; LE-NEXT:    add lr, sp, #56
-; LE-NEXT:    mov r9, r1
-; LE-NEXT:    vmov.32 d14[1], r0
 ; LE-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-NEXT:    add lr, sp, #80
+; LE-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; LE-NEXT:    vorr d0, d11, d11
 ; LE-NEXT:    bl llrint
-; LE-NEXT:    vmov.32 d10[0], r0
-; LE-NEXT:    add lr, sp, #72
-; LE-NEXT:    mov r7, r1
-; LE-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-NEXT:    vmov.32 d15[0], r0
 ; LE-NEXT:    add lr, sp, #160
-; LE-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
-; LE-NEXT:    add lr, sp, #104
+; LE-NEXT:    vorr d0, d10, d10
+; LE-NEXT:    ldr r0, [sp, #72] @ 4-byte Reload
 ; LE-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
-; LE-NEXT:    vorr d0, d9, d9
-; LE-NEXT:    vmov.32 d13[1], r6
-; LE-NEXT:    bl llrint
-; LE-NEXT:    vmov.32 d11[0], r0
-; LE-NEXT:    ldr r0, [sp, #40] @ 4-byte Reload
-; LE-NEXT:    vorr d0, d8, d8
-; LE-NEXT:    add lr, sp, #160
 ; LE-NEXT:    mov r6, r1
-; LE-NEXT:    vmov.32 d12[1], r0
-; LE-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-NEXT:    vmov.32 d9[1], r0
 ; LE-NEXT:    bl llrint
-; LE-NEXT:    add lr, sp, #8
+; LE-NEXT:    vmov.32 d14[0], r0
+; LE-NEXT:    ldr r0, [sp, #76] @ 4-byte Reload
+; LE-NEXT:    add lr, sp, #160
 ; LE-NEXT:    mov r4, r1
-; LE-NEXT:    vmov.32 d10[0], r0
-; LE-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
+; LE-NEXT:    vmov.32 d8[1], r0
+; LE-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
 ; LE-NEXT:    add lr, sp, #120
-; LE-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
-; LE-NEXT:    vorr d0, d9, d9
-; LE-NEXT:    vmov.32 d13[1], r8
+; LE-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; LE-NEXT:    vorr d0, d11, d11
 ; LE-NEXT:    bl llrint
-; LE-NEXT:    vorr d0, d8, d8
+; LE-NEXT:    vmov.32 d13[0], r0
+; LE-NEXT:    add lr, sp, #40
+; LE-NEXT:    vorr d0, d10, d10
+; LE-NEXT:    ldr r0, [sp, #116] @ 4-byte Reload
+; LE-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
 ; LE-NEXT:    mov r5, r1
-; LE-NEXT:    vmov.32 d15[0], r0
-; LE-NEXT:    vmov.32 d12[1], r11
+; LE-NEXT:    vmov.32 d9[1], r0
 ; LE-NEXT:    bl llrint
-; LE-NEXT:    add lr, sp, #72
-; LE-NEXT:    vmov.32 d14[0], r0
+; LE-NEXT:    add lr, sp, #144
+; LE-NEXT:    mov r7, r1
+; LE-NEXT:    vmov.32 d12[0], r0
+; LE-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
+; LE-NEXT:    @ kill: def $d0 killed $d0 killed $q0
+; LE-NEXT:    vmov.32 d8[1], r10
+; LE-NEXT:    bl llrint
+; LE-NEXT:    add lr, sp, #8
+; LE-NEXT:    vmov.32 d15[1], r6
+; LE-NEXT:    vldmia lr, {d20, d21} @ 16-byte Reload
+; LE-NEXT:    add lr, sp, #24
 ; LE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
 ; LE-NEXT:    add lr, sp, #160
-; LE-NEXT:    vmov.32 d17[1], r9
-; LE-NEXT:    vmov.32 d16[1], r7
-; LE-NEXT:    vst1.64 {d12, d13}, [r10:128]!
-; LE-NEXT:    vorr q9, q8, q8
-; LE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-NEXT:    add lr, sp, #136
-; LE-NEXT:    vmov.32 d15[1], r5
-; LE-NEXT:    vst1.64 {d16, d17}, [r10:128]
-; LE-NEXT:    vmov.32 d14[1], r1
-; LE-NEXT:    ldr r1, [sp, #156] @ 4-byte Reload
-; LE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-NEXT:    add r0, r1, #128
+; LE-NEXT:    vmov.32 d20[0], r0
+; LE-NEXT:    vmov.32 d21[1], r8
+; LE-NEXT:    vmov.32 d20[1], r1
+; LE-NEXT:    ldr r1, [sp, #140] @ 4-byte Reload
+; LE-NEXT:    vmov.32 d13[1], r5
+; LE-NEXT:    mov r0, r1
+; LE-NEXT:    vst1.64 {d8, d9}, [r0:128]!
+; LE-NEXT:    vldmia lr, {d18, d19} @ 16-byte Reload
 ; LE-NEXT:    add lr, sp, #56
-; LE-NEXT:    vmov.32 d11[1], r6
-; LE-NEXT:    vst1.64 {d16, d17}, [r0:128]!
+; LE-NEXT:    vmov.32 d14[1], r4
 ; LE-NEXT:    vst1.64 {d18, d19}, [r0:128]!
-; LE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-NEXT:    add lr, sp, #176
-; LE-NEXT:    vmov.32 d10[1], r4
-; LE-NEXT:    vst1.64 {d16, d17}, [r0:128]!
-; LE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-NEXT:    add lr, sp, #192
-; LE-NEXT:    vst1.64 {d16, d17}, [r0:128]
+; LE-NEXT:    vldmia lr, {d18, d19} @ 16-byte Reload
+; LE-NEXT:    add lr, sp, #96
+; LE-NEXT:    vmov.32 d12[1], r7
+; LE-NEXT:    vst1.64 {d18, d19}, [r0:128]!
+; LE-NEXT:    vldmia lr, {d18, d19} @ 16-byte Reload
+; LE-NEXT:    vmov.32 d17[1], r9
+; LE-NEXT:    vst1.64 {d18, d19}, [r0:128]
 ; LE-NEXT:    add r0, r1, #64
-; LE-NEXT:    vst1.64 {d10, d11}, [r0:128]!
 ; LE-NEXT:    vst1.64 {d14, d15}, [r0:128]!
-; LE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-NEXT:    add lr, sp, #88
-; LE-NEXT:    vst1.64 {d16, d17}, [r0:128]!
-; LE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEXT:    vst1.64 {d12, d13}, [r0:128]!
+; LE-NEXT:    vmov.32 d16[1], r11
+; LE-NEXT:    vst1.64 {d20, d21}, [r0:128]!
 ; LE-NEXT:    vst1.64 {d16, d17}, [r0:128]
-; LE-NEXT:    add sp, sp, #208
+; LE-NEXT:    add sp, sp, #176
 ; LE-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
 ; LE-NEXT:    add sp, sp, #4
 ; LE-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
 ;
-; LE-NEON-LABEL: llrint_v32f64:
-; LE-NEON:       @ %bb.0:
-; LE-NEON-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; LE-NEON-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; LE-NEON-NEXT:    .pad #4
-; LE-NEON-NEXT:    sub sp, sp, #4
-; LE-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-NEON-NEXT:    .pad #208
-; LE-NEON-NEXT:    sub sp, sp, #208
-; LE-NEON-NEXT:    add lr, sp, #72
-; LE-NEON-NEXT:    str r0, [sp, #156] @ 4-byte Spill
-; LE-NEON-NEXT:    add r0, sp, #456
-; LE-NEON-NEXT:    vorr q4, q0, q0
-; LE-NEON-NEXT:    vstmia lr, {d6, d7} @ 16-byte Spill
-; LE-NEON-NEXT:    add lr, sp, #56
-; LE-NEON-NEXT:    vorr d0, d7, d7
-; LE-NEON-NEXT:    vstmia lr, {d2, d3} @ 16-byte Spill
-; LE-NEON-NEXT:    add lr, sp, #104
-; LE-NEON-NEXT:    vorr q5, q2, q2
-; LE-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
-; LE-NEON-NEXT:    add r0, sp, #344
-; LE-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-NEON-NEXT:    add lr, sp, #192
-; LE-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
-; LE-NEON-NEXT:    add r0, sp, #376
-; LE-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-NEON-NEXT:    add lr, sp, #160
-; LE-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
-; LE-NEON-NEXT:    add r0, sp, #360
-; LE-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-NEON-NEXT:    add lr, sp, #136
-; LE-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
-; LE-NEON-NEXT:    add r0, sp, #440
-; LE-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-NEON-NEXT:    add lr, sp, #176
-; LE-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
-; LE-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-NEON-NEXT:    bl llrint
-; LE-NEON-NEXT:    vorr d0, d10, d10
-; LE-NEON-NEXT:    str r1, [sp, #120] @ 4-byte Spill
-; LE-NEON-NEXT:    vmov.32 d13[0], r0
-; LE-NEON-NEXT:    bl llrint
-; LE-NEON-NEXT:    vorr d0, d11, d11
-; LE-NEON-NEXT:    mov r10, r1
-; LE-NEON-NEXT:    vmov.32 d14[0], r0
-; LE-NEON-NEXT:    bl llrint
-; LE-NEON-NEXT:    vorr d0, d8, d8
-; LE-NEON-NEXT:    add lr, sp, #88
-; LE-NEON-NEXT:    vmov.32 d15[0], r0
-; LE-NEON-NEXT:    mov r11, r1
-; LE-NEON-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
-; LE-NEON-NEXT:    bl llrint
-; LE-NEON-NEXT:    vorr d0, d9, d9
-; LE-NEON-NEXT:    mov r9, r1
-; LE-NEON-NEXT:    vmov.32 d10[0], r0
-; LE-NEON-NEXT:    bl llrint
-; LE-NEON-NEXT:    add lr, sp, #40
-; LE-NEON-NEXT:    vmov.32 d11[0], r0
-; LE-NEON-NEXT:    mov r6, r1
-; LE-NEON-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
-; LE-NEON-NEXT:    add lr, sp, #56
-; LE-NEON-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
-; LE-NEON-NEXT:    vorr d0, d10, d10
-; LE-NEON-NEXT:    bl llrint
-; LE-NEON-NEXT:    vorr d0, d11, d11
-; LE-NEON-NEXT:    mov r4, r1
-; LE-NEON-NEXT:    vmov.32 d8[0], r0
-; LE-NEON-NEXT:    bl llrint
-; LE-NEON-NEXT:    add lr, sp, #72
-; LE-NEON-NEXT:    mov r7, r1
-; LE-NEON-NEXT:    vmov.32 d9[0], r0
-; LE-NEON-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
-; LE-NEON-NEXT:    @ kill: def $d0 killed $d0 killed $q0
-; LE-NEON-NEXT:    bl llrint
-; LE-NEON-NEXT:    vmov.32 d12[0], r0
-; LE-NEON-NEXT:    add lr, sp, #56
-; LE-NEON-NEXT:    mov r5, r1
-; LE-NEON-NEXT:    vmov.32 d9[1], r7
-; LE-NEON-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
-; LE-NEON-NEXT:    add lr, sp, #176
-; LE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-NEON-NEXT:    vorr d0, d17, d17
-; LE-NEON-NEXT:    bl llrint
-; LE-NEON-NEXT:    vmov.32 d8[1], r4
-; LE-NEON-NEXT:    add lr, sp, #72
-; LE-NEON-NEXT:    mov r7, r1
-; LE-NEON-NEXT:    vmov.32 d13[0], r0
-; LE-NEON-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
-; LE-NEON-NEXT:    add lr, sp, #104
-; LE-NEON-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
-; LE-NEON-NEXT:    vorr d0, d9, d9
-; LE-NEON-NEXT:    bl llrint
-; LE-NEON-NEXT:    add lr, sp, #40
-; LE-NEON-NEXT:    vorr d0, d8, d8
-; LE-NEON-NEXT:    mov r8, r1
-; LE-NEON-NEXT:    vmov.32 d15[0], r0
-; LE-NEON-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
-; LE-NEON-NEXT:    vmov.32 d11[1], r6
-; LE-NEON-NEXT:    bl llrint
-; LE-NEON-NEXT:    add lr, sp, #176
-; LE-NEON-NEXT:    mov r4, r1
-; LE-NEON-NEXT:    vmov.32 d14[0], r0
-; LE-NEON-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
-; LE-NEON-NEXT:    @ kill: def $d0 killed $d0 killed $q0
-; LE-NEON-NEXT:    vmov.32 d10[1], r9
-; LE-NEON-NEXT:    bl llrint
-; LE-NEON-NEXT:    add lr, sp, #88
-; LE-NEON-NEXT:    vmov.32 d12[0], r0
-; LE-NEON-NEXT:    ldr r0, [sp, #120] @ 4-byte Reload
-; LE-NEON-NEXT:    vldmia lr, {d20, d21} @ 16-byte Reload
-; LE-NEON-NEXT:    add lr, sp, #56
-; LE-NEON-NEXT:    vldmia lr, {d18, d19} @ 16-byte Reload
-; LE-NEON-NEXT:    add lr, sp, #24
-; LE-NEON-NEXT:    vmov.32 d19[1], r0
-; LE-NEON-NEXT:    add r0, sp, #408
-; LE-NEON-NEXT:    ldr r2, [sp, #156] @ 4-byte Reload
-; LE-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
-; LE-NEON-NEXT:    vmov.32 d13[1], r7
-; LE-NEON-NEXT:    mov r0, r2
-; LE-NEON-NEXT:    vmov.32 d12[1], r1
-; LE-NEON-NEXT:    add r1, sp, #488
-; LE-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-NEON-NEXT:    add lr, sp, #40
-; LE-NEON-NEXT:    vld1.64 {d16, d17}, [r1]
-; LE-NEON-NEXT:    add r1, sp, #472
-; LE-NEON-NEXT:    vst1.64 {d10, d11}, [r0:128]!
-; LE-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-NEON-NEXT:    add lr, sp, #72
-; LE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-NEON-NEXT:    add lr, sp, #56
-; LE-NEON-NEXT:    vmov.32 d21[1], r11
-; LE-NEON-NEXT:    vmov.32 d20[1], r10
-; LE-NEON-NEXT:    add r10, r2, #192
-; LE-NEON-NEXT:    vst1.64 {d16, d17}, [r0:128]!
-; LE-NEON-NEXT:    vld1.64 {d16, d17}, [r1]
-; LE-NEON-NEXT:    add r1, sp, #392
-; LE-NEON-NEXT:    vmov.32 d18[1], r5
-; LE-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-NEON-NEXT:    add lr, sp, #72
-; LE-NEON-NEXT:    vst1.64 {d20, d21}, [r0:128]!
-; LE-NEON-NEXT:    vld1.64 {d16, d17}, [r1]
-; LE-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-NEON-NEXT:    add lr, sp, #104
-; LE-NEON-NEXT:    vst1.64 {d18, d19}, [r0:128]
-; LE-NEON-NEXT:    add r0, sp, #312
-; LE-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
-; LE-NEON-NEXT:    add r0, sp, #328
-; LE-NEON-NEXT:    vmov.32 d15[1], r8
-; LE-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-NEON-NEXT:    add lr, sp, #120
-; LE-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
-; LE-NEON-NEXT:    add r0, sp, #424
-; LE-NEON-NEXT:    vmov.32 d14[1], r4
-; LE-NEON-NEXT:    vst1.64 {d12, d13}, [r10:128]!
-; LE-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-NEON-NEXT:    add lr, sp, #176
-; LE-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
-; LE-NEON-NEXT:    vst1.64 {d14, d15}, [r10:128]!
-; LE-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-NEON-NEXT:    add lr, sp, #192
-; LE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-NEON-NEXT:    vorr d0, d17, d17
-; LE-NEON-NEXT:    bl llrint
-; LE-NEON-NEXT:    add lr, sp, #136
-; LE-NEON-NEXT:    mov r9, r1
-; LE-NEON-NEXT:    vmov.32 d9[0], r0
-; LE-NEON-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
-; LE-NEON-NEXT:    vorr d0, d10, d10
-; LE-NEON-NEXT:    bl llrint
-; LE-NEON-NEXT:    vorr d0, d11, d11
-; LE-NEON-NEXT:    mov r8, r1
-; LE-NEON-NEXT:    vmov.32 d12[0], r0
-; LE-NEON-NEXT:    bl llrint
-; LE-NEON-NEXT:    add lr, sp, #160
-; LE-NEON-NEXT:    mov r5, r1
-; LE-NEON-NEXT:    vmov.32 d13[0], r0
-; LE-NEON-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
-; LE-NEON-NEXT:    vorr d0, d10, d10
-; LE-NEON-NEXT:    bl llrint
-; LE-NEON-NEXT:    vorr d0, d11, d11
-; LE-NEON-NEXT:    mov r6, r1
-; LE-NEON-NEXT:    vmov.32 d14[0], r0
-; LE-NEON-NEXT:    bl llrint
-; LE-NEON-NEXT:    add lr, sp, #192
-; LE-NEON-NEXT:    mov r4, r1
-; LE-NEON-NEXT:    vmov.32 d15[0], r0
-; LE-NEON-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
-; LE-NEON-NEXT:    @ kill: def $d0 killed $d0 killed $q0
-; LE-NEON-NEXT:    bl llrint
-; LE-NEON-NEXT:    vmov.32 d8[0], r0
-; LE-NEON-NEXT:    add lr, sp, #192
-; LE-NEON-NEXT:    mov r11, r1
-; LE-NEON-NEXT:    vmov.32 d15[1], r4
-; LE-NEON-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
-; LE-NEON-NEXT:    add lr, sp, #176
-; LE-NEON-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
-; LE-NEON-NEXT:    vorr d0, d9, d9
-; LE-NEON-NEXT:    bl llrint
-; LE-NEON-NEXT:    vorr d0, d8, d8
-; LE-NEON-NEXT:    vmov.32 d14[1], r6
-; LE-NEON-NEXT:    add lr, sp, #136
-; LE-NEON-NEXT:    mov r4, r1
-; LE-NEON-NEXT:    vmov.32 d11[0], r0
-; LE-NEON-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
-; LE-NEON-NEXT:    bl llrint
-; LE-NEON-NEXT:    vmov.32 d10[0], r0
-; LE-NEON-NEXT:    add lr, sp, #176
-; LE-NEON-NEXT:    mov r7, r1
-; LE-NEON-NEXT:    vmov.32 d13[1], r5
-; LE-NEON-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
-; LE-NEON-NEXT:    add lr, sp, #24
-; LE-NEON-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
-; LE-NEON-NEXT:    vorr d0, d9, d9
-; LE-NEON-NEXT:    bl llrint
-; LE-NEON-NEXT:    vorr d0, d8, d8
-; LE-NEON-NEXT:    vmov.32 d12[1], r8
-; LE-NEON-NEXT:    add lr, sp, #88
-; LE-NEON-NEXT:    mov r5, r1
-; LE-NEON-NEXT:    vmov.32 d15[0], r0
-; LE-NEON-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
-; LE-NEON-NEXT:    bl llrint
-; LE-NEON-NEXT:    add lr, sp, #192
-; LE-NEON-NEXT:    str r1, [sp, #24] @ 4-byte Spill
-; LE-NEON-NEXT:    vmov.32 d14[0], r0
-; LE-NEON-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
-; LE-NEON-NEXT:    add lr, sp, #40
-; LE-NEON-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
-; LE-NEON-NEXT:    vorr d0, d11, d11
-; LE-NEON-NEXT:    vmov.32 d9[1], r9
-; LE-NEON-NEXT:    bl llrint
-; LE-NEON-NEXT:    vorr d0, d10, d10
-; LE-NEON-NEXT:    vmov.32 d8[1], r11
-; LE-NEON-NEXT:    add lr, sp, #192
-; LE-NEON-NEXT:    mov r6, r1
-; LE-NEON-NEXT:    vmov.32 d13[0], r0
-; LE-NEON-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
-; LE-NEON-NEXT:    bl llrint
-; LE-NEON-NEXT:    vmov.32 d12[0], r0
-; LE-NEON-NEXT:    add lr, sp, #160
-; LE-NEON-NEXT:    str r1, [sp, #40] @ 4-byte Spill
-; LE-NEON-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
-; LE-NEON-NEXT:    add lr, sp, #176
-; LE-NEON-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
-; LE-NEON-NEXT:    add lr, sp, #56
-; LE-NEON-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
-; LE-NEON-NEXT:    vorr d0, d9, d9
-; LE-NEON-NEXT:    vmov.32 d11[1], r4
-; LE-NEON-NEXT:    bl llrint
-; LE-NEON-NEXT:    vorr d0, d8, d8
-; LE-NEON-NEXT:    vmov.32 d10[1], r7
-; LE-NEON-NEXT:    add lr, sp, #176
-; LE-NEON-NEXT:    mov r8, r1
-; LE-NEON-NEXT:    vmov.32 d13[0], r0
-; LE-NEON-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
-; LE-NEON-NEXT:    bl llrint
-; LE-NEON-NEXT:    vmov.32 d12[0], r0
-; LE-NEON-NEXT:    add lr, sp, #8
-; LE-NEON-NEXT:    mov r11, r1
-; LE-NEON-NEXT:    vmov.32 d15[1], r5
-; LE-NEON-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
-; LE-NEON-NEXT:    add lr, sp, #72
-; LE-NEON-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
-; LE-NEON-NEXT:    vorr d0, d9, d9
-; LE-NEON-NEXT:    bl llrint
-; LE-NEON-NEXT:    vmov.32 d11[0], r0
-; LE-NEON-NEXT:    ldr r0, [sp, #24] @ 4-byte Reload
-; LE-NEON-NEXT:    vorr d0, d8, d8
-; LE-NEON-NEXT:    add lr, sp, #56
-; LE-NEON-NEXT:    mov r9, r1
-; LE-NEON-NEXT:    vmov.32 d14[1], r0
-; LE-NEON-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
-; LE-NEON-NEXT:    bl llrint
-; LE-NEON-NEXT:    vmov.32 d10[0], r0
-; LE-NEON-NEXT:    add lr, sp, #72
-; LE-NEON-NEXT:    mov r7, r1
-; LE-NEON-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
-; LE-NEON-NEXT:    add lr, sp, #160
-; LE-NEON-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
-; LE-NEON-NEXT:    add lr, sp, #104
-; LE-NEON-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
-; LE-NEON-NEXT:    vorr d0, d9, d9
-; LE-NEON-NEXT:    vmov.32 d13[1], r6
-; LE-NEON-NEXT:    bl llrint
-; LE-NEON-NEXT:    vmov.32 d11[0], r0
-; LE-NEON-NEXT:    ldr r0, [sp, #40] @ 4-byte Reload
-; LE-NEON-NEXT:    vorr d0, d8, d8
-; LE-NEON-NEXT:    add lr, sp, #160
-; LE-NEON-NEXT:    mov r6, r1
-; LE-NEON-NEXT:    vmov.32 d12[1], r0
-; LE-NEON-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
-; LE-NEON-NEXT:    bl llrint
-; LE-NEON-NEXT:    add lr, sp, #8
-; LE-NEON-NEXT:    mov r4, r1
-; LE-NEON-NEXT:    vmov.32 d10[0], r0
-; LE-NEON-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
-; LE-NEON-NEXT:    add lr, sp, #120
-; LE-NEON-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
-; LE-NEON-NEXT:    vorr d0, d9, d9
-; LE-NEON-NEXT:    vmov.32 d13[1], r8
-; LE-NEON-NEXT:    bl llrint
-; LE-NEON-NEXT:    vorr d0, d8, d8
-; LE-NEON-NEXT:    mov r5, r1
-; LE-NEON-NEXT:    vmov.32 d15[0], r0
-; LE-NEON-NEXT:    vmov.32 d12[1], r11
-; LE-NEON-NEXT:    bl llrint
-; LE-NEON-NEXT:    add lr, sp, #72
-; LE-NEON-NEXT:    vmov.32 d14[0], r0
-; LE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-NEON-NEXT:    add lr, sp, #160
-; LE-NEON-NEXT:    vmov.32 d17[1], r9
-; LE-NEON-NEXT:    vmov.32 d16[1], r7
-; LE-NEON-NEXT:    vst1.64 {d12, d13}, [r10:128]!
-; LE-NEON-NEXT:    vorr q9, q8, q8
-; LE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-NEON-NEXT:    add lr, sp, #136
-; LE-NEON-NEXT:    vmov.32 d15[1], r5
-; LE-NEON-NEXT:    vst1.64 {d16, d17}, [r10:128]
-; LE-NEON-NEXT:    vmov.32 d14[1], r1
-; LE-NEON-NEXT:    ldr r1, [sp, #156] @ 4-byte Reload
-; LE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-NEON-NEXT:    add r0, r1, #128
-; LE-NEON-NEXT:    add lr, sp, #56
-; LE-NEON-NEXT:    vmov.32 d11[1], r6
-; LE-NEON-NEXT:    vst1.64 {d16, d17}, [r0:128]!
-; LE-NEON-NEXT:    vst1.64 {d18, d19}, [r0:128]!
-; LE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-NEON-NEXT:    add lr, sp, #176
-; LE-NEON-NEXT:    vmov.32 d10[1], r4
-; LE-NEON-NEXT:    vst1.64 {d16, d17}, [r0:128]!
-; LE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-NEON-NEXT:    add lr, sp, #192
-; LE-NEON-NEXT:    vst1.64 {d16, d17}, [r0:128]
-; LE-NEON-NEXT:    add r0, r1, #64
-; LE-NEON-NEXT:    vst1.64 {d10, d11}, [r0:128]!
-; LE-NEON-NEXT:    vst1.64 {d14, d15}, [r0:128]!
-; LE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-NEON-NEXT:    add lr, sp, #88
-; LE-NEON-NEXT:    vst1.64 {d16, d17}, [r0:128]!
-; LE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-NEON-NEXT:    vst1.64 {d16, d17}, [r0:128]
-; LE-NEON-NEXT:    add sp, sp, #208
-; LE-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-NEON-NEXT:    add sp, sp, #4
-; LE-NEON-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
-;
-; BE-LABEL: llrint_v32f64:
+; BE-LABEL: llrint_v16f64:
 ; BE:       @ %bb.0:
 ; BE-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
 ; BE-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
@@ -7615,675 +1859,183 @@ define <32 x i64> @llrint_v32f64(<32 x double> %x) {
 ; BE-NEXT:    sub sp, sp, #4
 ; BE-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
 ; BE-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
-; BE-NEXT:    .pad #232
-; BE-NEXT:    sub sp, sp, #232
-; BE-NEXT:    add lr, sp, #184
-; BE-NEXT:    str r0, [sp, #148] @ 4-byte Spill
-; BE-NEXT:    add r0, sp, #416
-; BE-NEXT:    vstmia lr, {d6, d7} @ 16-byte Spill
-; BE-NEXT:    add lr, sp, #168
-; BE-NEXT:    vstmia lr, {d4, d5} @ 16-byte Spill
-; BE-NEXT:    add lr, sp, #152
-; BE-NEXT:    vstmia lr, {d2, d3} @ 16-byte Spill
-; BE-NEXT:    add lr, sp, #128
-; BE-NEXT:    vstmia lr, {d0, d1} @ 16-byte Spill
-; BE-NEXT:    add lr, sp, #200
-; BE-NEXT:    vld1.64 {d18, d19}, [r0]
-; BE-NEXT:    add r0, sp, #448
-; BE-NEXT:    vorr d0, d19, d19
-; BE-NEXT:    vld1.64 {d14, d15}, [r0]
-; BE-NEXT:    add r0, sp, #336
-; BE-NEXT:    vstmia lr, {d18, d19} @ 16-byte Spill
+; BE-NEXT:    .pad #168
+; BE-NEXT:    sub sp, sp, #168
 ; BE-NEXT:    add lr, sp, #64
-; BE-NEXT:    vld1.64 {d16, d17}, [r0]
-; BE-NEXT:    add r0, sp, #400
-; BE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-NEXT:    add lr, sp, #8
-; BE-NEXT:    vld1.64 {d16, d17}, [r0]
-; BE-NEXT:    add r0, sp, #352
-; BE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-NEXT:    add lr, sp, #24
-; BE-NEXT:    vld1.64 {d16, d17}, [r0]
-; BE-NEXT:    add r0, sp, #368
-; BE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-NEXT:    str r0, [sp, #132] @ 4-byte Spill
+; BE-NEXT:    add r0, sp, #304
+; BE-NEXT:    vorr q4, q3, q3
+; BE-NEXT:    vstmia lr, {d0, d1} @ 16-byte Spill
 ; BE-NEXT:    add lr, sp, #48
+; BE-NEXT:    vorr d0, d1, d1
 ; BE-NEXT:    vld1.64 {d16, d17}, [r0]
-; BE-NEXT:    add r0, sp, #384
+; BE-NEXT:    add r0, sp, #320
+; BE-NEXT:    vorr q6, q2, q2
 ; BE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-NEXT:    add lr, sp, #96
+; BE-NEXT:    add lr, sp, #88
+; BE-NEXT:    vorr q7, q1, q1
 ; BE-NEXT:    vld1.64 {d16, d17}, [r0]
-; BE-NEXT:    add r0, sp, #512
+; BE-NEXT:    add r0, sp, #272
 ; BE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
 ; BE-NEXT:    add lr, sp, #112
 ; BE-NEXT:    vld1.64 {d16, d17}, [r0]
-; BE-NEXT:    add r0, sp, #432
-; BE-NEXT:    vld1.64 {d8, d9}, [r0]
+; BE-NEXT:    add r0, sp, #288
+; BE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-NEXT:    add lr, sp, #24
+; BE-NEXT:    vld1.64 {d16, d17}, [r0]
 ; BE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
 ; BE-NEXT:    bl llrint
-; BE-NEXT:    vorr d0, d8, d8
-; BE-NEXT:    str r1, [sp, #80] @ 4-byte Spill
-; BE-NEXT:    vmov.32 d13[0], r0
+; BE-NEXT:    vorr d0, d14, d14
+; BE-NEXT:    add lr, sp, #136
+; BE-NEXT:    vmov.32 d17[0], r0
+; BE-NEXT:    str r1, [sp, #108] @ 4-byte Spill
+; BE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
 ; BE-NEXT:    bl llrint
-; BE-NEXT:    vorr d0, d9, d9
-; BE-NEXT:    str r1, [sp, #44] @ 4-byte Spill
+; BE-NEXT:    vorr d0, d15, d15
+; BE-NEXT:    str r1, [sp, #84] @ 4-byte Spill
 ; BE-NEXT:    vmov.32 d10[0], r0
 ; BE-NEXT:    bl llrint
-; BE-NEXT:    vorr d0, d14, d14
-; BE-NEXT:    add lr, sp, #216
+; BE-NEXT:    vorr d0, d12, d12
+; BE-NEXT:    add lr, sp, #152
 ; BE-NEXT:    vmov.32 d11[0], r0
-; BE-NEXT:    mov r9, r1
+; BE-NEXT:    str r1, [sp, #44] @ 4-byte Spill
 ; BE-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
 ; BE-NEXT:    bl llrint
-; BE-NEXT:    vorr d0, d15, d15
-; BE-NEXT:    mov r8, r1
-; BE-NEXT:    vmov.32 d8[0], r0
+; BE-NEXT:    vorr d0, d13, d13
+; BE-NEXT:    mov r6, r1
+; BE-NEXT:    vmov.32 d10[0], r0
 ; BE-NEXT:    bl llrint
-; BE-NEXT:    add lr, sp, #64
+; BE-NEXT:    vorr d0, d8, d8
 ; BE-NEXT:    mov r4, r1
-; BE-NEXT:    vmov.32 d9[0], r0
-; BE-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
-; BE-NEXT:    vorr d0, d10, d10
-; BE-NEXT:    bl llrint
-; BE-NEXT:    vorr d0, d11, d11
-; BE-NEXT:    mov r6, r1
-; BE-NEXT:    vmov.32 d14[0], r0
+; BE-NEXT:    vmov.32 d11[0], r0
 ; BE-NEXT:    bl llrint
-; BE-NEXT:    add lr, sp, #200
+; BE-NEXT:    vorr d0, d9, d9
 ; BE-NEXT:    mov r7, r1
-; BE-NEXT:    vmov.32 d15[0], r0
-; BE-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
-; BE-NEXT:    @ kill: def $d0 killed $d0 killed $q0
-; BE-NEXT:    bl llrint
 ; BE-NEXT:    vmov.32 d12[0], r0
-; BE-NEXT:    add lr, sp, #200
-; BE-NEXT:    mov r5, r1
-; BE-NEXT:    vmov.32 d15[1], r7
-; BE-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
-; BE-NEXT:    add lr, sp, #8
-; BE-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
-; BE-NEXT:    vorr d0, d11, d11
 ; BE-NEXT:    bl llrint
-; BE-NEXT:    vorr d0, d10, d10
-; BE-NEXT:    vmov.32 d14[1], r6
 ; BE-NEXT:    add lr, sp, #64
-; BE-NEXT:    mov r10, r1
+; BE-NEXT:    mov r5, r1
 ; BE-NEXT:    vmov.32 d13[0], r0
-; BE-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
-; BE-NEXT:    bl llrint
-; BE-NEXT:    add lr, sp, #24
-; BE-NEXT:    mov r11, r1
-; BE-NEXT:    vmov.32 d12[0], r0
-; BE-NEXT:    vldmia lr, {d14, d15} @ 16-byte Reload
-; BE-NEXT:    vorr d0, d15, d15
-; BE-NEXT:    vmov.32 d9[1], r4
-; BE-NEXT:    bl llrint
-; BE-NEXT:    vorr d0, d14, d14
-; BE-NEXT:    vmov.32 d8[1], r8
-; BE-NEXT:    add lr, sp, #8
-; BE-NEXT:    str r1, [sp, #4] @ 4-byte Spill
-; BE-NEXT:    vmov.32 d11[0], r0
-; BE-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
+; BE-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
+; BE-NEXT:    @ kill: def $d0 killed $d0 killed $q0
 ; BE-NEXT:    bl llrint
-; BE-NEXT:    vmov.32 d10[0], r0
+; BE-NEXT:    add lr, sp, #136
+; BE-NEXT:    mov r9, r1
+; BE-NEXT:    vmov.32 d13[1], r5
+; BE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEXT:    vmov.32 d16[0], r0
+; BE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
 ; BE-NEXT:    add lr, sp, #24
-; BE-NEXT:    mov r8, r1
-; BE-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
-; BE-NEXT:    add lr, sp, #216
-; BE-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
-; BE-NEXT:    add lr, sp, #48
 ; BE-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
 ; BE-NEXT:    vorr d0, d9, d9
-; BE-NEXT:    vmov.32 d11[1], r9
 ; BE-NEXT:    bl llrint
-; BE-NEXT:    vmov.32 d15[0], r0
-; BE-NEXT:    ldr r0, [sp, #44] @ 4-byte Reload
 ; BE-NEXT:    vorr d0, d8, d8
-; BE-NEXT:    add lr, sp, #216
-; BE-NEXT:    mov r9, r1
-; BE-NEXT:    vmov.32 d10[1], r0
-; BE-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-NEXT:    vmov.32 d12[1], r7
+; BE-NEXT:    add lr, sp, #64
+; BE-NEXT:    mov r10, r1
+; BE-NEXT:    vmov.32 d15[0], r0
+; BE-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
 ; BE-NEXT:    bl llrint
 ; BE-NEXT:    vmov.32 d14[0], r0
-; BE-NEXT:    add lr, sp, #48
-; BE-NEXT:    ldr r0, [sp, #80] @ 4-byte Reload
-; BE-NEXT:    mov r6, r1
+; BE-NEXT:    add lr, sp, #8
+; BE-NEXT:    mov r11, r1
+; BE-NEXT:    vmov.32 d11[1], r4
 ; BE-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
-; BE-NEXT:    add lr, sp, #200
-; BE-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
-; BE-NEXT:    add lr, sp, #96
+; BE-NEXT:    add lr, sp, #48
+; BE-NEXT:    vorr q6, q5, q5
 ; BE-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
 ; BE-NEXT:    vorr d0, d9, d9
-; BE-NEXT:    vmov.32 d11[1], r0
 ; BE-NEXT:    bl llrint
 ; BE-NEXT:    vorr d0, d8, d8
-; BE-NEXT:    vmov.32 d10[1], r5
-; BE-NEXT:    add lr, sp, #200
-; BE-NEXT:    mov r7, r1
-; BE-NEXT:    vmov.32 d15[0], r0
-; BE-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
-; BE-NEXT:    bl llrint
-; BE-NEXT:    add lr, sp, #112
-; BE-NEXT:    vorr q4, q6, q6
-; BE-NEXT:    mov r4, r1
-; BE-NEXT:    vmov.32 d14[0], r0
-; BE-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
-; BE-NEXT:    vorr d0, d13, d13
-; BE-NEXT:    vmov.32 d9[1], r10
-; BE-NEXT:    bl llrint
-; BE-NEXT:    vorr d0, d12, d12
-; BE-NEXT:    mov r5, r1
+; BE-NEXT:    vmov.32 d12[1], r6
+; BE-NEXT:    add lr, sp, #24
+; BE-NEXT:    mov r8, r1
 ; BE-NEXT:    vmov.32 d11[0], r0
-; BE-NEXT:    vmov.32 d8[1], r11
+; BE-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
 ; BE-NEXT:    bl llrint
 ; BE-NEXT:    vmov.32 d10[0], r0
-; BE-NEXT:    add lr, sp, #24
-; BE-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
-; BE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-NEXT:    add lr, sp, #48
-; BE-NEXT:    vmov.32 d17[1], r0
-; BE-NEXT:    vmov.32 d16[1], r8
-; BE-NEXT:    vorr q9, q8, q8
-; BE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-NEXT:    add lr, sp, #112
-; BE-NEXT:    vmov.32 d17[1], r9
-; BE-NEXT:    vmov.32 d16[1], r6
-; BE-NEXT:    vorr q10, q8, q8
-; BE-NEXT:    vrev64.32 q8, q4
-; BE-NEXT:    vmov.32 d15[1], r7
-; BE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-NEXT:    add lr, sp, #200
-; BE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-NEXT:    vmov.32 d11[1], r5
-; BE-NEXT:    vrev64.32 q8, q8
-; BE-NEXT:    vmov.32 d14[1], r4
-; BE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-NEXT:    add lr, sp, #216
-; BE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-NEXT:    vmov.32 d10[1], r1
-; BE-NEXT:    vrev64.32 q8, q8
-; BE-NEXT:    vrev64.32 q6, q7
-; BE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-NEXT:    add lr, sp, #8
-; BE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-NEXT:    add lr, sp, #96
-; BE-NEXT:    vrev64.32 q7, q5
-; BE-NEXT:    vrev64.32 q8, q8
-; BE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-NEXT:    add lr, sp, #64
-; BE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-NEXT:    add lr, sp, #80
-; BE-NEXT:    vrev64.32 q8, q8
-; BE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-NEXT:    add lr, sp, #64
-; BE-NEXT:    vrev64.32 q8, q9
-; BE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
 ; BE-NEXT:    add lr, sp, #48
-; BE-NEXT:    vrev64.32 q8, q10
-; BE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-NEXT:    add lr, sp, #128
-; BE-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
-; BE-NEXT:    vorr d0, d11, d11
-; BE-NEXT:    bl llrint
-; BE-NEXT:    vorr d0, d10, d10
-; BE-NEXT:    mov r4, r1
-; BE-NEXT:    vmov.32 d9[0], r0
-; BE-NEXT:    bl llrint
-; BE-NEXT:    vmov.32 d8[0], r0
-; BE-NEXT:    ldr r6, [sp, #148] @ 4-byte Reload
+; BE-NEXT:    ldr r0, [sp, #44] @ 4-byte Reload
+; BE-NEXT:    mov r6, r1
+; BE-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
 ; BE-NEXT:    add lr, sp, #152
-; BE-NEXT:    vmov.32 d9[1], r4
-; BE-NEXT:    mov r5, r6
-; BE-NEXT:    vmov.32 d8[1], r1
-; BE-NEXT:    vrev64.32 q8, q4
-; BE-NEXT:    vst1.64 {d16, d17}, [r5:128]!
-; BE-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
-; BE-NEXT:    vorr d0, d11, d11
-; BE-NEXT:    bl llrint
-; BE-NEXT:    vorr d0, d10, d10
-; BE-NEXT:    mov r4, r1
-; BE-NEXT:    vmov.32 d9[0], r0
-; BE-NEXT:    bl llrint
-; BE-NEXT:    vmov.32 d8[0], r0
-; BE-NEXT:    add lr, sp, #168
-; BE-NEXT:    vmov.32 d9[1], r4
-; BE-NEXT:    vmov.32 d8[1], r1
-; BE-NEXT:    vrev64.32 q8, q4
-; BE-NEXT:    vst1.64 {d16, d17}, [r5:128]!
-; BE-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
-; BE-NEXT:    vorr d0, d11, d11
+; BE-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
+; BE-NEXT:    add lr, sp, #88
+; BE-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
+; BE-NEXT:    vorr d0, d13, d13
+; BE-NEXT:    vmov.32 d9[1], r0
 ; BE-NEXT:    bl llrint
-; BE-NEXT:    vorr d0, d10, d10
+; BE-NEXT:    vmov.32 d15[0], r0
+; BE-NEXT:    ldr r0, [sp, #84] @ 4-byte Reload
+; BE-NEXT:    vorr d0, d12, d12
+; BE-NEXT:    add lr, sp, #152
 ; BE-NEXT:    mov r4, r1
-; BE-NEXT:    vmov.32 d9[0], r0
+; BE-NEXT:    vmov.32 d8[1], r0
+; BE-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
 ; BE-NEXT:    bl llrint
-; BE-NEXT:    vmov.32 d8[0], r0
-; BE-NEXT:    add lr, sp, #184
-; BE-NEXT:    vmov.32 d9[1], r4
-; BE-NEXT:    vmov.32 d8[1], r1
-; BE-NEXT:    vrev64.32 q8, q4
-; BE-NEXT:    vst1.64 {d16, d17}, [r5:128]!
+; BE-NEXT:    add lr, sp, #136
+; BE-NEXT:    vmov.32 d14[0], r0
+; BE-NEXT:    ldr r0, [sp, #108] @ 4-byte Reload
+; BE-NEXT:    mov r5, r1
 ; BE-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
-; BE-NEXT:    vorr d0, d11, d11
-; BE-NEXT:    bl llrint
-; BE-NEXT:    vorr d0, d10, d10
-; BE-NEXT:    mov r4, r1
-; BE-NEXT:    vmov.32 d9[0], r0
-; BE-NEXT:    bl llrint
-; BE-NEXT:    vmov.32 d8[0], r0
-; BE-NEXT:    add r0, sp, #464
-; BE-NEXT:    vmov.32 d9[1], r4
-; BE-NEXT:    vmov.32 d8[1], r1
-; BE-NEXT:    vrev64.32 q8, q4
-; BE-NEXT:    vld1.64 {d8, d9}, [r0]
-; BE-NEXT:    vorr d0, d9, d9
-; BE-NEXT:    vst1.64 {d16, d17}, [r5:128]
-; BE-NEXT:    bl llrint
-; BE-NEXT:    vorr d0, d8, d8
-; BE-NEXT:    mov r4, r1
-; BE-NEXT:    vmov.32 d11[0], r0
-; BE-NEXT:    bl llrint
-; BE-NEXT:    vmov.32 d10[0], r0
-; BE-NEXT:    add r0, sp, #480
-; BE-NEXT:    add r5, r6, #192
-; BE-NEXT:    vmov.32 d11[1], r4
-; BE-NEXT:    vmov.32 d10[1], r1
-; BE-NEXT:    vld1.64 {d8, d9}, [r0]
-; BE-NEXT:    vorr d0, d9, d9
-; BE-NEXT:    vrev64.32 q8, q5
-; BE-NEXT:    vst1.64 {d16, d17}, [r5:128]!
-; BE-NEXT:    bl llrint
-; BE-NEXT:    vorr d0, d8, d8
-; BE-NEXT:    mov r4, r1
-; BE-NEXT:    vmov.32 d11[0], r0
-; BE-NEXT:    bl llrint
-; BE-NEXT:    vmov.32 d10[0], r0
-; BE-NEXT:    add r0, sp, #496
-; BE-NEXT:    vmov.32 d11[1], r4
-; BE-NEXT:    vmov.32 d10[1], r1
-; BE-NEXT:    vld1.64 {d8, d9}, [r0]
+; BE-NEXT:    add lr, sp, #112
+; BE-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
 ; BE-NEXT:    vorr d0, d9, d9
-; BE-NEXT:    vrev64.32 q8, q5
-; BE-NEXT:    vst1.64 {d16, d17}, [r5:128]!
+; BE-NEXT:    vmov.32 d11[1], r0
 ; BE-NEXT:    bl llrint
 ; BE-NEXT:    vorr d0, d8, d8
-; BE-NEXT:    mov r4, r1
-; BE-NEXT:    vmov.32 d11[0], r0
+; BE-NEXT:    mov r7, r1
+; BE-NEXT:    vmov.32 d13[0], r0
+; BE-NEXT:    vmov.32 d10[1], r9
 ; BE-NEXT:    bl llrint
-; BE-NEXT:    vmov.32 d10[0], r0
-; BE-NEXT:    add lr, sp, #112
-; BE-NEXT:    add r0, r6, #128
-; BE-NEXT:    vmov.32 d11[1], r4
-; BE-NEXT:    vmov.32 d10[1], r1
-; BE-NEXT:    vrev64.32 q8, q5
-; BE-NEXT:    vst1.64 {d16, d17}, [r5:128]!
-; BE-NEXT:    vst1.64 {d14, d15}, [r5:128]
-; BE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-NEXT:    add lr, sp, #200
-; BE-NEXT:    vst1.64 {d16, d17}, [r0:128]!
-; BE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-NEXT:    add lr, sp, #216
-; BE-NEXT:    vst1.64 {d16, d17}, [r0:128]!
-; BE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-NEXT:    add lr, sp, #96
-; BE-NEXT:    vst1.64 {d16, d17}, [r0:128]!
-; BE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-NEXT:    add lr, sp, #80
-; BE-NEXT:    vst1.64 {d16, d17}, [r0:128]
-; BE-NEXT:    add r0, r6, #64
-; BE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-NEXT:    add lr, sp, #64
-; BE-NEXT:    vst1.64 {d16, d17}, [r0:128]!
+; BE-NEXT:    add lr, sp, #8
+; BE-NEXT:    vmov.32 d12[0], r0
 ; BE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
 ; BE-NEXT:    add lr, sp, #48
-; BE-NEXT:    vst1.64 {d16, d17}, [r0:128]!
+; BE-NEXT:    vmov.32 d17[1], r10
+; BE-NEXT:    vmov.32 d16[1], r11
+; BE-NEXT:    vorr q12, q8, q8
 ; BE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-NEXT:    add lr, sp, #152
+; BE-NEXT:    vmov.32 d17[1], r8
+; BE-NEXT:    vldmia lr, {d18, d19} @ 16-byte Reload
+; BE-NEXT:    add lr, sp, #24
+; BE-NEXT:    vmov.32 d13[1], r7
+; BE-NEXT:    vmov.32 d16[1], r6
+; BE-NEXT:    vldmia lr, {d20, d21} @ 16-byte Reload
+; BE-NEXT:    add lr, sp, #64
+; BE-NEXT:    vorr q13, q8, q8
+; BE-NEXT:    vmov.32 d12[1], r1
+; BE-NEXT:    ldr r1, [sp, #132] @ 4-byte Reload
+; BE-NEXT:    vrev64.32 q8, q5
+; BE-NEXT:    mov r0, r1
+; BE-NEXT:    vldmia lr, {d22, d23} @ 16-byte Reload
+; BE-NEXT:    vrev64.32 q9, q9
+; BE-NEXT:    vrev64.32 q10, q10
 ; BE-NEXT:    vst1.64 {d16, d17}, [r0:128]!
-; BE-NEXT:    vst1.64 {d12, d13}, [r0:128]
-; BE-NEXT:    add sp, sp, #232
+; BE-NEXT:    vst1.64 {d18, d19}, [r0:128]!
+; BE-NEXT:    vrev64.32 q11, q11
+; BE-NEXT:    vmov.32 d15[1], r4
+; BE-NEXT:    vst1.64 {d20, d21}, [r0:128]!
+; BE-NEXT:    vrev64.32 q15, q6
+; BE-NEXT:    vmov.32 d14[1], r5
+; BE-NEXT:    vrev64.32 q12, q12
+; BE-NEXT:    vst1.64 {d22, d23}, [r0:128]
+; BE-NEXT:    add r0, r1, #64
+; BE-NEXT:    vrev64.32 q13, q13
+; BE-NEXT:    vst1.64 {d30, d31}, [r0:128]!
+; BE-NEXT:    vst1.64 {d24, d25}, [r0:128]!
+; BE-NEXT:    vrev64.32 q14, q7
+; BE-NEXT:    vst1.64 {d26, d27}, [r0:128]!
+; BE-NEXT:    vst1.64 {d28, d29}, [r0:128]
+; BE-NEXT:    add sp, sp, #168
 ; BE-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
 ; BE-NEXT:    add sp, sp, #4
 ; BE-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
-;
-; BE-NEON-LABEL: llrint_v32f64:
-; BE-NEON:       @ %bb.0:
-; BE-NEON-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; BE-NEON-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; BE-NEON-NEXT:    .pad #4
-; BE-NEON-NEXT:    sub sp, sp, #4
-; BE-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
-; BE-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
-; BE-NEON-NEXT:    .pad #232
-; BE-NEON-NEXT:    sub sp, sp, #232
-; BE-NEON-NEXT:    add lr, sp, #184
-; BE-NEON-NEXT:    str r0, [sp, #148] @ 4-byte Spill
-; BE-NEON-NEXT:    add r0, sp, #416
-; BE-NEON-NEXT:    vstmia lr, {d6, d7} @ 16-byte Spill
-; BE-NEON-NEXT:    add lr, sp, #168
-; BE-NEON-NEXT:    vstmia lr, {d4, d5} @ 16-byte Spill
-; BE-NEON-NEXT:    add lr, sp, #152
-; BE-NEON-NEXT:    vstmia lr, {d2, d3} @ 16-byte Spill
-; BE-NEON-NEXT:    add lr, sp, #128
-; BE-NEON-NEXT:    vstmia lr, {d0, d1} @ 16-byte Spill
-; BE-NEON-NEXT:    add lr, sp, #200
-; BE-NEON-NEXT:    vld1.64 {d18, d19}, [r0]
-; BE-NEON-NEXT:    add r0, sp, #448
-; BE-NEON-NEXT:    vorr d0, d19, d19
-; BE-NEON-NEXT:    vld1.64 {d14, d15}, [r0]
-; BE-NEON-NEXT:    add r0, sp, #336
-; BE-NEON-NEXT:    vstmia lr, {d18, d19} @ 16-byte Spill
-; BE-NEON-NEXT:    add lr, sp, #64
-; BE-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
-; BE-NEON-NEXT:    add r0, sp, #400
-; BE-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-NEON-NEXT:    add lr, sp, #8
-; BE-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
-; BE-NEON-NEXT:    add r0, sp, #352
-; BE-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-NEON-NEXT:    add lr, sp, #24
-; BE-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
-; BE-NEON-NEXT:    add r0, sp, #368
-; BE-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-NEON-NEXT:    add lr, sp, #48
-; BE-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
-; BE-NEON-NEXT:    add r0, sp, #384
-; BE-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-NEON-NEXT:    add lr, sp, #96
-; BE-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
-; BE-NEON-NEXT:    add r0, sp, #512
-; BE-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-NEON-NEXT:    add lr, sp, #112
-; BE-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
-; BE-NEON-NEXT:    add r0, sp, #432
-; BE-NEON-NEXT:    vld1.64 {d8, d9}, [r0]
-; BE-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-NEON-NEXT:    bl llrint
-; BE-NEON-NEXT:    vorr d0, d8, d8
-; BE-NEON-NEXT:    str r1, [sp, #80] @ 4-byte Spill
-; BE-NEON-NEXT:    vmov.32 d13[0], r0
-; BE-NEON-NEXT:    bl llrint
-; BE-NEON-NEXT:    vorr d0, d9, d9
-; BE-NEON-NEXT:    str r1, [sp, #44] @ 4-byte Spill
-; BE-NEON-NEXT:    vmov.32 d10[0], r0
-; BE-NEON-NEXT:    bl llrint
-; BE-NEON-NEXT:    vorr d0, d14, d14
-; BE-NEON-NEXT:    add lr, sp, #216
-; BE-NEON-NEXT:    vmov.32 d11[0], r0
-; BE-NEON-NEXT:    mov r9, r1
-; BE-NEON-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
-; BE-NEON-NEXT:    bl llrint
-; BE-NEON-NEXT:    vorr d0, d15, d15
-; BE-NEON-NEXT:    mov r8, r1
-; BE-NEON-NEXT:    vmov.32 d8[0], r0
-; BE-NEON-NEXT:    bl llrint
-; BE-NEON-NEXT:    add lr, sp, #64
-; BE-NEON-NEXT:    mov r4, r1
-; BE-NEON-NEXT:    vmov.32 d9[0], r0
-; BE-NEON-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
-; BE-NEON-NEXT:    vorr d0, d10, d10
-; BE-NEON-NEXT:    bl llrint
-; BE-NEON-NEXT:    vorr d0, d11, d11
-; BE-NEON-NEXT:    mov r6, r1
-; BE-NEON-NEXT:    vmov.32 d14[0], r0
-; BE-NEON-NEXT:    bl llrint
-; BE-NEON-NEXT:    add lr, sp, #200
-; BE-NEON-NEXT:    mov r7, r1
-; BE-NEON-NEXT:    vmov.32 d15[0], r0
-; BE-NEON-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
-; BE-NEON-NEXT:    @ kill: def $d0 killed $d0 killed $q0
-; BE-NEON-NEXT:    bl llrint
-; BE-NEON-NEXT:    vmov.32 d12[0], r0
-; BE-NEON-NEXT:    add lr, sp, #200
-; BE-NEON-NEXT:    mov r5, r1
-; BE-NEON-NEXT:    vmov.32 d15[1], r7
-; BE-NEON-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
-; BE-NEON-NEXT:    add lr, sp, #8
-; BE-NEON-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
-; BE-NEON-NEXT:    vorr d0, d11, d11
-; BE-NEON-NEXT:    bl llrint
-; BE-NEON-NEXT:    vorr d0, d10, d10
-; BE-NEON-NEXT:    vmov.32 d14[1], r6
-; BE-NEON-NEXT:    add lr, sp, #64
-; BE-NEON-NEXT:    mov r10, r1
-; BE-NEON-NEXT:    vmov.32 d13[0], r0
-; BE-NEON-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
-; BE-NEON-NEXT:    bl llrint
-; BE-NEON-NEXT:    add lr, sp, #24
-; BE-NEON-NEXT:    mov r11, r1
-; BE-NEON-NEXT:    vmov.32 d12[0], r0
-; BE-NEON-NEXT:    vldmia lr, {d14, d15} @ 16-byte Reload
-; BE-NEON-NEXT:    vorr d0, d15, d15
-; BE-NEON-NEXT:    vmov.32 d9[1], r4
-; BE-NEON-NEXT:    bl llrint
-; BE-NEON-NEXT:    vorr d0, d14, d14
-; BE-NEON-NEXT:    vmov.32 d8[1], r8
-; BE-NEON-NEXT:    add lr, sp, #8
-; BE-NEON-NEXT:    str r1, [sp, #4] @ 4-byte Spill
-; BE-NEON-NEXT:    vmov.32 d11[0], r0
-; BE-NEON-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
-; BE-NEON-NEXT:    bl llrint
-; BE-NEON-NEXT:    vmov.32 d10[0], r0
-; BE-NEON-NEXT:    add lr, sp, #24
-; BE-NEON-NEXT:    mov r8, r1
-; BE-NEON-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
-; BE-NEON-NEXT:    add lr, sp, #216
-; BE-NEON-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
-; BE-NEON-NEXT:    add lr, sp, #48
-; BE-NEON-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
-; BE-NEON-NEXT:    vorr d0, d9, d9
-; BE-NEON-NEXT:    vmov.32 d11[1], r9
-; BE-NEON-NEXT:    bl llrint
-; BE-NEON-NEXT:    vmov.32 d15[0], r0
-; BE-NEON-NEXT:    ldr r0, [sp, #44] @ 4-byte Reload
-; BE-NEON-NEXT:    vorr d0, d8, d8
-; BE-NEON-NEXT:    add lr, sp, #216
-; BE-NEON-NEXT:    mov r9, r1
-; BE-NEON-NEXT:    vmov.32 d10[1], r0
-; BE-NEON-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
-; BE-NEON-NEXT:    bl llrint
-; BE-NEON-NEXT:    vmov.32 d14[0], r0
-; BE-NEON-NEXT:    add lr, sp, #48
-; BE-NEON-NEXT:    ldr r0, [sp, #80] @ 4-byte Reload
-; BE-NEON-NEXT:    mov r6, r1
-; BE-NEON-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
-; BE-NEON-NEXT:    add lr, sp, #200
-; BE-NEON-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
-; BE-NEON-NEXT:    add lr, sp, #96
-; BE-NEON-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
-; BE-NEON-NEXT:    vorr d0, d9, d9
-; BE-NEON-NEXT:    vmov.32 d11[1], r0
-; BE-NEON-NEXT:    bl llrint
-; BE-NEON-NEXT:    vorr d0, d8, d8
-; BE-NEON-NEXT:    vmov.32 d10[1], r5
-; BE-NEON-NEXT:    add lr, sp, #200
-; BE-NEON-NEXT:    mov r7, r1
-; BE-NEON-NEXT:    vmov.32 d15[0], r0
-; BE-NEON-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
-; BE-NEON-NEXT:    bl llrint
-; BE-NEON-NEXT:    add lr, sp, #112
-; BE-NEON-NEXT:    vorr q4, q6, q6
-; BE-NEON-NEXT:    mov r4, r1
-; BE-NEON-NEXT:    vmov.32 d14[0], r0
-; BE-NEON-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
-; BE-NEON-NEXT:    vorr d0, d13, d13
-; BE-NEON-NEXT:    vmov.32 d9[1], r10
-; BE-NEON-NEXT:    bl llrint
-; BE-NEON-NEXT:    vorr d0, d12, d12
-; BE-NEON-NEXT:    mov r5, r1
-; BE-NEON-NEXT:    vmov.32 d11[0], r0
-; BE-NEON-NEXT:    vmov.32 d8[1], r11
-; BE-NEON-NEXT:    bl llrint
-; BE-NEON-NEXT:    vmov.32 d10[0], r0
-; BE-NEON-NEXT:    add lr, sp, #24
-; BE-NEON-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
-; BE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-NEON-NEXT:    add lr, sp, #48
-; BE-NEON-NEXT:    vmov.32 d17[1], r0
-; BE-NEON-NEXT:    vmov.32 d16[1], r8
-; BE-NEON-NEXT:    vorr q9, q8, q8
-; BE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-NEON-NEXT:    add lr, sp, #112
-; BE-NEON-NEXT:    vmov.32 d17[1], r9
-; BE-NEON-NEXT:    vmov.32 d16[1], r6
-; BE-NEON-NEXT:    vorr q10, q8, q8
-; BE-NEON-NEXT:    vrev64.32 q8, q4
-; BE-NEON-NEXT:    vmov.32 d15[1], r7
-; BE-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-NEON-NEXT:    add lr, sp, #200
-; BE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-NEON-NEXT:    vmov.32 d11[1], r5
-; BE-NEON-NEXT:    vrev64.32 q8, q8
-; BE-NEON-NEXT:    vmov.32 d14[1], r4
-; BE-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-NEON-NEXT:    add lr, sp, #216
-; BE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-NEON-NEXT:    vmov.32 d10[1], r1
-; BE-NEON-NEXT:    vrev64.32 q8, q8
-; BE-NEON-NEXT:    vrev64.32 q6, q7
-; BE-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-NEON-NEXT:    add lr, sp, #8
-; BE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-NEON-NEXT:    add lr, sp, #96
-; BE-NEON-NEXT:    vrev64.32 q7, q5
-; BE-NEON-NEXT:    vrev64.32 q8, q8
-; BE-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-NEON-NEXT:    add lr, sp, #64
-; BE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-NEON-NEXT:    add lr, sp, #80
-; BE-NEON-NEXT:    vrev64.32 q8, q8
-; BE-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-NEON-NEXT:    add lr, sp, #64
-; BE-NEON-NEXT:    vrev64.32 q8, q9
-; BE-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-NEON-NEXT:    add lr, sp, #48
-; BE-NEON-NEXT:    vrev64.32 q8, q10
-; BE-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-NEON-NEXT:    add lr, sp, #128
-; BE-NEON-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
-; BE-NEON-NEXT:    vorr d0, d11, d11
-; BE-NEON-NEXT:    bl llrint
-; BE-NEON-NEXT:    vorr d0, d10, d10
-; BE-NEON-NEXT:    mov r4, r1
-; BE-NEON-NEXT:    vmov.32 d9[0], r0
-; BE-NEON-NEXT:    bl llrint
-; BE-NEON-NEXT:    vmov.32 d8[0], r0
-; BE-NEON-NEXT:    ldr r6, [sp, #148] @ 4-byte Reload
-; BE-NEON-NEXT:    add lr, sp, #152
-; BE-NEON-NEXT:    vmov.32 d9[1], r4
-; BE-NEON-NEXT:    mov r5, r6
-; BE-NEON-NEXT:    vmov.32 d8[1], r1
-; BE-NEON-NEXT:    vrev64.32 q8, q4
-; BE-NEON-NEXT:    vst1.64 {d16, d17}, [r5:128]!
-; BE-NEON-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
-; BE-NEON-NEXT:    vorr d0, d11, d11
-; BE-NEON-NEXT:    bl llrint
-; BE-NEON-NEXT:    vorr d0, d10, d10
-; BE-NEON-NEXT:    mov r4, r1
-; BE-NEON-NEXT:    vmov.32 d9[0], r0
-; BE-NEON-NEXT:    bl llrint
-; BE-NEON-NEXT:    vmov.32 d8[0], r0
-; BE-NEON-NEXT:    add lr, sp, #168
-; BE-NEON-NEXT:    vmov.32 d9[1], r4
-; BE-NEON-NEXT:    vmov.32 d8[1], r1
-; BE-NEON-NEXT:    vrev64.32 q8, q4
-; BE-NEON-NEXT:    vst1.64 {d16, d17}, [r5:128]!
-; BE-NEON-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
-; BE-NEON-NEXT:    vorr d0, d11, d11
-; BE-NEON-NEXT:    bl llrint
-; BE-NEON-NEXT:    vorr d0, d10, d10
-; BE-NEON-NEXT:    mov r4, r1
-; BE-NEON-NEXT:    vmov.32 d9[0], r0
-; BE-NEON-NEXT:    bl llrint
-; BE-NEON-NEXT:    vmov.32 d8[0], r0
-; BE-NEON-NEXT:    add lr, sp, #184
-; BE-NEON-NEXT:    vmov.32 d9[1], r4
-; BE-NEON-NEXT:    vmov.32 d8[1], r1
-; BE-NEON-NEXT:    vrev64.32 q8, q4
-; BE-NEON-NEXT:    vst1.64 {d16, d17}, [r5:128]!
-; BE-NEON-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
-; BE-NEON-NEXT:    vorr d0, d11, d11
-; BE-NEON-NEXT:    bl llrint
-; BE-NEON-NEXT:    vorr d0, d10, d10
-; BE-NEON-NEXT:    mov r4, r1
-; BE-NEON-NEXT:    vmov.32 d9[0], r0
-; BE-NEON-NEXT:    bl llrint
-; BE-NEON-NEXT:    vmov.32 d8[0], r0
-; BE-NEON-NEXT:    add r0, sp, #464
-; BE-NEON-NEXT:    vmov.32 d9[1], r4
-; BE-NEON-NEXT:    vmov.32 d8[1], r1
-; BE-NEON-NEXT:    vrev64.32 q8, q4
-; BE-NEON-NEXT:    vld1.64 {d8, d9}, [r0]
-; BE-NEON-NEXT:    vorr d0, d9, d9
-; BE-NEON-NEXT:    vst1.64 {d16, d17}, [r5:128]
-; BE-NEON-NEXT:    bl llrint
-; BE-NEON-NEXT:    vorr d0, d8, d8
-; BE-NEON-NEXT:    mov r4, r1
-; BE-NEON-NEXT:    vmov.32 d11[0], r0
-; BE-NEON-NEXT:    bl llrint
-; BE-NEON-NEXT:    vmov.32 d10[0], r0
-; BE-NEON-NEXT:    add r0, sp, #480
-; BE-NEON-NEXT:    add r5, r6, #192
-; BE-NEON-NEXT:    vmov.32 d11[1], r4
-; BE-NEON-NEXT:    vmov.32 d10[1], r1
-; BE-NEON-NEXT:    vld1.64 {d8, d9}, [r0]
-; BE-NEON-NEXT:    vorr d0, d9, d9
-; BE-NEON-NEXT:    vrev64.32 q8, q5
-; BE-NEON-NEXT:    vst1.64 {d16, d17}, [r5:128]!
-; BE-NEON-NEXT:    bl llrint
-; BE-NEON-NEXT:    vorr d0, d8, d8
-; BE-NEON-NEXT:    mov r4, r1
-; BE-NEON-NEXT:    vmov.32 d11[0], r0
-; BE-NEON-NEXT:    bl llrint
-; BE-NEON-NEXT:    vmov.32 d10[0], r0
-; BE-NEON-NEXT:    add r0, sp, #496
-; BE-NEON-NEXT:    vmov.32 d11[1], r4
-; BE-NEON-NEXT:    vmov.32 d10[1], r1
-; BE-NEON-NEXT:    vld1.64 {d8, d9}, [r0]
-; BE-NEON-NEXT:    vorr d0, d9, d9
-; BE-NEON-NEXT:    vrev64.32 q8, q5
-; BE-NEON-NEXT:    vst1.64 {d16, d17}, [r5:128]!
-; BE-NEON-NEXT:    bl llrint
-; BE-NEON-NEXT:    vorr d0, d8, d8
-; BE-NEON-NEXT:    mov r4, r1
-; BE-NEON-NEXT:    vmov.32 d11[0], r0
-; BE-NEON-NEXT:    bl llrint
-; BE-NEON-NEXT:    vmov.32 d10[0], r0
-; BE-NEON-NEXT:    add lr, sp, #112
-; BE-NEON-NEXT:    add r0, r6, #128
-; BE-NEON-NEXT:    vmov.32 d11[1], r4
-; BE-NEON-NEXT:    vmov.32 d10[1], r1
-; BE-NEON-NEXT:    vrev64.32 q8, q5
-; BE-NEON-NEXT:    vst1.64 {d16, d17}, [r5:128]!
-; BE-NEON-NEXT:    vst1.64 {d14, d15}, [r5:128]
-; BE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-NEON-NEXT:    add lr, sp, #200
-; BE-NEON-NEXT:    vst1.64 {d16, d17}, [r0:128]!
-; BE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-NEON-NEXT:    add lr, sp, #216
-; BE-NEON-NEXT:    vst1.64 {d16, d17}, [r0:128]!
-; BE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-NEON-NEXT:    add lr, sp, #96
-; BE-NEON-NEXT:    vst1.64 {d16, d17}, [r0:128]!
-; BE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-NEON-NEXT:    add lr, sp, #80
-; BE-NEON-NEXT:    vst1.64 {d16, d17}, [r0:128]
-; BE-NEON-NEXT:    add r0, r6, #64
-; BE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-NEON-NEXT:    add lr, sp, #64
-; BE-NEON-NEXT:    vst1.64 {d16, d17}, [r0:128]!
-; BE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-NEON-NEXT:    add lr, sp, #48
-; BE-NEON-NEXT:    vst1.64 {d16, d17}, [r0:128]!
-; BE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-NEON-NEXT:    vst1.64 {d16, d17}, [r0:128]!
-; BE-NEON-NEXT:    vst1.64 {d12, d13}, [r0:128]
-; BE-NEON-NEXT:    add sp, sp, #232
-; BE-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
-; BE-NEON-NEXT:    add sp, sp, #4
-; BE-NEON-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
-  %a = call <32 x i64> @llvm.llrint.v32i64.v16f64(<32 x double> %x)
-  ret <32 x i64> %a
+  %a = call <16 x i64> @llvm.llrint.v16i64.v16f64(<16 x double> %x)
+  ret <16 x i64> %a
 }
-declare <32 x i64> @llvm.llrint.v32i64.v32f64(<32 x double>)
+declare <16 x i64> @llvm.llrint.v16i64.v16f64(<16 x double>)
 
 define <1 x i64> @llrint_v1i64_v1f128(<1 x fp128> %x) {
 ; LE-LABEL: llrint_v1i64_v1f128:
@@ -8295,15 +2047,6 @@ define <1 x i64> @llrint_v1i64_v1f128(<1 x fp128> %x) {
 ; LE-NEXT:    vmov.32 d0[1], r1
 ; LE-NEXT:    pop {r11, pc}
 ;
-; LE-NEON-LABEL: llrint_v1i64_v1f128:
-; LE-NEON:       @ %bb.0:
-; LE-NEON-NEXT:    .save {r11, lr}
-; LE-NEON-NEXT:    push {r11, lr}
-; LE-NEON-NEXT:    bl llrintl
-; LE-NEON-NEXT:    vmov.32 d0[0], r0
-; LE-NEON-NEXT:    vmov.32 d0[1], r1
-; LE-NEON-NEXT:    pop {r11, pc}
-;
 ; BE-LABEL: llrint_v1i64_v1f128:
 ; BE:       @ %bb.0:
 ; BE-NEXT:    .save {r11, lr}
@@ -8313,16 +2056,6 @@ define <1 x i64> @llrint_v1i64_v1f128(<1 x fp128> %x) {
 ; BE-NEXT:    vmov.32 d16[1], r1
 ; BE-NEXT:    vrev64.32 d0, d16
 ; BE-NEXT:    pop {r11, pc}
-;
-; BE-NEON-LABEL: llrint_v1i64_v1f128:
-; BE-NEON:       @ %bb.0:
-; BE-NEON-NEXT:    .save {r11, lr}
-; BE-NEON-NEXT:    push {r11, lr}
-; BE-NEON-NEXT:    bl llrintl
-; BE-NEON-NEXT:    vmov.32 d16[0], r0
-; BE-NEON-NEXT:    vmov.32 d16[1], r1
-; BE-NEON-NEXT:    vrev64.32 d0, d16
-; BE-NEON-NEXT:    pop {r11, pc}
   %a = call <1 x i64> @llvm.llrint.v1i64.v1f128(<1 x fp128> %x)
   ret <1 x i64> %a
 }
@@ -8356,1312 +2089,135 @@ define <2 x i64> @llrint_v2i64_v2f128(<2 x fp128> %x) {
 ; LE-NEXT:    vpop {d8, d9}
 ; LE-NEXT:    pop {r4, r5, r6, r7, r8, pc}
 ;
-; LE-NEON-LABEL: llrint_v2i64_v2f128:
-; LE-NEON:       @ %bb.0:
-; LE-NEON-NEXT:    .save {r4, r5, r6, r7, r8, lr}
-; LE-NEON-NEXT:    push {r4, r5, r6, r7, r8, lr}
-; LE-NEON-NEXT:    .vsave {d8, d9}
-; LE-NEON-NEXT:    vpush {d8, d9}
-; LE-NEON-NEXT:    mov r8, r3
-; LE-NEON-NEXT:    add r3, sp, #40
-; LE-NEON-NEXT:    mov r5, r2
-; LE-NEON-NEXT:    mov r6, r1
-; LE-NEON-NEXT:    mov r7, r0
-; LE-NEON-NEXT:    ldm r3, {r0, r1, r2, r3}
-; LE-NEON-NEXT:    bl llrintl
-; LE-NEON-NEXT:    mov r4, r1
-; LE-NEON-NEXT:    vmov.32 d9[0], r0
-; LE-NEON-NEXT:    mov r0, r7
-; LE-NEON-NEXT:    mov r1, r6
-; LE-NEON-NEXT:    mov r2, r5
-; LE-NEON-NEXT:    mov r3, r8
-; LE-NEON-NEXT:    bl llrintl
-; LE-NEON-NEXT:    vmov.32 d8[0], r0
-; LE-NEON-NEXT:    vmov.32 d9[1], r4
-; LE-NEON-NEXT:    vmov.32 d8[1], r1
-; LE-NEON-NEXT:    vorr q0, q4, q4
-; LE-NEON-NEXT:    vpop {d8, d9}
-; LE-NEON-NEXT:    pop {r4, r5, r6, r7, r8, pc}
-;
 ; BE-LABEL: llrint_v2i64_v2f128:
 ; BE:       @ %bb.0:
 ; BE-NEXT:    .save {r4, r5, r6, r7, r8, lr}
-; BE-NEXT:    push {r4, r5, r6, r7, r8, lr}
-; BE-NEXT:    .vsave {d8}
-; BE-NEXT:    vpush {d8}
-; BE-NEXT:    mov r8, r3
-; BE-NEXT:    add r3, sp, #32
-; BE-NEXT:    mov r5, r2
-; BE-NEXT:    mov r6, r1
-; BE-NEXT:    mov r7, r0
-; BE-NEXT:    ldm r3, {r0, r1, r2, r3}
-; BE-NEXT:    bl llrintl
-; BE-NEXT:    mov r4, r1
-; BE-NEXT:    vmov.32 d8[0], r0
-; BE-NEXT:    mov r0, r7
-; BE-NEXT:    mov r1, r6
-; BE-NEXT:    mov r2, r5
-; BE-NEXT:    mov r3, r8
-; BE-NEXT:    bl llrintl
-; BE-NEXT:    vmov.32 d16[0], r0
-; BE-NEXT:    vmov.32 d8[1], r4
-; BE-NEXT:    vmov.32 d16[1], r1
-; BE-NEXT:    vrev64.32 d1, d8
-; BE-NEXT:    vrev64.32 d0, d16
-; BE-NEXT:    vpop {d8}
-; BE-NEXT:    pop {r4, r5, r6, r7, r8, pc}
-;
-; BE-NEON-LABEL: llrint_v2i64_v2f128:
-; BE-NEON:       @ %bb.0:
-; BE-NEON-NEXT:    .save {r4, r5, r6, r7, r8, lr}
-; BE-NEON-NEXT:    push {r4, r5, r6, r7, r8, lr}
-; BE-NEON-NEXT:    .vsave {d8}
-; BE-NEON-NEXT:    vpush {d8}
-; BE-NEON-NEXT:    mov r8, r3
-; BE-NEON-NEXT:    add r3, sp, #32
-; BE-NEON-NEXT:    mov r5, r2
-; BE-NEON-NEXT:    mov r6, r1
-; BE-NEON-NEXT:    mov r7, r0
-; BE-NEON-NEXT:    ldm r3, {r0, r1, r2, r3}
-; BE-NEON-NEXT:    bl llrintl
-; BE-NEON-NEXT:    mov r4, r1
-; BE-NEON-NEXT:    vmov.32 d8[0], r0
-; BE-NEON-NEXT:    mov r0, r7
-; BE-NEON-NEXT:    mov r1, r6
-; BE-NEON-NEXT:    mov r2, r5
-; BE-NEON-NEXT:    mov r3, r8
-; BE-NEON-NEXT:    bl llrintl
-; BE-NEON-NEXT:    vmov.32 d16[0], r0
-; BE-NEON-NEXT:    vmov.32 d8[1], r4
-; BE-NEON-NEXT:    vmov.32 d16[1], r1
-; BE-NEON-NEXT:    vrev64.32 d1, d8
-; BE-NEON-NEXT:    vrev64.32 d0, d16
-; BE-NEON-NEXT:    vpop {d8}
-; BE-NEON-NEXT:    pop {r4, r5, r6, r7, r8, pc}
-  %a = call <2 x i64> @llvm.llrint.v2i64.v2f128(<2 x fp128> %x)
-  ret <2 x i64> %a
-}
-declare <2 x i64> @llvm.llrint.v2i64.v2f128(<2 x fp128>)
-
-define <4 x i64> @llrint_v4i64_v4f128(<4 x fp128> %x) {
-; LE-LABEL: llrint_v4i64_v4f128:
-; LE:       @ %bb.0:
-; LE-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, lr}
-; LE-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, lr}
-; LE-NEXT:    .vsave {d8, d9, d10, d11}
-; LE-NEXT:    vpush {d8, d9, d10, d11}
-; LE-NEXT:    mov r5, r3
-; LE-NEXT:    add r3, sp, #96
-; LE-NEXT:    mov r7, r2
-; LE-NEXT:    mov r6, r1
-; LE-NEXT:    mov r4, r0
-; LE-NEXT:    ldm r3, {r0, r1, r2, r3}
-; LE-NEXT:    bl llrintl
-; LE-NEXT:    mov r9, r1
-; LE-NEXT:    vmov.32 d9[0], r0
-; LE-NEXT:    mov r0, r4
-; LE-NEXT:    mov r1, r6
-; LE-NEXT:    mov r2, r7
-; LE-NEXT:    mov r3, r5
-; LE-NEXT:    ldr r8, [sp, #80]
-; LE-NEXT:    ldr r10, [sp, #64]
-; LE-NEXT:    bl llrintl
-; LE-NEXT:    add r3, sp, #68
-; LE-NEXT:    mov r5, r1
-; LE-NEXT:    vmov.32 d10[0], r0
-; LE-NEXT:    mov r0, r10
-; LE-NEXT:    ldm r3, {r1, r2, r3}
-; LE-NEXT:    bl llrintl
-; LE-NEXT:    add r3, sp, #84
-; LE-NEXT:    mov r4, r1
-; LE-NEXT:    vmov.32 d11[0], r0
-; LE-NEXT:    mov r0, r8
-; LE-NEXT:    ldm r3, {r1, r2, r3}
-; LE-NEXT:    bl llrintl
-; LE-NEXT:    vmov.32 d8[0], r0
-; LE-NEXT:    vmov.32 d11[1], r4
-; LE-NEXT:    vmov.32 d9[1], r9
-; LE-NEXT:    vmov.32 d10[1], r5
-; LE-NEXT:    vmov.32 d8[1], r1
-; LE-NEXT:    vorr q0, q5, q5
-; LE-NEXT:    vorr q1, q4, q4
-; LE-NEXT:    vpop {d8, d9, d10, d11}
-; LE-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, pc}
-;
-; LE-NEON-LABEL: llrint_v4i64_v4f128:
-; LE-NEON:       @ %bb.0:
-; LE-NEON-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, lr}
-; LE-NEON-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, lr}
-; LE-NEON-NEXT:    .vsave {d8, d9, d10, d11}
-; LE-NEON-NEXT:    vpush {d8, d9, d10, d11}
-; LE-NEON-NEXT:    mov r5, r3
-; LE-NEON-NEXT:    add r3, sp, #96
-; LE-NEON-NEXT:    mov r7, r2
-; LE-NEON-NEXT:    mov r6, r1
-; LE-NEON-NEXT:    mov r4, r0
-; LE-NEON-NEXT:    ldm r3, {r0, r1, r2, r3}
-; LE-NEON-NEXT:    bl llrintl
-; LE-NEON-NEXT:    mov r9, r1
-; LE-NEON-NEXT:    vmov.32 d9[0], r0
-; LE-NEON-NEXT:    mov r0, r4
-; LE-NEON-NEXT:    mov r1, r6
-; LE-NEON-NEXT:    mov r2, r7
-; LE-NEON-NEXT:    mov r3, r5
-; LE-NEON-NEXT:    ldr r8, [sp, #80]
-; LE-NEON-NEXT:    ldr r10, [sp, #64]
-; LE-NEON-NEXT:    bl llrintl
-; LE-NEON-NEXT:    add r3, sp, #68
-; LE-NEON-NEXT:    mov r5, r1
-; LE-NEON-NEXT:    vmov.32 d10[0], r0
-; LE-NEON-NEXT:    mov r0, r10
-; LE-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; LE-NEON-NEXT:    bl llrintl
-; LE-NEON-NEXT:    add r3, sp, #84
-; LE-NEON-NEXT:    mov r4, r1
-; LE-NEON-NEXT:    vmov.32 d11[0], r0
-; LE-NEON-NEXT:    mov r0, r8
-; LE-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; LE-NEON-NEXT:    bl llrintl
-; LE-NEON-NEXT:    vmov.32 d8[0], r0
-; LE-NEON-NEXT:    vmov.32 d11[1], r4
-; LE-NEON-NEXT:    vmov.32 d9[1], r9
-; LE-NEON-NEXT:    vmov.32 d10[1], r5
-; LE-NEON-NEXT:    vmov.32 d8[1], r1
-; LE-NEON-NEXT:    vorr q0, q5, q5
-; LE-NEON-NEXT:    vorr q1, q4, q4
-; LE-NEON-NEXT:    vpop {d8, d9, d10, d11}
-; LE-NEON-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, pc}
-;
-; BE-LABEL: llrint_v4i64_v4f128:
-; BE:       @ %bb.0:
-; BE-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, lr}
-; BE-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, lr}
-; BE-NEXT:    .vsave {d8, d9, d10}
-; BE-NEXT:    vpush {d8, d9, d10}
-; BE-NEXT:    mov r5, r3
-; BE-NEXT:    add r3, sp, #88
-; BE-NEXT:    mov r7, r2
-; BE-NEXT:    mov r6, r1
-; BE-NEXT:    mov r4, r0
-; BE-NEXT:    ldm r3, {r0, r1, r2, r3}
-; BE-NEXT:    bl llrintl
-; BE-NEXT:    mov r9, r1
-; BE-NEXT:    vmov.32 d8[0], r0
-; BE-NEXT:    mov r0, r4
-; BE-NEXT:    mov r1, r6
-; BE-NEXT:    mov r2, r7
-; BE-NEXT:    mov r3, r5
-; BE-NEXT:    ldr r8, [sp, #72]
-; BE-NEXT:    ldr r10, [sp, #56]
-; BE-NEXT:    bl llrintl
-; BE-NEXT:    add r3, sp, #60
-; BE-NEXT:    mov r5, r1
-; BE-NEXT:    vmov.32 d9[0], r0
-; BE-NEXT:    mov r0, r10
-; BE-NEXT:    ldm r3, {r1, r2, r3}
-; BE-NEXT:    bl llrintl
-; BE-NEXT:    add r3, sp, #76
-; BE-NEXT:    mov r4, r1
-; BE-NEXT:    vmov.32 d10[0], r0
-; BE-NEXT:    mov r0, r8
-; BE-NEXT:    ldm r3, {r1, r2, r3}
-; BE-NEXT:    bl llrintl
-; BE-NEXT:    vmov.32 d16[0], r0
-; BE-NEXT:    vmov.32 d10[1], r4
-; BE-NEXT:    vmov.32 d8[1], r9
-; BE-NEXT:    vmov.32 d9[1], r5
-; BE-NEXT:    vmov.32 d16[1], r1
-; BE-NEXT:    vrev64.32 d1, d10
-; BE-NEXT:    vrev64.32 d3, d8
-; BE-NEXT:    vrev64.32 d0, d9
-; BE-NEXT:    vrev64.32 d2, d16
-; BE-NEXT:    vpop {d8, d9, d10}
-; BE-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, pc}
-;
-; BE-NEON-LABEL: llrint_v4i64_v4f128:
-; BE-NEON:       @ %bb.0:
-; BE-NEON-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, lr}
-; BE-NEON-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, lr}
-; BE-NEON-NEXT:    .vsave {d8, d9, d10}
-; BE-NEON-NEXT:    vpush {d8, d9, d10}
-; BE-NEON-NEXT:    mov r5, r3
-; BE-NEON-NEXT:    add r3, sp, #88
-; BE-NEON-NEXT:    mov r7, r2
-; BE-NEON-NEXT:    mov r6, r1
-; BE-NEON-NEXT:    mov r4, r0
-; BE-NEON-NEXT:    ldm r3, {r0, r1, r2, r3}
-; BE-NEON-NEXT:    bl llrintl
-; BE-NEON-NEXT:    mov r9, r1
-; BE-NEON-NEXT:    vmov.32 d8[0], r0
-; BE-NEON-NEXT:    mov r0, r4
-; BE-NEON-NEXT:    mov r1, r6
-; BE-NEON-NEXT:    mov r2, r7
-; BE-NEON-NEXT:    mov r3, r5
-; BE-NEON-NEXT:    ldr r8, [sp, #72]
-; BE-NEON-NEXT:    ldr r10, [sp, #56]
-; BE-NEON-NEXT:    bl llrintl
-; BE-NEON-NEXT:    add r3, sp, #60
-; BE-NEON-NEXT:    mov r5, r1
-; BE-NEON-NEXT:    vmov.32 d9[0], r0
-; BE-NEON-NEXT:    mov r0, r10
-; BE-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; BE-NEON-NEXT:    bl llrintl
-; BE-NEON-NEXT:    add r3, sp, #76
-; BE-NEON-NEXT:    mov r4, r1
-; BE-NEON-NEXT:    vmov.32 d10[0], r0
-; BE-NEON-NEXT:    mov r0, r8
-; BE-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; BE-NEON-NEXT:    bl llrintl
-; BE-NEON-NEXT:    vmov.32 d16[0], r0
-; BE-NEON-NEXT:    vmov.32 d10[1], r4
-; BE-NEON-NEXT:    vmov.32 d8[1], r9
-; BE-NEON-NEXT:    vmov.32 d9[1], r5
-; BE-NEON-NEXT:    vmov.32 d16[1], r1
-; BE-NEON-NEXT:    vrev64.32 d1, d10
-; BE-NEON-NEXT:    vrev64.32 d3, d8
-; BE-NEON-NEXT:    vrev64.32 d0, d9
-; BE-NEON-NEXT:    vrev64.32 d2, d16
-; BE-NEON-NEXT:    vpop {d8, d9, d10}
-; BE-NEON-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, pc}
-  %a = call <4 x i64> @llvm.llrint.v4i64.v4f128(<4 x fp128> %x)
-  ret <4 x i64> %a
-}
-declare <4 x i64> @llvm.llrint.v4i64.v4f128(<4 x fp128>)
-
-define <8 x i64> @llrint_v8i64_v8f128(<8 x fp128> %x) {
-; LE-LABEL: llrint_v8i64_v8f128:
-; LE:       @ %bb.0:
-; LE-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; LE-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; LE-NEXT:    .pad #4
-; LE-NEXT:    sub sp, sp, #4
-; LE-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-NEXT:    .pad #8
-; LE-NEXT:    sub sp, sp, #8
-; LE-NEXT:    mov r11, r3
-; LE-NEXT:    add r3, sp, #208
-; LE-NEXT:    mov r10, r2
-; LE-NEXT:    mov r4, r1
-; LE-NEXT:    mov r5, r0
-; LE-NEXT:    ldm r3, {r0, r1, r2, r3}
-; LE-NEXT:    bl llrintl
-; LE-NEXT:    add r7, sp, #164
-; LE-NEXT:    ldr r6, [sp, #160]
-; LE-NEXT:    str r1, [sp, #4] @ 4-byte Spill
-; LE-NEXT:    vmov.32 d9[0], r0
-; LE-NEXT:    ldm r7, {r1, r2, r3, r7}
-; LE-NEXT:    mov r0, r6
-; LE-NEXT:    ldr r8, [sp, #128]
-; LE-NEXT:    ldr r9, [sp, #144]
-; LE-NEXT:    bl llrintl
-; LE-NEXT:    add r3, sp, #180
-; LE-NEXT:    str r1, [sp] @ 4-byte Spill
-; LE-NEXT:    vmov.32 d10[0], r0
-; LE-NEXT:    mov r0, r7
-; LE-NEXT:    ldm r3, {r1, r2, r3}
-; LE-NEXT:    bl llrintl
-; LE-NEXT:    add r3, sp, #132
-; LE-NEXT:    mov r7, r1
-; LE-NEXT:    vmov.32 d11[0], r0
-; LE-NEXT:    mov r0, r8
-; LE-NEXT:    ldm r3, {r1, r2, r3}
-; LE-NEXT:    bl llrintl
-; LE-NEXT:    add r3, sp, #148
-; LE-NEXT:    mov r8, r1
-; LE-NEXT:    vmov.32 d12[0], r0
-; LE-NEXT:    mov r0, r9
-; LE-NEXT:    ldm r3, {r1, r2, r3}
-; LE-NEXT:    bl llrintl
-; LE-NEXT:    mov r9, r1
-; LE-NEXT:    vmov.32 d13[0], r0
-; LE-NEXT:    mov r0, r5
-; LE-NEXT:    mov r1, r4
-; LE-NEXT:    mov r2, r10
-; LE-NEXT:    mov r3, r11
-; LE-NEXT:    ldr r6, [sp, #112]
-; LE-NEXT:    bl llrintl
-; LE-NEXT:    add r3, sp, #116
-; LE-NEXT:    mov r4, r1
-; LE-NEXT:    vmov.32 d14[0], r0
-; LE-NEXT:    mov r0, r6
-; LE-NEXT:    ldm r3, {r1, r2, r3}
-; LE-NEXT:    bl llrintl
-; LE-NEXT:    add r3, sp, #196
-; LE-NEXT:    vmov.32 d15[0], r0
-; LE-NEXT:    ldr r0, [sp, #192]
-; LE-NEXT:    mov r5, r1
-; LE-NEXT:    ldm r3, {r1, r2, r3}
-; LE-NEXT:    bl llrintl
-; LE-NEXT:    vmov.32 d8[0], r0
-; LE-NEXT:    ldr r0, [sp] @ 4-byte Reload
-; LE-NEXT:    vmov.32 d11[1], r7
-; LE-NEXT:    vmov.32 d10[1], r0
-; LE-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
-; LE-NEXT:    vmov.32 d15[1], r5
-; LE-NEXT:    vorr q2, q5, q5
-; LE-NEXT:    vmov.32 d13[1], r9
-; LE-NEXT:    vmov.32 d9[1], r0
-; LE-NEXT:    vmov.32 d14[1], r4
-; LE-NEXT:    vmov.32 d12[1], r8
-; LE-NEXT:    vorr q0, q7, q7
-; LE-NEXT:    vmov.32 d8[1], r1
-; LE-NEXT:    vorr q1, q6, q6
-; LE-NEXT:    vorr q3, q4, q4
-; LE-NEXT:    add sp, sp, #8
-; LE-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-NEXT:    add sp, sp, #4
-; LE-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
-;
-; LE-NEON-LABEL: llrint_v8i64_v8f128:
-; LE-NEON:       @ %bb.0:
-; LE-NEON-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; LE-NEON-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; LE-NEON-NEXT:    .pad #4
-; LE-NEON-NEXT:    sub sp, sp, #4
-; LE-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-NEON-NEXT:    .pad #8
-; LE-NEON-NEXT:    sub sp, sp, #8
-; LE-NEON-NEXT:    mov r11, r3
-; LE-NEON-NEXT:    add r3, sp, #208
-; LE-NEON-NEXT:    mov r10, r2
-; LE-NEON-NEXT:    mov r4, r1
-; LE-NEON-NEXT:    mov r5, r0
-; LE-NEON-NEXT:    ldm r3, {r0, r1, r2, r3}
-; LE-NEON-NEXT:    bl llrintl
-; LE-NEON-NEXT:    add r7, sp, #164
-; LE-NEON-NEXT:    ldr r6, [sp, #160]
-; LE-NEON-NEXT:    str r1, [sp, #4] @ 4-byte Spill
-; LE-NEON-NEXT:    vmov.32 d9[0], r0
-; LE-NEON-NEXT:    ldm r7, {r1, r2, r3, r7}
-; LE-NEON-NEXT:    mov r0, r6
-; LE-NEON-NEXT:    ldr r8, [sp, #128]
-; LE-NEON-NEXT:    ldr r9, [sp, #144]
-; LE-NEON-NEXT:    bl llrintl
-; LE-NEON-NEXT:    add r3, sp, #180
-; LE-NEON-NEXT:    str r1, [sp] @ 4-byte Spill
-; LE-NEON-NEXT:    vmov.32 d10[0], r0
-; LE-NEON-NEXT:    mov r0, r7
-; LE-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; LE-NEON-NEXT:    bl llrintl
-; LE-NEON-NEXT:    add r3, sp, #132
-; LE-NEON-NEXT:    mov r7, r1
-; LE-NEON-NEXT:    vmov.32 d11[0], r0
-; LE-NEON-NEXT:    mov r0, r8
-; LE-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; LE-NEON-NEXT:    bl llrintl
-; LE-NEON-NEXT:    add r3, sp, #148
-; LE-NEON-NEXT:    mov r8, r1
-; LE-NEON-NEXT:    vmov.32 d12[0], r0
-; LE-NEON-NEXT:    mov r0, r9
-; LE-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; LE-NEON-NEXT:    bl llrintl
-; LE-NEON-NEXT:    mov r9, r1
-; LE-NEON-NEXT:    vmov.32 d13[0], r0
-; LE-NEON-NEXT:    mov r0, r5
-; LE-NEON-NEXT:    mov r1, r4
-; LE-NEON-NEXT:    mov r2, r10
-; LE-NEON-NEXT:    mov r3, r11
-; LE-NEON-NEXT:    ldr r6, [sp, #112]
-; LE-NEON-NEXT:    bl llrintl
-; LE-NEON-NEXT:    add r3, sp, #116
-; LE-NEON-NEXT:    mov r4, r1
-; LE-NEON-NEXT:    vmov.32 d14[0], r0
-; LE-NEON-NEXT:    mov r0, r6
-; LE-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; LE-NEON-NEXT:    bl llrintl
-; LE-NEON-NEXT:    add r3, sp, #196
-; LE-NEON-NEXT:    vmov.32 d15[0], r0
-; LE-NEON-NEXT:    ldr r0, [sp, #192]
-; LE-NEON-NEXT:    mov r5, r1
-; LE-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; LE-NEON-NEXT:    bl llrintl
-; LE-NEON-NEXT:    vmov.32 d8[0], r0
-; LE-NEON-NEXT:    ldr r0, [sp] @ 4-byte Reload
-; LE-NEON-NEXT:    vmov.32 d11[1], r7
-; LE-NEON-NEXT:    vmov.32 d10[1], r0
-; LE-NEON-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
-; LE-NEON-NEXT:    vmov.32 d15[1], r5
-; LE-NEON-NEXT:    vorr q2, q5, q5
-; LE-NEON-NEXT:    vmov.32 d13[1], r9
-; LE-NEON-NEXT:    vmov.32 d9[1], r0
-; LE-NEON-NEXT:    vmov.32 d14[1], r4
-; LE-NEON-NEXT:    vmov.32 d12[1], r8
-; LE-NEON-NEXT:    vorr q0, q7, q7
-; LE-NEON-NEXT:    vmov.32 d8[1], r1
-; LE-NEON-NEXT:    vorr q1, q6, q6
-; LE-NEON-NEXT:    vorr q3, q4, q4
-; LE-NEON-NEXT:    add sp, sp, #8
-; LE-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-NEON-NEXT:    add sp, sp, #4
-; LE-NEON-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
-;
-; BE-LABEL: llrint_v8i64_v8f128:
-; BE:       @ %bb.0:
-; BE-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; BE-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; BE-NEXT:    .pad #4
-; BE-NEXT:    sub sp, sp, #4
-; BE-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14}
-; BE-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14}
-; BE-NEXT:    .pad #16
-; BE-NEXT:    sub sp, sp, #16
-; BE-NEXT:    str r3, [sp, #4] @ 4-byte Spill
-; BE-NEXT:    add r3, sp, #208
-; BE-NEXT:    mov r11, r2
-; BE-NEXT:    mov r4, r1
-; BE-NEXT:    mov r5, r0
-; BE-NEXT:    ldm r3, {r0, r1, r2, r3}
-; BE-NEXT:    bl llrintl
-; BE-NEXT:    ldr r7, [sp, #176]
-; BE-NEXT:    add r3, sp, #180
-; BE-NEXT:    str r1, [sp, #12] @ 4-byte Spill
-; BE-NEXT:    vmov.32 d8[0], r0
-; BE-NEXT:    ldm r3, {r1, r2, r3}
-; BE-NEXT:    mov r0, r7
-; BE-NEXT:    ldr r6, [sp, #128]
-; BE-NEXT:    ldr r8, [sp, #144]
-; BE-NEXT:    bl llrintl
-; BE-NEXT:    add r3, sp, #132
-; BE-NEXT:    str r1, [sp, #8] @ 4-byte Spill
-; BE-NEXT:    vmov.32 d9[0], r0
-; BE-NEXT:    mov r0, r6
-; BE-NEXT:    ldm r3, {r1, r2, r3}
-; BE-NEXT:    bl llrintl
-; BE-NEXT:    add r3, sp, #148
+; BE-NEXT:    push {r4, r5, r6, r7, r8, lr}
+; BE-NEXT:    .vsave {d8}
+; BE-NEXT:    vpush {d8}
+; BE-NEXT:    mov r8, r3
+; BE-NEXT:    add r3, sp, #32
+; BE-NEXT:    mov r5, r2
 ; BE-NEXT:    mov r6, r1
-; BE-NEXT:    vmov.32 d10[0], r0
-; BE-NEXT:    mov r0, r8
-; BE-NEXT:    ldm r3, {r1, r2, r3}
-; BE-NEXT:    bl llrintl
-; BE-NEXT:    add r3, sp, #160
-; BE-NEXT:    mov r9, r0
-; BE-NEXT:    mov r7, r1
+; BE-NEXT:    mov r7, r0
 ; BE-NEXT:    ldm r3, {r0, r1, r2, r3}
 ; BE-NEXT:    bl llrintl
-; BE-NEXT:    ldr r3, [sp, #4] @ 4-byte Reload
-; BE-NEXT:    mov r8, r1
-; BE-NEXT:    vmov.32 d11[0], r0
-; BE-NEXT:    mov r0, r5
-; BE-NEXT:    mov r1, r4
-; BE-NEXT:    mov r2, r11
-; BE-NEXT:    ldr r10, [sp, #112]
-; BE-NEXT:    vmov.32 d12[0], r9
-; BE-NEXT:    bl llrintl
-; BE-NEXT:    add r3, sp, #116
 ; BE-NEXT:    mov r4, r1
-; BE-NEXT:    vmov.32 d13[0], r0
-; BE-NEXT:    mov r0, r10
-; BE-NEXT:    ldm r3, {r1, r2, r3}
-; BE-NEXT:    bl llrintl
-; BE-NEXT:    add r3, sp, #196
-; BE-NEXT:    vmov.32 d14[0], r0
-; BE-NEXT:    ldr r0, [sp, #192]
-; BE-NEXT:    mov r5, r1
-; BE-NEXT:    ldm r3, {r1, r2, r3}
+; BE-NEXT:    vmov.32 d8[0], r0
+; BE-NEXT:    mov r0, r7
+; BE-NEXT:    mov r1, r6
+; BE-NEXT:    mov r2, r5
+; BE-NEXT:    mov r3, r8
 ; BE-NEXT:    bl llrintl
 ; BE-NEXT:    vmov.32 d16[0], r0
-; BE-NEXT:    ldr r0, [sp, #8] @ 4-byte Reload
-; BE-NEXT:    vmov.32 d14[1], r5
-; BE-NEXT:    vmov.32 d9[1], r0
-; BE-NEXT:    ldr r0, [sp, #12] @ 4-byte Reload
-; BE-NEXT:    vmov.32 d12[1], r7
-; BE-NEXT:    vmov.32 d8[1], r0
-; BE-NEXT:    vmov.32 d13[1], r4
-; BE-NEXT:    vmov.32 d10[1], r6
-; BE-NEXT:    vmov.32 d11[1], r8
+; BE-NEXT:    vmov.32 d8[1], r4
 ; BE-NEXT:    vmov.32 d16[1], r1
-; BE-NEXT:    vrev64.32 d1, d14
-; BE-NEXT:    vrev64.32 d3, d12
-; BE-NEXT:    vrev64.32 d5, d9
-; BE-NEXT:    vrev64.32 d7, d8
-; BE-NEXT:    vrev64.32 d0, d13
-; BE-NEXT:    vrev64.32 d2, d10
-; BE-NEXT:    vrev64.32 d4, d11
-; BE-NEXT:    vrev64.32 d6, d16
-; BE-NEXT:    add sp, sp, #16
-; BE-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14}
-; BE-NEXT:    add sp, sp, #4
-; BE-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
-;
-; BE-NEON-LABEL: llrint_v8i64_v8f128:
-; BE-NEON:       @ %bb.0:
-; BE-NEON-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; BE-NEON-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; BE-NEON-NEXT:    .pad #4
-; BE-NEON-NEXT:    sub sp, sp, #4
-; BE-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14}
-; BE-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14}
-; BE-NEON-NEXT:    .pad #16
-; BE-NEON-NEXT:    sub sp, sp, #16
-; BE-NEON-NEXT:    str r3, [sp, #4] @ 4-byte Spill
-; BE-NEON-NEXT:    add r3, sp, #208
-; BE-NEON-NEXT:    mov r11, r2
-; BE-NEON-NEXT:    mov r4, r1
-; BE-NEON-NEXT:    mov r5, r0
-; BE-NEON-NEXT:    ldm r3, {r0, r1, r2, r3}
-; BE-NEON-NEXT:    bl llrintl
-; BE-NEON-NEXT:    ldr r7, [sp, #176]
-; BE-NEON-NEXT:    add r3, sp, #180
-; BE-NEON-NEXT:    str r1, [sp, #12] @ 4-byte Spill
-; BE-NEON-NEXT:    vmov.32 d8[0], r0
-; BE-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; BE-NEON-NEXT:    mov r0, r7
-; BE-NEON-NEXT:    ldr r6, [sp, #128]
-; BE-NEON-NEXT:    ldr r8, [sp, #144]
-; BE-NEON-NEXT:    bl llrintl
-; BE-NEON-NEXT:    add r3, sp, #132
-; BE-NEON-NEXT:    str r1, [sp, #8] @ 4-byte Spill
-; BE-NEON-NEXT:    vmov.32 d9[0], r0
-; BE-NEON-NEXT:    mov r0, r6
-; BE-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; BE-NEON-NEXT:    bl llrintl
-; BE-NEON-NEXT:    add r3, sp, #148
-; BE-NEON-NEXT:    mov r6, r1
-; BE-NEON-NEXT:    vmov.32 d10[0], r0
-; BE-NEON-NEXT:    mov r0, r8
-; BE-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; BE-NEON-NEXT:    bl llrintl
-; BE-NEON-NEXT:    add r3, sp, #160
-; BE-NEON-NEXT:    mov r9, r0
-; BE-NEON-NEXT:    mov r7, r1
-; BE-NEON-NEXT:    ldm r3, {r0, r1, r2, r3}
-; BE-NEON-NEXT:    bl llrintl
-; BE-NEON-NEXT:    ldr r3, [sp, #4] @ 4-byte Reload
-; BE-NEON-NEXT:    mov r8, r1
-; BE-NEON-NEXT:    vmov.32 d11[0], r0
-; BE-NEON-NEXT:    mov r0, r5
-; BE-NEON-NEXT:    mov r1, r4
-; BE-NEON-NEXT:    mov r2, r11
-; BE-NEON-NEXT:    ldr r10, [sp, #112]
-; BE-NEON-NEXT:    vmov.32 d12[0], r9
-; BE-NEON-NEXT:    bl llrintl
-; BE-NEON-NEXT:    add r3, sp, #116
-; BE-NEON-NEXT:    mov r4, r1
-; BE-NEON-NEXT:    vmov.32 d13[0], r0
-; BE-NEON-NEXT:    mov r0, r10
-; BE-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; BE-NEON-NEXT:    bl llrintl
-; BE-NEON-NEXT:    add r3, sp, #196
-; BE-NEON-NEXT:    vmov.32 d14[0], r0
-; BE-NEON-NEXT:    ldr r0, [sp, #192]
-; BE-NEON-NEXT:    mov r5, r1
-; BE-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; BE-NEON-NEXT:    bl llrintl
-; BE-NEON-NEXT:    vmov.32 d16[0], r0
-; BE-NEON-NEXT:    ldr r0, [sp, #8] @ 4-byte Reload
-; BE-NEON-NEXT:    vmov.32 d14[1], r5
-; BE-NEON-NEXT:    vmov.32 d9[1], r0
-; BE-NEON-NEXT:    ldr r0, [sp, #12] @ 4-byte Reload
-; BE-NEON-NEXT:    vmov.32 d12[1], r7
-; BE-NEON-NEXT:    vmov.32 d8[1], r0
-; BE-NEON-NEXT:    vmov.32 d13[1], r4
-; BE-NEON-NEXT:    vmov.32 d10[1], r6
-; BE-NEON-NEXT:    vmov.32 d11[1], r8
-; BE-NEON-NEXT:    vmov.32 d16[1], r1
-; BE-NEON-NEXT:    vrev64.32 d1, d14
-; BE-NEON-NEXT:    vrev64.32 d3, d12
-; BE-NEON-NEXT:    vrev64.32 d5, d9
-; BE-NEON-NEXT:    vrev64.32 d7, d8
-; BE-NEON-NEXT:    vrev64.32 d0, d13
-; BE-NEON-NEXT:    vrev64.32 d2, d10
-; BE-NEON-NEXT:    vrev64.32 d4, d11
-; BE-NEON-NEXT:    vrev64.32 d6, d16
-; BE-NEON-NEXT:    add sp, sp, #16
-; BE-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14}
-; BE-NEON-NEXT:    add sp, sp, #4
-; BE-NEON-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
-  %a = call <8 x i64> @llvm.llrint.v8i64.v8f128(<8 x fp128> %x)
-  ret <8 x i64> %a
+; BE-NEXT:    vrev64.32 d1, d8
+; BE-NEXT:    vrev64.32 d0, d16
+; BE-NEXT:    vpop {d8}
+; BE-NEXT:    pop {r4, r5, r6, r7, r8, pc}
+  %a = call <2 x i64> @llvm.llrint.v2i64.v2f128(<2 x fp128> %x)
+  ret <2 x i64> %a
 }
-declare <8 x i64> @llvm.llrint.v8i64.v8f128(<8 x fp128>)
+declare <2 x i64> @llvm.llrint.v2i64.v2f128(<2 x fp128>)
 
-define <16 x i64> @llrint_v16f128(<16 x fp128> %x) {
-; LE-LABEL: llrint_v16f128:
+define <4 x i64> @llrint_v4i64_v4f128(<4 x fp128> %x) {
+; LE-LABEL: llrint_v4i64_v4f128:
 ; LE:       @ %bb.0:
-; LE-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; LE-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; LE-NEXT:    .pad #4
-; LE-NEXT:    sub sp, sp, #4
-; LE-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-NEXT:    .pad #72
-; LE-NEXT:    sub sp, sp, #72
-; LE-NEXT:    mov r6, r3
-; LE-NEXT:    add r3, sp, #408
+; LE-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, lr}
+; LE-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, lr}
+; LE-NEXT:    .vsave {d8, d9, d10, d11}
+; LE-NEXT:    vpush {d8, d9, d10, d11}
+; LE-NEXT:    mov r5, r3
+; LE-NEXT:    add r3, sp, #96
 ; LE-NEXT:    mov r7, r2
+; LE-NEXT:    mov r6, r1
 ; LE-NEXT:    mov r4, r0
 ; LE-NEXT:    ldm r3, {r0, r1, r2, r3}
 ; LE-NEXT:    bl llrintl
-; LE-NEXT:    add r5, sp, #176
-; LE-NEXT:    mov r10, r1
-; LE-NEXT:    vmov.32 d13[0], r0
-; LE-NEXT:    mov r0, r7
-; LE-NEXT:    ldm r5, {r2, r3, r5}
-; LE-NEXT:    mov r1, r6
-; LE-NEXT:    ldr r8, [sp, #232]
-; LE-NEXT:    bl llrintl
-; LE-NEXT:    add r3, sp, #188
 ; LE-NEXT:    mov r9, r1
-; LE-NEXT:    vmov.32 d8[0], r0
-; LE-NEXT:    mov r0, r5
-; LE-NEXT:    ldm r3, {r1, r2, r3}
-; LE-NEXT:    bl llrintl
-; LE-NEXT:    add r3, sp, #236
-; LE-NEXT:    mov r11, r1
 ; LE-NEXT:    vmov.32 d9[0], r0
-; LE-NEXT:    mov r0, r8
-; LE-NEXT:    ldm r3, {r1, r2, r3}
-; LE-NEXT:    bl llrintl
-; LE-NEXT:    add r3, sp, #252
-; LE-NEXT:    vmov.32 d10[0], r0
-; LE-NEXT:    ldr r0, [sp, #248]
-; LE-NEXT:    mov r8, r1
-; LE-NEXT:    ldm r3, {r1, r2, r3}
-; LE-NEXT:    bl llrintl
-; LE-NEXT:    add r3, sp, #268
-; LE-NEXT:    vmov.32 d11[0], r0
-; LE-NEXT:    ldr r0, [sp, #264]
-; LE-NEXT:    mov r6, r1
-; LE-NEXT:    ldm r3, {r1, r2, r3}
-; LE-NEXT:    bl llrintl
-; LE-NEXT:    add r3, sp, #284
-; LE-NEXT:    vmov.32 d14[0], r0
-; LE-NEXT:    ldr r0, [sp, #280]
-; LE-NEXT:    mov r7, r1
-; LE-NEXT:    ldm r3, {r1, r2, r3}
+; LE-NEXT:    mov r0, r4
+; LE-NEXT:    mov r1, r6
+; LE-NEXT:    mov r2, r7
+; LE-NEXT:    mov r3, r5
+; LE-NEXT:    ldr r8, [sp, #80]
+; LE-NEXT:    ldr r10, [sp, #64]
 ; LE-NEXT:    bl llrintl
-; LE-NEXT:    add r3, sp, #316
-; LE-NEXT:    vmov.32 d15[0], r0
-; LE-NEXT:    ldr r0, [sp, #312]
+; LE-NEXT:    add r3, sp, #68
 ; LE-NEXT:    mov r5, r1
-; LE-NEXT:    ldm r3, {r1, r2, r3}
-; LE-NEXT:    bl llrintl
-; LE-NEXT:    vmov.32 d15[1], r5
-; LE-NEXT:    add lr, sp, #56
-; LE-NEXT:    ldr r5, [sp, #300]
-; LE-NEXT:    vmov.32 d14[1], r7
-; LE-NEXT:    ldr r2, [sp, #304]
-; LE-NEXT:    ldr r3, [sp, #308]
-; LE-NEXT:    vmov.32 d11[1], r6
-; LE-NEXT:    ldr r6, [sp, #200]
-; LE-NEXT:    ldr r7, [sp, #204]
-; LE-NEXT:    vmov.32 d10[1], r8
-; LE-NEXT:    ldr r8, [sp, #344]
-; LE-NEXT:    vmov.32 d9[1], r11
-; LE-NEXT:    ldr r11, [sp, #216]
-; LE-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
-; LE-NEXT:    add lr, sp, #40
-; LE-NEXT:    vmov.32 d17[0], r0
-; LE-NEXT:    ldr r0, [sp, #296]
-; LE-NEXT:    vmov.32 d8[1], r9
-; LE-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
-; LE-NEXT:    add lr, sp, #24
-; LE-NEXT:    vorr q5, q8, q8
-; LE-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
-; LE-NEXT:    vorr q4, q6, q6
-; LE-NEXT:    vmov.32 d11[1], r1
-; LE-NEXT:    mov r1, r5
-; LE-NEXT:    vmov.32 d9[1], r10
-; LE-NEXT:    bl llrintl
-; LE-NEXT:    vmov.32 d10[0], r0
-; LE-NEXT:    ldr r2, [sp, #208]
-; LE-NEXT:    ldr r3, [sp, #212]
-; LE-NEXT:    add lr, sp, #8
-; LE-NEXT:    mov r9, r1
-; LE-NEXT:    mov r0, r6
-; LE-NEXT:    mov r1, r7
-; LE-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
-; LE-NEXT:    bl llrintl
-; LE-NEXT:    add r3, sp, #220
-; LE-NEXT:    mov r10, r1
 ; LE-NEXT:    vmov.32 d10[0], r0
-; LE-NEXT:    mov r0, r11
+; LE-NEXT:    mov r0, r10
 ; LE-NEXT:    ldm r3, {r1, r2, r3}
 ; LE-NEXT:    bl llrintl
-; LE-NEXT:    add r3, sp, #348
-; LE-NEXT:    mov r11, r1
+; LE-NEXT:    add r3, sp, #84
+; LE-NEXT:    mov r4, r1
 ; LE-NEXT:    vmov.32 d11[0], r0
 ; LE-NEXT:    mov r0, r8
 ; LE-NEXT:    ldm r3, {r1, r2, r3}
 ; LE-NEXT:    bl llrintl
-; LE-NEXT:    add r3, sp, #364
-; LE-NEXT:    vmov.32 d13[0], r0
-; LE-NEXT:    ldr r0, [sp, #360]
-; LE-NEXT:    mov r8, r1
-; LE-NEXT:    ldm r3, {r1, r2, r3}
-; LE-NEXT:    bl llrintl
-; LE-NEXT:    add r3, sp, #380
-; LE-NEXT:    vmov.32 d14[0], r0
-; LE-NEXT:    ldr r0, [sp, #376]
-; LE-NEXT:    mov r5, r1
-; LE-NEXT:    ldm r3, {r1, r2, r3}
-; LE-NEXT:    bl llrintl
-; LE-NEXT:    add r3, sp, #396
-; LE-NEXT:    vmov.32 d15[0], r0
-; LE-NEXT:    ldr r0, [sp, #392]
-; LE-NEXT:    mov r6, r1
-; LE-NEXT:    ldm r3, {r1, r2, r3}
-; LE-NEXT:    bl llrintl
-; LE-NEXT:    add r3, sp, #332
 ; LE-NEXT:    vmov.32 d8[0], r0
-; LE-NEXT:    ldr r0, [sp, #328]
-; LE-NEXT:    mov r7, r1
-; LE-NEXT:    ldm r3, {r1, r2, r3}
-; LE-NEXT:    bl llrintl
-; LE-NEXT:    add lr, sp, #8
-; LE-NEXT:    vmov.32 d12[0], r0
-; LE-NEXT:    add r0, r4, #64
-; LE-NEXT:    vldmia lr, {d18, d19} @ 16-byte Reload
-; LE-NEXT:    add lr, sp, #24
-; LE-NEXT:    vmov.32 d13[1], r8
-; LE-NEXT:    vmov.32 d18[1], r9
-; LE-NEXT:    vmov.32 d15[1], r6
-; LE-NEXT:    vmov.32 d12[1], r1
-; LE-NEXT:    vmov.32 d14[1], r5
-; LE-NEXT:    vst1.64 {d18, d19}, [r0:128]!
-; LE-NEXT:    vst1.64 {d12, d13}, [r0:128]!
-; LE-NEXT:    vmov.32 d8[1], r7
-; LE-NEXT:    vst1.64 {d14, d15}, [r0:128]!
-; LE-NEXT:    vst1.64 {d8, d9}, [r0:128]
-; LE-NEXT:    vmov.32 d11[1], r11
-; LE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-NEXT:    add lr, sp, #40
-; LE-NEXT:    vmov.32 d10[1], r10
-; LE-NEXT:    vst1.64 {d16, d17}, [r4:128]!
-; LE-NEXT:    vst1.64 {d10, d11}, [r4:128]!
-; LE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-NEXT:    add lr, sp, #56
-; LE-NEXT:    vst1.64 {d16, d17}, [r4:128]!
-; LE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-NEXT:    vst1.64 {d16, d17}, [r4:128]
-; LE-NEXT:    add sp, sp, #72
-; LE-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-NEXT:    add sp, sp, #4
-; LE-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
-;
-; LE-NEON-LABEL: llrint_v16f128:
-; LE-NEON:       @ %bb.0:
-; LE-NEON-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; LE-NEON-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; LE-NEON-NEXT:    .pad #4
-; LE-NEON-NEXT:    sub sp, sp, #4
-; LE-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-NEON-NEXT:    .pad #72
-; LE-NEON-NEXT:    sub sp, sp, #72
-; LE-NEON-NEXT:    mov r6, r3
-; LE-NEON-NEXT:    add r3, sp, #408
-; LE-NEON-NEXT:    mov r7, r2
-; LE-NEON-NEXT:    mov r4, r0
-; LE-NEON-NEXT:    ldm r3, {r0, r1, r2, r3}
-; LE-NEON-NEXT:    bl llrintl
-; LE-NEON-NEXT:    add r5, sp, #176
-; LE-NEON-NEXT:    mov r10, r1
-; LE-NEON-NEXT:    vmov.32 d13[0], r0
-; LE-NEON-NEXT:    mov r0, r7
-; LE-NEON-NEXT:    ldm r5, {r2, r3, r5}
-; LE-NEON-NEXT:    mov r1, r6
-; LE-NEON-NEXT:    ldr r8, [sp, #232]
-; LE-NEON-NEXT:    bl llrintl
-; LE-NEON-NEXT:    add r3, sp, #188
-; LE-NEON-NEXT:    mov r9, r1
-; LE-NEON-NEXT:    vmov.32 d8[0], r0
-; LE-NEON-NEXT:    mov r0, r5
-; LE-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; LE-NEON-NEXT:    bl llrintl
-; LE-NEON-NEXT:    add r3, sp, #236
-; LE-NEON-NEXT:    mov r11, r1
-; LE-NEON-NEXT:    vmov.32 d9[0], r0
-; LE-NEON-NEXT:    mov r0, r8
-; LE-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; LE-NEON-NEXT:    bl llrintl
-; LE-NEON-NEXT:    add r3, sp, #252
-; LE-NEON-NEXT:    vmov.32 d10[0], r0
-; LE-NEON-NEXT:    ldr r0, [sp, #248]
-; LE-NEON-NEXT:    mov r8, r1
-; LE-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; LE-NEON-NEXT:    bl llrintl
-; LE-NEON-NEXT:    add r3, sp, #268
-; LE-NEON-NEXT:    vmov.32 d11[0], r0
-; LE-NEON-NEXT:    ldr r0, [sp, #264]
-; LE-NEON-NEXT:    mov r6, r1
-; LE-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; LE-NEON-NEXT:    bl llrintl
-; LE-NEON-NEXT:    add r3, sp, #284
-; LE-NEON-NEXT:    vmov.32 d14[0], r0
-; LE-NEON-NEXT:    ldr r0, [sp, #280]
-; LE-NEON-NEXT:    mov r7, r1
-; LE-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; LE-NEON-NEXT:    bl llrintl
-; LE-NEON-NEXT:    add r3, sp, #316
-; LE-NEON-NEXT:    vmov.32 d15[0], r0
-; LE-NEON-NEXT:    ldr r0, [sp, #312]
-; LE-NEON-NEXT:    mov r5, r1
-; LE-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; LE-NEON-NEXT:    bl llrintl
-; LE-NEON-NEXT:    vmov.32 d15[1], r5
-; LE-NEON-NEXT:    add lr, sp, #56
-; LE-NEON-NEXT:    ldr r5, [sp, #300]
-; LE-NEON-NEXT:    vmov.32 d14[1], r7
-; LE-NEON-NEXT:    ldr r2, [sp, #304]
-; LE-NEON-NEXT:    ldr r3, [sp, #308]
-; LE-NEON-NEXT:    vmov.32 d11[1], r6
-; LE-NEON-NEXT:    ldr r6, [sp, #200]
-; LE-NEON-NEXT:    ldr r7, [sp, #204]
-; LE-NEON-NEXT:    vmov.32 d10[1], r8
-; LE-NEON-NEXT:    ldr r8, [sp, #344]
-; LE-NEON-NEXT:    vmov.32 d9[1], r11
-; LE-NEON-NEXT:    ldr r11, [sp, #216]
-; LE-NEON-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
-; LE-NEON-NEXT:    add lr, sp, #40
-; LE-NEON-NEXT:    vmov.32 d17[0], r0
-; LE-NEON-NEXT:    ldr r0, [sp, #296]
-; LE-NEON-NEXT:    vmov.32 d8[1], r9
-; LE-NEON-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
-; LE-NEON-NEXT:    add lr, sp, #24
-; LE-NEON-NEXT:    vorr q5, q8, q8
-; LE-NEON-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
-; LE-NEON-NEXT:    vorr q4, q6, q6
-; LE-NEON-NEXT:    vmov.32 d11[1], r1
-; LE-NEON-NEXT:    mov r1, r5
-; LE-NEON-NEXT:    vmov.32 d9[1], r10
-; LE-NEON-NEXT:    bl llrintl
-; LE-NEON-NEXT:    vmov.32 d10[0], r0
-; LE-NEON-NEXT:    ldr r2, [sp, #208]
-; LE-NEON-NEXT:    ldr r3, [sp, #212]
-; LE-NEON-NEXT:    add lr, sp, #8
-; LE-NEON-NEXT:    mov r9, r1
-; LE-NEON-NEXT:    mov r0, r6
-; LE-NEON-NEXT:    mov r1, r7
-; LE-NEON-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
-; LE-NEON-NEXT:    bl llrintl
-; LE-NEON-NEXT:    add r3, sp, #220
-; LE-NEON-NEXT:    mov r10, r1
-; LE-NEON-NEXT:    vmov.32 d10[0], r0
-; LE-NEON-NEXT:    mov r0, r11
-; LE-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; LE-NEON-NEXT:    bl llrintl
-; LE-NEON-NEXT:    add r3, sp, #348
-; LE-NEON-NEXT:    mov r11, r1
-; LE-NEON-NEXT:    vmov.32 d11[0], r0
-; LE-NEON-NEXT:    mov r0, r8
-; LE-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; LE-NEON-NEXT:    bl llrintl
-; LE-NEON-NEXT:    add r3, sp, #364
-; LE-NEON-NEXT:    vmov.32 d13[0], r0
-; LE-NEON-NEXT:    ldr r0, [sp, #360]
-; LE-NEON-NEXT:    mov r8, r1
-; LE-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; LE-NEON-NEXT:    bl llrintl
-; LE-NEON-NEXT:    add r3, sp, #380
-; LE-NEON-NEXT:    vmov.32 d14[0], r0
-; LE-NEON-NEXT:    ldr r0, [sp, #376]
-; LE-NEON-NEXT:    mov r5, r1
-; LE-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; LE-NEON-NEXT:    bl llrintl
-; LE-NEON-NEXT:    add r3, sp, #396
-; LE-NEON-NEXT:    vmov.32 d15[0], r0
-; LE-NEON-NEXT:    ldr r0, [sp, #392]
-; LE-NEON-NEXT:    mov r6, r1
-; LE-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; LE-NEON-NEXT:    bl llrintl
-; LE-NEON-NEXT:    add r3, sp, #332
-; LE-NEON-NEXT:    vmov.32 d8[0], r0
-; LE-NEON-NEXT:    ldr r0, [sp, #328]
-; LE-NEON-NEXT:    mov r7, r1
-; LE-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; LE-NEON-NEXT:    bl llrintl
-; LE-NEON-NEXT:    add lr, sp, #8
-; LE-NEON-NEXT:    vmov.32 d12[0], r0
-; LE-NEON-NEXT:    add r0, r4, #64
-; LE-NEON-NEXT:    vldmia lr, {d18, d19} @ 16-byte Reload
-; LE-NEON-NEXT:    add lr, sp, #24
-; LE-NEON-NEXT:    vmov.32 d13[1], r8
-; LE-NEON-NEXT:    vmov.32 d18[1], r9
-; LE-NEON-NEXT:    vmov.32 d15[1], r6
-; LE-NEON-NEXT:    vmov.32 d12[1], r1
-; LE-NEON-NEXT:    vmov.32 d14[1], r5
-; LE-NEON-NEXT:    vst1.64 {d18, d19}, [r0:128]!
-; LE-NEON-NEXT:    vst1.64 {d12, d13}, [r0:128]!
-; LE-NEON-NEXT:    vmov.32 d8[1], r7
-; LE-NEON-NEXT:    vst1.64 {d14, d15}, [r0:128]!
-; LE-NEON-NEXT:    vst1.64 {d8, d9}, [r0:128]
-; LE-NEON-NEXT:    vmov.32 d11[1], r11
-; LE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-NEON-NEXT:    add lr, sp, #40
-; LE-NEON-NEXT:    vmov.32 d10[1], r10
-; LE-NEON-NEXT:    vst1.64 {d16, d17}, [r4:128]!
-; LE-NEON-NEXT:    vst1.64 {d10, d11}, [r4:128]!
-; LE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-NEON-NEXT:    add lr, sp, #56
-; LE-NEON-NEXT:    vst1.64 {d16, d17}, [r4:128]!
-; LE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-NEON-NEXT:    vst1.64 {d16, d17}, [r4:128]
-; LE-NEON-NEXT:    add sp, sp, #72
-; LE-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-NEON-NEXT:    add sp, sp, #4
-; LE-NEON-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+; LE-NEXT:    vmov.32 d11[1], r4
+; LE-NEXT:    vmov.32 d9[1], r9
+; LE-NEXT:    vmov.32 d10[1], r5
+; LE-NEXT:    vmov.32 d8[1], r1
+; LE-NEXT:    vorr q0, q5, q5
+; LE-NEXT:    vorr q1, q4, q4
+; LE-NEXT:    vpop {d8, d9, d10, d11}
+; LE-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, pc}
 ;
-; BE-LABEL: llrint_v16f128:
+; BE-LABEL: llrint_v4i64_v4f128:
 ; BE:       @ %bb.0:
-; BE-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; BE-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; BE-NEXT:    .pad #4
-; BE-NEXT:    sub sp, sp, #4
-; BE-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
-; BE-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
-; BE-NEXT:    .pad #56
-; BE-NEXT:    sub sp, sp, #56
+; BE-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, lr}
+; BE-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, lr}
+; BE-NEXT:    .vsave {d8, d9, d10}
+; BE-NEXT:    vpush {d8, d9, d10}
 ; BE-NEXT:    mov r5, r3
-; BE-NEXT:    add r3, sp, #376
-; BE-NEXT:    mov r6, r2
+; BE-NEXT:    add r3, sp, #88
+; BE-NEXT:    mov r7, r2
+; BE-NEXT:    mov r6, r1
 ; BE-NEXT:    mov r4, r0
 ; BE-NEXT:    ldm r3, {r0, r1, r2, r3}
 ; BE-NEXT:    bl llrintl
-; BE-NEXT:    ldr r7, [sp, #392]
-; BE-NEXT:    add r3, sp, #396
 ; BE-NEXT:    mov r9, r1
 ; BE-NEXT:    vmov.32 d8[0], r0
-; BE-NEXT:    ldm r3, {r1, r2, r3}
-; BE-NEXT:    mov r0, r7
-; BE-NEXT:    ldr r11, [sp, #168]
-; BE-NEXT:    bl llrintl
-; BE-NEXT:    ldr r2, [sp, #160]
-; BE-NEXT:    mov r10, r1
-; BE-NEXT:    ldr r3, [sp, #164]
-; BE-NEXT:    vmov.32 d9[0], r0
-; BE-NEXT:    mov r0, r6
-; BE-NEXT:    mov r1, r5
-; BE-NEXT:    bl llrintl
-; BE-NEXT:    add r3, sp, #172
-; BE-NEXT:    mov r8, r1
-; BE-NEXT:    vmov.32 d10[0], r0
-; BE-NEXT:    mov r0, r11
-; BE-NEXT:    ldm r3, {r1, r2, r3}
-; BE-NEXT:    bl llrintl
-; BE-NEXT:    add r3, sp, #220
-; BE-NEXT:    vmov.32 d11[0], r0
-; BE-NEXT:    ldr r0, [sp, #216]
-; BE-NEXT:    mov r11, r1
-; BE-NEXT:    ldm r3, {r1, r2, r3}
-; BE-NEXT:    bl llrintl
-; BE-NEXT:    add r3, sp, #236
-; BE-NEXT:    vmov.32 d12[0], r0
-; BE-NEXT:    ldr r0, [sp, #232]
-; BE-NEXT:    mov r6, r1
-; BE-NEXT:    ldm r3, {r1, r2, r3}
-; BE-NEXT:    bl llrintl
-; BE-NEXT:    add r3, sp, #252
-; BE-NEXT:    vmov.32 d13[0], r0
-; BE-NEXT:    ldr r0, [sp, #248]
-; BE-NEXT:    mov r7, r1
-; BE-NEXT:    ldm r3, {r1, r2, r3}
-; BE-NEXT:    bl llrintl
-; BE-NEXT:    add r3, sp, #268
-; BE-NEXT:    vmov.32 d14[0], r0
-; BE-NEXT:    ldr r0, [sp, #264]
-; BE-NEXT:    mov r5, r1
-; BE-NEXT:    ldm r3, {r1, r2, r3}
-; BE-NEXT:    bl llrintl
-; BE-NEXT:    vmov.32 d15[0], r0
-; BE-NEXT:    ldr r0, [sp, #280]
-; BE-NEXT:    ldr r2, [sp, #288]
-; BE-NEXT:    vmov.32 d13[1], r7
-; BE-NEXT:    ldr r7, [sp, #284]
-; BE-NEXT:    ldr r3, [sp, #292]
-; BE-NEXT:    vmov.32 d14[1], r5
-; BE-NEXT:    ldr r5, [sp, #328]
-; BE-NEXT:    vmov.32 d12[1], r6
-; BE-NEXT:    ldr r6, [sp, #300]
-; BE-NEXT:    vmov.32 d10[1], r8
-; BE-NEXT:    ldr r8, [sp, #184]
-; BE-NEXT:    vmov.32 d11[1], r11
-; BE-NEXT:    vmov.32 d9[1], r10
-; BE-NEXT:    vmov.32 d8[1], r9
-; BE-NEXT:    vmov.32 d15[1], r1
-; BE-NEXT:    mov r1, r7
-; BE-NEXT:    vstr d14, [sp, #48] @ 8-byte Spill
-; BE-NEXT:    vstr d13, [sp, #40] @ 8-byte Spill
-; BE-NEXT:    vstr d12, [sp, #32] @ 8-byte Spill
-; BE-NEXT:    vstr d11, [sp, #24] @ 8-byte Spill
-; BE-NEXT:    vstr d10, [sp, #16] @ 8-byte Spill
-; BE-NEXT:    vstr d9, [sp, #8] @ 8-byte Spill
-; BE-NEXT:    vstr d8, [sp] @ 8-byte Spill
-; BE-NEXT:    bl llrintl
-; BE-NEXT:    mov r10, r1
-; BE-NEXT:    ldr r1, [sp, #296]
-; BE-NEXT:    ldr r2, [sp, #304]
-; BE-NEXT:    vmov.32 d8[0], r0
-; BE-NEXT:    ldr r3, [sp, #308]
-; BE-NEXT:    mov r0, r1
+; BE-NEXT:    mov r0, r4
 ; BE-NEXT:    mov r1, r6
+; BE-NEXT:    mov r2, r7
+; BE-NEXT:    mov r3, r5
+; BE-NEXT:    ldr r8, [sp, #72]
+; BE-NEXT:    ldr r10, [sp, #56]
 ; BE-NEXT:    bl llrintl
-; BE-NEXT:    add r3, sp, #332
-; BE-NEXT:    mov r11, r1
-; BE-NEXT:    vmov.32 d9[0], r0
-; BE-NEXT:    mov r0, r5
-; BE-NEXT:    ldm r3, {r1, r2, r3}
-; BE-NEXT:    bl llrintl
-; BE-NEXT:    add r3, sp, #188
-; BE-NEXT:    mov r7, r1
-; BE-NEXT:    vmov.32 d10[0], r0
-; BE-NEXT:    mov r0, r8
-; BE-NEXT:    ldm r3, {r1, r2, r3}
-; BE-NEXT:    bl llrintl
-; BE-NEXT:    add r3, sp, #204
-; BE-NEXT:    vmov.32 d11[0], r0
-; BE-NEXT:    ldr r0, [sp, #200]
-; BE-NEXT:    mov r8, r1
-; BE-NEXT:    ldm r3, {r1, r2, r3}
-; BE-NEXT:    bl llrintl
-; BE-NEXT:    add r3, sp, #348
-; BE-NEXT:    vmov.32 d12[0], r0
-; BE-NEXT:    ldr r0, [sp, #344]
+; BE-NEXT:    add r3, sp, #60
 ; BE-NEXT:    mov r5, r1
+; BE-NEXT:    vmov.32 d9[0], r0
+; BE-NEXT:    mov r0, r10
 ; BE-NEXT:    ldm r3, {r1, r2, r3}
 ; BE-NEXT:    bl llrintl
-; BE-NEXT:    add r3, sp, #364
-; BE-NEXT:    vmov.32 d13[0], r0
-; BE-NEXT:    ldr r0, [sp, #360]
-; BE-NEXT:    mov r9, r1
-; BE-NEXT:    ldm r3, {r1, r2, r3}
-; BE-NEXT:    bl llrintl
-; BE-NEXT:    add r3, sp, #316
-; BE-NEXT:    vmov.32 d14[0], r0
-; BE-NEXT:    ldr r0, [sp, #312]
-; BE-NEXT:    mov r6, r1
+; BE-NEXT:    add r3, sp, #76
+; BE-NEXT:    mov r4, r1
+; BE-NEXT:    vmov.32 d10[0], r0
+; BE-NEXT:    mov r0, r8
 ; BE-NEXT:    ldm r3, {r1, r2, r3}
 ; BE-NEXT:    bl llrintl
-; BE-NEXT:    vldr d18, [sp, #48] @ 8-byte Reload
-; BE-NEXT:    vrev64.32 d17, d15
-; BE-NEXT:    vrev64.32 d16, d18
-; BE-NEXT:    vldr d18, [sp, #40] @ 8-byte Reload
-; BE-NEXT:    vmov.32 d24[0], r0
-; BE-NEXT:    add r0, r4, #64
-; BE-NEXT:    vldr d20, [sp, #32] @ 8-byte Reload
-; BE-NEXT:    vrev64.32 d19, d18
-; BE-NEXT:    vmov.32 d9[1], r11
-; BE-NEXT:    vmov.32 d10[1], r7
-; BE-NEXT:    vrev64.32 d18, d20
-; BE-NEXT:    vldr d20, [sp, #24] @ 8-byte Reload
-; BE-NEXT:    vmov.32 d8[1], r10
-; BE-NEXT:    vmov.32 d14[1], r6
-; BE-NEXT:    vmov.32 d24[1], r1
-; BE-NEXT:    vldr d22, [sp, #16] @ 8-byte Reload
-; BE-NEXT:    vrev64.32 d21, d20
-; BE-NEXT:    vrev64.32 d1, d9
-; BE-NEXT:    vmov.32 d13[1], r9
-; BE-NEXT:    vrev64.32 d31, d10
-; BE-NEXT:    vrev64.32 d20, d22
-; BE-NEXT:    vldr d22, [sp, #8] @ 8-byte Reload
-; BE-NEXT:    vrev64.32 d0, d8
-; BE-NEXT:    vrev64.32 d29, d14
-; BE-NEXT:    vmov.32 d12[1], r5
-; BE-NEXT:    vrev64.32 d30, d24
-; BE-NEXT:    vrev64.32 d27, d22
-; BE-NEXT:    vldr d22, [sp] @ 8-byte Reload
-; BE-NEXT:    vst1.64 {d0, d1}, [r0:128]!
-; BE-NEXT:    vmov.32 d11[1], r8
-; BE-NEXT:    vrev64.32 d28, d13
-; BE-NEXT:    vst1.64 {d30, d31}, [r0:128]!
-; BE-NEXT:    vrev64.32 d26, d22
-; BE-NEXT:    vrev64.32 d23, d12
-; BE-NEXT:    vst1.64 {d28, d29}, [r0:128]!
-; BE-NEXT:    vrev64.32 d22, d11
-; BE-NEXT:    vst1.64 {d26, d27}, [r0:128]
-; BE-NEXT:    vst1.64 {d20, d21}, [r4:128]!
-; BE-NEXT:    vst1.64 {d22, d23}, [r4:128]!
-; BE-NEXT:    vst1.64 {d18, d19}, [r4:128]!
-; BE-NEXT:    vst1.64 {d16, d17}, [r4:128]
-; BE-NEXT:    add sp, sp, #56
-; BE-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
-; BE-NEXT:    add sp, sp, #4
-; BE-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
-;
-; BE-NEON-LABEL: llrint_v16f128:
-; BE-NEON:       @ %bb.0:
-; BE-NEON-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; BE-NEON-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; BE-NEON-NEXT:    .pad #4
-; BE-NEON-NEXT:    sub sp, sp, #4
-; BE-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
-; BE-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
-; BE-NEON-NEXT:    .pad #56
-; BE-NEON-NEXT:    sub sp, sp, #56
-; BE-NEON-NEXT:    mov r5, r3
-; BE-NEON-NEXT:    add r3, sp, #376
-; BE-NEON-NEXT:    mov r6, r2
-; BE-NEON-NEXT:    mov r4, r0
-; BE-NEON-NEXT:    ldm r3, {r0, r1, r2, r3}
-; BE-NEON-NEXT:    bl llrintl
-; BE-NEON-NEXT:    ldr r7, [sp, #392]
-; BE-NEON-NEXT:    add r3, sp, #396
-; BE-NEON-NEXT:    mov r9, r1
-; BE-NEON-NEXT:    vmov.32 d8[0], r0
-; BE-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; BE-NEON-NEXT:    mov r0, r7
-; BE-NEON-NEXT:    ldr r11, [sp, #168]
-; BE-NEON-NEXT:    bl llrintl
-; BE-NEON-NEXT:    ldr r2, [sp, #160]
-; BE-NEON-NEXT:    mov r10, r1
-; BE-NEON-NEXT:    ldr r3, [sp, #164]
-; BE-NEON-NEXT:    vmov.32 d9[0], r0
-; BE-NEON-NEXT:    mov r0, r6
-; BE-NEON-NEXT:    mov r1, r5
-; BE-NEON-NEXT:    bl llrintl
-; BE-NEON-NEXT:    add r3, sp, #172
-; BE-NEON-NEXT:    mov r8, r1
-; BE-NEON-NEXT:    vmov.32 d10[0], r0
-; BE-NEON-NEXT:    mov r0, r11
-; BE-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; BE-NEON-NEXT:    bl llrintl
-; BE-NEON-NEXT:    add r3, sp, #220
-; BE-NEON-NEXT:    vmov.32 d11[0], r0
-; BE-NEON-NEXT:    ldr r0, [sp, #216]
-; BE-NEON-NEXT:    mov r11, r1
-; BE-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; BE-NEON-NEXT:    bl llrintl
-; BE-NEON-NEXT:    add r3, sp, #236
-; BE-NEON-NEXT:    vmov.32 d12[0], r0
-; BE-NEON-NEXT:    ldr r0, [sp, #232]
-; BE-NEON-NEXT:    mov r6, r1
-; BE-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; BE-NEON-NEXT:    bl llrintl
-; BE-NEON-NEXT:    add r3, sp, #252
-; BE-NEON-NEXT:    vmov.32 d13[0], r0
-; BE-NEON-NEXT:    ldr r0, [sp, #248]
-; BE-NEON-NEXT:    mov r7, r1
-; BE-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; BE-NEON-NEXT:    bl llrintl
-; BE-NEON-NEXT:    add r3, sp, #268
-; BE-NEON-NEXT:    vmov.32 d14[0], r0
-; BE-NEON-NEXT:    ldr r0, [sp, #264]
-; BE-NEON-NEXT:    mov r5, r1
-; BE-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; BE-NEON-NEXT:    bl llrintl
-; BE-NEON-NEXT:    vmov.32 d15[0], r0
-; BE-NEON-NEXT:    ldr r0, [sp, #280]
-; BE-NEON-NEXT:    ldr r2, [sp, #288]
-; BE-NEON-NEXT:    vmov.32 d13[1], r7
-; BE-NEON-NEXT:    ldr r7, [sp, #284]
-; BE-NEON-NEXT:    ldr r3, [sp, #292]
-; BE-NEON-NEXT:    vmov.32 d14[1], r5
-; BE-NEON-NEXT:    ldr r5, [sp, #328]
-; BE-NEON-NEXT:    vmov.32 d12[1], r6
-; BE-NEON-NEXT:    ldr r6, [sp, #300]
-; BE-NEON-NEXT:    vmov.32 d10[1], r8
-; BE-NEON-NEXT:    ldr r8, [sp, #184]
-; BE-NEON-NEXT:    vmov.32 d11[1], r11
-; BE-NEON-NEXT:    vmov.32 d9[1], r10
-; BE-NEON-NEXT:    vmov.32 d8[1], r9
-; BE-NEON-NEXT:    vmov.32 d15[1], r1
-; BE-NEON-NEXT:    mov r1, r7
-; BE-NEON-NEXT:    vstr d14, [sp, #48] @ 8-byte Spill
-; BE-NEON-NEXT:    vstr d13, [sp, #40] @ 8-byte Spill
-; BE-NEON-NEXT:    vstr d12, [sp, #32] @ 8-byte Spill
-; BE-NEON-NEXT:    vstr d11, [sp, #24] @ 8-byte Spill
-; BE-NEON-NEXT:    vstr d10, [sp, #16] @ 8-byte Spill
-; BE-NEON-NEXT:    vstr d9, [sp, #8] @ 8-byte Spill
-; BE-NEON-NEXT:    vstr d8, [sp] @ 8-byte Spill
-; BE-NEON-NEXT:    bl llrintl
-; BE-NEON-NEXT:    mov r10, r1
-; BE-NEON-NEXT:    ldr r1, [sp, #296]
-; BE-NEON-NEXT:    ldr r2, [sp, #304]
-; BE-NEON-NEXT:    vmov.32 d8[0], r0
-; BE-NEON-NEXT:    ldr r3, [sp, #308]
-; BE-NEON-NEXT:    mov r0, r1
-; BE-NEON-NEXT:    mov r1, r6
-; BE-NEON-NEXT:    bl llrintl
-; BE-NEON-NEXT:    add r3, sp, #332
-; BE-NEON-NEXT:    mov r11, r1
-; BE-NEON-NEXT:    vmov.32 d9[0], r0
-; BE-NEON-NEXT:    mov r0, r5
-; BE-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; BE-NEON-NEXT:    bl llrintl
-; BE-NEON-NEXT:    add r3, sp, #188
-; BE-NEON-NEXT:    mov r7, r1
-; BE-NEON-NEXT:    vmov.32 d10[0], r0
-; BE-NEON-NEXT:    mov r0, r8
-; BE-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; BE-NEON-NEXT:    bl llrintl
-; BE-NEON-NEXT:    add r3, sp, #204
-; BE-NEON-NEXT:    vmov.32 d11[0], r0
-; BE-NEON-NEXT:    ldr r0, [sp, #200]
-; BE-NEON-NEXT:    mov r8, r1
-; BE-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; BE-NEON-NEXT:    bl llrintl
-; BE-NEON-NEXT:    add r3, sp, #348
-; BE-NEON-NEXT:    vmov.32 d12[0], r0
-; BE-NEON-NEXT:    ldr r0, [sp, #344]
-; BE-NEON-NEXT:    mov r5, r1
-; BE-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; BE-NEON-NEXT:    bl llrintl
-; BE-NEON-NEXT:    add r3, sp, #364
-; BE-NEON-NEXT:    vmov.32 d13[0], r0
-; BE-NEON-NEXT:    ldr r0, [sp, #360]
-; BE-NEON-NEXT:    mov r9, r1
-; BE-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; BE-NEON-NEXT:    bl llrintl
-; BE-NEON-NEXT:    add r3, sp, #316
-; BE-NEON-NEXT:    vmov.32 d14[0], r0
-; BE-NEON-NEXT:    ldr r0, [sp, #312]
-; BE-NEON-NEXT:    mov r6, r1
-; BE-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; BE-NEON-NEXT:    bl llrintl
-; BE-NEON-NEXT:    vldr d18, [sp, #48] @ 8-byte Reload
-; BE-NEON-NEXT:    vrev64.32 d17, d15
-; BE-NEON-NEXT:    vrev64.32 d16, d18
-; BE-NEON-NEXT:    vldr d18, [sp, #40] @ 8-byte Reload
-; BE-NEON-NEXT:    vmov.32 d24[0], r0
-; BE-NEON-NEXT:    add r0, r4, #64
-; BE-NEON-NEXT:    vldr d20, [sp, #32] @ 8-byte Reload
-; BE-NEON-NEXT:    vrev64.32 d19, d18
-; BE-NEON-NEXT:    vmov.32 d9[1], r11
-; BE-NEON-NEXT:    vmov.32 d10[1], r7
-; BE-NEON-NEXT:    vrev64.32 d18, d20
-; BE-NEON-NEXT:    vldr d20, [sp, #24] @ 8-byte Reload
-; BE-NEON-NEXT:    vmov.32 d8[1], r10
-; BE-NEON-NEXT:    vmov.32 d14[1], r6
-; BE-NEON-NEXT:    vmov.32 d24[1], r1
-; BE-NEON-NEXT:    vldr d22, [sp, #16] @ 8-byte Reload
-; BE-NEON-NEXT:    vrev64.32 d21, d20
-; BE-NEON-NEXT:    vrev64.32 d1, d9
-; BE-NEON-NEXT:    vmov.32 d13[1], r9
-; BE-NEON-NEXT:    vrev64.32 d31, d10
-; BE-NEON-NEXT:    vrev64.32 d20, d22
-; BE-NEON-NEXT:    vldr d22, [sp, #8] @ 8-byte Reload
-; BE-NEON-NEXT:    vrev64.32 d0, d8
-; BE-NEON-NEXT:    vrev64.32 d29, d14
-; BE-NEON-NEXT:    vmov.32 d12[1], r5
-; BE-NEON-NEXT:    vrev64.32 d30, d24
-; BE-NEON-NEXT:    vrev64.32 d27, d22
-; BE-NEON-NEXT:    vldr d22, [sp] @ 8-byte Reload
-; BE-NEON-NEXT:    vst1.64 {d0, d1}, [r0:128]!
-; BE-NEON-NEXT:    vmov.32 d11[1], r8
-; BE-NEON-NEXT:    vrev64.32 d28, d13
-; BE-NEON-NEXT:    vst1.64 {d30, d31}, [r0:128]!
-; BE-NEON-NEXT:    vrev64.32 d26, d22
-; BE-NEON-NEXT:    vrev64.32 d23, d12
-; BE-NEON-NEXT:    vst1.64 {d28, d29}, [r0:128]!
-; BE-NEON-NEXT:    vrev64.32 d22, d11
-; BE-NEON-NEXT:    vst1.64 {d26, d27}, [r0:128]
-; BE-NEON-NEXT:    vst1.64 {d20, d21}, [r4:128]!
-; BE-NEON-NEXT:    vst1.64 {d22, d23}, [r4:128]!
-; BE-NEON-NEXT:    vst1.64 {d18, d19}, [r4:128]!
-; BE-NEON-NEXT:    vst1.64 {d16, d17}, [r4:128]
-; BE-NEON-NEXT:    add sp, sp, #56
-; BE-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
-; BE-NEON-NEXT:    add sp, sp, #4
-; BE-NEON-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
-  %a = call <16 x i64> @llvm.llrint.v16i64.v16f128(<16 x fp128> %x)
-  ret <16 x i64> %a
+; BE-NEXT:    vmov.32 d16[0], r0
+; BE-NEXT:    vmov.32 d10[1], r4
+; BE-NEXT:    vmov.32 d8[1], r9
+; BE-NEXT:    vmov.32 d9[1], r5
+; BE-NEXT:    vmov.32 d16[1], r1
+; BE-NEXT:    vrev64.32 d1, d10
+; BE-NEXT:    vrev64.32 d3, d8
+; BE-NEXT:    vrev64.32 d0, d9
+; BE-NEXT:    vrev64.32 d2, d16
+; BE-NEXT:    vpop {d8, d9, d10}
+; BE-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, pc}
+  %a = call <4 x i64> @llvm.llrint.v4i64.v4f128(<4 x fp128> %x)
+  ret <4 x i64> %a
 }
-declare <16 x i64> @llvm.llrint.v16i64.v16f128(<16 x fp128>)
+declare <4 x i64> @llvm.llrint.v4i64.v4f128(<4 x fp128>)
 
-define <32 x i64> @llrint_v32f128(<32 x fp128> %x) {
-; LE-LABEL: llrint_v32f128:
+define <8 x i64> @llrint_v8i64_v8f128(<8 x fp128> %x) {
+; LE-LABEL: llrint_v8i64_v8f128:
 ; LE:       @ %bb.0:
 ; LE-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
 ; LE-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
@@ -9669,1458 +2225,170 @@ define <32 x i64> @llrint_v32f128(<32 x fp128> %x) {
 ; LE-NEXT:    sub sp, sp, #4
 ; LE-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
 ; LE-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-NEXT:    .pad #192
-; LE-NEXT:    sub sp, sp, #192
-; LE-NEXT:    str r3, [sp, #60] @ 4-byte Spill
-; LE-NEXT:    add r3, sp, #688
-; LE-NEXT:    str r2, [sp, #56] @ 4-byte Spill
-; LE-NEXT:    mov r9, r0
-; LE-NEXT:    ldm r3, {r0, r1, r2, r3}
-; LE-NEXT:    bl llrintl
-; LE-NEXT:    add r3, sp, #560
-; LE-NEXT:    mov r4, r0
-; LE-NEXT:    str r1, [sp, #64] @ 4-byte Spill
-; LE-NEXT:    ldm r3, {r0, r1, r2, r3}
-; LE-NEXT:    bl llrintl
-; LE-NEXT:    vmov.32 d17[0], r0
-; LE-NEXT:    ldr r7, [sp, #544]
-; LE-NEXT:    ldr r6, [sp, #548]
-; LE-NEXT:    add lr, sp, #96
-; LE-NEXT:    ldr r2, [sp, #552]
-; LE-NEXT:    vmov.32 d17[1], r1
-; LE-NEXT:    ldr r3, [sp, #556]
-; LE-NEXT:    mov r0, r7
-; LE-NEXT:    mov r1, r6
-; LE-NEXT:    vorr q4, q8, q8
-; LE-NEXT:    ldr r5, [sp, #528]
-; LE-NEXT:    vmov.32 d17[0], r4
-; LE-NEXT:    ldr r10, [sp, #304]
-; LE-NEXT:    ldr r8, [sp, #368]
-; LE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-NEXT:    bl llrintl
-; LE-NEXT:    add r3, sp, #532
-; LE-NEXT:    vmov.32 d8[0], r0
-; LE-NEXT:    mov r11, r1
-; LE-NEXT:    add lr, sp, #144
-; LE-NEXT:    ldm r3, {r1, r2, r3}
-; LE-NEXT:    mov r0, r5
-; LE-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
-; LE-NEXT:    bl llrintl
-; LE-NEXT:    add r3, sp, #308
-; LE-NEXT:    mov r5, r1
-; LE-NEXT:    add lr, sp, #176
-; LE-NEXT:    vmov.32 d17[0], r0
-; LE-NEXT:    ldm r3, {r1, r2, r3}
-; LE-NEXT:    mov r0, r10
-; LE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-NEXT:    bl llrintl
-; LE-NEXT:    add r3, sp, #372
-; LE-NEXT:    mov r10, r1
-; LE-NEXT:    vmov.32 d11[0], r0
-; LE-NEXT:    mov r0, r8
-; LE-NEXT:    ldm r3, {r1, r2, r3}
-; LE-NEXT:    bl llrintl
-; LE-NEXT:    add r3, sp, #404
-; LE-NEXT:    vmov.32 d15[0], r0
-; LE-NEXT:    ldr r0, [sp, #400]
-; LE-NEXT:    mov r6, r1
-; LE-NEXT:    ldm r3, {r1, r2, r3}
-; LE-NEXT:    bl llrintl
-; LE-NEXT:    add r3, sp, #596
-; LE-NEXT:    vmov.32 d9[0], r0
-; LE-NEXT:    ldr r0, [sp, #592]
-; LE-NEXT:    mov r7, r1
-; LE-NEXT:    ldm r3, {r1, r2, r3}
-; LE-NEXT:    bl llrintl
-; LE-NEXT:    add r3, sp, #676
-; LE-NEXT:    vmov.32 d13[0], r0
-; LE-NEXT:    ldr r0, [sp, #672]
+; LE-NEXT:    .pad #8
+; LE-NEXT:    sub sp, sp, #8
+; LE-NEXT:    mov r11, r3
+; LE-NEXT:    add r3, sp, #208
+; LE-NEXT:    mov r10, r2
 ; LE-NEXT:    mov r4, r1
-; LE-NEXT:    ldm r3, {r1, r2, r3}
-; LE-NEXT:    bl llrintl
-; LE-NEXT:    add lr, sp, #96
-; LE-NEXT:    vmov.32 d13[1], r4
-; LE-NEXT:    str r1, [sp, #52] @ 4-byte Spill
-; LE-NEXT:    vldmia lr, {d18, d19} @ 16-byte Reload
-; LE-NEXT:    add lr, sp, #80
-; LE-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
-; LE-NEXT:    add lr, sp, #128
-; LE-NEXT:    vmov.32 d9[1], r7
-; LE-NEXT:    ldr r1, [sp, #628]
-; LE-NEXT:    ldr r2, [sp, #632]
-; LE-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
-; LE-NEXT:    add lr, sp, #112
-; LE-NEXT:    vmov.32 d15[1], r6
-; LE-NEXT:    ldr r3, [sp, #636]
-; LE-NEXT:    ldr r7, [sp, #64] @ 4-byte Reload
-; LE-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
-; LE-NEXT:    add lr, sp, #160
-; LE-NEXT:    vmov.32 d11[1], r10
-; LE-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
-; LE-NEXT:    add lr, sp, #176
-; LE-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
-; LE-NEXT:    add lr, sp, #144
-; LE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-NEXT:    vmov.32 d18[0], r0
-; LE-NEXT:    ldr r0, [sp, #624]
-; LE-NEXT:    vmov.32 d16[1], r11
-; LE-NEXT:    vmov.32 d9[1], r5
-; LE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-NEXT:    add lr, sp, #96
-; LE-NEXT:    vmov.32 d19[1], r7
-; LE-NEXT:    vstmia lr, {d18, d19} @ 16-byte Spill
-; LE-NEXT:    bl llrintl
-; LE-NEXT:    add r3, sp, #512
-; LE-NEXT:    str r0, [sp, #48] @ 4-byte Spill
-; LE-NEXT:    str r1, [sp, #64] @ 4-byte Spill
-; LE-NEXT:    ldm r3, {r0, r1, r2, r3}
-; LE-NEXT:    bl llrintl
-; LE-NEXT:    add r3, sp, #768
-; LE-NEXT:    mov r11, r0
-; LE-NEXT:    str r1, [sp, #28] @ 4-byte Spill
+; LE-NEXT:    mov r5, r0
 ; LE-NEXT:    ldm r3, {r0, r1, r2, r3}
 ; LE-NEXT:    bl llrintl
-; LE-NEXT:    ldr r6, [sp, #784]
-; LE-NEXT:    add r3, sp, #788
-; LE-NEXT:    mov r8, r1
-; LE-NEXT:    vmov.32 d14[0], r0
-; LE-NEXT:    ldm r3, {r1, r2, r3}
-; LE-NEXT:    mov r0, r6
-; LE-NEXT:    ldr r5, [sp, #736]
-; LE-NEXT:    ldr r7, [sp, #752]
-; LE-NEXT:    ldr r4, [sp, #720]
-; LE-NEXT:    bl llrintl
-; LE-NEXT:    add r3, sp, #740
-; LE-NEXT:    mov r6, r1
-; LE-NEXT:    vmov.32 d15[0], r0
-; LE-NEXT:    mov r0, r5
-; LE-NEXT:    ldm r3, {r1, r2, r3}
-; LE-NEXT:    bl llrintl
-; LE-NEXT:    add r3, sp, #756
-; LE-NEXT:    mov r5, r1
-; LE-NEXT:    vmov.32 d12[0], r0
-; LE-NEXT:    mov r0, r7
-; LE-NEXT:    ldm r3, {r1, r2, r3}
-; LE-NEXT:    bl llrintl
-; LE-NEXT:    add r3, sp, #724
-; LE-NEXT:    mov r7, r1
-; LE-NEXT:    vmov.32 d13[0], r0
-; LE-NEXT:    mov r0, r4
-; LE-NEXT:    ldm r3, {r1, r2, r3}
-; LE-NEXT:    bl llrintl
-; LE-NEXT:    vmov.32 d13[1], r7
-; LE-NEXT:    add lr, sp, #8
-; LE-NEXT:    ldr r2, [sp, #296]
-; LE-NEXT:    vmov.32 d12[1], r5
-; LE-NEXT:    ldr r3, [sp, #300]
-; LE-NEXT:    ldr r4, [sp, #576]
-; LE-NEXT:    vmov.32 d11[0], r0
-; LE-NEXT:    ldr r0, [sp, #56] @ 4-byte Reload
-; LE-NEXT:    ldr r10, [sp, #384]
-; LE-NEXT:    vmov.32 d15[1], r6
-; LE-NEXT:    ldr r6, [sp, #352]
-; LE-NEXT:    vmov.32 d14[1], r8
-; LE-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
-; LE-NEXT:    add lr, sp, #32
-; LE-NEXT:    vmov.32 d11[1], r1
-; LE-NEXT:    ldr r1, [sp, #60] @ 4-byte Reload
-; LE-NEXT:    vmov.32 d8[0], r11
-; LE-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
-; LE-NEXT:    add lr, sp, #176
-; LE-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
-; LE-NEXT:    bl llrintl
-; LE-NEXT:    add lr, sp, #160
-; LE-NEXT:    add r3, sp, #356
-; LE-NEXT:    mov r5, r1
-; LE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-NEXT:    ldm r3, {r1, r2, r3}
-; LE-NEXT:    vmov.32 d16[0], r0
+; LE-NEXT:    add r7, sp, #164
+; LE-NEXT:    ldr r6, [sp, #160]
+; LE-NEXT:    str r1, [sp, #4] @ 4-byte Spill
+; LE-NEXT:    vmov.32 d9[0], r0
+; LE-NEXT:    ldm r7, {r1, r2, r3, r7}
 ; LE-NEXT:    mov r0, r6
-; LE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-NEXT:    bl llrintl
-; LE-NEXT:    add lr, sp, #112
-; LE-NEXT:    add r3, sp, #388
-; LE-NEXT:    mov r7, r1
-; LE-NEXT:    vldmia lr, {d14, d15} @ 16-byte Reload
-; LE-NEXT:    ldm r3, {r1, r2, r3}
-; LE-NEXT:    vmov.32 d14[0], r0
-; LE-NEXT:    mov r0, r10
-; LE-NEXT:    bl llrintl
-; LE-NEXT:    add lr, sp, #128
-; LE-NEXT:    add r3, sp, #580
-; LE-NEXT:    mov r6, r1
-; LE-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
-; LE-NEXT:    ldm r3, {r1, r2, r3}
-; LE-NEXT:    vmov.32 d12[0], r0
-; LE-NEXT:    mov r0, r4
-; LE-NEXT:    bl llrintl
-; LE-NEXT:    add lr, sp, #80
-; LE-NEXT:    add r3, sp, #708
-; LE-NEXT:    mov r4, r1
-; LE-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
-; LE-NEXT:    ldm r3, {r1, r2, r3}
-; LE-NEXT:    vmov.32 d8[0], r0
-; LE-NEXT:    ldr r0, [sp, #704]
+; LE-NEXT:    ldr r8, [sp, #128]
+; LE-NEXT:    ldr r9, [sp, #144]
 ; LE-NEXT:    bl llrintl
-; LE-NEXT:    vmov.32 d8[1], r4
-; LE-NEXT:    add lr, sp, #80
-; LE-NEXT:    ldr r2, [sp, #52] @ 4-byte Reload
-; LE-NEXT:    vmov.32 d12[1], r6
-; LE-NEXT:    ldr r6, [sp, #644]
-; LE-NEXT:    ldr r3, [sp, #652]
-; LE-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
-; LE-NEXT:    add lr, sp, #128
-; LE-NEXT:    vmov.32 d14[1], r7
-; LE-NEXT:    ldr r4, [sp, #480]
-; LE-NEXT:    ldr r7, [sp, #656]
-; LE-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
-; LE-NEXT:    add lr, sp, #112
-; LE-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
-; LE-NEXT:    add lr, sp, #160
-; LE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-NEXT:    add r3, sp, #180
+; LE-NEXT:    str r1, [sp] @ 4-byte Spill
 ; LE-NEXT:    vmov.32 d10[0], r0
-; LE-NEXT:    ldr r0, [sp, #28] @ 4-byte Reload
-; LE-NEXT:    ldr r10, [sp, #496]
-; LE-NEXT:    vmov.32 d16[1], r5
-; LE-NEXT:    add r5, r9, #192
-; LE-NEXT:    ldr r8, [sp, #608]
-; LE-NEXT:    vmov.32 d10[1], r1
-; LE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-NEXT:    add lr, sp, #176
-; LE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-NEXT:    vmov.32 d16[1], r0
-; LE-NEXT:    ldr r0, [sp, #640]
-; LE-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-NEXT:    add lr, sp, #96
-; LE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-NEXT:    add lr, sp, #8
-; LE-NEXT:    vmov.32 d16[1], r2
-; LE-NEXT:    ldr r2, [sp, #648]
-; LE-NEXT:    vst1.64 {d16, d17}, [r5:128]!
-; LE-NEXT:    vst1.64 {d10, d11}, [r5:128]!
-; LE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-NEXT:    vst1.64 {d16, d17}, [r5:128]!
-; LE-NEXT:    ldr r1, [sp, #48] @ 4-byte Reload
-; LE-NEXT:    vmov.32 d9[0], r1
-; LE-NEXT:    mov r1, r6
-; LE-NEXT:    bl llrintl
-; LE-NEXT:    add r3, sp, #660
-; LE-NEXT:    mov r11, r1
-; LE-NEXT:    vmov.32 d12[0], r0
 ; LE-NEXT:    mov r0, r7
 ; LE-NEXT:    ldm r3, {r1, r2, r3}
 ; LE-NEXT:    bl llrintl
-; LE-NEXT:    add r3, sp, #484
+; LE-NEXT:    add r3, sp, #132
 ; LE-NEXT:    mov r7, r1
-; LE-NEXT:    vmov.32 d13[0], r0
-; LE-NEXT:    mov r0, r4
-; LE-NEXT:    ldm r3, {r1, r2, r3}
-; LE-NEXT:    bl llrintl
-; LE-NEXT:    add r3, sp, #500
-; LE-NEXT:    mov r6, r1
-; LE-NEXT:    vmov.32 d10[0], r0
-; LE-NEXT:    mov r0, r10
-; LE-NEXT:    ldm r3, {r1, r2, r3}
-; LE-NEXT:    bl llrintl
-; LE-NEXT:    add r3, sp, #612
-; LE-NEXT:    mov r4, r1
 ; LE-NEXT:    vmov.32 d11[0], r0
 ; LE-NEXT:    mov r0, r8
 ; LE-NEXT:    ldm r3, {r1, r2, r3}
 ; LE-NEXT:    bl llrintl
-; LE-NEXT:    vmov.32 d8[0], r0
-; LE-NEXT:    ldr r0, [sp, #64] @ 4-byte Reload
-; LE-NEXT:    add lr, sp, #96
-; LE-NEXT:    add r8, r9, #128
-; LE-NEXT:    vmov.32 d13[1], r7
-; LE-NEXT:    ldr r2, [sp, #344]
-; LE-NEXT:    ldr r3, [sp, #348]
-; LE-NEXT:    vmov.32 d12[1], r11
-; LE-NEXT:    ldr r7, [sp, #452]
-; LE-NEXT:    ldr r10, [sp, #416]
-; LE-NEXT:    vmov.32 d9[1], r0
-; LE-NEXT:    ldr r0, [sp, #336]
-; LE-NEXT:    vmov.32 d8[1], r1
-; LE-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
-; LE-NEXT:    add lr, sp, #64
-; LE-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
-; LE-NEXT:    add lr, sp, #32
-; LE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-NEXT:    add lr, sp, #144
-; LE-NEXT:    vmov.32 d11[1], r4
-; LE-NEXT:    ldr r4, [sp, #340]
-; LE-NEXT:    vst1.64 {d16, d17}, [r5:128]
-; LE-NEXT:    mov r1, r4
-; LE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-NEXT:    add lr, sp, #80
-; LE-NEXT:    vmov.32 d10[1], r6
-; LE-NEXT:    ldr r6, [sp, #448]
-; LE-NEXT:    vst1.64 {d16, d17}, [r8:128]!
-; LE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-NEXT:    vst1.64 {d16, d17}, [r8:128]!
-; LE-NEXT:    bl llrintl
-; LE-NEXT:    ldr r2, [sp, #456]
-; LE-NEXT:    mov r11, r1
-; LE-NEXT:    ldr r3, [sp, #460]
-; LE-NEXT:    vmov.32 d15[0], r0
-; LE-NEXT:    mov r0, r6
-; LE-NEXT:    mov r1, r7
-; LE-NEXT:    ldr r5, [sp, #432]
-; LE-NEXT:    bl llrintl
-; LE-NEXT:    add r3, sp, #468
+; LE-NEXT:    add r3, sp, #148
+; LE-NEXT:    mov r8, r1
 ; LE-NEXT:    vmov.32 d12[0], r0
-; LE-NEXT:    ldr r0, [sp, #464]
-; LE-NEXT:    mov r6, r1
-; LE-NEXT:    ldm r3, {r1, r2, r3}
-; LE-NEXT:    bl llrintl
-; LE-NEXT:    add r3, sp, #420
-; LE-NEXT:    mov r7, r1
-; LE-NEXT:    vmov.32 d13[0], r0
-; LE-NEXT:    mov r0, r10
+; LE-NEXT:    mov r0, r9
 ; LE-NEXT:    ldm r3, {r1, r2, r3}
 ; LE-NEXT:    bl llrintl
-; LE-NEXT:    add r3, sp, #436
-; LE-NEXT:    mov r4, r1
-; LE-NEXT:    vmov.32 d8[0], r0
+; LE-NEXT:    mov r9, r1
+; LE-NEXT:    vmov.32 d13[0], r0
 ; LE-NEXT:    mov r0, r5
+; LE-NEXT:    mov r1, r4
+; LE-NEXT:    mov r2, r10
+; LE-NEXT:    mov r3, r11
+; LE-NEXT:    ldr r6, [sp, #112]
+; LE-NEXT:    bl llrintl
+; LE-NEXT:    add r3, sp, #116
+; LE-NEXT:    mov r4, r1
+; LE-NEXT:    vmov.32 d14[0], r0
+; LE-NEXT:    mov r0, r6
 ; LE-NEXT:    ldm r3, {r1, r2, r3}
 ; LE-NEXT:    bl llrintl
-; LE-NEXT:    add r3, sp, #324
-; LE-NEXT:    vmov.32 d9[0], r0
-; LE-NEXT:    ldr r0, [sp, #320]
+; LE-NEXT:    add r3, sp, #196
+; LE-NEXT:    vmov.32 d15[0], r0
+; LE-NEXT:    ldr r0, [sp, #192]
 ; LE-NEXT:    mov r5, r1
 ; LE-NEXT:    ldm r3, {r1, r2, r3}
 ; LE-NEXT:    bl llrintl
-; LE-NEXT:    add lr, sp, #64
-; LE-NEXT:    vmov.32 d9[1], r5
-; LE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-NEXT:    add lr, sp, #96
-; LE-NEXT:    vmov.32 d13[1], r7
-; LE-NEXT:    vst1.64 {d16, d17}, [r8:128]!
-; LE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-NEXT:    add lr, sp, #176
-; LE-NEXT:    vmov.32 d8[1], r4
-; LE-NEXT:    vmov.32 d12[1], r6
-; LE-NEXT:    vmov.32 d14[0], r0
-; LE-NEXT:    add r0, r9, #64
-; LE-NEXT:    vst1.64 {d16, d17}, [r8:128]
-; LE-NEXT:    vst1.64 {d8, d9}, [r0:128]!
-; LE-NEXT:    vst1.64 {d12, d13}, [r0:128]!
-; LE-NEXT:    vst1.64 {d10, d11}, [r0:128]!
-; LE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-NEXT:    add lr, sp, #160
-; LE-NEXT:    vmov.32 d15[1], r11
-; LE-NEXT:    vst1.64 {d16, d17}, [r0:128]
-; LE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-NEXT:    add lr, sp, #112
-; LE-NEXT:    vmov.32 d14[1], r1
-; LE-NEXT:    vst1.64 {d16, d17}, [r9:128]!
-; LE-NEXT:    vst1.64 {d14, d15}, [r9:128]!
-; LE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-NEXT:    add lr, sp, #128
-; LE-NEXT:    vst1.64 {d16, d17}, [r9:128]!
-; LE-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-NEXT:    vst1.64 {d16, d17}, [r9:128]
-; LE-NEXT:    add sp, sp, #192
+; LE-NEXT:    vmov.32 d8[0], r0
+; LE-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; LE-NEXT:    vmov.32 d11[1], r7
+; LE-NEXT:    vmov.32 d10[1], r0
+; LE-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
+; LE-NEXT:    vmov.32 d15[1], r5
+; LE-NEXT:    vorr q2, q5, q5
+; LE-NEXT:    vmov.32 d13[1], r9
+; LE-NEXT:    vmov.32 d9[1], r0
+; LE-NEXT:    vmov.32 d14[1], r4
+; LE-NEXT:    vmov.32 d12[1], r8
+; LE-NEXT:    vorr q0, q7, q7
+; LE-NEXT:    vmov.32 d8[1], r1
+; LE-NEXT:    vorr q1, q6, q6
+; LE-NEXT:    vorr q3, q4, q4
+; LE-NEXT:    add sp, sp, #8
 ; LE-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
 ; LE-NEXT:    add sp, sp, #4
 ; LE-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
 ;
-; LE-NEON-LABEL: llrint_v32f128:
-; LE-NEON:       @ %bb.0:
-; LE-NEON-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; LE-NEON-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; LE-NEON-NEXT:    .pad #4
-; LE-NEON-NEXT:    sub sp, sp, #4
-; LE-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-NEON-NEXT:    .pad #192
-; LE-NEON-NEXT:    sub sp, sp, #192
-; LE-NEON-NEXT:    str r3, [sp, #60] @ 4-byte Spill
-; LE-NEON-NEXT:    add r3, sp, #688
-; LE-NEON-NEXT:    str r2, [sp, #56] @ 4-byte Spill
-; LE-NEON-NEXT:    mov r9, r0
-; LE-NEON-NEXT:    ldm r3, {r0, r1, r2, r3}
-; LE-NEON-NEXT:    bl llrintl
-; LE-NEON-NEXT:    add r3, sp, #560
-; LE-NEON-NEXT:    mov r4, r0
-; LE-NEON-NEXT:    str r1, [sp, #64] @ 4-byte Spill
-; LE-NEON-NEXT:    ldm r3, {r0, r1, r2, r3}
-; LE-NEON-NEXT:    bl llrintl
-; LE-NEON-NEXT:    vmov.32 d17[0], r0
-; LE-NEON-NEXT:    ldr r7, [sp, #544]
-; LE-NEON-NEXT:    ldr r6, [sp, #548]
-; LE-NEON-NEXT:    add lr, sp, #96
-; LE-NEON-NEXT:    ldr r2, [sp, #552]
-; LE-NEON-NEXT:    vmov.32 d17[1], r1
-; LE-NEON-NEXT:    ldr r3, [sp, #556]
-; LE-NEON-NEXT:    mov r0, r7
-; LE-NEON-NEXT:    mov r1, r6
-; LE-NEON-NEXT:    vorr q4, q8, q8
-; LE-NEON-NEXT:    ldr r5, [sp, #528]
-; LE-NEON-NEXT:    vmov.32 d17[0], r4
-; LE-NEON-NEXT:    ldr r10, [sp, #304]
-; LE-NEON-NEXT:    ldr r8, [sp, #368]
-; LE-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-NEON-NEXT:    bl llrintl
-; LE-NEON-NEXT:    add r3, sp, #532
-; LE-NEON-NEXT:    vmov.32 d8[0], r0
-; LE-NEON-NEXT:    mov r11, r1
-; LE-NEON-NEXT:    add lr, sp, #144
-; LE-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; LE-NEON-NEXT:    mov r0, r5
-; LE-NEON-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
-; LE-NEON-NEXT:    bl llrintl
-; LE-NEON-NEXT:    add r3, sp, #308
-; LE-NEON-NEXT:    mov r5, r1
-; LE-NEON-NEXT:    add lr, sp, #176
-; LE-NEON-NEXT:    vmov.32 d17[0], r0
-; LE-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; LE-NEON-NEXT:    mov r0, r10
-; LE-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-NEON-NEXT:    bl llrintl
-; LE-NEON-NEXT:    add r3, sp, #372
-; LE-NEON-NEXT:    mov r10, r1
-; LE-NEON-NEXT:    vmov.32 d11[0], r0
-; LE-NEON-NEXT:    mov r0, r8
-; LE-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; LE-NEON-NEXT:    bl llrintl
-; LE-NEON-NEXT:    add r3, sp, #404
-; LE-NEON-NEXT:    vmov.32 d15[0], r0
-; LE-NEON-NEXT:    ldr r0, [sp, #400]
-; LE-NEON-NEXT:    mov r6, r1
-; LE-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; LE-NEON-NEXT:    bl llrintl
-; LE-NEON-NEXT:    add r3, sp, #596
-; LE-NEON-NEXT:    vmov.32 d9[0], r0
-; LE-NEON-NEXT:    ldr r0, [sp, #592]
-; LE-NEON-NEXT:    mov r7, r1
-; LE-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; LE-NEON-NEXT:    bl llrintl
-; LE-NEON-NEXT:    add r3, sp, #676
-; LE-NEON-NEXT:    vmov.32 d13[0], r0
-; LE-NEON-NEXT:    ldr r0, [sp, #672]
-; LE-NEON-NEXT:    mov r4, r1
-; LE-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; LE-NEON-NEXT:    bl llrintl
-; LE-NEON-NEXT:    add lr, sp, #96
-; LE-NEON-NEXT:    vmov.32 d13[1], r4
-; LE-NEON-NEXT:    str r1, [sp, #52] @ 4-byte Spill
-; LE-NEON-NEXT:    vldmia lr, {d18, d19} @ 16-byte Reload
-; LE-NEON-NEXT:    add lr, sp, #80
-; LE-NEON-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
-; LE-NEON-NEXT:    add lr, sp, #128
-; LE-NEON-NEXT:    vmov.32 d9[1], r7
-; LE-NEON-NEXT:    ldr r1, [sp, #628]
-; LE-NEON-NEXT:    ldr r2, [sp, #632]
-; LE-NEON-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
-; LE-NEON-NEXT:    add lr, sp, #112
-; LE-NEON-NEXT:    vmov.32 d15[1], r6
-; LE-NEON-NEXT:    ldr r3, [sp, #636]
-; LE-NEON-NEXT:    ldr r7, [sp, #64] @ 4-byte Reload
-; LE-NEON-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
-; LE-NEON-NEXT:    add lr, sp, #160
-; LE-NEON-NEXT:    vmov.32 d11[1], r10
-; LE-NEON-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
-; LE-NEON-NEXT:    add lr, sp, #176
-; LE-NEON-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
-; LE-NEON-NEXT:    add lr, sp, #144
-; LE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-NEON-NEXT:    vmov.32 d18[0], r0
-; LE-NEON-NEXT:    ldr r0, [sp, #624]
-; LE-NEON-NEXT:    vmov.32 d16[1], r11
-; LE-NEON-NEXT:    vmov.32 d9[1], r5
-; LE-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-NEON-NEXT:    add lr, sp, #96
-; LE-NEON-NEXT:    vmov.32 d19[1], r7
-; LE-NEON-NEXT:    vstmia lr, {d18, d19} @ 16-byte Spill
-; LE-NEON-NEXT:    bl llrintl
-; LE-NEON-NEXT:    add r3, sp, #512
-; LE-NEON-NEXT:    str r0, [sp, #48] @ 4-byte Spill
-; LE-NEON-NEXT:    str r1, [sp, #64] @ 4-byte Spill
-; LE-NEON-NEXT:    ldm r3, {r0, r1, r2, r3}
-; LE-NEON-NEXT:    bl llrintl
-; LE-NEON-NEXT:    add r3, sp, #768
-; LE-NEON-NEXT:    mov r11, r0
-; LE-NEON-NEXT:    str r1, [sp, #28] @ 4-byte Spill
-; LE-NEON-NEXT:    ldm r3, {r0, r1, r2, r3}
-; LE-NEON-NEXT:    bl llrintl
-; LE-NEON-NEXT:    ldr r6, [sp, #784]
-; LE-NEON-NEXT:    add r3, sp, #788
-; LE-NEON-NEXT:    mov r8, r1
-; LE-NEON-NEXT:    vmov.32 d14[0], r0
-; LE-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; LE-NEON-NEXT:    mov r0, r6
-; LE-NEON-NEXT:    ldr r5, [sp, #736]
-; LE-NEON-NEXT:    ldr r7, [sp, #752]
-; LE-NEON-NEXT:    ldr r4, [sp, #720]
-; LE-NEON-NEXT:    bl llrintl
-; LE-NEON-NEXT:    add r3, sp, #740
-; LE-NEON-NEXT:    mov r6, r1
-; LE-NEON-NEXT:    vmov.32 d15[0], r0
-; LE-NEON-NEXT:    mov r0, r5
-; LE-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; LE-NEON-NEXT:    bl llrintl
-; LE-NEON-NEXT:    add r3, sp, #756
-; LE-NEON-NEXT:    mov r5, r1
-; LE-NEON-NEXT:    vmov.32 d12[0], r0
-; LE-NEON-NEXT:    mov r0, r7
-; LE-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; LE-NEON-NEXT:    bl llrintl
-; LE-NEON-NEXT:    add r3, sp, #724
-; LE-NEON-NEXT:    mov r7, r1
-; LE-NEON-NEXT:    vmov.32 d13[0], r0
-; LE-NEON-NEXT:    mov r0, r4
-; LE-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; LE-NEON-NEXT:    bl llrintl
-; LE-NEON-NEXT:    vmov.32 d13[1], r7
-; LE-NEON-NEXT:    add lr, sp, #8
-; LE-NEON-NEXT:    ldr r2, [sp, #296]
-; LE-NEON-NEXT:    vmov.32 d12[1], r5
-; LE-NEON-NEXT:    ldr r3, [sp, #300]
-; LE-NEON-NEXT:    ldr r4, [sp, #576]
-; LE-NEON-NEXT:    vmov.32 d11[0], r0
-; LE-NEON-NEXT:    ldr r0, [sp, #56] @ 4-byte Reload
-; LE-NEON-NEXT:    ldr r10, [sp, #384]
-; LE-NEON-NEXT:    vmov.32 d15[1], r6
-; LE-NEON-NEXT:    ldr r6, [sp, #352]
-; LE-NEON-NEXT:    vmov.32 d14[1], r8
-; LE-NEON-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
-; LE-NEON-NEXT:    add lr, sp, #32
-; LE-NEON-NEXT:    vmov.32 d11[1], r1
-; LE-NEON-NEXT:    ldr r1, [sp, #60] @ 4-byte Reload
-; LE-NEON-NEXT:    vmov.32 d8[0], r11
-; LE-NEON-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
-; LE-NEON-NEXT:    add lr, sp, #176
-; LE-NEON-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
-; LE-NEON-NEXT:    bl llrintl
-; LE-NEON-NEXT:    add lr, sp, #160
-; LE-NEON-NEXT:    add r3, sp, #356
-; LE-NEON-NEXT:    mov r5, r1
-; LE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; LE-NEON-NEXT:    vmov.32 d16[0], r0
-; LE-NEON-NEXT:    mov r0, r6
-; LE-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-NEON-NEXT:    bl llrintl
-; LE-NEON-NEXT:    add lr, sp, #112
-; LE-NEON-NEXT:    add r3, sp, #388
-; LE-NEON-NEXT:    mov r7, r1
-; LE-NEON-NEXT:    vldmia lr, {d14, d15} @ 16-byte Reload
-; LE-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; LE-NEON-NEXT:    vmov.32 d14[0], r0
-; LE-NEON-NEXT:    mov r0, r10
-; LE-NEON-NEXT:    bl llrintl
-; LE-NEON-NEXT:    add lr, sp, #128
-; LE-NEON-NEXT:    add r3, sp, #580
-; LE-NEON-NEXT:    mov r6, r1
-; LE-NEON-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
-; LE-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; LE-NEON-NEXT:    vmov.32 d12[0], r0
-; LE-NEON-NEXT:    mov r0, r4
-; LE-NEON-NEXT:    bl llrintl
-; LE-NEON-NEXT:    add lr, sp, #80
-; LE-NEON-NEXT:    add r3, sp, #708
-; LE-NEON-NEXT:    mov r4, r1
-; LE-NEON-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
-; LE-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; LE-NEON-NEXT:    vmov.32 d8[0], r0
-; LE-NEON-NEXT:    ldr r0, [sp, #704]
-; LE-NEON-NEXT:    bl llrintl
-; LE-NEON-NEXT:    vmov.32 d8[1], r4
-; LE-NEON-NEXT:    add lr, sp, #80
-; LE-NEON-NEXT:    ldr r2, [sp, #52] @ 4-byte Reload
-; LE-NEON-NEXT:    vmov.32 d12[1], r6
-; LE-NEON-NEXT:    ldr r6, [sp, #644]
-; LE-NEON-NEXT:    ldr r3, [sp, #652]
-; LE-NEON-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
-; LE-NEON-NEXT:    add lr, sp, #128
-; LE-NEON-NEXT:    vmov.32 d14[1], r7
-; LE-NEON-NEXT:    ldr r4, [sp, #480]
-; LE-NEON-NEXT:    ldr r7, [sp, #656]
-; LE-NEON-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
-; LE-NEON-NEXT:    add lr, sp, #112
-; LE-NEON-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
-; LE-NEON-NEXT:    add lr, sp, #160
-; LE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-NEON-NEXT:    vmov.32 d10[0], r0
-; LE-NEON-NEXT:    ldr r0, [sp, #28] @ 4-byte Reload
-; LE-NEON-NEXT:    ldr r10, [sp, #496]
-; LE-NEON-NEXT:    vmov.32 d16[1], r5
-; LE-NEON-NEXT:    add r5, r9, #192
-; LE-NEON-NEXT:    ldr r8, [sp, #608]
-; LE-NEON-NEXT:    vmov.32 d10[1], r1
-; LE-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-NEON-NEXT:    add lr, sp, #176
-; LE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-NEON-NEXT:    vmov.32 d16[1], r0
-; LE-NEON-NEXT:    ldr r0, [sp, #640]
-; LE-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-NEON-NEXT:    add lr, sp, #96
-; LE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-NEON-NEXT:    add lr, sp, #8
-; LE-NEON-NEXT:    vmov.32 d16[1], r2
-; LE-NEON-NEXT:    ldr r2, [sp, #648]
-; LE-NEON-NEXT:    vst1.64 {d16, d17}, [r5:128]!
-; LE-NEON-NEXT:    vst1.64 {d10, d11}, [r5:128]!
-; LE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-NEON-NEXT:    vst1.64 {d16, d17}, [r5:128]!
-; LE-NEON-NEXT:    ldr r1, [sp, #48] @ 4-byte Reload
-; LE-NEON-NEXT:    vmov.32 d9[0], r1
-; LE-NEON-NEXT:    mov r1, r6
-; LE-NEON-NEXT:    bl llrintl
-; LE-NEON-NEXT:    add r3, sp, #660
-; LE-NEON-NEXT:    mov r11, r1
-; LE-NEON-NEXT:    vmov.32 d12[0], r0
-; LE-NEON-NEXT:    mov r0, r7
-; LE-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; LE-NEON-NEXT:    bl llrintl
-; LE-NEON-NEXT:    add r3, sp, #484
-; LE-NEON-NEXT:    mov r7, r1
-; LE-NEON-NEXT:    vmov.32 d13[0], r0
-; LE-NEON-NEXT:    mov r0, r4
-; LE-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; LE-NEON-NEXT:    bl llrintl
-; LE-NEON-NEXT:    add r3, sp, #500
-; LE-NEON-NEXT:    mov r6, r1
-; LE-NEON-NEXT:    vmov.32 d10[0], r0
-; LE-NEON-NEXT:    mov r0, r10
-; LE-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; LE-NEON-NEXT:    bl llrintl
-; LE-NEON-NEXT:    add r3, sp, #612
-; LE-NEON-NEXT:    mov r4, r1
-; LE-NEON-NEXT:    vmov.32 d11[0], r0
-; LE-NEON-NEXT:    mov r0, r8
-; LE-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; LE-NEON-NEXT:    bl llrintl
-; LE-NEON-NEXT:    vmov.32 d8[0], r0
-; LE-NEON-NEXT:    ldr r0, [sp, #64] @ 4-byte Reload
-; LE-NEON-NEXT:    add lr, sp, #96
-; LE-NEON-NEXT:    add r8, r9, #128
-; LE-NEON-NEXT:    vmov.32 d13[1], r7
-; LE-NEON-NEXT:    ldr r2, [sp, #344]
-; LE-NEON-NEXT:    ldr r3, [sp, #348]
-; LE-NEON-NEXT:    vmov.32 d12[1], r11
-; LE-NEON-NEXT:    ldr r7, [sp, #452]
-; LE-NEON-NEXT:    ldr r10, [sp, #416]
-; LE-NEON-NEXT:    vmov.32 d9[1], r0
-; LE-NEON-NEXT:    ldr r0, [sp, #336]
-; LE-NEON-NEXT:    vmov.32 d8[1], r1
-; LE-NEON-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
-; LE-NEON-NEXT:    add lr, sp, #64
-; LE-NEON-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
-; LE-NEON-NEXT:    add lr, sp, #32
-; LE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-NEON-NEXT:    add lr, sp, #144
-; LE-NEON-NEXT:    vmov.32 d11[1], r4
-; LE-NEON-NEXT:    ldr r4, [sp, #340]
-; LE-NEON-NEXT:    vst1.64 {d16, d17}, [r5:128]
-; LE-NEON-NEXT:    mov r1, r4
-; LE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-NEON-NEXT:    add lr, sp, #80
-; LE-NEON-NEXT:    vmov.32 d10[1], r6
-; LE-NEON-NEXT:    ldr r6, [sp, #448]
-; LE-NEON-NEXT:    vst1.64 {d16, d17}, [r8:128]!
-; LE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-NEON-NEXT:    vst1.64 {d16, d17}, [r8:128]!
-; LE-NEON-NEXT:    bl llrintl
-; LE-NEON-NEXT:    ldr r2, [sp, #456]
-; LE-NEON-NEXT:    mov r11, r1
-; LE-NEON-NEXT:    ldr r3, [sp, #460]
-; LE-NEON-NEXT:    vmov.32 d15[0], r0
-; LE-NEON-NEXT:    mov r0, r6
-; LE-NEON-NEXT:    mov r1, r7
-; LE-NEON-NEXT:    ldr r5, [sp, #432]
-; LE-NEON-NEXT:    bl llrintl
-; LE-NEON-NEXT:    add r3, sp, #468
-; LE-NEON-NEXT:    vmov.32 d12[0], r0
-; LE-NEON-NEXT:    ldr r0, [sp, #464]
-; LE-NEON-NEXT:    mov r6, r1
-; LE-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; LE-NEON-NEXT:    bl llrintl
-; LE-NEON-NEXT:    add r3, sp, #420
-; LE-NEON-NEXT:    mov r7, r1
-; LE-NEON-NEXT:    vmov.32 d13[0], r0
-; LE-NEON-NEXT:    mov r0, r10
-; LE-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; LE-NEON-NEXT:    bl llrintl
-; LE-NEON-NEXT:    add r3, sp, #436
-; LE-NEON-NEXT:    mov r4, r1
-; LE-NEON-NEXT:    vmov.32 d8[0], r0
-; LE-NEON-NEXT:    mov r0, r5
-; LE-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; LE-NEON-NEXT:    bl llrintl
-; LE-NEON-NEXT:    add r3, sp, #324
-; LE-NEON-NEXT:    vmov.32 d9[0], r0
-; LE-NEON-NEXT:    ldr r0, [sp, #320]
-; LE-NEON-NEXT:    mov r5, r1
-; LE-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; LE-NEON-NEXT:    bl llrintl
-; LE-NEON-NEXT:    add lr, sp, #64
-; LE-NEON-NEXT:    vmov.32 d9[1], r5
-; LE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-NEON-NEXT:    add lr, sp, #96
-; LE-NEON-NEXT:    vmov.32 d13[1], r7
-; LE-NEON-NEXT:    vst1.64 {d16, d17}, [r8:128]!
-; LE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-NEON-NEXT:    add lr, sp, #176
-; LE-NEON-NEXT:    vmov.32 d8[1], r4
-; LE-NEON-NEXT:    vmov.32 d12[1], r6
-; LE-NEON-NEXT:    vmov.32 d14[0], r0
-; LE-NEON-NEXT:    add r0, r9, #64
-; LE-NEON-NEXT:    vst1.64 {d16, d17}, [r8:128]
-; LE-NEON-NEXT:    vst1.64 {d8, d9}, [r0:128]!
-; LE-NEON-NEXT:    vst1.64 {d12, d13}, [r0:128]!
-; LE-NEON-NEXT:    vst1.64 {d10, d11}, [r0:128]!
-; LE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-NEON-NEXT:    add lr, sp, #160
-; LE-NEON-NEXT:    vmov.32 d15[1], r11
-; LE-NEON-NEXT:    vst1.64 {d16, d17}, [r0:128]
-; LE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-NEON-NEXT:    add lr, sp, #112
-; LE-NEON-NEXT:    vmov.32 d14[1], r1
-; LE-NEON-NEXT:    vst1.64 {d16, d17}, [r9:128]!
-; LE-NEON-NEXT:    vst1.64 {d14, d15}, [r9:128]!
-; LE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-NEON-NEXT:    add lr, sp, #128
-; LE-NEON-NEXT:    vst1.64 {d16, d17}, [r9:128]!
-; LE-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-NEON-NEXT:    vst1.64 {d16, d17}, [r9:128]
-; LE-NEON-NEXT:    add sp, sp, #192
-; LE-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-NEON-NEXT:    add sp, sp, #4
-; LE-NEON-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
-;
-; BE-LABEL: llrint_v32f128:
+; BE-LABEL: llrint_v8i64_v8f128:
 ; BE:       @ %bb.0:
 ; BE-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
 ; BE-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
 ; BE-NEXT:    .pad #4
 ; BE-NEXT:    sub sp, sp, #4
-; BE-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
-; BE-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
-; BE-NEXT:    .pad #152
-; BE-NEXT:    sub sp, sp, #152
-; BE-NEXT:    str r3, [sp, #120] @ 4-byte Spill
-; BE-NEXT:    add r3, sp, #712
-; BE-NEXT:    str r2, [sp, #112] @ 4-byte Spill
-; BE-NEXT:    mov r9, r0
-; BE-NEXT:    ldm r3, {r0, r1, r2, r3}
-; BE-NEXT:    bl llrintl
-; BE-NEXT:    ldr r7, [sp, #648]
-; BE-NEXT:    add r3, sp, #652
-; BE-NEXT:    mov r5, r1
-; BE-NEXT:    vmov.32 d9[0], r0
-; BE-NEXT:    ldm r3, {r1, r2, r3}
-; BE-NEXT:    mov r0, r7
-; BE-NEXT:    ldr r6, [sp, #520]
-; BE-NEXT:    ldr r8, [sp, #632]
-; BE-NEXT:    bl llrintl
-; BE-NEXT:    add r3, sp, #524
-; BE-NEXT:    mov r7, r1
-; BE-NEXT:    vmov.32 d10[0], r0
-; BE-NEXT:    mov r0, r6
-; BE-NEXT:    ldm r3, {r1, r2, r3}
-; BE-NEXT:    bl llrintl
-; BE-NEXT:    add r3, sp, #636
+; BE-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14}
+; BE-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14}
+; BE-NEXT:    .pad #16
+; BE-NEXT:    sub sp, sp, #16
+; BE-NEXT:    str r3, [sp, #4] @ 4-byte Spill
+; BE-NEXT:    add r3, sp, #208
+; BE-NEXT:    mov r11, r2
 ; BE-NEXT:    mov r4, r1
-; BE-NEXT:    vmov.32 d8[0], r0
-; BE-NEXT:    mov r0, r8
-; BE-NEXT:    ldm r3, {r1, r2, r3}
-; BE-NEXT:    bl llrintl
-; BE-NEXT:    mov r6, r1
-; BE-NEXT:    vmov.32 d11[0], r0
-; BE-NEXT:    ldr r0, [sp, #488]
-; BE-NEXT:    vmov.32 d8[1], r4
-; BE-NEXT:    ldr r1, [sp, #492]
-; BE-NEXT:    ldr r2, [sp, #496]
-; BE-NEXT:    vmov.32 d10[1], r7
-; BE-NEXT:    ldr r3, [sp, #500]
-; BE-NEXT:    vmov.32 d9[1], r5
-; BE-NEXT:    vstr d8, [sp, #144] @ 8-byte Spill
-; BE-NEXT:    vstr d10, [sp, #136] @ 8-byte Spill
-; BE-NEXT:    vstr d9, [sp, #128] @ 8-byte Spill
-; BE-NEXT:    bl llrintl
-; BE-NEXT:    add r3, sp, #680
-; BE-NEXT:    str r0, [sp, #104] @ 4-byte Spill
-; BE-NEXT:    str r1, [sp, #88] @ 4-byte Spill
+; BE-NEXT:    mov r5, r0
 ; BE-NEXT:    ldm r3, {r0, r1, r2, r3}
 ; BE-NEXT:    bl llrintl
-; BE-NEXT:    vmov.32 d16[0], r0
-; BE-NEXT:    ldr r0, [sp, #728]
-; BE-NEXT:    ldr r2, [sp, #736]
-; BE-NEXT:    vmov.32 d11[1], r6
-; BE-NEXT:    ldr r6, [sp, #732]
-; BE-NEXT:    ldr r3, [sp, #740]
-; BE-NEXT:    vmov.32 d16[1], r1
-; BE-NEXT:    ldr r5, [sp, #504]
-; BE-NEXT:    mov r1, r6
-; BE-NEXT:    ldr r7, [sp, #744]
-; BE-NEXT:    ldr r4, [sp, #748]
-; BE-NEXT:    vstr d11, [sp, #24] @ 8-byte Spill
-; BE-NEXT:    vstr d16, [sp, #8] @ 8-byte Spill
-; BE-NEXT:    bl llrintl
-; BE-NEXT:    ldr r2, [sp, #752]
-; BE-NEXT:    mov r11, r1
-; BE-NEXT:    ldr r3, [sp, #756]
-; BE-NEXT:    vmov.32 d13[0], r0
-; BE-NEXT:    mov r0, r7
-; BE-NEXT:    mov r1, r4
-; BE-NEXT:    ldr r10, [sp, #552]
-; BE-NEXT:    ldr r6, [sp, #664]
-; BE-NEXT:    bl llrintl
-; BE-NEXT:    add r3, sp, #508
-; BE-NEXT:    mov r8, r1
+; BE-NEXT:    ldr r7, [sp, #176]
+; BE-NEXT:    add r3, sp, #180
+; BE-NEXT:    str r1, [sp, #12] @ 4-byte Spill
 ; BE-NEXT:    vmov.32 d8[0], r0
-; BE-NEXT:    mov r0, r5
-; BE-NEXT:    ldm r3, {r1, r2, r3}
-; BE-NEXT:    bl llrintl
-; BE-NEXT:    add r3, sp, #540
-; BE-NEXT:    vmov.32 d10[0], r0
-; BE-NEXT:    ldr r0, [sp, #536]
-; BE-NEXT:    mov r7, r1
-; BE-NEXT:    ldm r3, {r1, r2, r3}
-; BE-NEXT:    bl llrintl
-; BE-NEXT:    add r3, sp, #556
-; BE-NEXT:    mov r5, r1
-; BE-NEXT:    vmov.32 d12[0], r0
-; BE-NEXT:    mov r0, r10
 ; BE-NEXT:    ldm r3, {r1, r2, r3}
+; BE-NEXT:    mov r0, r7
+; BE-NEXT:    ldr r6, [sp, #128]
+; BE-NEXT:    ldr r8, [sp, #144]
 ; BE-NEXT:    bl llrintl
-; BE-NEXT:    add r3, sp, #668
-; BE-NEXT:    mov r4, r1
+; BE-NEXT:    add r3, sp, #132
+; BE-NEXT:    str r1, [sp, #8] @ 4-byte Spill
 ; BE-NEXT:    vmov.32 d9[0], r0
 ; BE-NEXT:    mov r0, r6
 ; BE-NEXT:    ldm r3, {r1, r2, r3}
 ; BE-NEXT:    bl llrintl
-; BE-NEXT:    add r3, sp, #700
-; BE-NEXT:    vmov.32 d14[0], r0
-; BE-NEXT:    ldr r0, [sp, #696]
+; BE-NEXT:    add r3, sp, #148
 ; BE-NEXT:    mov r6, r1
-; BE-NEXT:    ldm r3, {r1, r2, r3}
-; BE-NEXT:    bl llrintl
-; BE-NEXT:    vmov.32 d11[0], r0
-; BE-NEXT:    ldr r0, [sp, #104] @ 4-byte Reload
-; BE-NEXT:    ldr r2, [sp, #256]
-; BE-NEXT:    vmov.32 d13[1], r11
-; BE-NEXT:    ldr r3, [sp, #260]
-; BE-NEXT:    vmov.32 d14[1], r6
-; BE-NEXT:    ldr r6, [sp, #264]
-; BE-NEXT:    vmov.32 d9[1], r4
-; BE-NEXT:    ldr r4, [sp, #344]
-; BE-NEXT:    vmov.32 d12[1], r5
-; BE-NEXT:    ldr r5, [sp, #312]
-; BE-NEXT:    vmov.32 d8[1], r8
-; BE-NEXT:    ldr r8, [sp, #328]
-; BE-NEXT:    vmov.32 d10[1], r7
-; BE-NEXT:    vstr d13, [sp, #32] @ 8-byte Spill
-; BE-NEXT:    vmov.32 d11[1], r1
-; BE-NEXT:    ldr r1, [sp, #120] @ 4-byte Reload
-; BE-NEXT:    vmov.32 d13[0], r0
-; BE-NEXT:    ldr r0, [sp, #112] @ 4-byte Reload
-; BE-NEXT:    vstr d14, [sp] @ 8-byte Spill
-; BE-NEXT:    vstr d9, [sp, #16] @ 8-byte Spill
-; BE-NEXT:    vstr d12, [sp, #56] @ 8-byte Spill
-; BE-NEXT:    vstr d10, [sp, #64] @ 8-byte Spill
-; BE-NEXT:    vstr d8, [sp, #40] @ 8-byte Spill
-; BE-NEXT:    bl llrintl
-; BE-NEXT:    add r3, sp, #268
-; BE-NEXT:    mov r11, r1
-; BE-NEXT:    vmov.32 d14[0], r0
-; BE-NEXT:    mov r0, r6
-; BE-NEXT:    ldm r3, {r1, r2, r3}
-; BE-NEXT:    bl llrintl
-; BE-NEXT:    add r3, sp, #316
-; BE-NEXT:    mov r10, r1
-; BE-NEXT:    vmov.32 d8[0], r0
-; BE-NEXT:    mov r0, r5
-; BE-NEXT:    ldm r3, {r1, r2, r3}
-; BE-NEXT:    bl llrintl
-; BE-NEXT:    add r3, sp, #332
-; BE-NEXT:    mov r7, r1
-; BE-NEXT:    vmov.32 d12[0], r0
-; BE-NEXT:    mov r0, r8
-; BE-NEXT:    ldm r3, {r1, r2, r3}
-; BE-NEXT:    bl llrintl
-; BE-NEXT:    add r3, sp, #348
-; BE-NEXT:    mov r5, r1
-; BE-NEXT:    vmov.32 d15[0], r0
-; BE-NEXT:    mov r0, r4
-; BE-NEXT:    ldm r3, {r1, r2, r3}
-; BE-NEXT:    bl llrintl
-; BE-NEXT:    add r3, sp, #364
-; BE-NEXT:    vmov.32 d9[0], r0
-; BE-NEXT:    ldr r0, [sp, #360]
-; BE-NEXT:    mov r4, r1
-; BE-NEXT:    ldm r3, {r1, r2, r3}
-; BE-NEXT:    bl llrintl
-; BE-NEXT:    add r3, sp, #476
 ; BE-NEXT:    vmov.32 d10[0], r0
-; BE-NEXT:    ldr r0, [sp, #472]
-; BE-NEXT:    mov r6, r1
-; BE-NEXT:    ldm r3, {r1, r2, r3}
-; BE-NEXT:    bl llrintl
-; BE-NEXT:    vmov.32 d16[0], r0
-; BE-NEXT:    ldr r0, [sp, #88] @ 4-byte Reload
-; BE-NEXT:    ldr r2, [sp, #592]
-; BE-NEXT:    vldr d20, [sp, #136] @ 8-byte Reload
-; BE-NEXT:    vmov.32 d16[1], r1
-; BE-NEXT:    ldr r1, [sp, #588]
-; BE-NEXT:    ldr r3, [sp, #596]
-; BE-NEXT:    vldr d22, [sp, #24] @ 8-byte Reload
-; BE-NEXT:    vldr d18, [sp, #8] @ 8-byte Reload
-; BE-NEXT:    vrev64.32 d21, d20
-; BE-NEXT:    vmov.32 d10[1], r6
-; BE-NEXT:    ldr r6, [sp, #600]
-; BE-NEXT:    vmov.32 d9[1], r4
-; BE-NEXT:    ldr r4, [sp, #616]
-; BE-NEXT:    vmov.32 d12[1], r7
-; BE-NEXT:    ldr r7, [sp, #604]
-; BE-NEXT:    vmov.32 d8[1], r10
-; BE-NEXT:    add r10, r9, #192
-; BE-NEXT:    vmov.32 d14[1], r11
-; BE-NEXT:    ldr r11, [sp, #440]
-; BE-NEXT:    vmov.32 d13[1], r0
-; BE-NEXT:    ldr r0, [sp, #584]
-; BE-NEXT:    vmov.32 d15[1], r5
-; BE-NEXT:    vstr d16, [sp, #48] @ 8-byte Spill
-; BE-NEXT:    vldr d16, [sp, #128] @ 8-byte Reload
-; BE-NEXT:    vrev64.32 d20, d22
-; BE-NEXT:    vldr d22, [sp] @ 8-byte Reload
-; BE-NEXT:    vrev64.32 d19, d18
-; BE-NEXT:    vrev64.32 d17, d16
-; BE-NEXT:    vrev64.32 d18, d22
-; BE-NEXT:    vstr d10, [sp, #120] @ 8-byte Spill
-; BE-NEXT:    vstr d9, [sp, #112] @ 8-byte Spill
-; BE-NEXT:    vstr d15, [sp, #104] @ 8-byte Spill
-; BE-NEXT:    vstr d12, [sp, #96] @ 8-byte Spill
-; BE-NEXT:    vstr d8, [sp, #80] @ 8-byte Spill
-; BE-NEXT:    vstr d14, [sp, #72] @ 8-byte Spill
-; BE-NEXT:    vstr d13, [sp, #88] @ 8-byte Spill
-; BE-NEXT:    vst1.64 {d20, d21}, [r10:128]!
-; BE-NEXT:    vrev64.32 d16, d11
-; BE-NEXT:    vst1.64 {d18, d19}, [r10:128]!
-; BE-NEXT:    vst1.64 {d16, d17}, [r10:128]!
-; BE-NEXT:    bl llrintl
-; BE-NEXT:    ldr r2, [sp, #608]
-; BE-NEXT:    mov r8, r1
-; BE-NEXT:    ldr r3, [sp, #612]
-; BE-NEXT:    vmov.32 d8[0], r0
-; BE-NEXT:    mov r0, r6
-; BE-NEXT:    mov r1, r7
-; BE-NEXT:    ldr r5, [sp, #456]
-; BE-NEXT:    bl llrintl
-; BE-NEXT:    add r3, sp, #620
-; BE-NEXT:    mov r6, r1
-; BE-NEXT:    vmov.32 d9[0], r0
-; BE-NEXT:    mov r0, r4
+; BE-NEXT:    mov r0, r8
 ; BE-NEXT:    ldm r3, {r1, r2, r3}
 ; BE-NEXT:    bl llrintl
-; BE-NEXT:    add r3, sp, #444
+; BE-NEXT:    add r3, sp, #160
+; BE-NEXT:    mov r9, r0
 ; BE-NEXT:    mov r7, r1
-; BE-NEXT:    vmov.32 d10[0], r0
-; BE-NEXT:    mov r0, r11
-; BE-NEXT:    ldm r3, {r1, r2, r3}
+; BE-NEXT:    ldm r3, {r0, r1, r2, r3}
 ; BE-NEXT:    bl llrintl
-; BE-NEXT:    add r3, sp, #460
-; BE-NEXT:    mov r4, r1
+; BE-NEXT:    ldr r3, [sp, #4] @ 4-byte Reload
+; BE-NEXT:    mov r8, r1
 ; BE-NEXT:    vmov.32 d11[0], r0
 ; BE-NEXT:    mov r0, r5
-; BE-NEXT:    ldm r3, {r1, r2, r3}
+; BE-NEXT:    mov r1, r4
+; BE-NEXT:    mov r2, r11
+; BE-NEXT:    ldr r10, [sp, #112]
+; BE-NEXT:    vmov.32 d12[0], r9
 ; BE-NEXT:    bl llrintl
-; BE-NEXT:    add r3, sp, #572
+; BE-NEXT:    add r3, sp, #116
+; BE-NEXT:    mov r4, r1
 ; BE-NEXT:    vmov.32 d13[0], r0
-; BE-NEXT:    ldr r0, [sp, #568]
-; BE-NEXT:    mov r5, r1
-; BE-NEXT:    ldm r3, {r1, r2, r3}
-; BE-NEXT:    bl llrintl
-; BE-NEXT:    vldr d16, [sp, #16] @ 8-byte Reload
-; BE-NEXT:    vldr d18, [sp, #56] @ 8-byte Reload
-; BE-NEXT:    vrev64.32 d17, d16
-; BE-NEXT:    ldr r2, [sp, #304]
-; BE-NEXT:    vrev64.32 d16, d18
-; BE-NEXT:    ldr r3, [sp, #308]
-; BE-NEXT:    vldr d18, [sp, #144] @ 8-byte Reload
-; BE-NEXT:    vldr d20, [sp, #64] @ 8-byte Reload
-; BE-NEXT:    vrev64.32 d19, d18
-; BE-NEXT:    vrev64.32 d18, d20
-; BE-NEXT:    vldr d20, [sp, #40] @ 8-byte Reload
-; BE-NEXT:    vldr d22, [sp, #32] @ 8-byte Reload
-; BE-NEXT:    vmov.32 d14[0], r0
-; BE-NEXT:    ldr r0, [sp, #296]
-; BE-NEXT:    vmov.32 d10[1], r7
-; BE-NEXT:    ldr r7, [sp, #412]
-; BE-NEXT:    vmov.32 d9[1], r6
-; BE-NEXT:    ldr r6, [sp, #408]
-; BE-NEXT:    vmov.32 d8[1], r8
-; BE-NEXT:    add r8, r9, #128
-; BE-NEXT:    vrev64.32 d21, d20
-; BE-NEXT:    vmov.32 d13[1], r5
-; BE-NEXT:    ldr r5, [sp, #300]
-; BE-NEXT:    vrev64.32 d20, d22
-; BE-NEXT:    vmov.32 d14[1], r1
-; BE-NEXT:    mov r1, r5
-; BE-NEXT:    vstr d10, [sp, #136] @ 8-byte Spill
-; BE-NEXT:    vstr d9, [sp, #128] @ 8-byte Spill
-; BE-NEXT:    vstr d8, [sp, #24] @ 8-byte Spill
-; BE-NEXT:    vst1.64 {d20, d21}, [r10:128]
-; BE-NEXT:    vst1.64 {d18, d19}, [r8:128]!
-; BE-NEXT:    vmov.32 d11[1], r4
-; BE-NEXT:    ldr r4, [sp, #424]
-; BE-NEXT:    ldr r10, [sp, #376]
-; BE-NEXT:    vst1.64 {d16, d17}, [r8:128]!
-; BE-NEXT:    bl llrintl
-; BE-NEXT:    ldr r2, [sp, #416]
-; BE-NEXT:    mov r11, r1
-; BE-NEXT:    ldr r3, [sp, #420]
-; BE-NEXT:    vmov.32 d15[0], r0
-; BE-NEXT:    mov r0, r6
-; BE-NEXT:    mov r1, r7
-; BE-NEXT:    ldr r5, [sp, #392]
-; BE-NEXT:    bl llrintl
-; BE-NEXT:    add r3, sp, #428
-; BE-NEXT:    mov r6, r1
-; BE-NEXT:    vmov.32 d8[0], r0
-; BE-NEXT:    mov r0, r4
-; BE-NEXT:    ldm r3, {r1, r2, r3}
-; BE-NEXT:    bl llrintl
-; BE-NEXT:    add r3, sp, #380
-; BE-NEXT:    mov r7, r1
-; BE-NEXT:    vmov.32 d9[0], r0
 ; BE-NEXT:    mov r0, r10
 ; BE-NEXT:    ldm r3, {r1, r2, r3}
 ; BE-NEXT:    bl llrintl
-; BE-NEXT:    add r3, sp, #396
-; BE-NEXT:    mov r4, r1
-; BE-NEXT:    vmov.32 d12[0], r0
-; BE-NEXT:    mov r0, r5
-; BE-NEXT:    ldm r3, {r1, r2, r3}
-; BE-NEXT:    bl llrintl
-; BE-NEXT:    add r3, sp, #284
-; BE-NEXT:    vmov.32 d10[0], r0
-; BE-NEXT:    ldr r0, [sp, #280]
+; BE-NEXT:    add r3, sp, #196
+; BE-NEXT:    vmov.32 d14[0], r0
+; BE-NEXT:    ldr r0, [sp, #192]
 ; BE-NEXT:    mov r5, r1
 ; BE-NEXT:    ldm r3, {r1, r2, r3}
 ; BE-NEXT:    bl llrintl
-; BE-NEXT:    vldr d16, [sp, #120] @ 8-byte Reload
-; BE-NEXT:    vldr d18, [sp, #112] @ 8-byte Reload
-; BE-NEXT:    vrev64.32 d17, d16
-; BE-NEXT:    vldr d26, [sp, #136] @ 8-byte Reload
-; BE-NEXT:    vrev64.32 d16, d18
-; BE-NEXT:    vldr d18, [sp, #104] @ 8-byte Reload
-; BE-NEXT:    vrev64.32 d31, d26
-; BE-NEXT:    vldr d26, [sp, #128] @ 8-byte Reload
-; BE-NEXT:    vldr d20, [sp, #96] @ 8-byte Reload
-; BE-NEXT:    vrev64.32 d19, d18
-; BE-NEXT:    vrev64.32 d18, d20
-; BE-NEXT:    vldr d20, [sp, #80] @ 8-byte Reload
-; BE-NEXT:    vrev64.32 d30, d26
-; BE-NEXT:    vldr d26, [sp, #24] @ 8-byte Reload
-; BE-NEXT:    vmov.32 d10[1], r5
-; BE-NEXT:    vldr d22, [sp, #72] @ 8-byte Reload
-; BE-NEXT:    vrev64.32 d21, d20
-; BE-NEXT:    vrev64.32 d1, d26
-; BE-NEXT:    vmov.32 d9[1], r7
-; BE-NEXT:    vmov.32 d12[1], r4
-; BE-NEXT:    vrev64.32 d20, d22
-; BE-NEXT:    vldr d22, [sp, #88] @ 8-byte Reload
-; BE-NEXT:    vmov.32 d8[1], r6
-; BE-NEXT:    vrev64.32 d0, d14
-; BE-NEXT:    vmov.32 d28[0], r0
-; BE-NEXT:    add r0, r9, #64
-; BE-NEXT:    vrev64.32 d3, d10
-; BE-NEXT:    vldr d24, [sp, #48] @ 8-byte Reload
-; BE-NEXT:    vrev64.32 d23, d22
+; BE-NEXT:    vmov.32 d16[0], r0
+; BE-NEXT:    ldr r0, [sp, #8] @ 4-byte Reload
+; BE-NEXT:    vmov.32 d14[1], r5
+; BE-NEXT:    vmov.32 d9[1], r0
+; BE-NEXT:    ldr r0, [sp, #12] @ 4-byte Reload
+; BE-NEXT:    vmov.32 d12[1], r7
+; BE-NEXT:    vmov.32 d8[1], r0
+; BE-NEXT:    vmov.32 d13[1], r4
+; BE-NEXT:    vmov.32 d10[1], r6
+; BE-NEXT:    vmov.32 d11[1], r8
+; BE-NEXT:    vmov.32 d16[1], r1
+; BE-NEXT:    vrev64.32 d1, d14
+; BE-NEXT:    vrev64.32 d3, d12
 ; BE-NEXT:    vrev64.32 d5, d9
-; BE-NEXT:    vst1.64 {d0, d1}, [r8:128]!
-; BE-NEXT:    vrev64.32 d2, d12
-; BE-NEXT:    vmov.32 d15[1], r11
-; BE-NEXT:    vrev64.32 d22, d24
-; BE-NEXT:    vrev64.32 d25, d13
-; BE-NEXT:    vrev64.32 d4, d8
-; BE-NEXT:    vst1.64 {d30, d31}, [r8:128]
-; BE-NEXT:    vst1.64 {d2, d3}, [r0:128]!
-; BE-NEXT:    vmov.32 d28[1], r1
-; BE-NEXT:    vrev64.32 d24, d11
-; BE-NEXT:    vst1.64 {d4, d5}, [r0:128]!
-; BE-NEXT:    vrev64.32 d27, d15
-; BE-NEXT:    vst1.64 {d24, d25}, [r0:128]!
-; BE-NEXT:    vrev64.32 d26, d28
-; BE-NEXT:    vst1.64 {d22, d23}, [r0:128]
-; BE-NEXT:    vst1.64 {d20, d21}, [r9:128]!
-; BE-NEXT:    vst1.64 {d26, d27}, [r9:128]!
-; BE-NEXT:    vst1.64 {d18, d19}, [r9:128]!
-; BE-NEXT:    vst1.64 {d16, d17}, [r9:128]
-; BE-NEXT:    add sp, sp, #152
-; BE-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-NEXT:    vrev64.32 d7, d8
+; BE-NEXT:    vrev64.32 d0, d13
+; BE-NEXT:    vrev64.32 d2, d10
+; BE-NEXT:    vrev64.32 d4, d11
+; BE-NEXT:    vrev64.32 d6, d16
+; BE-NEXT:    add sp, sp, #16
+; BE-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14}
 ; BE-NEXT:    add sp, sp, #4
 ; BE-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
-;
-; BE-NEON-LABEL: llrint_v32f128:
-; BE-NEON:       @ %bb.0:
-; BE-NEON-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; BE-NEON-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; BE-NEON-NEXT:    .pad #4
-; BE-NEON-NEXT:    sub sp, sp, #4
-; BE-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
-; BE-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
-; BE-NEON-NEXT:    .pad #152
-; BE-NEON-NEXT:    sub sp, sp, #152
-; BE-NEON-NEXT:    str r3, [sp, #120] @ 4-byte Spill
-; BE-NEON-NEXT:    add r3, sp, #712
-; BE-NEON-NEXT:    str r2, [sp, #112] @ 4-byte Spill
-; BE-NEON-NEXT:    mov r9, r0
-; BE-NEON-NEXT:    ldm r3, {r0, r1, r2, r3}
-; BE-NEON-NEXT:    bl llrintl
-; BE-NEON-NEXT:    ldr r7, [sp, #648]
-; BE-NEON-NEXT:    add r3, sp, #652
-; BE-NEON-NEXT:    mov r5, r1
-; BE-NEON-NEXT:    vmov.32 d9[0], r0
-; BE-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; BE-NEON-NEXT:    mov r0, r7
-; BE-NEON-NEXT:    ldr r6, [sp, #520]
-; BE-NEON-NEXT:    ldr r8, [sp, #632]
-; BE-NEON-NEXT:    bl llrintl
-; BE-NEON-NEXT:    add r3, sp, #524
-; BE-NEON-NEXT:    mov r7, r1
-; BE-NEON-NEXT:    vmov.32 d10[0], r0
-; BE-NEON-NEXT:    mov r0, r6
-; BE-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; BE-NEON-NEXT:    bl llrintl
-; BE-NEON-NEXT:    add r3, sp, #636
-; BE-NEON-NEXT:    mov r4, r1
-; BE-NEON-NEXT:    vmov.32 d8[0], r0
-; BE-NEON-NEXT:    mov r0, r8
-; BE-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; BE-NEON-NEXT:    bl llrintl
-; BE-NEON-NEXT:    mov r6, r1
-; BE-NEON-NEXT:    vmov.32 d11[0], r0
-; BE-NEON-NEXT:    ldr r0, [sp, #488]
-; BE-NEON-NEXT:    vmov.32 d8[1], r4
-; BE-NEON-NEXT:    ldr r1, [sp, #492]
-; BE-NEON-NEXT:    ldr r2, [sp, #496]
-; BE-NEON-NEXT:    vmov.32 d10[1], r7
-; BE-NEON-NEXT:    ldr r3, [sp, #500]
-; BE-NEON-NEXT:    vmov.32 d9[1], r5
-; BE-NEON-NEXT:    vstr d8, [sp, #144] @ 8-byte Spill
-; BE-NEON-NEXT:    vstr d10, [sp, #136] @ 8-byte Spill
-; BE-NEON-NEXT:    vstr d9, [sp, #128] @ 8-byte Spill
-; BE-NEON-NEXT:    bl llrintl
-; BE-NEON-NEXT:    add r3, sp, #680
-; BE-NEON-NEXT:    str r0, [sp, #104] @ 4-byte Spill
-; BE-NEON-NEXT:    str r1, [sp, #88] @ 4-byte Spill
-; BE-NEON-NEXT:    ldm r3, {r0, r1, r2, r3}
-; BE-NEON-NEXT:    bl llrintl
-; BE-NEON-NEXT:    vmov.32 d16[0], r0
-; BE-NEON-NEXT:    ldr r0, [sp, #728]
-; BE-NEON-NEXT:    ldr r2, [sp, #736]
-; BE-NEON-NEXT:    vmov.32 d11[1], r6
-; BE-NEON-NEXT:    ldr r6, [sp, #732]
-; BE-NEON-NEXT:    ldr r3, [sp, #740]
-; BE-NEON-NEXT:    vmov.32 d16[1], r1
-; BE-NEON-NEXT:    ldr r5, [sp, #504]
-; BE-NEON-NEXT:    mov r1, r6
-; BE-NEON-NEXT:    ldr r7, [sp, #744]
-; BE-NEON-NEXT:    ldr r4, [sp, #748]
-; BE-NEON-NEXT:    vstr d11, [sp, #24] @ 8-byte Spill
-; BE-NEON-NEXT:    vstr d16, [sp, #8] @ 8-byte Spill
-; BE-NEON-NEXT:    bl llrintl
-; BE-NEON-NEXT:    ldr r2, [sp, #752]
-; BE-NEON-NEXT:    mov r11, r1
-; BE-NEON-NEXT:    ldr r3, [sp, #756]
-; BE-NEON-NEXT:    vmov.32 d13[0], r0
-; BE-NEON-NEXT:    mov r0, r7
-; BE-NEON-NEXT:    mov r1, r4
-; BE-NEON-NEXT:    ldr r10, [sp, #552]
-; BE-NEON-NEXT:    ldr r6, [sp, #664]
-; BE-NEON-NEXT:    bl llrintl
-; BE-NEON-NEXT:    add r3, sp, #508
-; BE-NEON-NEXT:    mov r8, r1
-; BE-NEON-NEXT:    vmov.32 d8[0], r0
-; BE-NEON-NEXT:    mov r0, r5
-; BE-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; BE-NEON-NEXT:    bl llrintl
-; BE-NEON-NEXT:    add r3, sp, #540
-; BE-NEON-NEXT:    vmov.32 d10[0], r0
-; BE-NEON-NEXT:    ldr r0, [sp, #536]
-; BE-NEON-NEXT:    mov r7, r1
-; BE-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; BE-NEON-NEXT:    bl llrintl
-; BE-NEON-NEXT:    add r3, sp, #556
-; BE-NEON-NEXT:    mov r5, r1
-; BE-NEON-NEXT:    vmov.32 d12[0], r0
-; BE-NEON-NEXT:    mov r0, r10
-; BE-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; BE-NEON-NEXT:    bl llrintl
-; BE-NEON-NEXT:    add r3, sp, #668
-; BE-NEON-NEXT:    mov r4, r1
-; BE-NEON-NEXT:    vmov.32 d9[0], r0
-; BE-NEON-NEXT:    mov r0, r6
-; BE-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; BE-NEON-NEXT:    bl llrintl
-; BE-NEON-NEXT:    add r3, sp, #700
-; BE-NEON-NEXT:    vmov.32 d14[0], r0
-; BE-NEON-NEXT:    ldr r0, [sp, #696]
-; BE-NEON-NEXT:    mov r6, r1
-; BE-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; BE-NEON-NEXT:    bl llrintl
-; BE-NEON-NEXT:    vmov.32 d11[0], r0
-; BE-NEON-NEXT:    ldr r0, [sp, #104] @ 4-byte Reload
-; BE-NEON-NEXT:    ldr r2, [sp, #256]
-; BE-NEON-NEXT:    vmov.32 d13[1], r11
-; BE-NEON-NEXT:    ldr r3, [sp, #260]
-; BE-NEON-NEXT:    vmov.32 d14[1], r6
-; BE-NEON-NEXT:    ldr r6, [sp, #264]
-; BE-NEON-NEXT:    vmov.32 d9[1], r4
-; BE-NEON-NEXT:    ldr r4, [sp, #344]
-; BE-NEON-NEXT:    vmov.32 d12[1], r5
-; BE-NEON-NEXT:    ldr r5, [sp, #312]
-; BE-NEON-NEXT:    vmov.32 d8[1], r8
-; BE-NEON-NEXT:    ldr r8, [sp, #328]
-; BE-NEON-NEXT:    vmov.32 d10[1], r7
-; BE-NEON-NEXT:    vstr d13, [sp, #32] @ 8-byte Spill
-; BE-NEON-NEXT:    vmov.32 d11[1], r1
-; BE-NEON-NEXT:    ldr r1, [sp, #120] @ 4-byte Reload
-; BE-NEON-NEXT:    vmov.32 d13[0], r0
-; BE-NEON-NEXT:    ldr r0, [sp, #112] @ 4-byte Reload
-; BE-NEON-NEXT:    vstr d14, [sp] @ 8-byte Spill
-; BE-NEON-NEXT:    vstr d9, [sp, #16] @ 8-byte Spill
-; BE-NEON-NEXT:    vstr d12, [sp, #56] @ 8-byte Spill
-; BE-NEON-NEXT:    vstr d10, [sp, #64] @ 8-byte Spill
-; BE-NEON-NEXT:    vstr d8, [sp, #40] @ 8-byte Spill
-; BE-NEON-NEXT:    bl llrintl
-; BE-NEON-NEXT:    add r3, sp, #268
-; BE-NEON-NEXT:    mov r11, r1
-; BE-NEON-NEXT:    vmov.32 d14[0], r0
-; BE-NEON-NEXT:    mov r0, r6
-; BE-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; BE-NEON-NEXT:    bl llrintl
-; BE-NEON-NEXT:    add r3, sp, #316
-; BE-NEON-NEXT:    mov r10, r1
-; BE-NEON-NEXT:    vmov.32 d8[0], r0
-; BE-NEON-NEXT:    mov r0, r5
-; BE-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; BE-NEON-NEXT:    bl llrintl
-; BE-NEON-NEXT:    add r3, sp, #332
-; BE-NEON-NEXT:    mov r7, r1
-; BE-NEON-NEXT:    vmov.32 d12[0], r0
-; BE-NEON-NEXT:    mov r0, r8
-; BE-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; BE-NEON-NEXT:    bl llrintl
-; BE-NEON-NEXT:    add r3, sp, #348
-; BE-NEON-NEXT:    mov r5, r1
-; BE-NEON-NEXT:    vmov.32 d15[0], r0
-; BE-NEON-NEXT:    mov r0, r4
-; BE-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; BE-NEON-NEXT:    bl llrintl
-; BE-NEON-NEXT:    add r3, sp, #364
-; BE-NEON-NEXT:    vmov.32 d9[0], r0
-; BE-NEON-NEXT:    ldr r0, [sp, #360]
-; BE-NEON-NEXT:    mov r4, r1
-; BE-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; BE-NEON-NEXT:    bl llrintl
-; BE-NEON-NEXT:    add r3, sp, #476
-; BE-NEON-NEXT:    vmov.32 d10[0], r0
-; BE-NEON-NEXT:    ldr r0, [sp, #472]
-; BE-NEON-NEXT:    mov r6, r1
-; BE-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; BE-NEON-NEXT:    bl llrintl
-; BE-NEON-NEXT:    vmov.32 d16[0], r0
-; BE-NEON-NEXT:    ldr r0, [sp, #88] @ 4-byte Reload
-; BE-NEON-NEXT:    ldr r2, [sp, #592]
-; BE-NEON-NEXT:    vldr d20, [sp, #136] @ 8-byte Reload
-; BE-NEON-NEXT:    vmov.32 d16[1], r1
-; BE-NEON-NEXT:    ldr r1, [sp, #588]
-; BE-NEON-NEXT:    ldr r3, [sp, #596]
-; BE-NEON-NEXT:    vldr d22, [sp, #24] @ 8-byte Reload
-; BE-NEON-NEXT:    vldr d18, [sp, #8] @ 8-byte Reload
-; BE-NEON-NEXT:    vrev64.32 d21, d20
-; BE-NEON-NEXT:    vmov.32 d10[1], r6
-; BE-NEON-NEXT:    ldr r6, [sp, #600]
-; BE-NEON-NEXT:    vmov.32 d9[1], r4
-; BE-NEON-NEXT:    ldr r4, [sp, #616]
-; BE-NEON-NEXT:    vmov.32 d12[1], r7
-; BE-NEON-NEXT:    ldr r7, [sp, #604]
-; BE-NEON-NEXT:    vmov.32 d8[1], r10
-; BE-NEON-NEXT:    add r10, r9, #192
-; BE-NEON-NEXT:    vmov.32 d14[1], r11
-; BE-NEON-NEXT:    ldr r11, [sp, #440]
-; BE-NEON-NEXT:    vmov.32 d13[1], r0
-; BE-NEON-NEXT:    ldr r0, [sp, #584]
-; BE-NEON-NEXT:    vmov.32 d15[1], r5
-; BE-NEON-NEXT:    vstr d16, [sp, #48] @ 8-byte Spill
-; BE-NEON-NEXT:    vldr d16, [sp, #128] @ 8-byte Reload
-; BE-NEON-NEXT:    vrev64.32 d20, d22
-; BE-NEON-NEXT:    vldr d22, [sp] @ 8-byte Reload
-; BE-NEON-NEXT:    vrev64.32 d19, d18
-; BE-NEON-NEXT:    vrev64.32 d17, d16
-; BE-NEON-NEXT:    vrev64.32 d18, d22
-; BE-NEON-NEXT:    vstr d10, [sp, #120] @ 8-byte Spill
-; BE-NEON-NEXT:    vstr d9, [sp, #112] @ 8-byte Spill
-; BE-NEON-NEXT:    vstr d15, [sp, #104] @ 8-byte Spill
-; BE-NEON-NEXT:    vstr d12, [sp, #96] @ 8-byte Spill
-; BE-NEON-NEXT:    vstr d8, [sp, #80] @ 8-byte Spill
-; BE-NEON-NEXT:    vstr d14, [sp, #72] @ 8-byte Spill
-; BE-NEON-NEXT:    vstr d13, [sp, #88] @ 8-byte Spill
-; BE-NEON-NEXT:    vst1.64 {d20, d21}, [r10:128]!
-; BE-NEON-NEXT:    vrev64.32 d16, d11
-; BE-NEON-NEXT:    vst1.64 {d18, d19}, [r10:128]!
-; BE-NEON-NEXT:    vst1.64 {d16, d17}, [r10:128]!
-; BE-NEON-NEXT:    bl llrintl
-; BE-NEON-NEXT:    ldr r2, [sp, #608]
-; BE-NEON-NEXT:    mov r8, r1
-; BE-NEON-NEXT:    ldr r3, [sp, #612]
-; BE-NEON-NEXT:    vmov.32 d8[0], r0
-; BE-NEON-NEXT:    mov r0, r6
-; BE-NEON-NEXT:    mov r1, r7
-; BE-NEON-NEXT:    ldr r5, [sp, #456]
-; BE-NEON-NEXT:    bl llrintl
-; BE-NEON-NEXT:    add r3, sp, #620
-; BE-NEON-NEXT:    mov r6, r1
-; BE-NEON-NEXT:    vmov.32 d9[0], r0
-; BE-NEON-NEXT:    mov r0, r4
-; BE-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; BE-NEON-NEXT:    bl llrintl
-; BE-NEON-NEXT:    add r3, sp, #444
-; BE-NEON-NEXT:    mov r7, r1
-; BE-NEON-NEXT:    vmov.32 d10[0], r0
-; BE-NEON-NEXT:    mov r0, r11
-; BE-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; BE-NEON-NEXT:    bl llrintl
-; BE-NEON-NEXT:    add r3, sp, #460
-; BE-NEON-NEXT:    mov r4, r1
-; BE-NEON-NEXT:    vmov.32 d11[0], r0
-; BE-NEON-NEXT:    mov r0, r5
-; BE-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; BE-NEON-NEXT:    bl llrintl
-; BE-NEON-NEXT:    add r3, sp, #572
-; BE-NEON-NEXT:    vmov.32 d13[0], r0
-; BE-NEON-NEXT:    ldr r0, [sp, #568]
-; BE-NEON-NEXT:    mov r5, r1
-; BE-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; BE-NEON-NEXT:    bl llrintl
-; BE-NEON-NEXT:    vldr d16, [sp, #16] @ 8-byte Reload
-; BE-NEON-NEXT:    vldr d18, [sp, #56] @ 8-byte Reload
-; BE-NEON-NEXT:    vrev64.32 d17, d16
-; BE-NEON-NEXT:    ldr r2, [sp, #304]
-; BE-NEON-NEXT:    vrev64.32 d16, d18
-; BE-NEON-NEXT:    ldr r3, [sp, #308]
-; BE-NEON-NEXT:    vldr d18, [sp, #144] @ 8-byte Reload
-; BE-NEON-NEXT:    vldr d20, [sp, #64] @ 8-byte Reload
-; BE-NEON-NEXT:    vrev64.32 d19, d18
-; BE-NEON-NEXT:    vrev64.32 d18, d20
-; BE-NEON-NEXT:    vldr d20, [sp, #40] @ 8-byte Reload
-; BE-NEON-NEXT:    vldr d22, [sp, #32] @ 8-byte Reload
-; BE-NEON-NEXT:    vmov.32 d14[0], r0
-; BE-NEON-NEXT:    ldr r0, [sp, #296]
-; BE-NEON-NEXT:    vmov.32 d10[1], r7
-; BE-NEON-NEXT:    ldr r7, [sp, #412]
-; BE-NEON-NEXT:    vmov.32 d9[1], r6
-; BE-NEON-NEXT:    ldr r6, [sp, #408]
-; BE-NEON-NEXT:    vmov.32 d8[1], r8
-; BE-NEON-NEXT:    add r8, r9, #128
-; BE-NEON-NEXT:    vrev64.32 d21, d20
-; BE-NEON-NEXT:    vmov.32 d13[1], r5
-; BE-NEON-NEXT:    ldr r5, [sp, #300]
-; BE-NEON-NEXT:    vrev64.32 d20, d22
-; BE-NEON-NEXT:    vmov.32 d14[1], r1
-; BE-NEON-NEXT:    mov r1, r5
-; BE-NEON-NEXT:    vstr d10, [sp, #136] @ 8-byte Spill
-; BE-NEON-NEXT:    vstr d9, [sp, #128] @ 8-byte Spill
-; BE-NEON-NEXT:    vstr d8, [sp, #24] @ 8-byte Spill
-; BE-NEON-NEXT:    vst1.64 {d20, d21}, [r10:128]
-; BE-NEON-NEXT:    vst1.64 {d18, d19}, [r8:128]!
-; BE-NEON-NEXT:    vmov.32 d11[1], r4
-; BE-NEON-NEXT:    ldr r4, [sp, #424]
-; BE-NEON-NEXT:    ldr r10, [sp, #376]
-; BE-NEON-NEXT:    vst1.64 {d16, d17}, [r8:128]!
-; BE-NEON-NEXT:    bl llrintl
-; BE-NEON-NEXT:    ldr r2, [sp, #416]
-; BE-NEON-NEXT:    mov r11, r1
-; BE-NEON-NEXT:    ldr r3, [sp, #420]
-; BE-NEON-NEXT:    vmov.32 d15[0], r0
-; BE-NEON-NEXT:    mov r0, r6
-; BE-NEON-NEXT:    mov r1, r7
-; BE-NEON-NEXT:    ldr r5, [sp, #392]
-; BE-NEON-NEXT:    bl llrintl
-; BE-NEON-NEXT:    add r3, sp, #428
-; BE-NEON-NEXT:    mov r6, r1
-; BE-NEON-NEXT:    vmov.32 d8[0], r0
-; BE-NEON-NEXT:    mov r0, r4
-; BE-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; BE-NEON-NEXT:    bl llrintl
-; BE-NEON-NEXT:    add r3, sp, #380
-; BE-NEON-NEXT:    mov r7, r1
-; BE-NEON-NEXT:    vmov.32 d9[0], r0
-; BE-NEON-NEXT:    mov r0, r10
-; BE-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; BE-NEON-NEXT:    bl llrintl
-; BE-NEON-NEXT:    add r3, sp, #396
-; BE-NEON-NEXT:    mov r4, r1
-; BE-NEON-NEXT:    vmov.32 d12[0], r0
-; BE-NEON-NEXT:    mov r0, r5
-; BE-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; BE-NEON-NEXT:    bl llrintl
-; BE-NEON-NEXT:    add r3, sp, #284
-; BE-NEON-NEXT:    vmov.32 d10[0], r0
-; BE-NEON-NEXT:    ldr r0, [sp, #280]
-; BE-NEON-NEXT:    mov r5, r1
-; BE-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; BE-NEON-NEXT:    bl llrintl
-; BE-NEON-NEXT:    vldr d16, [sp, #120] @ 8-byte Reload
-; BE-NEON-NEXT:    vldr d18, [sp, #112] @ 8-byte Reload
-; BE-NEON-NEXT:    vrev64.32 d17, d16
-; BE-NEON-NEXT:    vldr d26, [sp, #136] @ 8-byte Reload
-; BE-NEON-NEXT:    vrev64.32 d16, d18
-; BE-NEON-NEXT:    vldr d18, [sp, #104] @ 8-byte Reload
-; BE-NEON-NEXT:    vrev64.32 d31, d26
-; BE-NEON-NEXT:    vldr d26, [sp, #128] @ 8-byte Reload
-; BE-NEON-NEXT:    vldr d20, [sp, #96] @ 8-byte Reload
-; BE-NEON-NEXT:    vrev64.32 d19, d18
-; BE-NEON-NEXT:    vrev64.32 d18, d20
-; BE-NEON-NEXT:    vldr d20, [sp, #80] @ 8-byte Reload
-; BE-NEON-NEXT:    vrev64.32 d30, d26
-; BE-NEON-NEXT:    vldr d26, [sp, #24] @ 8-byte Reload
-; BE-NEON-NEXT:    vmov.32 d10[1], r5
-; BE-NEON-NEXT:    vldr d22, [sp, #72] @ 8-byte Reload
-; BE-NEON-NEXT:    vrev64.32 d21, d20
-; BE-NEON-NEXT:    vrev64.32 d1, d26
-; BE-NEON-NEXT:    vmov.32 d9[1], r7
-; BE-NEON-NEXT:    vmov.32 d12[1], r4
-; BE-NEON-NEXT:    vrev64.32 d20, d22
-; BE-NEON-NEXT:    vldr d22, [sp, #88] @ 8-byte Reload
-; BE-NEON-NEXT:    vmov.32 d8[1], r6
-; BE-NEON-NEXT:    vrev64.32 d0, d14
-; BE-NEON-NEXT:    vmov.32 d28[0], r0
-; BE-NEON-NEXT:    add r0, r9, #64
-; BE-NEON-NEXT:    vrev64.32 d3, d10
-; BE-NEON-NEXT:    vldr d24, [sp, #48] @ 8-byte Reload
-; BE-NEON-NEXT:    vrev64.32 d23, d22
-; BE-NEON-NEXT:    vrev64.32 d5, d9
-; BE-NEON-NEXT:    vst1.64 {d0, d1}, [r8:128]!
-; BE-NEON-NEXT:    vrev64.32 d2, d12
-; BE-NEON-NEXT:    vmov.32 d15[1], r11
-; BE-NEON-NEXT:    vrev64.32 d22, d24
-; BE-NEON-NEXT:    vrev64.32 d25, d13
-; BE-NEON-NEXT:    vrev64.32 d4, d8
-; BE-NEON-NEXT:    vst1.64 {d30, d31}, [r8:128]
-; BE-NEON-NEXT:    vst1.64 {d2, d3}, [r0:128]!
-; BE-NEON-NEXT:    vmov.32 d28[1], r1
-; BE-NEON-NEXT:    vrev64.32 d24, d11
-; BE-NEON-NEXT:    vst1.64 {d4, d5}, [r0:128]!
-; BE-NEON-NEXT:    vrev64.32 d27, d15
-; BE-NEON-NEXT:    vst1.64 {d24, d25}, [r0:128]!
-; BE-NEON-NEXT:    vrev64.32 d26, d28
-; BE-NEON-NEXT:    vst1.64 {d22, d23}, [r0:128]
-; BE-NEON-NEXT:    vst1.64 {d20, d21}, [r9:128]!
-; BE-NEON-NEXT:    vst1.64 {d26, d27}, [r9:128]!
-; BE-NEON-NEXT:    vst1.64 {d18, d19}, [r9:128]!
-; BE-NEON-NEXT:    vst1.64 {d16, d17}, [r9:128]
-; BE-NEON-NEXT:    add sp, sp, #152
-; BE-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
-; BE-NEON-NEXT:    add sp, sp, #4
-; BE-NEON-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
-  %a = call <32 x i64> @llvm.llrint.v32i64.v16f128(<32 x fp128> %x)
-  ret <32 x i64> %a
+  %a = call <8 x i64> @llvm.llrint.v8i64.v8f128(<8 x fp128> %x)
+  ret <8 x i64> %a
 }
-declare <32 x i64> @llvm.llrint.v32i64.v32f128(<32 x fp128>)
+declare <8 x i64> @llvm.llrint.v8i64.v8f128(<8 x fp128>)
diff --git a/llvm/test/CodeGen/ARM/vector-lrint.ll b/llvm/test/CodeGen/ARM/vector-lrint.ll
index 50c8b9ff6d913..fe5e3cbcdf771 100644
--- a/llvm/test/CodeGen/ARM/vector-lrint.ll
+++ b/llvm/test/CodeGen/ARM/vector-lrint.ll
@@ -1,13 +1,13 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 
-; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=armv7-unknown-none-eabihf                | FileCheck %s --check-prefix=LE-I32
-; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=armv7-unknown-none-eabihf                | FileCheck %s --check-prefix=LE-I64
-; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=armv7-unknown-none-eabihf -mattr=+neon   | FileCheck %s --check-prefix=LE-I32-NEON
-; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=armv7-unknown-none-eabihf -mattr=+neon   | FileCheck %s --check-prefix=LE-I64-NEON
-; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=armebv7-unknown-none-eabihf              | FileCheck %s --check-prefix=BE-I32
-; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=armebv7-unknown-none-eabihf              | FileCheck %s --check-prefix=BE-I64
-; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=armebv7-unknown-none-eabihf -mattr=+neon | FileCheck %s --check-prefix=BE-I32-NEON
-; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=armebv7-unknown-none-eabihf -mattr=+neon | FileCheck %s --check-prefix=BE-I64-NEON
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=armv7-unknown-none-eabihf                | FileCheck %s --check-prefixes=LE-I32
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=armv7-unknown-none-eabihf                | FileCheck %s --check-prefixes=LE-I64
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=armv7-unknown-none-eabihf -mattr=+neon   | FileCheck %s --check-prefixes=LE-I32
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=armv7-unknown-none-eabihf -mattr=+neon   | FileCheck %s --check-prefixes=LE-I64
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=armebv7-unknown-none-eabihf              | FileCheck %s --check-prefixes=BE-I32
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=armebv7-unknown-none-eabihf              | FileCheck %s --check-prefixes=BE-I64
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=armebv7-unknown-none-eabihf -mattr=+neon | FileCheck %s --check-prefixes=BE-I32
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=armebv7-unknown-none-eabihf -mattr=+neon | FileCheck %s --check-prefixes=BE-I64
 
 ; FIXME: crash "Do not know how to soft promote this operator's operand!"
 ; define <1 x iXLen> @lrint_v1f16(<1 x half> %x) {
@@ -40,12 +40,6 @@
 ; }
 ; declare <16 x iXLen> @llvm.lrint.v16iXLen.v16f16(<16 x half>)
 
-; define <32 x iXLen> @lrint_v32f16(<32 x half> %x) {
-;   %a = call <32 x iXLen> @llvm.lrint.v32iXLen.v32f16(<32 x half> %x)
-;   ret <32 x iXLen> %a
-; }
-; declare <32 x iXLen> @llvm.lrint.v32iXLen.v32f16(<32 x half>)
-
 define <1 x iXLen> @lrint_v1f32(<1 x float> %x) {
 ; LE-I32-LABEL: lrint_v1f32:
 ; LE-I32:       @ %bb.0:
@@ -63,22 +57,6 @@ define <1 x iXLen> @lrint_v1f32(<1 x float> %x) {
 ; LE-I64-NEXT:    vmov.32 d0[1], r1
 ; LE-I64-NEXT:    pop {r11, pc}
 ;
-; LE-I32-NEON-LABEL: lrint_v1f32:
-; LE-I32-NEON:       @ %bb.0:
-; LE-I32-NEON-NEXT:    .save {r11, lr}
-; LE-I32-NEON-NEXT:    push {r11, lr}
-; LE-I32-NEON-NEXT:    bl lrintf
-; LE-I32-NEON-NEXT:    pop {r11, pc}
-;
-; LE-I64-NEON-LABEL: lrint_v1f32:
-; LE-I64-NEON:       @ %bb.0:
-; LE-I64-NEON-NEXT:    .save {r11, lr}
-; LE-I64-NEON-NEXT:    push {r11, lr}
-; LE-I64-NEON-NEXT:    bl lrintf
-; LE-I64-NEON-NEXT:    vmov.32 d0[0], r0
-; LE-I64-NEON-NEXT:    vmov.32 d0[1], r1
-; LE-I64-NEON-NEXT:    pop {r11, pc}
-;
 ; BE-I32-LABEL: lrint_v1f32:
 ; BE-I32:       @ %bb.0:
 ; BE-I32-NEXT:    .save {r11, lr}
@@ -95,23 +73,6 @@ define <1 x iXLen> @lrint_v1f32(<1 x float> %x) {
 ; BE-I64-NEXT:    vmov.32 d16[1], r1
 ; BE-I64-NEXT:    vrev64.32 d0, d16
 ; BE-I64-NEXT:    pop {r11, pc}
-;
-; BE-I32-NEON-LABEL: lrint_v1f32:
-; BE-I32-NEON:       @ %bb.0:
-; BE-I32-NEON-NEXT:    .save {r11, lr}
-; BE-I32-NEON-NEXT:    push {r11, lr}
-; BE-I32-NEON-NEXT:    bl lrintf
-; BE-I32-NEON-NEXT:    pop {r11, pc}
-;
-; BE-I64-NEON-LABEL: lrint_v1f32:
-; BE-I64-NEON:       @ %bb.0:
-; BE-I64-NEON-NEXT:    .save {r11, lr}
-; BE-I64-NEON-NEXT:    push {r11, lr}
-; BE-I64-NEON-NEXT:    bl lrintf
-; BE-I64-NEON-NEXT:    vmov.32 d16[0], r0
-; BE-I64-NEON-NEXT:    vmov.32 d16[1], r1
-; BE-I64-NEON-NEXT:    vrev64.32 d0, d16
-; BE-I64-NEON-NEXT:    pop {r11, pc}
   %a = call <1 x iXLen> @llvm.lrint.v1iXLen.v1f32(<1 x float> %x)
   ret <1 x iXLen> %a
 }
@@ -157,45 +118,6 @@ define <2 x iXLen> @lrint_v2f32(<2 x float> %x) {
 ; LE-I64-NEXT:    vpop {d10, d11}
 ; LE-I64-NEXT:    pop {r4, pc}
 ;
-; LE-I32-NEON-LABEL: lrint_v2f32:
-; LE-I32-NEON:       @ %bb.0:
-; LE-I32-NEON-NEXT:    .save {r11, lr}
-; LE-I32-NEON-NEXT:    push {r11, lr}
-; LE-I32-NEON-NEXT:    .vsave {d8, d9}
-; LE-I32-NEON-NEXT:    vpush {d8, d9}
-; LE-I32-NEON-NEXT:    vmov.f64 d8, d0
-; LE-I32-NEON-NEXT:    bl lrintf
-; LE-I32-NEON-NEXT:    vmov.f32 s0, s17
-; LE-I32-NEON-NEXT:    vmov.32 d9[0], r0
-; LE-I32-NEON-NEXT:    bl lrintf
-; LE-I32-NEON-NEXT:    vmov.32 d9[1], r0
-; LE-I32-NEON-NEXT:    vorr d0, d9, d9
-; LE-I32-NEON-NEXT:    vpop {d8, d9}
-; LE-I32-NEON-NEXT:    pop {r11, pc}
-;
-; LE-I64-NEON-LABEL: lrint_v2f32:
-; LE-I64-NEON:       @ %bb.0:
-; LE-I64-NEON-NEXT:    .save {r4, lr}
-; LE-I64-NEON-NEXT:    push {r4, lr}
-; LE-I64-NEON-NEXT:    .vsave {d10, d11}
-; LE-I64-NEON-NEXT:    vpush {d10, d11}
-; LE-I64-NEON-NEXT:    .vsave {d8}
-; LE-I64-NEON-NEXT:    vpush {d8}
-; LE-I64-NEON-NEXT:    vmov.f64 d8, d0
-; LE-I64-NEON-NEXT:    vmov.f32 s0, s17
-; LE-I64-NEON-NEXT:    bl lrintf
-; LE-I64-NEON-NEXT:    vmov.f32 s0, s16
-; LE-I64-NEON-NEXT:    mov r4, r1
-; LE-I64-NEON-NEXT:    vmov.32 d11[0], r0
-; LE-I64-NEON-NEXT:    bl lrintf
-; LE-I64-NEON-NEXT:    vmov.32 d10[0], r0
-; LE-I64-NEON-NEXT:    vmov.32 d11[1], r4
-; LE-I64-NEON-NEXT:    vmov.32 d10[1], r1
-; LE-I64-NEON-NEXT:    vorr q0, q5, q5
-; LE-I64-NEON-NEXT:    vpop {d8}
-; LE-I64-NEON-NEXT:    vpop {d10, d11}
-; LE-I64-NEON-NEXT:    pop {r4, pc}
-;
 ; BE-I32-LABEL: lrint_v2f32:
 ; BE-I32:       @ %bb.0:
 ; BE-I32-NEXT:    .save {r11, lr}
@@ -235,46 +157,6 @@ define <2 x iXLen> @lrint_v2f32(<2 x float> %x) {
 ; BE-I64-NEXT:    vpop {d8}
 ; BE-I64-NEXT:    vpop {d10, d11}
 ; BE-I64-NEXT:    pop {r4, pc}
-;
-; BE-I32-NEON-LABEL: lrint_v2f32:
-; BE-I32-NEON:       @ %bb.0:
-; BE-I32-NEON-NEXT:    .save {r11, lr}
-; BE-I32-NEON-NEXT:    push {r11, lr}
-; BE-I32-NEON-NEXT:    .vsave {d8, d9}
-; BE-I32-NEON-NEXT:    vpush {d8, d9}
-; BE-I32-NEON-NEXT:    vrev64.32 d8, d0
-; BE-I32-NEON-NEXT:    vmov.f32 s0, s16
-; BE-I32-NEON-NEXT:    bl lrintf
-; BE-I32-NEON-NEXT:    vmov.f32 s0, s17
-; BE-I32-NEON-NEXT:    vmov.32 d9[0], r0
-; BE-I32-NEON-NEXT:    bl lrintf
-; BE-I32-NEON-NEXT:    vmov.32 d9[1], r0
-; BE-I32-NEON-NEXT:    vrev64.32 d0, d9
-; BE-I32-NEON-NEXT:    vpop {d8, d9}
-; BE-I32-NEON-NEXT:    pop {r11, pc}
-;
-; BE-I64-NEON-LABEL: lrint_v2f32:
-; BE-I64-NEON:       @ %bb.0:
-; BE-I64-NEON-NEXT:    .save {r4, lr}
-; BE-I64-NEON-NEXT:    push {r4, lr}
-; BE-I64-NEON-NEXT:    .vsave {d10, d11}
-; BE-I64-NEON-NEXT:    vpush {d10, d11}
-; BE-I64-NEON-NEXT:    .vsave {d8}
-; BE-I64-NEON-NEXT:    vpush {d8}
-; BE-I64-NEON-NEXT:    vrev64.32 d8, d0
-; BE-I64-NEON-NEXT:    vmov.f32 s0, s17
-; BE-I64-NEON-NEXT:    bl lrintf
-; BE-I64-NEON-NEXT:    vmov.f32 s0, s16
-; BE-I64-NEON-NEXT:    mov r4, r1
-; BE-I64-NEON-NEXT:    vmov.32 d11[0], r0
-; BE-I64-NEON-NEXT:    bl lrintf
-; BE-I64-NEON-NEXT:    vmov.32 d10[0], r0
-; BE-I64-NEON-NEXT:    vmov.32 d11[1], r4
-; BE-I64-NEON-NEXT:    vmov.32 d10[1], r1
-; BE-I64-NEON-NEXT:    vrev64.32 q0, q5
-; BE-I64-NEON-NEXT:    vpop {d8}
-; BE-I64-NEON-NEXT:    vpop {d10, d11}
-; BE-I64-NEON-NEXT:    pop {r4, pc}
   %a = call <2 x iXLen> @llvm.lrint.v2iXLen.v2f32(<2 x float> %x)
   ret <2 x iXLen> %a
 }
@@ -335,60 +217,6 @@ define <4 x iXLen> @lrint_v4f32(<4 x float> %x) {
 ; LE-I64-NEXT:    vpop {d8, d9, d10, d11, d12, d13}
 ; LE-I64-NEXT:    pop {r4, r5, r6, pc}
 ;
-; LE-I32-NEON-LABEL: lrint_v4f32:
-; LE-I32-NEON:       @ %bb.0:
-; LE-I32-NEON-NEXT:    .save {r11, lr}
-; LE-I32-NEON-NEXT:    push {r11, lr}
-; LE-I32-NEON-NEXT:    .vsave {d8, d9, d10, d11}
-; LE-I32-NEON-NEXT:    vpush {d8, d9, d10, d11}
-; LE-I32-NEON-NEXT:    vorr q4, q0, q0
-; LE-I32-NEON-NEXT:    vmov.f32 s0, s18
-; LE-I32-NEON-NEXT:    bl lrintf
-; LE-I32-NEON-NEXT:    vmov.f32 s0, s16
-; LE-I32-NEON-NEXT:    vmov.32 d11[0], r0
-; LE-I32-NEON-NEXT:    bl lrintf
-; LE-I32-NEON-NEXT:    vmov.f32 s0, s19
-; LE-I32-NEON-NEXT:    vmov.32 d10[0], r0
-; LE-I32-NEON-NEXT:    bl lrintf
-; LE-I32-NEON-NEXT:    vmov.f32 s0, s17
-; LE-I32-NEON-NEXT:    vmov.32 d11[1], r0
-; LE-I32-NEON-NEXT:    bl lrintf
-; LE-I32-NEON-NEXT:    vmov.32 d10[1], r0
-; LE-I32-NEON-NEXT:    vorr q0, q5, q5
-; LE-I32-NEON-NEXT:    vpop {d8, d9, d10, d11}
-; LE-I32-NEON-NEXT:    pop {r11, pc}
-;
-; LE-I64-NEON-LABEL: lrint_v4f32:
-; LE-I64-NEON:       @ %bb.0:
-; LE-I64-NEON-NEXT:    .save {r4, r5, r6, lr}
-; LE-I64-NEON-NEXT:    push {r4, r5, r6, lr}
-; LE-I64-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13}
-; LE-I64-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13}
-; LE-I64-NEON-NEXT:    vorr q5, q0, q0
-; LE-I64-NEON-NEXT:    vmov.f32 s0, s23
-; LE-I64-NEON-NEXT:    bl lrintf
-; LE-I64-NEON-NEXT:    vmov.f32 s0, s20
-; LE-I64-NEON-NEXT:    mov r4, r1
-; LE-I64-NEON-NEXT:    vmov.32 d9[0], r0
-; LE-I64-NEON-NEXT:    bl lrintf
-; LE-I64-NEON-NEXT:    vmov.f32 s0, s21
-; LE-I64-NEON-NEXT:    mov r5, r1
-; LE-I64-NEON-NEXT:    vmov.32 d12[0], r0
-; LE-I64-NEON-NEXT:    bl lrintf
-; LE-I64-NEON-NEXT:    vmov.f32 s0, s22
-; LE-I64-NEON-NEXT:    mov r6, r1
-; LE-I64-NEON-NEXT:    vmov.32 d13[0], r0
-; LE-I64-NEON-NEXT:    bl lrintf
-; LE-I64-NEON-NEXT:    vmov.32 d8[0], r0
-; LE-I64-NEON-NEXT:    vmov.32 d13[1], r6
-; LE-I64-NEON-NEXT:    vmov.32 d9[1], r4
-; LE-I64-NEON-NEXT:    vmov.32 d12[1], r5
-; LE-I64-NEON-NEXT:    vmov.32 d8[1], r1
-; LE-I64-NEON-NEXT:    vorr q0, q6, q6
-; LE-I64-NEON-NEXT:    vorr q1, q4, q4
-; LE-I64-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13}
-; LE-I64-NEON-NEXT:    pop {r4, r5, r6, pc}
-;
 ; BE-I32-LABEL: lrint_v4f32:
 ; BE-I32:       @ %bb.0:
 ; BE-I32-NEXT:    .save {r11, lr}
@@ -443,61 +271,6 @@ define <4 x iXLen> @lrint_v4f32(<4 x float> %x) {
 ; BE-I64-NEXT:    vrev64.32 q1, q5
 ; BE-I64-NEXT:    vpop {d8, d9, d10, d11, d12, d13}
 ; BE-I64-NEXT:    pop {r4, r5, r6, pc}
-;
-; BE-I32-NEON-LABEL: lrint_v4f32:
-; BE-I32-NEON:       @ %bb.0:
-; BE-I32-NEON-NEXT:    .save {r11, lr}
-; BE-I32-NEON-NEXT:    push {r11, lr}
-; BE-I32-NEON-NEXT:    .vsave {d8, d9, d10, d11}
-; BE-I32-NEON-NEXT:    vpush {d8, d9, d10, d11}
-; BE-I32-NEON-NEXT:    vrev64.32 q4, q0
-; BE-I32-NEON-NEXT:    vmov.f32 s0, s18
-; BE-I32-NEON-NEXT:    bl lrintf
-; BE-I32-NEON-NEXT:    vmov.f32 s0, s16
-; BE-I32-NEON-NEXT:    vmov.32 d11[0], r0
-; BE-I32-NEON-NEXT:    bl lrintf
-; BE-I32-NEON-NEXT:    vmov.f32 s0, s19
-; BE-I32-NEON-NEXT:    vmov.32 d10[0], r0
-; BE-I32-NEON-NEXT:    bl lrintf
-; BE-I32-NEON-NEXT:    vmov.f32 s0, s17
-; BE-I32-NEON-NEXT:    vmov.32 d11[1], r0
-; BE-I32-NEON-NEXT:    bl lrintf
-; BE-I32-NEON-NEXT:    vmov.32 d10[1], r0
-; BE-I32-NEON-NEXT:    vrev64.32 q0, q5
-; BE-I32-NEON-NEXT:    vpop {d8, d9, d10, d11}
-; BE-I32-NEON-NEXT:    pop {r11, pc}
-;
-; BE-I64-NEON-LABEL: lrint_v4f32:
-; BE-I64-NEON:       @ %bb.0:
-; BE-I64-NEON-NEXT:    .save {r4, r5, r6, lr}
-; BE-I64-NEON-NEXT:    push {r4, r5, r6, lr}
-; BE-I64-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13}
-; BE-I64-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13}
-; BE-I64-NEON-NEXT:    vrev64.32 d8, d1
-; BE-I64-NEON-NEXT:    vrev64.32 d9, d0
-; BE-I64-NEON-NEXT:    vmov.f32 s0, s17
-; BE-I64-NEON-NEXT:    bl lrintf
-; BE-I64-NEON-NEXT:    vmov.f32 s0, s18
-; BE-I64-NEON-NEXT:    mov r4, r1
-; BE-I64-NEON-NEXT:    vmov.32 d11[0], r0
-; BE-I64-NEON-NEXT:    bl lrintf
-; BE-I64-NEON-NEXT:    vmov.f32 s0, s19
-; BE-I64-NEON-NEXT:    mov r5, r1
-; BE-I64-NEON-NEXT:    vmov.32 d12[0], r0
-; BE-I64-NEON-NEXT:    bl lrintf
-; BE-I64-NEON-NEXT:    vmov.f32 s0, s16
-; BE-I64-NEON-NEXT:    mov r6, r1
-; BE-I64-NEON-NEXT:    vmov.32 d13[0], r0
-; BE-I64-NEON-NEXT:    bl lrintf
-; BE-I64-NEON-NEXT:    vmov.32 d10[0], r0
-; BE-I64-NEON-NEXT:    vmov.32 d13[1], r6
-; BE-I64-NEON-NEXT:    vmov.32 d11[1], r4
-; BE-I64-NEON-NEXT:    vmov.32 d12[1], r5
-; BE-I64-NEON-NEXT:    vmov.32 d10[1], r1
-; BE-I64-NEON-NEXT:    vrev64.32 q0, q6
-; BE-I64-NEON-NEXT:    vrev64.32 q1, q5
-; BE-I64-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13}
-; BE-I64-NEON-NEXT:    pop {r4, r5, r6, pc}
   %a = call <4 x iXLen> @llvm.lrint.v4iXLen.v4f32(<4 x float> %x)
   ret <4 x iXLen> %a
 }
@@ -607,109 +380,6 @@ define <8 x iXLen> @lrint_v8f32(<8 x float> %x) {
 ; LE-I64-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
 ; LE-I64-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, pc}
 ;
-; LE-I32-NEON-LABEL: lrint_v8f32:
-; LE-I32-NEON:       @ %bb.0:
-; LE-I32-NEON-NEXT:    .save {r11, lr}
-; LE-I32-NEON-NEXT:    push {r11, lr}
-; LE-I32-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-I32-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-I32-NEON-NEXT:    vorr q5, q1, q1
-; LE-I32-NEON-NEXT:    vorr q7, q0, q0
-; LE-I32-NEON-NEXT:    vmov.f32 s0, s20
-; LE-I32-NEON-NEXT:    bl lrintf
-; LE-I32-NEON-NEXT:    vmov.f32 s0, s22
-; LE-I32-NEON-NEXT:    vmov.32 d8[0], r0
-; LE-I32-NEON-NEXT:    bl lrintf
-; LE-I32-NEON-NEXT:    vmov.f32 s0, s30
-; LE-I32-NEON-NEXT:    vmov.32 d9[0], r0
-; LE-I32-NEON-NEXT:    bl lrintf
-; LE-I32-NEON-NEXT:    vmov.f32 s0, s28
-; LE-I32-NEON-NEXT:    vmov.32 d13[0], r0
-; LE-I32-NEON-NEXT:    bl lrintf
-; LE-I32-NEON-NEXT:    vmov.f32 s0, s31
-; LE-I32-NEON-NEXT:    vmov.32 d12[0], r0
-; LE-I32-NEON-NEXT:    bl lrintf
-; LE-I32-NEON-NEXT:    vmov.f32 s0, s29
-; LE-I32-NEON-NEXT:    vmov.32 d13[1], r0
-; LE-I32-NEON-NEXT:    bl lrintf
-; LE-I32-NEON-NEXT:    vmov.f32 s0, s23
-; LE-I32-NEON-NEXT:    vmov.32 d12[1], r0
-; LE-I32-NEON-NEXT:    bl lrintf
-; LE-I32-NEON-NEXT:    vmov.f32 s0, s21
-; LE-I32-NEON-NEXT:    vmov.32 d9[1], r0
-; LE-I32-NEON-NEXT:    bl lrintf
-; LE-I32-NEON-NEXT:    vmov.32 d8[1], r0
-; LE-I32-NEON-NEXT:    vorr q0, q6, q6
-; LE-I32-NEON-NEXT:    vorr q1, q4, q4
-; LE-I32-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-I32-NEON-NEXT:    pop {r11, pc}
-;
-; LE-I64-NEON-LABEL: lrint_v8f32:
-; LE-I64-NEON:       @ %bb.0:
-; LE-I64-NEON-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, lr}
-; LE-I64-NEON-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, lr}
-; LE-I64-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-I64-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-I64-NEON-NEXT:    .pad #40
-; LE-I64-NEON-NEXT:    sub sp, sp, #40
-; LE-I64-NEON-NEXT:    vorr q6, q1, q1
-; LE-I64-NEON-NEXT:    add lr, sp, #24
-; LE-I64-NEON-NEXT:    vorr q7, q0, q0
-; LE-I64-NEON-NEXT:    vstmia lr, {d2, d3} @ 16-byte Spill
-; LE-I64-NEON-NEXT:    vmov.f32 s0, s27
-; LE-I64-NEON-NEXT:    bl lrintf
-; LE-I64-NEON-NEXT:    vmov.f32 s0, s24
-; LE-I64-NEON-NEXT:    mov r8, r1
-; LE-I64-NEON-NEXT:    vmov.32 d9[0], r0
-; LE-I64-NEON-NEXT:    bl lrintf
-; LE-I64-NEON-NEXT:    vmov.f32 s0, s25
-; LE-I64-NEON-NEXT:    mov r9, r1
-; LE-I64-NEON-NEXT:    vmov.32 d10[0], r0
-; LE-I64-NEON-NEXT:    bl lrintf
-; LE-I64-NEON-NEXT:    vorr q6, q7, q7
-; LE-I64-NEON-NEXT:    add lr, sp, #8
-; LE-I64-NEON-NEXT:    mov r10, r1
-; LE-I64-NEON-NEXT:    vmov.32 d11[0], r0
-; LE-I64-NEON-NEXT:    vmov.f32 s0, s26
-; LE-I64-NEON-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
-; LE-I64-NEON-NEXT:    bl lrintf
-; LE-I64-NEON-NEXT:    vmov.f32 s0, s27
-; LE-I64-NEON-NEXT:    mov r7, r1
-; LE-I64-NEON-NEXT:    vmov.32 d14[0], r0
-; LE-I64-NEON-NEXT:    bl lrintf
-; LE-I64-NEON-NEXT:    vmov.f32 s0, s24
-; LE-I64-NEON-NEXT:    mov r4, r1
-; LE-I64-NEON-NEXT:    vmov.32 d15[0], r0
-; LE-I64-NEON-NEXT:    bl lrintf
-; LE-I64-NEON-NEXT:    add lr, sp, #8
-; LE-I64-NEON-NEXT:    mov r5, r1
-; LE-I64-NEON-NEXT:    vmov.32 d12[0], r0
-; LE-I64-NEON-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
-; LE-I64-NEON-NEXT:    vmov.f32 s0, s1
-; LE-I64-NEON-NEXT:    bl lrintf
-; LE-I64-NEON-NEXT:    add lr, sp, #24
-; LE-I64-NEON-NEXT:    mov r6, r1
-; LE-I64-NEON-NEXT:    vmov.32 d13[0], r0
-; LE-I64-NEON-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
-; LE-I64-NEON-NEXT:    vmov.f32 s0, s2
-; LE-I64-NEON-NEXT:    bl lrintf
-; LE-I64-NEON-NEXT:    vmov.32 d8[0], r0
-; LE-I64-NEON-NEXT:    vmov.32 d13[1], r6
-; LE-I64-NEON-NEXT:    vmov.32 d15[1], r4
-; LE-I64-NEON-NEXT:    vmov.32 d11[1], r10
-; LE-I64-NEON-NEXT:    vmov.32 d9[1], r8
-; LE-I64-NEON-NEXT:    vmov.32 d12[1], r5
-; LE-I64-NEON-NEXT:    vmov.32 d14[1], r7
-; LE-I64-NEON-NEXT:    vorr q0, q6, q6
-; LE-I64-NEON-NEXT:    vmov.32 d10[1], r9
-; LE-I64-NEON-NEXT:    vorr q1, q7, q7
-; LE-I64-NEON-NEXT:    vmov.32 d8[1], r1
-; LE-I64-NEON-NEXT:    vorr q2, q5, q5
-; LE-I64-NEON-NEXT:    vorr q3, q4, q4
-; LE-I64-NEON-NEXT:    add sp, sp, #40
-; LE-I64-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-I64-NEON-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, pc}
-;
 ; BE-I32-LABEL: lrint_v8f32:
 ; BE-I32:       @ %bb.0:
 ; BE-I32-NEXT:    .save {r11, lr}
@@ -815,112 +485,6 @@ define <8 x iXLen> @lrint_v8f32(<8 x float> %x) {
 ; BE-I64-NEXT:    add sp, sp, #32
 ; BE-I64-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
 ; BE-I64-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, pc}
-;
-; BE-I32-NEON-LABEL: lrint_v8f32:
-; BE-I32-NEON:       @ %bb.0:
-; BE-I32-NEON-NEXT:    .save {r11, lr}
-; BE-I32-NEON-NEXT:    push {r11, lr}
-; BE-I32-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
-; BE-I32-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
-; BE-I32-NEON-NEXT:    vrev64.32 q4, q1
-; BE-I32-NEON-NEXT:    vrev64.32 q5, q0
-; BE-I32-NEON-NEXT:    vmov.f32 s0, s16
-; BE-I32-NEON-NEXT:    bl lrintf
-; BE-I32-NEON-NEXT:    vmov.f32 s0, s20
-; BE-I32-NEON-NEXT:    vmov.32 d12[0], r0
-; BE-I32-NEON-NEXT:    bl lrintf
-; BE-I32-NEON-NEXT:    vmov.f32 s0, s18
-; BE-I32-NEON-NEXT:    vmov.32 d14[0], r0
-; BE-I32-NEON-NEXT:    bl lrintf
-; BE-I32-NEON-NEXT:    vmov.f32 s0, s22
-; BE-I32-NEON-NEXT:    vmov.32 d13[0], r0
-; BE-I32-NEON-NEXT:    bl lrintf
-; BE-I32-NEON-NEXT:    vmov.f32 s0, s19
-; BE-I32-NEON-NEXT:    vmov.32 d15[0], r0
-; BE-I32-NEON-NEXT:    bl lrintf
-; BE-I32-NEON-NEXT:    vmov.f32 s0, s23
-; BE-I32-NEON-NEXT:    vmov.32 d13[1], r0
-; BE-I32-NEON-NEXT:    bl lrintf
-; BE-I32-NEON-NEXT:    vmov.f32 s0, s21
-; BE-I32-NEON-NEXT:    vmov.32 d15[1], r0
-; BE-I32-NEON-NEXT:    bl lrintf
-; BE-I32-NEON-NEXT:    vmov.f32 s0, s17
-; BE-I32-NEON-NEXT:    vmov.32 d14[1], r0
-; BE-I32-NEON-NEXT:    bl lrintf
-; BE-I32-NEON-NEXT:    vmov.32 d12[1], r0
-; BE-I32-NEON-NEXT:    vrev64.32 q0, q7
-; BE-I32-NEON-NEXT:    vrev64.32 q1, q6
-; BE-I32-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
-; BE-I32-NEON-NEXT:    pop {r11, pc}
-;
-; BE-I64-NEON-LABEL: lrint_v8f32:
-; BE-I64-NEON:       @ %bb.0:
-; BE-I64-NEON-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, lr}
-; BE-I64-NEON-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, lr}
-; BE-I64-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
-; BE-I64-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
-; BE-I64-NEON-NEXT:    .pad #32
-; BE-I64-NEON-NEXT:    sub sp, sp, #32
-; BE-I64-NEON-NEXT:    vorr q4, q1, q1
-; BE-I64-NEON-NEXT:    add lr, sp, #8
-; BE-I64-NEON-NEXT:    vorr q5, q0, q0
-; BE-I64-NEON-NEXT:    vstmia lr, {d0, d1} @ 16-byte Spill
-; BE-I64-NEON-NEXT:    vrev64.32 d12, d8
-; BE-I64-NEON-NEXT:    vmov.f32 s0, s25
-; BE-I64-NEON-NEXT:    bl lrintf
-; BE-I64-NEON-NEXT:    vmov.f32 s0, s24
-; BE-I64-NEON-NEXT:    mov r8, r1
-; BE-I64-NEON-NEXT:    vmov.32 d15[0], r0
-; BE-I64-NEON-NEXT:    bl lrintf
-; BE-I64-NEON-NEXT:    vrev64.32 d0, d11
-; BE-I64-NEON-NEXT:    mov r9, r1
-; BE-I64-NEON-NEXT:    vrev64.32 d8, d9
-; BE-I64-NEON-NEXT:    vorr d9, d0, d0
-; BE-I64-NEON-NEXT:    vmov.32 d14[0], r0
-; BE-I64-NEON-NEXT:    vstr d8, [sp, #24] @ 8-byte Spill
-; BE-I64-NEON-NEXT:    bl lrintf
-; BE-I64-NEON-NEXT:    vmov.f32 s0, s17
-; BE-I64-NEON-NEXT:    mov r10, r1
-; BE-I64-NEON-NEXT:    vmov.32 d10[0], r0
-; BE-I64-NEON-NEXT:    bl lrintf
-; BE-I64-NEON-NEXT:    add lr, sp, #8
-; BE-I64-NEON-NEXT:    vmov.f32 s0, s19
-; BE-I64-NEON-NEXT:    mov r7, r1
-; BE-I64-NEON-NEXT:    vmov.32 d13[0], r0
-; BE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-I64-NEON-NEXT:    vrev64.32 d8, d16
-; BE-I64-NEON-NEXT:    vstr d8, [sp, #8] @ 8-byte Spill
-; BE-I64-NEON-NEXT:    bl lrintf
-; BE-I64-NEON-NEXT:    vmov.f32 s0, s16
-; BE-I64-NEON-NEXT:    mov r4, r1
-; BE-I64-NEON-NEXT:    vmov.32 d11[0], r0
-; BE-I64-NEON-NEXT:    bl lrintf
-; BE-I64-NEON-NEXT:    vldr d0, [sp, #8] @ 8-byte Reload
-; BE-I64-NEON-NEXT:    mov r5, r1
-; BE-I64-NEON-NEXT:    vmov.32 d8[0], r0
-; BE-I64-NEON-NEXT:    vmov.f32 s0, s1
-; BE-I64-NEON-NEXT:    bl lrintf
-; BE-I64-NEON-NEXT:    vldr d0, [sp, #24] @ 8-byte Reload
-; BE-I64-NEON-NEXT:    mov r6, r1
-; BE-I64-NEON-NEXT:    @ kill: def $s0 killed $s0 killed $d0
-; BE-I64-NEON-NEXT:    vmov.32 d9[0], r0
-; BE-I64-NEON-NEXT:    bl lrintf
-; BE-I64-NEON-NEXT:    vmov.32 d12[0], r0
-; BE-I64-NEON-NEXT:    vmov.32 d9[1], r6
-; BE-I64-NEON-NEXT:    vmov.32 d11[1], r4
-; BE-I64-NEON-NEXT:    vmov.32 d15[1], r8
-; BE-I64-NEON-NEXT:    vmov.32 d13[1], r7
-; BE-I64-NEON-NEXT:    vmov.32 d8[1], r5
-; BE-I64-NEON-NEXT:    vmov.32 d10[1], r10
-; BE-I64-NEON-NEXT:    vmov.32 d14[1], r9
-; BE-I64-NEON-NEXT:    vmov.32 d12[1], r1
-; BE-I64-NEON-NEXT:    vrev64.32 q0, q4
-; BE-I64-NEON-NEXT:    vrev64.32 q1, q5
-; BE-I64-NEON-NEXT:    vrev64.32 q2, q7
-; BE-I64-NEON-NEXT:    vrev64.32 q3, q6
-; BE-I64-NEON-NEXT:    add sp, sp, #32
-; BE-I64-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
-; BE-I64-NEON-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, pc}
   %a = call <8 x iXLen> @llvm.lrint.v8iXLen.v8f32(<8 x float> %x)
   ret <8 x iXLen> %a
 }
@@ -1172,251 +736,6 @@ define <16 x iXLen> @lrint_v16f32(<16 x float> %x) {
 ; LE-I64-NEXT:    add sp, sp, #4
 ; LE-I64-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
 ;
-; LE-I32-NEON-LABEL: lrint_v16f32:
-; LE-I32-NEON:       @ %bb.0:
-; LE-I32-NEON-NEXT:    .save {r11, lr}
-; LE-I32-NEON-NEXT:    push {r11, lr}
-; LE-I32-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-I32-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-I32-NEON-NEXT:    .pad #80
-; LE-I32-NEON-NEXT:    sub sp, sp, #80
-; LE-I32-NEON-NEXT:    vorr q5, q3, q3
-; LE-I32-NEON-NEXT:    vstmia sp, {d0, d1} @ 16-byte Spill
-; LE-I32-NEON-NEXT:    add lr, sp, #32
-; LE-I32-NEON-NEXT:    vorr q6, q2, q2
-; LE-I32-NEON-NEXT:    vorr q7, q1, q1
-; LE-I32-NEON-NEXT:    vmov.f32 s0, s20
-; LE-I32-NEON-NEXT:    vstmia lr, {d6, d7} @ 16-byte Spill
-; LE-I32-NEON-NEXT:    bl lrintf
-; LE-I32-NEON-NEXT:    vmov.f32 s0, s22
-; LE-I32-NEON-NEXT:    vmov.32 d8[0], r0
-; LE-I32-NEON-NEXT:    bl lrintf
-; LE-I32-NEON-NEXT:    vmov.f32 s0, s24
-; LE-I32-NEON-NEXT:    add lr, sp, #48
-; LE-I32-NEON-NEXT:    vmov.32 d9[0], r0
-; LE-I32-NEON-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
-; LE-I32-NEON-NEXT:    add lr, sp, #16
-; LE-I32-NEON-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
-; LE-I32-NEON-NEXT:    bl lrintf
-; LE-I32-NEON-NEXT:    vmov.f32 s0, s26
-; LE-I32-NEON-NEXT:    vmov.32 d8[0], r0
-; LE-I32-NEON-NEXT:    bl lrintf
-; LE-I32-NEON-NEXT:    add lr, sp, #64
-; LE-I32-NEON-NEXT:    vmov.32 d9[0], r0
-; LE-I32-NEON-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
-; LE-I32-NEON-NEXT:    vorr q4, q7, q7
-; LE-I32-NEON-NEXT:    vmov.f32 s0, s16
-; LE-I32-NEON-NEXT:    bl lrintf
-; LE-I32-NEON-NEXT:    vmov.f32 s0, s18
-; LE-I32-NEON-NEXT:    vmov.32 d10[0], r0
-; LE-I32-NEON-NEXT:    bl lrintf
-; LE-I32-NEON-NEXT:    vldmia sp, {d12, d13} @ 16-byte Reload
-; LE-I32-NEON-NEXT:    vmov.f32 s0, s26
-; LE-I32-NEON-NEXT:    vmov.32 d11[0], r0
-; LE-I32-NEON-NEXT:    bl lrintf
-; LE-I32-NEON-NEXT:    vmov.f32 s0, s24
-; LE-I32-NEON-NEXT:    vmov.32 d15[0], r0
-; LE-I32-NEON-NEXT:    bl lrintf
-; LE-I32-NEON-NEXT:    vmov.f32 s0, s27
-; LE-I32-NEON-NEXT:    vmov.32 d14[0], r0
-; LE-I32-NEON-NEXT:    bl lrintf
-; LE-I32-NEON-NEXT:    vmov.f32 s0, s25
-; LE-I32-NEON-NEXT:    vmov.32 d15[1], r0
-; LE-I32-NEON-NEXT:    bl lrintf
-; LE-I32-NEON-NEXT:    vmov.f32 s0, s19
-; LE-I32-NEON-NEXT:    vmov.32 d14[1], r0
-; LE-I32-NEON-NEXT:    bl lrintf
-; LE-I32-NEON-NEXT:    vmov.f32 s0, s17
-; LE-I32-NEON-NEXT:    vmov.32 d11[1], r0
-; LE-I32-NEON-NEXT:    bl lrintf
-; LE-I32-NEON-NEXT:    add lr, sp, #16
-; LE-I32-NEON-NEXT:    vmov.32 d10[1], r0
-; LE-I32-NEON-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
-; LE-I32-NEON-NEXT:    vmov.f32 s0, s27
-; LE-I32-NEON-NEXT:    bl lrintf
-; LE-I32-NEON-NEXT:    vmov.f32 s0, s25
-; LE-I32-NEON-NEXT:    add lr, sp, #64
-; LE-I32-NEON-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
-; LE-I32-NEON-NEXT:    vmov.32 d9[1], r0
-; LE-I32-NEON-NEXT:    bl lrintf
-; LE-I32-NEON-NEXT:    vmov.32 d8[1], r0
-; LE-I32-NEON-NEXT:    add lr, sp, #64
-; LE-I32-NEON-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
-; LE-I32-NEON-NEXT:    add lr, sp, #32
-; LE-I32-NEON-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
-; LE-I32-NEON-NEXT:    vmov.f32 s0, s19
-; LE-I32-NEON-NEXT:    bl lrintf
-; LE-I32-NEON-NEXT:    vmov.f32 s0, s17
-; LE-I32-NEON-NEXT:    add lr, sp, #48
-; LE-I32-NEON-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
-; LE-I32-NEON-NEXT:    vmov.32 d13[1], r0
-; LE-I32-NEON-NEXT:    bl lrintf
-; LE-I32-NEON-NEXT:    add lr, sp, #64
-; LE-I32-NEON-NEXT:    vmov.32 d12[1], r0
-; LE-I32-NEON-NEXT:    vorr q0, q7, q7
-; LE-I32-NEON-NEXT:    vldmia lr, {d4, d5} @ 16-byte Reload
-; LE-I32-NEON-NEXT:    vorr q1, q5, q5
-; LE-I32-NEON-NEXT:    vorr q3, q6, q6
-; LE-I32-NEON-NEXT:    add sp, sp, #80
-; LE-I32-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-I32-NEON-NEXT:    pop {r11, pc}
-;
-; LE-I64-NEON-LABEL: lrint_v16f32:
-; LE-I64-NEON:       @ %bb.0:
-; LE-I64-NEON-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; LE-I64-NEON-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; LE-I64-NEON-NEXT:    .pad #4
-; LE-I64-NEON-NEXT:    sub sp, sp, #4
-; LE-I64-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-I64-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-I64-NEON-NEXT:    .pad #160
-; LE-I64-NEON-NEXT:    sub sp, sp, #160
-; LE-I64-NEON-NEXT:    add lr, sp, #112
-; LE-I64-NEON-NEXT:    vorr q5, q3, q3
-; LE-I64-NEON-NEXT:    vorr q6, q0, q0
-; LE-I64-NEON-NEXT:    mov r4, r0
-; LE-I64-NEON-NEXT:    vstmia lr, {d4, d5} @ 16-byte Spill
-; LE-I64-NEON-NEXT:    add lr, sp, #48
-; LE-I64-NEON-NEXT:    vorr q7, q1, q1
-; LE-I64-NEON-NEXT:    vstmia lr, {d0, d1} @ 16-byte Spill
-; LE-I64-NEON-NEXT:    vmov.f32 s0, s23
-; LE-I64-NEON-NEXT:    bl lrintf
-; LE-I64-NEON-NEXT:    vmov.f32 s0, s24
-; LE-I64-NEON-NEXT:    add lr, sp, #144
-; LE-I64-NEON-NEXT:    vmov.32 d17[0], r0
-; LE-I64-NEON-NEXT:    str r1, [sp, #108] @ 4-byte Spill
-; LE-I64-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-I64-NEON-NEXT:    bl lrintf
-; LE-I64-NEON-NEXT:    vmov.f32 s0, s25
-; LE-I64-NEON-NEXT:    str r1, [sp, #84] @ 4-byte Spill
-; LE-I64-NEON-NEXT:    vmov.32 d8[0], r0
-; LE-I64-NEON-NEXT:    bl lrintf
-; LE-I64-NEON-NEXT:    vmov.f32 s0, s28
-; LE-I64-NEON-NEXT:    add lr, sp, #128
-; LE-I64-NEON-NEXT:    vmov.32 d9[0], r0
-; LE-I64-NEON-NEXT:    str r1, [sp, #44] @ 4-byte Spill
-; LE-I64-NEON-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
-; LE-I64-NEON-NEXT:    bl lrintf
-; LE-I64-NEON-NEXT:    vmov.f32 s0, s29
-; LE-I64-NEON-NEXT:    mov r9, r1
-; LE-I64-NEON-NEXT:    vmov.32 d8[0], r0
-; LE-I64-NEON-NEXT:    bl lrintf
-; LE-I64-NEON-NEXT:    vmov.f32 s0, s30
-; LE-I64-NEON-NEXT:    mov r6, r1
-; LE-I64-NEON-NEXT:    vmov.32 d9[0], r0
-; LE-I64-NEON-NEXT:    bl lrintf
-; LE-I64-NEON-NEXT:    vmov.f32 s0, s31
-; LE-I64-NEON-NEXT:    mov r5, r1
-; LE-I64-NEON-NEXT:    vmov.32 d12[0], r0
-; LE-I64-NEON-NEXT:    bl lrintf
-; LE-I64-NEON-NEXT:    add lr, sp, #112
-; LE-I64-NEON-NEXT:    mov r7, r1
-; LE-I64-NEON-NEXT:    vmov.32 d13[0], r0
-; LE-I64-NEON-NEXT:    vldmia lr, {d14, d15} @ 16-byte Reload
-; LE-I64-NEON-NEXT:    vmov.f32 s0, s29
-; LE-I64-NEON-NEXT:    bl lrintf
-; LE-I64-NEON-NEXT:    vmov.f32 s0, s22
-; LE-I64-NEON-NEXT:    add lr, sp, #24
-; LE-I64-NEON-NEXT:    vmov.32 d17[0], r0
-; LE-I64-NEON-NEXT:    mov r11, r1
-; LE-I64-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-I64-NEON-NEXT:    vmov.32 d13[1], r7
-; LE-I64-NEON-NEXT:    bl lrintf
-; LE-I64-NEON-NEXT:    add lr, sp, #144
-; LE-I64-NEON-NEXT:    vmov.f32 s0, s21
-; LE-I64-NEON-NEXT:    vmov.32 d12[1], r5
-; LE-I64-NEON-NEXT:    str r1, [sp, #40] @ 4-byte Spill
-; LE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-I64-NEON-NEXT:    vmov.32 d16[0], r0
-; LE-I64-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-I64-NEON-NEXT:    add lr, sp, #88
-; LE-I64-NEON-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
-; LE-I64-NEON-NEXT:    bl lrintf
-; LE-I64-NEON-NEXT:    vmov.f32 s0, s20
-; LE-I64-NEON-NEXT:    mov r10, r1
-; LE-I64-NEON-NEXT:    vmov.32 d13[0], r0
-; LE-I64-NEON-NEXT:    vmov.32 d9[1], r6
-; LE-I64-NEON-NEXT:    bl lrintf
-; LE-I64-NEON-NEXT:    vmov.f32 s0, s31
-; LE-I64-NEON-NEXT:    vmov.32 d12[0], r0
-; LE-I64-NEON-NEXT:    add lr, sp, #8
-; LE-I64-NEON-NEXT:    mov r8, r1
-; LE-I64-NEON-NEXT:    vmov.32 d8[1], r9
-; LE-I64-NEON-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
-; LE-I64-NEON-NEXT:    add lr, sp, #64
-; LE-I64-NEON-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
-; LE-I64-NEON-NEXT:    bl lrintf
-; LE-I64-NEON-NEXT:    add lr, sp, #128
-; LE-I64-NEON-NEXT:    vmov.32 d9[0], r0
-; LE-I64-NEON-NEXT:    ldr r0, [sp, #44] @ 4-byte Reload
-; LE-I64-NEON-NEXT:    mov r9, r1
-; LE-I64-NEON-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
-; LE-I64-NEON-NEXT:    add lr, sp, #48
-; LE-I64-NEON-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
-; LE-I64-NEON-NEXT:    vmov.f32 s0, s27
-; LE-I64-NEON-NEXT:    vmov.32 d11[1], r0
-; LE-I64-NEON-NEXT:    bl lrintf
-; LE-I64-NEON-NEXT:    vmov.f32 s0, s26
-; LE-I64-NEON-NEXT:    vmov.32 d15[0], r0
-; LE-I64-NEON-NEXT:    ldr r0, [sp, #84] @ 4-byte Reload
-; LE-I64-NEON-NEXT:    add lr, sp, #128
-; LE-I64-NEON-NEXT:    mov r7, r1
-; LE-I64-NEON-NEXT:    vmov.32 d10[1], r0
-; LE-I64-NEON-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
-; LE-I64-NEON-NEXT:    bl lrintf
-; LE-I64-NEON-NEXT:    vmov.32 d14[0], r0
-; LE-I64-NEON-NEXT:    add lr, sp, #144
-; LE-I64-NEON-NEXT:    ldr r0, [sp, #108] @ 4-byte Reload
-; LE-I64-NEON-NEXT:    mov r5, r1
-; LE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-I64-NEON-NEXT:    vmov.32 d17[1], r0
-; LE-I64-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-I64-NEON-NEXT:    add lr, sp, #112
-; LE-I64-NEON-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
-; LE-I64-NEON-NEXT:    vmov.f32 s0, s20
-; LE-I64-NEON-NEXT:    bl lrintf
-; LE-I64-NEON-NEXT:    add lr, sp, #24
-; LE-I64-NEON-NEXT:    vmov.f32 s0, s22
-; LE-I64-NEON-NEXT:    mov r6, r1
-; LE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-I64-NEON-NEXT:    vmov.32 d16[0], r0
-; LE-I64-NEON-NEXT:    vmov.32 d17[1], r11
-; LE-I64-NEON-NEXT:    vorr q6, q8, q8
-; LE-I64-NEON-NEXT:    bl lrintf
-; LE-I64-NEON-NEXT:    add lr, sp, #144
-; LE-I64-NEON-NEXT:    vmov.32 d8[0], r0
-; LE-I64-NEON-NEXT:    ldr r0, [sp, #40] @ 4-byte Reload
-; LE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-I64-NEON-NEXT:    add lr, sp, #8
-; LE-I64-NEON-NEXT:    vldmia lr, {d18, d19} @ 16-byte Reload
-; LE-I64-NEON-NEXT:    add lr, sp, #128
-; LE-I64-NEON-NEXT:    vmov.32 d9[1], r9
-; LE-I64-NEON-NEXT:    vmov.32 d12[1], r6
-; LE-I64-NEON-NEXT:    vmov.32 d19[1], r10
-; LE-I64-NEON-NEXT:    vmov.32 d8[1], r1
-; LE-I64-NEON-NEXT:    vmov.32 d16[1], r0
-; LE-I64-NEON-NEXT:    add r0, r4, #64
-; LE-I64-NEON-NEXT:    vmov.32 d18[1], r8
-; LE-I64-NEON-NEXT:    vst1.64 {d12, d13}, [r0:128]!
-; LE-I64-NEON-NEXT:    vst1.64 {d8, d9}, [r0:128]!
-; LE-I64-NEON-NEXT:    vst1.64 {d18, d19}, [r0:128]!
-; LE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r0:128]
-; LE-I64-NEON-NEXT:    vmov.32 d15[1], r7
-; LE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-I64-NEON-NEXT:    add lr, sp, #64
-; LE-I64-NEON-NEXT:    vmov.32 d14[1], r5
-; LE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r4:128]!
-; LE-I64-NEON-NEXT:    vst1.64 {d14, d15}, [r4:128]!
-; LE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-I64-NEON-NEXT:    add lr, sp, #88
-; LE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r4:128]!
-; LE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r4:128]
-; LE-I64-NEON-NEXT:    add sp, sp, #160
-; LE-I64-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-I64-NEON-NEXT:    add sp, sp, #4
-; LE-I64-NEON-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
-;
 ; BE-I32-LABEL: lrint_v16f32:
 ; BE-I32:       @ %bb.0:
 ; BE-I32-NEXT:    .save {r11, lr}
@@ -1683,2433 +1002,76 @@ define <16 x iXLen> @lrint_v16f32(<16 x float> %x) {
 ; BE-I64-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
 ; BE-I64-NEXT:    add sp, sp, #4
 ; BE-I64-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
-;
-; BE-I32-NEON-LABEL: lrint_v16f32:
-; BE-I32-NEON:       @ %bb.0:
-; BE-I32-NEON-NEXT:    .save {r11, lr}
-; BE-I32-NEON-NEXT:    push {r11, lr}
-; BE-I32-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
-; BE-I32-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
-; BE-I32-NEON-NEXT:    .pad #96
-; BE-I32-NEON-NEXT:    sub sp, sp, #96
-; BE-I32-NEON-NEXT:    vrev64.32 q3, q3
-; BE-I32-NEON-NEXT:    add lr, sp, #64
-; BE-I32-NEON-NEXT:    vrev64.32 q4, q0
-; BE-I32-NEON-NEXT:    vmov.f32 s0, s12
-; BE-I32-NEON-NEXT:    vstmia lr, {d6, d7} @ 16-byte Spill
-; BE-I32-NEON-NEXT:    add lr, sp, #32
-; BE-I32-NEON-NEXT:    vrev64.32 q5, q1
-; BE-I32-NEON-NEXT:    vrev64.32 q7, q2
-; BE-I32-NEON-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
-; BE-I32-NEON-NEXT:    bl lrintf
-; BE-I32-NEON-NEXT:    vmov.f32 s0, s16
-; BE-I32-NEON-NEXT:    vmov.32 d16[0], r0
-; BE-I32-NEON-NEXT:    add lr, sp, #80
-; BE-I32-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-I32-NEON-NEXT:    bl lrintf
-; BE-I32-NEON-NEXT:    vmov.f32 s0, s18
-; BE-I32-NEON-NEXT:    vmov.32 d12[0], r0
-; BE-I32-NEON-NEXT:    bl lrintf
-; BE-I32-NEON-NEXT:    vmov.f32 s0, s20
-; BE-I32-NEON-NEXT:    add lr, sp, #48
-; BE-I32-NEON-NEXT:    vmov.32 d13[0], r0
-; BE-I32-NEON-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
-; BE-I32-NEON-NEXT:    add lr, sp, #16
-; BE-I32-NEON-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
-; BE-I32-NEON-NEXT:    bl lrintf
-; BE-I32-NEON-NEXT:    vmov.f32 s0, s22
-; BE-I32-NEON-NEXT:    vmov.32 d8[0], r0
-; BE-I32-NEON-NEXT:    bl lrintf
-; BE-I32-NEON-NEXT:    vmov.f32 s0, s28
-; BE-I32-NEON-NEXT:    vmov.32 d9[0], r0
-; BE-I32-NEON-NEXT:    vstmia sp, {d8, d9} @ 16-byte Spill
-; BE-I32-NEON-NEXT:    bl lrintf
-; BE-I32-NEON-NEXT:    add lr, sp, #64
-; BE-I32-NEON-NEXT:    vmov.32 d12[0], r0
-; BE-I32-NEON-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
-; BE-I32-NEON-NEXT:    vmov.f32 s0, s22
-; BE-I32-NEON-NEXT:    bl lrintf
-; BE-I32-NEON-NEXT:    vmov.f32 s0, s30
-; BE-I32-NEON-NEXT:    add lr, sp, #80
-; BE-I32-NEON-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
-; BE-I32-NEON-NEXT:    vmov.32 d9[0], r0
-; BE-I32-NEON-NEXT:    bl lrintf
-; BE-I32-NEON-NEXT:    vmov.f32 s0, s23
-; BE-I32-NEON-NEXT:    vmov.32 d13[0], r0
-; BE-I32-NEON-NEXT:    bl lrintf
-; BE-I32-NEON-NEXT:    vmov.f32 s0, s31
-; BE-I32-NEON-NEXT:    add lr, sp, #80
-; BE-I32-NEON-NEXT:    vmov.32 d9[1], r0
-; BE-I32-NEON-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
-; BE-I32-NEON-NEXT:    bl lrintf
-; BE-I32-NEON-NEXT:    vmov.f32 s0, s29
-; BE-I32-NEON-NEXT:    vmov.32 d13[1], r0
-; BE-I32-NEON-NEXT:    bl lrintf
-; BE-I32-NEON-NEXT:    add lr, sp, #16
-; BE-I32-NEON-NEXT:    vmov.32 d12[1], r0
-; BE-I32-NEON-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
-; BE-I32-NEON-NEXT:    vmov.f32 s0, s19
-; BE-I32-NEON-NEXT:    bl lrintf
-; BE-I32-NEON-NEXT:    vmov.f32 s0, s17
-; BE-I32-NEON-NEXT:    vldmia sp, {d10, d11} @ 16-byte Reload
-; BE-I32-NEON-NEXT:    vmov.32 d11[1], r0
-; BE-I32-NEON-NEXT:    bl lrintf
-; BE-I32-NEON-NEXT:    add lr, sp, #32
-; BE-I32-NEON-NEXT:    vmov.32 d10[1], r0
-; BE-I32-NEON-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
-; BE-I32-NEON-NEXT:    vmov.f32 s0, s19
-; BE-I32-NEON-NEXT:    vorr q7, q5, q5
-; BE-I32-NEON-NEXT:    bl lrintf
-; BE-I32-NEON-NEXT:    vmov.f32 s0, s17
-; BE-I32-NEON-NEXT:    add lr, sp, #48
-; BE-I32-NEON-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
-; BE-I32-NEON-NEXT:    vmov.32 d11[1], r0
-; BE-I32-NEON-NEXT:    bl lrintf
-; BE-I32-NEON-NEXT:    add lr, sp, #64
-; BE-I32-NEON-NEXT:    vmov.32 d10[1], r0
-; BE-I32-NEON-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
-; BE-I32-NEON-NEXT:    vmov.f32 s0, s1
-; BE-I32-NEON-NEXT:    bl lrintf
-; BE-I32-NEON-NEXT:    add lr, sp, #80
-; BE-I32-NEON-NEXT:    vrev64.32 q0, q5
-; BE-I32-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-I32-NEON-NEXT:    vrev64.32 q1, q7
-; BE-I32-NEON-NEXT:    vmov.32 d16[1], r0
-; BE-I32-NEON-NEXT:    vrev64.32 q2, q6
-; BE-I32-NEON-NEXT:    vrev64.32 q3, q8
-; BE-I32-NEON-NEXT:    add sp, sp, #96
-; BE-I32-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
-; BE-I32-NEON-NEXT:    pop {r11, pc}
-;
-; BE-I64-NEON-LABEL: lrint_v16f32:
-; BE-I64-NEON:       @ %bb.0:
-; BE-I64-NEON-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; BE-I64-NEON-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; BE-I64-NEON-NEXT:    .pad #4
-; BE-I64-NEON-NEXT:    sub sp, sp, #4
-; BE-I64-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
-; BE-I64-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
-; BE-I64-NEON-NEXT:    .pad #144
-; BE-I64-NEON-NEXT:    sub sp, sp, #144
-; BE-I64-NEON-NEXT:    vorr q6, q3, q3
-; BE-I64-NEON-NEXT:    add lr, sp, #112
-; BE-I64-NEON-NEXT:    vorr q7, q0, q0
-; BE-I64-NEON-NEXT:    mov r4, r0
-; BE-I64-NEON-NEXT:    vstmia lr, {d4, d5} @ 16-byte Spill
-; BE-I64-NEON-NEXT:    add lr, sp, #96
-; BE-I64-NEON-NEXT:    vrev64.32 d8, d13
-; BE-I64-NEON-NEXT:    vstmia lr, {d2, d3} @ 16-byte Spill
-; BE-I64-NEON-NEXT:    vmov.f32 s0, s17
-; BE-I64-NEON-NEXT:    bl lrintf
-; BE-I64-NEON-NEXT:    vmov.f32 s0, s16
-; BE-I64-NEON-NEXT:    str r1, [sp, #88] @ 4-byte Spill
-; BE-I64-NEON-NEXT:    vmov.32 d11[0], r0
-; BE-I64-NEON-NEXT:    bl lrintf
-; BE-I64-NEON-NEXT:    vrev64.32 d8, d14
-; BE-I64-NEON-NEXT:    add lr, sp, #128
-; BE-I64-NEON-NEXT:    vmov.32 d10[0], r0
-; BE-I64-NEON-NEXT:    str r1, [sp, #92] @ 4-byte Spill
-; BE-I64-NEON-NEXT:    vmov.f32 s0, s16
-; BE-I64-NEON-NEXT:    vrev64.32 d9, d12
-; BE-I64-NEON-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
-; BE-I64-NEON-NEXT:    vstr d9, [sp, #64] @ 8-byte Spill
-; BE-I64-NEON-NEXT:    bl lrintf
-; BE-I64-NEON-NEXT:    vmov.f32 s0, s19
-; BE-I64-NEON-NEXT:    mov r9, r1
-; BE-I64-NEON-NEXT:    vmov.32 d12[0], r0
-; BE-I64-NEON-NEXT:    bl lrintf
-; BE-I64-NEON-NEXT:    vmov.f32 s0, s17
-; BE-I64-NEON-NEXT:    str r1, [sp, #84] @ 4-byte Spill
-; BE-I64-NEON-NEXT:    vmov.32 d11[0], r0
-; BE-I64-NEON-NEXT:    vrev64.32 d9, d15
-; BE-I64-NEON-NEXT:    bl lrintf
-; BE-I64-NEON-NEXT:    vmov.f32 s0, s18
-; BE-I64-NEON-NEXT:    mov r6, r1
-; BE-I64-NEON-NEXT:    vmov.32 d13[0], r0
-; BE-I64-NEON-NEXT:    bl lrintf
-; BE-I64-NEON-NEXT:    vmov.f32 s0, s19
-; BE-I64-NEON-NEXT:    mov r5, r1
-; BE-I64-NEON-NEXT:    vmov.32 d14[0], r0
-; BE-I64-NEON-NEXT:    bl lrintf
-; BE-I64-NEON-NEXT:    vldr d0, [sp, #64] @ 8-byte Reload
-; BE-I64-NEON-NEXT:    mov r7, r1
-; BE-I64-NEON-NEXT:    @ kill: def $s0 killed $s0 killed $d0
-; BE-I64-NEON-NEXT:    vmov.32 d15[0], r0
-; BE-I64-NEON-NEXT:    bl lrintf
-; BE-I64-NEON-NEXT:    vmov.32 d10[0], r0
-; BE-I64-NEON-NEXT:    add lr, sp, #40
-; BE-I64-NEON-NEXT:    str r1, [sp, #60] @ 4-byte Spill
-; BE-I64-NEON-NEXT:    vmov.32 d15[1], r7
-; BE-I64-NEON-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
-; BE-I64-NEON-NEXT:    add lr, sp, #96
-; BE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-I64-NEON-NEXT:    vrev64.32 d8, d16
-; BE-I64-NEON-NEXT:    vmov.f32 s0, s17
-; BE-I64-NEON-NEXT:    bl lrintf
-; BE-I64-NEON-NEXT:    vmov.f32 s0, s16
-; BE-I64-NEON-NEXT:    vmov.32 d14[1], r5
-; BE-I64-NEON-NEXT:    add lr, sp, #64
-; BE-I64-NEON-NEXT:    mov r10, r1
-; BE-I64-NEON-NEXT:    vmov.32 d11[0], r0
-; BE-I64-NEON-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
-; BE-I64-NEON-NEXT:    bl lrintf
-; BE-I64-NEON-NEXT:    vmov.32 d10[0], r0
-; BE-I64-NEON-NEXT:    add lr, sp, #24
-; BE-I64-NEON-NEXT:    mov r11, r1
-; BE-I64-NEON-NEXT:    vmov.32 d13[1], r6
-; BE-I64-NEON-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
-; BE-I64-NEON-NEXT:    add lr, sp, #96
-; BE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-I64-NEON-NEXT:    vrev64.32 d8, d17
-; BE-I64-NEON-NEXT:    vmov.f32 s0, s17
-; BE-I64-NEON-NEXT:    bl lrintf
-; BE-I64-NEON-NEXT:    vmov.f32 s0, s16
-; BE-I64-NEON-NEXT:    vmov.32 d12[1], r9
-; BE-I64-NEON-NEXT:    add lr, sp, #96
-; BE-I64-NEON-NEXT:    mov r8, r1
-; BE-I64-NEON-NEXT:    vmov.32 d11[0], r0
-; BE-I64-NEON-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
-; BE-I64-NEON-NEXT:    bl lrintf
-; BE-I64-NEON-NEXT:    vmov.32 d10[0], r0
-; BE-I64-NEON-NEXT:    add lr, sp, #8
-; BE-I64-NEON-NEXT:    ldr r0, [sp, #88] @ 4-byte Reload
-; BE-I64-NEON-NEXT:    mov r9, r1
-; BE-I64-NEON-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
-; BE-I64-NEON-NEXT:    add lr, sp, #112
-; BE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-I64-NEON-NEXT:    add lr, sp, #128
-; BE-I64-NEON-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
-; BE-I64-NEON-NEXT:    vrev64.32 d8, d16
-; BE-I64-NEON-NEXT:    vmov.32 d11[1], r0
-; BE-I64-NEON-NEXT:    vmov.f32 s0, s17
-; BE-I64-NEON-NEXT:    bl lrintf
-; BE-I64-NEON-NEXT:    vmov.f32 s0, s16
-; BE-I64-NEON-NEXT:    vmov.32 d15[0], r0
-; BE-I64-NEON-NEXT:    ldr r0, [sp, #92] @ 4-byte Reload
-; BE-I64-NEON-NEXT:    add lr, sp, #128
-; BE-I64-NEON-NEXT:    mov r7, r1
-; BE-I64-NEON-NEXT:    vmov.32 d10[1], r0
-; BE-I64-NEON-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
-; BE-I64-NEON-NEXT:    bl lrintf
-; BE-I64-NEON-NEXT:    add lr, sp, #112
-; BE-I64-NEON-NEXT:    vmov.32 d14[0], r0
-; BE-I64-NEON-NEXT:    ldr r0, [sp, #84] @ 4-byte Reload
-; BE-I64-NEON-NEXT:    mov r5, r1
-; BE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-I64-NEON-NEXT:    add lr, sp, #40
-; BE-I64-NEON-NEXT:    vrev64.32 d8, d17
-; BE-I64-NEON-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
-; BE-I64-NEON-NEXT:    vmov.f32 s0, s17
-; BE-I64-NEON-NEXT:    vmov.32 d13[1], r0
-; BE-I64-NEON-NEXT:    bl lrintf
-; BE-I64-NEON-NEXT:    vmov.f32 s0, s16
-; BE-I64-NEON-NEXT:    vmov.32 d11[0], r0
-; BE-I64-NEON-NEXT:    ldr r0, [sp, #60] @ 4-byte Reload
-; BE-I64-NEON-NEXT:    mov r6, r1
-; BE-I64-NEON-NEXT:    vmov.32 d12[1], r0
-; BE-I64-NEON-NEXT:    bl lrintf
-; BE-I64-NEON-NEXT:    add lr, sp, #24
-; BE-I64-NEON-NEXT:    vmov.32 d10[0], r0
-; BE-I64-NEON-NEXT:    add r0, r4, #64
-; BE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-I64-NEON-NEXT:    add lr, sp, #8
-; BE-I64-NEON-NEXT:    vmov.32 d17[1], r10
-; BE-I64-NEON-NEXT:    vmov.32 d16[1], r11
-; BE-I64-NEON-NEXT:    vorr q12, q8, q8
-; BE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-I64-NEON-NEXT:    add lr, sp, #128
-; BE-I64-NEON-NEXT:    vmov.32 d15[1], r7
-; BE-I64-NEON-NEXT:    vmov.32 d11[1], r6
-; BE-I64-NEON-NEXT:    vmov.32 d14[1], r5
-; BE-I64-NEON-NEXT:    vldmia lr, {d18, d19} @ 16-byte Reload
-; BE-I64-NEON-NEXT:    add lr, sp, #96
-; BE-I64-NEON-NEXT:    vmov.32 d10[1], r1
-; BE-I64-NEON-NEXT:    vmov.32 d17[1], r8
-; BE-I64-NEON-NEXT:    vldmia lr, {d20, d21} @ 16-byte Reload
-; BE-I64-NEON-NEXT:    add lr, sp, #64
-; BE-I64-NEON-NEXT:    vmov.32 d16[1], r9
-; BE-I64-NEON-NEXT:    vrev64.32 q14, q7
-; BE-I64-NEON-NEXT:    vorr q13, q8, q8
-; BE-I64-NEON-NEXT:    vrev64.32 q15, q5
-; BE-I64-NEON-NEXT:    vldmia lr, {d22, d23} @ 16-byte Reload
-; BE-I64-NEON-NEXT:    vrev64.32 q8, q6
-; BE-I64-NEON-NEXT:    vst1.64 {d28, d29}, [r0:128]!
-; BE-I64-NEON-NEXT:    vst1.64 {d30, d31}, [r0:128]!
-; BE-I64-NEON-NEXT:    vrev64.32 q9, q9
-; BE-I64-NEON-NEXT:    vrev64.32 q10, q10
-; BE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r0:128]!
-; BE-I64-NEON-NEXT:    vrev64.32 q11, q11
-; BE-I64-NEON-NEXT:    vrev64.32 q12, q12
-; BE-I64-NEON-NEXT:    vst1.64 {d18, d19}, [r0:128]
-; BE-I64-NEON-NEXT:    vst1.64 {d20, d21}, [r4:128]!
-; BE-I64-NEON-NEXT:    vst1.64 {d22, d23}, [r4:128]!
-; BE-I64-NEON-NEXT:    vrev64.32 q13, q13
-; BE-I64-NEON-NEXT:    vst1.64 {d24, d25}, [r4:128]!
-; BE-I64-NEON-NEXT:    vst1.64 {d26, d27}, [r4:128]
-; BE-I64-NEON-NEXT:    add sp, sp, #144
-; BE-I64-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
-; BE-I64-NEON-NEXT:    add sp, sp, #4
-; BE-I64-NEON-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
   %a = call <16 x iXLen> @llvm.lrint.v16iXLen.v16f32(<16 x float> %x)
   ret <16 x iXLen> %a
 }
 declare <16 x iXLen> @llvm.lrint.v16iXLen.v16f32(<16 x float>)
 
-define <32 x iXLen> @lrint_v32f32(<32 x float> %x) {
-; LE-I32-LABEL: lrint_v32f32:
+define <1 x iXLen> @lrint_v1f64(<1 x double> %x) {
+; LE-I32-LABEL: lrint_v1f64:
 ; LE-I32:       @ %bb.0:
-; LE-I32-NEXT:    .save {r4, r5, r6, lr}
-; LE-I32-NEXT:    push {r4, r5, r6, lr}
-; LE-I32-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-I32-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-I32-NEXT:    .pad #144
-; LE-I32-NEXT:    sub sp, sp, #144
-; LE-I32-NEXT:    add lr, sp, #48
-; LE-I32-NEXT:    mov r4, r0
-; LE-I32-NEXT:    add r0, sp, #224
+; LE-I32-NEXT:    .save {r11, lr}
+; LE-I32-NEXT:    push {r11, lr}
+; LE-I32-NEXT:    bl lrint
+; LE-I32-NEXT:    pop {r11, pc}
+;
+; LE-I64-LABEL: lrint_v1f64:
+; LE-I64:       @ %bb.0:
+; LE-I64-NEXT:    .save {r11, lr}
+; LE-I64-NEXT:    push {r11, lr}
+; LE-I64-NEXT:    bl lrint
+; LE-I64-NEXT:    vmov.32 d0[0], r0
+; LE-I64-NEXT:    vmov.32 d0[1], r1
+; LE-I64-NEXT:    pop {r11, pc}
+;
+; BE-I32-LABEL: lrint_v1f64:
+; BE-I32:       @ %bb.0:
+; BE-I32-NEXT:    .save {r11, lr}
+; BE-I32-NEXT:    push {r11, lr}
+; BE-I32-NEXT:    bl lrint
+; BE-I32-NEXT:    pop {r11, pc}
+;
+; BE-I64-LABEL: lrint_v1f64:
+; BE-I64:       @ %bb.0:
+; BE-I64-NEXT:    .save {r11, lr}
+; BE-I64-NEXT:    push {r11, lr}
+; BE-I64-NEXT:    bl lrint
+; BE-I64-NEXT:    vmov.32 d16[0], r0
+; BE-I64-NEXT:    vmov.32 d16[1], r1
+; BE-I64-NEXT:    vrev64.32 d0, d16
+; BE-I64-NEXT:    pop {r11, pc}
+  %a = call <1 x iXLen> @llvm.lrint.v1iXLen.v1f64(<1 x double> %x)
+  ret <1 x iXLen> %a
+}
+declare <1 x iXLen> @llvm.lrint.v1iXLen.v1f64(<1 x double>)
+
+define <2 x iXLen> @lrint_v2f64(<2 x double> %x) {
+; LE-I32-LABEL: lrint_v2f64:
+; LE-I32:       @ %bb.0:
+; LE-I32-NEXT:    .save {r11, lr}
+; LE-I32-NEXT:    push {r11, lr}
+; LE-I32-NEXT:    .vsave {d8, d9, d10}
+; LE-I32-NEXT:    vpush {d8, d9, d10}
 ; LE-I32-NEXT:    vorr q4, q0, q0
-; LE-I32-NEXT:    vstmia lr, {d4, d5} @ 16-byte Spill
-; LE-I32-NEXT:    add lr, sp, #16
-; LE-I32-NEXT:    vorr q6, q3, q3
-; LE-I32-NEXT:    vstmia lr, {d0, d1} @ 16-byte Spill
-; LE-I32-NEXT:    vmov.f32 s0, s4
-; LE-I32-NEXT:    add lr, sp, #80
-; LE-I32-NEXT:    vorr q5, q1, q1
-; LE-I32-NEXT:    vld1.64 {d16, d17}, [r0]
-; LE-I32-NEXT:    add r0, sp, #272
-; LE-I32-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-I32-NEXT:    add lr, sp, #64
-; LE-I32-NEXT:    vld1.64 {d16, d17}, [r0]
-; LE-I32-NEXT:    add r0, sp, #240
-; LE-I32-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-I32-NEXT:    add lr, sp, #32
-; LE-I32-NEXT:    vld1.64 {d16, d17}, [r0]
-; LE-I32-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-I32-NEXT:    vstmia sp, {d2, d3} @ 16-byte Spill
-; LE-I32-NEXT:    bl lrintf
-; LE-I32-NEXT:    vmov.f32 s0, s18
-; LE-I32-NEXT:    vmov.32 d14[0], r0
-; LE-I32-NEXT:    bl lrintf
-; LE-I32-NEXT:    vmov.f32 s0, s22
-; LE-I32-NEXT:    add lr, sp, #112
-; LE-I32-NEXT:    vmov.32 d17[0], r0
-; LE-I32-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-I32-NEXT:    bl lrintf
-; LE-I32-NEXT:    add lr, sp, #128
-; LE-I32-NEXT:    vmov.32 d15[0], r0
-; LE-I32-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
-; LE-I32-NEXT:    add lr, sp, #48
-; LE-I32-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
-; LE-I32-NEXT:    vmov.f32 s0, s20
-; LE-I32-NEXT:    bl lrintf
-; LE-I32-NEXT:    vmov.f32 s0, s22
-; LE-I32-NEXT:    vmov.32 d8[0], r0
-; LE-I32-NEXT:    vorr q7, q5, q5
-; LE-I32-NEXT:    bl lrintf
-; LE-I32-NEXT:    vmov.f32 s0, s26
-; LE-I32-NEXT:    vmov.32 d9[0], r0
-; LE-I32-NEXT:    bl lrintf
-; LE-I32-NEXT:    vmov.f32 s0, s24
-; LE-I32-NEXT:    vmov.32 d11[0], r0
-; LE-I32-NEXT:    bl lrintf
-; LE-I32-NEXT:    vmov.f32 s0, s27
-; LE-I32-NEXT:    vmov.32 d10[0], r0
-; LE-I32-NEXT:    bl lrintf
-; LE-I32-NEXT:    vmov.f32 s0, s25
-; LE-I32-NEXT:    vmov.32 d11[1], r0
-; LE-I32-NEXT:    bl lrintf
-; LE-I32-NEXT:    vmov.f32 s0, s31
-; LE-I32-NEXT:    vmov.32 d10[1], r0
-; LE-I32-NEXT:    add lr, sp, #96
-; LE-I32-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
-; LE-I32-NEXT:    bl lrintf
-; LE-I32-NEXT:    vmov.f32 s0, s29
-; LE-I32-NEXT:    vmov.32 d9[1], r0
-; LE-I32-NEXT:    bl lrintf
-; LE-I32-NEXT:    vldmia sp, {d14, d15} @ 16-byte Reload
-; LE-I32-NEXT:    vmov.f32 s0, s31
-; LE-I32-NEXT:    add lr, sp, #48
-; LE-I32-NEXT:    vmov.32 d8[1], r0
-; LE-I32-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
-; LE-I32-NEXT:    bl lrintf
-; LE-I32-NEXT:    add lr, sp, #128
-; LE-I32-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
-; LE-I32-NEXT:    add lr, sp, #16
-; LE-I32-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
-; LE-I32-NEXT:    vmov.f32 s0, s23
-; LE-I32-NEXT:    vmov.32 d9[1], r0
-; LE-I32-NEXT:    bl lrintf
-; LE-I32-NEXT:    vmov.f32 s0, s29
-; LE-I32-NEXT:    add lr, sp, #112
-; LE-I32-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
-; LE-I32-NEXT:    vmov.32 d13[1], r0
-; LE-I32-NEXT:    bl lrintf
-; LE-I32-NEXT:    vmov.f32 s0, s20
-; LE-I32-NEXT:    vmov.32 d8[1], r0
-; LE-I32-NEXT:    add lr, sp, #128
-; LE-I32-NEXT:    add r0, sp, #256
-; LE-I32-NEXT:    vld1.64 {d14, d15}, [r0]
-; LE-I32-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
-; LE-I32-NEXT:    bl lrintf
-; LE-I32-NEXT:    vmov.f32 s0, s21
-; LE-I32-NEXT:    vorr q4, q6, q6
-; LE-I32-NEXT:    vmov.32 d8[0], r0
-; LE-I32-NEXT:    bl lrintf
-; LE-I32-NEXT:    vorr q6, q7, q7
-; LE-I32-NEXT:    vmov.32 d8[1], r0
-; LE-I32-NEXT:    add lr, sp, #112
-; LE-I32-NEXT:    vstmia sp, {d14, d15} @ 16-byte Spill
-; LE-I32-NEXT:    vmov.f32 s0, s24
-; LE-I32-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
-; LE-I32-NEXT:    bl lrintf
-; LE-I32-NEXT:    add lr, sp, #32
-; LE-I32-NEXT:    vmov.32 d14[0], r0
-; LE-I32-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
-; LE-I32-NEXT:    vmov.f32 s0, s18
-; LE-I32-NEXT:    bl lrintf
-; LE-I32-NEXT:    vmov.f32 s0, s16
-; LE-I32-NEXT:    vmov.32 d11[0], r0
-; LE-I32-NEXT:    bl lrintf
-; LE-I32-NEXT:    vmov.f32 s0, s19
+; LE-I32-NEXT:    bl lrint
+; LE-I32-NEXT:    vorr d0, d9, d9
 ; LE-I32-NEXT:    vmov.32 d10[0], r0
-; LE-I32-NEXT:    bl lrintf
-; LE-I32-NEXT:    vmov.f32 s0, s26
-; LE-I32-NEXT:    vmov.32 d11[1], r0
-; LE-I32-NEXT:    bl lrintf
-; LE-I32-NEXT:    vmov.f32 s0, s17
-; LE-I32-NEXT:    add lr, sp, #16
-; LE-I32-NEXT:    vmov.32 d15[0], r0
-; LE-I32-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
-; LE-I32-NEXT:    bl lrintf
-; LE-I32-NEXT:    vmov.32 d10[1], r0
-; LE-I32-NEXT:    add lr, sp, #32
-; LE-I32-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
-; LE-I32-NEXT:    add lr, sp, #64
-; LE-I32-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
-; LE-I32-NEXT:    vmov.f32 s0, s20
-; LE-I32-NEXT:    bl lrintf
-; LE-I32-NEXT:    add lr, sp, #80
-; LE-I32-NEXT:    vmov.32 d14[0], r0
-; LE-I32-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
-; LE-I32-NEXT:    vmov.f32 s0, s26
-; LE-I32-NEXT:    bl lrintf
-; LE-I32-NEXT:    vmov.f32 s0, s24
-; LE-I32-NEXT:    vmov.32 d9[0], r0
-; LE-I32-NEXT:    bl lrintf
-; LE-I32-NEXT:    vmov.f32 s0, s27
-; LE-I32-NEXT:    vmov.32 d8[0], r0
-; LE-I32-NEXT:    bl lrintf
-; LE-I32-NEXT:    vmov.f32 s0, s22
-; LE-I32-NEXT:    vmov.32 d9[1], r0
-; LE-I32-NEXT:    bl lrintf
-; LE-I32-NEXT:    vmov.f32 s0, s25
-; LE-I32-NEXT:    vmov.32 d15[0], r0
-; LE-I32-NEXT:    bl lrintf
-; LE-I32-NEXT:    vmov.f32 s0, s23
-; LE-I32-NEXT:    vmov.32 d8[1], r0
-; LE-I32-NEXT:    bl lrintf
-; LE-I32-NEXT:    vmov.f32 s0, s21
-; LE-I32-NEXT:    vmov.32 d15[1], r0
-; LE-I32-NEXT:    bl lrintf
-; LE-I32-NEXT:    vldmia sp, {d12, d13} @ 16-byte Reload
-; LE-I32-NEXT:    vmov.f32 s0, s27
-; LE-I32-NEXT:    vmov.32 d14[1], r0
-; LE-I32-NEXT:    bl lrintf
-; LE-I32-NEXT:    vmov.f32 s0, s25
-; LE-I32-NEXT:    add lr, sp, #16
-; LE-I32-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
-; LE-I32-NEXT:    vmov.32 d11[1], r0
-; LE-I32-NEXT:    bl lrintf
-; LE-I32-NEXT:    add lr, sp, #112
+; LE-I32-NEXT:    bl lrint
 ; LE-I32-NEXT:    vmov.32 d10[1], r0
-; LE-I32-NEXT:    mov r0, r4
-; LE-I32-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-I32-NEXT:    add lr, sp, #128
-; LE-I32-NEXT:    vst1.32 {d16, d17}, [r0:128]!
-; LE-I32-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-I32-NEXT:    add lr, sp, #48
-; LE-I32-NEXT:    vst1.32 {d16, d17}, [r0:128]!
-; LE-I32-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-I32-NEXT:    add lr, sp, #96
-; LE-I32-NEXT:    vst1.32 {d16, d17}, [r0:128]!
-; LE-I32-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-I32-NEXT:    add lr, sp, #32
-; LE-I32-NEXT:    vst1.64 {d16, d17}, [r0:128]
-; LE-I32-NEXT:    add r0, r4, #64
-; LE-I32-NEXT:    vst1.32 {d8, d9}, [r0:128]!
-; LE-I32-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-I32-NEXT:    vst1.32 {d16, d17}, [r0:128]!
-; LE-I32-NEXT:    vst1.32 {d10, d11}, [r0:128]!
-; LE-I32-NEXT:    vst1.64 {d14, d15}, [r0:128]
-; LE-I32-NEXT:    add sp, sp, #144
-; LE-I32-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-I32-NEXT:    pop {r4, r5, r6, pc}
+; LE-I32-NEXT:    vorr d0, d10, d10
+; LE-I32-NEXT:    vpop {d8, d9, d10}
+; LE-I32-NEXT:    pop {r11, pc}
 ;
-; LE-I64-LABEL: lrint_v32f32:
+; LE-I64-LABEL: lrint_v2f64:
 ; LE-I64:       @ %bb.0:
-; LE-I64-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; LE-I64-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; LE-I64-NEXT:    .pad #4
-; LE-I64-NEXT:    sub sp, sp, #4
-; LE-I64-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-I64-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-I64-NEXT:    .pad #184
-; LE-I64-NEXT:    sub sp, sp, #184
-; LE-I64-NEXT:    add lr, sp, #152
-; LE-I64-NEXT:    vorr q7, q3, q3
-; LE-I64-NEXT:    vorr q4, q2, q2
-; LE-I64-NEXT:    mov r5, r0
-; LE-I64-NEXT:    vstmia lr, {d2, d3} @ 16-byte Spill
-; LE-I64-NEXT:    add lr, sp, #88
-; LE-I64-NEXT:    vstmia lr, {d0, d1} @ 16-byte Spill
-; LE-I64-NEXT:    vmov.f32 s0, s3
-; LE-I64-NEXT:    str r0, [sp, #68] @ 4-byte Spill
-; LE-I64-NEXT:    bl lrintf
-; LE-I64-NEXT:    vmov.f32 s0, s18
-; LE-I64-NEXT:    add lr, sp, #168
-; LE-I64-NEXT:    vmov.32 d17[0], r0
-; LE-I64-NEXT:    str r1, [sp, #16] @ 4-byte Spill
-; LE-I64-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-I64-NEXT:    bl lrintf
-; LE-I64-NEXT:    vmov.f32 s0, s16
-; LE-I64-NEXT:    mov r8, r1
-; LE-I64-NEXT:    vmov.32 d10[0], r0
-; LE-I64-NEXT:    bl lrintf
-; LE-I64-NEXT:    vmov.f32 s0, s17
-; LE-I64-NEXT:    mov r6, r1
-; LE-I64-NEXT:    vmov.32 d12[0], r0
-; LE-I64-NEXT:    bl lrintf
-; LE-I64-NEXT:    vmov.f32 s0, s19
-; LE-I64-NEXT:    mov r4, r1
-; LE-I64-NEXT:    vmov.32 d13[0], r0
-; LE-I64-NEXT:    bl lrintf
-; LE-I64-NEXT:    vmov.f32 s0, s31
-; LE-I64-NEXT:    mov r7, r1
-; LE-I64-NEXT:    vmov.32 d11[0], r0
-; LE-I64-NEXT:    bl lrintf
-; LE-I64-NEXT:    vmov.f32 s0, s30
-; LE-I64-NEXT:    str r1, [sp, #8] @ 4-byte Spill
-; LE-I64-NEXT:    vmov.32 d9[0], r0
-; LE-I64-NEXT:    vmov.32 d11[1], r7
-; LE-I64-NEXT:    bl lrintf
-; LE-I64-NEXT:    vmov.f32 s0, s29
-; LE-I64-NEXT:    vmov.32 d8[0], r0
-; LE-I64-NEXT:    add lr, sp, #104
-; LE-I64-NEXT:    str r1, [sp, #12] @ 4-byte Spill
-; LE-I64-NEXT:    vmov.32 d13[1], r4
-; LE-I64-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
-; LE-I64-NEXT:    bl lrintf
-; LE-I64-NEXT:    vmov.32 d9[0], r0
-; LE-I64-NEXT:    add r0, sp, #320
-; LE-I64-NEXT:    add lr, sp, #120
-; LE-I64-NEXT:    mov r11, r1
-; LE-I64-NEXT:    vld1.64 {d0, d1}, [r0]
-; LE-I64-NEXT:    add r0, sp, #304
-; LE-I64-NEXT:    vstmia lr, {d0, d1} @ 16-byte Spill
-; LE-I64-NEXT:    add lr, sp, #72
-; LE-I64-NEXT:    vld1.64 {d0, d1}, [r0]
-; LE-I64-NEXT:    add r0, sp, #336
-; LE-I64-NEXT:    vstmia lr, {d0, d1} @ 16-byte Spill
-; LE-I64-NEXT:    add lr, sp, #32
-; LE-I64-NEXT:    vld1.64 {d0, d1}, [r0]
-; LE-I64-NEXT:    add r0, sp, #288
-; LE-I64-NEXT:    vmov.32 d12[1], r6
-; LE-I64-NEXT:    vstmia lr, {d0, d1} @ 16-byte Spill
-; LE-I64-NEXT:    add lr, sp, #48
-; LE-I64-NEXT:    vld1.64 {d0, d1}, [r0]
-; LE-I64-NEXT:    vmov.32 d10[1], r8
-; LE-I64-NEXT:    add r8, r5, #64
-; LE-I64-NEXT:    vstmia lr, {d0, d1} @ 16-byte Spill
-; LE-I64-NEXT:    add lr, sp, #152
-; LE-I64-NEXT:    vst1.64 {d12, d13}, [r8:128]!
-; LE-I64-NEXT:    vst1.64 {d10, d11}, [r8:128]!
-; LE-I64-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
-; LE-I64-NEXT:    vmov.f32 s0, s27
-; LE-I64-NEXT:    bl lrintf
-; LE-I64-NEXT:    vmov.f32 s0, s28
-; LE-I64-NEXT:    mov r4, r1
-; LE-I64-NEXT:    vmov.32 d11[0], r0
-; LE-I64-NEXT:    bl lrintf
-; LE-I64-NEXT:    vmov.f32 s0, s26
-; LE-I64-NEXT:    mov r9, r1
-; LE-I64-NEXT:    vmov.32 d8[0], r0
-; LE-I64-NEXT:    vmov.32 d11[1], r4
-; LE-I64-NEXT:    bl lrintf
-; LE-I64-NEXT:    vmov.32 d10[0], r0
-; LE-I64-NEXT:    add lr, sp, #136
-; LE-I64-NEXT:    ldr r0, [sp, #16] @ 4-byte Reload
-; LE-I64-NEXT:    mov r10, r1
-; LE-I64-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
-; LE-I64-NEXT:    add lr, sp, #168
-; LE-I64-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
-; LE-I64-NEXT:    add lr, sp, #88
-; LE-I64-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
-; LE-I64-NEXT:    vmov.f32 s0, s26
-; LE-I64-NEXT:    vmov.32 d11[1], r0
-; LE-I64-NEXT:    bl lrintf
-; LE-I64-NEXT:    vmov.f32 s0, s25
-; LE-I64-NEXT:    vmov.32 d10[0], r0
-; LE-I64-NEXT:    add lr, sp, #168
-; LE-I64-NEXT:    ldr r0, [sp, #8] @ 4-byte Reload
-; LE-I64-NEXT:    mov r7, r1
-; LE-I64-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
-; LE-I64-NEXT:    add lr, sp, #104
-; LE-I64-NEXT:    vorr q5, q6, q6
-; LE-I64-NEXT:    vldmia lr, {d14, d15} @ 16-byte Reload
-; LE-I64-NEXT:    vmov.32 d15[1], r0
-; LE-I64-NEXT:    bl lrintf
-; LE-I64-NEXT:    vmov.f32 s0, s20
-; LE-I64-NEXT:    vmov.32 d13[0], r0
-; LE-I64-NEXT:    ldr r0, [sp, #12] @ 4-byte Reload
-; LE-I64-NEXT:    add lr, sp, #104
-; LE-I64-NEXT:    mov r4, r1
-; LE-I64-NEXT:    vmov.32 d14[1], r0
-; LE-I64-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
-; LE-I64-NEXT:    bl lrintf
-; LE-I64-NEXT:    vmov.32 d12[0], r0
-; LE-I64-NEXT:    add lr, sp, #152
-; LE-I64-NEXT:    mov r6, r1
-; LE-I64-NEXT:    vorr q7, q6, q6
-; LE-I64-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
-; LE-I64-NEXT:    vmov.32 d9[1], r11
-; LE-I64-NEXT:    vmov.f32 s0, s25
-; LE-I64-NEXT:    bl lrintf
-; LE-I64-NEXT:    vmov.f32 s0, s24
-; LE-I64-NEXT:    mov r5, r1
-; LE-I64-NEXT:    vmov.32 d11[0], r0
-; LE-I64-NEXT:    vmov.32 d8[1], r9
-; LE-I64-NEXT:    bl lrintf
-; LE-I64-NEXT:    add lr, sp, #136
-; LE-I64-NEXT:    mov r11, r1
-; LE-I64-NEXT:    vmov.32 d10[0], r0
-; LE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-I64-NEXT:    vmov.32 d16[1], r10
-; LE-I64-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-I64-NEXT:    add lr, sp, #120
-; LE-I64-NEXT:    vst1.64 {d8, d9}, [r8:128]!
-; LE-I64-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
-; LE-I64-NEXT:    vmov.f32 s0, s1
-; LE-I64-NEXT:    bl lrintf
-; LE-I64-NEXT:    add lr, sp, #152
-; LE-I64-NEXT:    vmov.32 d17[0], r0
-; LE-I64-NEXT:    mov r10, r1
-; LE-I64-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-I64-NEXT:    add lr, sp, #104
-; LE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-I64-NEXT:    add lr, sp, #72
-; LE-I64-NEXT:    vst1.64 {d16, d17}, [r8:128]
-; LE-I64-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
-; LE-I64-NEXT:    vmov.f32 s0, s19
-; LE-I64-NEXT:    bl lrintf
-; LE-I64-NEXT:    add lr, sp, #168
-; LE-I64-NEXT:    vmov.f32 s0, s18
-; LE-I64-NEXT:    mov r8, r1
-; LE-I64-NEXT:    vmov.32 d13[0], r0
-; LE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-I64-NEXT:    vmov.32 d16[1], r7
-; LE-I64-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-I64-NEXT:    bl lrintf
-; LE-I64-NEXT:    vmov.f32 s0, s17
-; LE-I64-NEXT:    vmov.32 d12[0], r0
-; LE-I64-NEXT:    add lr, sp, #104
-; LE-I64-NEXT:    mov r7, r1
-; LE-I64-NEXT:    vmov.32 d15[1], r4
-; LE-I64-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
-; LE-I64-NEXT:    bl lrintf
-; LE-I64-NEXT:    vmov.f32 s0, s16
-; LE-I64-NEXT:    mov r9, r1
-; LE-I64-NEXT:    vmov.32 d13[0], r0
-; LE-I64-NEXT:    vmov.32 d14[1], r6
-; LE-I64-NEXT:    bl lrintf
-; LE-I64-NEXT:    vmov.32 d12[0], r0
-; LE-I64-NEXT:    add lr, sp, #88
-; LE-I64-NEXT:    mov r6, r1
-; LE-I64-NEXT:    vmov.32 d11[1], r5
-; LE-I64-NEXT:    vmov.32 d10[1], r11
-; LE-I64-NEXT:    ldr r11, [sp, #68] @ 4-byte Reload
-; LE-I64-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
-; LE-I64-NEXT:    add lr, sp, #16
-; LE-I64-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
-; LE-I64-NEXT:    add lr, sp, #32
-; LE-I64-NEXT:    vst1.64 {d14, d15}, [r11:128]!
-; LE-I64-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
-; LE-I64-NEXT:    vmov.f32 s0, s23
-; LE-I64-NEXT:    bl lrintf
-; LE-I64-NEXT:    add lr, sp, #152
-; LE-I64-NEXT:    mov r4, r1
-; LE-I64-NEXT:    vmov.32 d9[0], r0
-; LE-I64-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
-; LE-I64-NEXT:    add lr, sp, #120
-; LE-I64-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
-; LE-I64-NEXT:    @ kill: def $s0 killed $s0 killed $q0
-; LE-I64-NEXT:    vmov.32 d13[1], r10
-; LE-I64-NEXT:    bl lrintf
-; LE-I64-NEXT:    vmov.f32 s0, s22
-; LE-I64-NEXT:    vmov.32 d12[0], r0
-; LE-I64-NEXT:    add lr, sp, #152
-; LE-I64-NEXT:    mov r5, r1
-; LE-I64-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
-; LE-I64-NEXT:    add lr, sp, #104
-; LE-I64-NEXT:    vldmia lr, {d14, d15} @ 16-byte Reload
-; LE-I64-NEXT:    vmov.32 d15[1], r8
-; LE-I64-NEXT:    bl lrintf
-; LE-I64-NEXT:    vmov.f32 s0, s21
-; LE-I64-NEXT:    vmov.32 d8[0], r0
-; LE-I64-NEXT:    add lr, sp, #72
-; LE-I64-NEXT:    mov r8, r1
-; LE-I64-NEXT:    vmov.32 d14[1], r7
-; LE-I64-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
-; LE-I64-NEXT:    add lr, sp, #104
-; LE-I64-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
-; LE-I64-NEXT:    bl lrintf
-; LE-I64-NEXT:    vmov.f32 s0, s20
-; LE-I64-NEXT:    add lr, sp, #88
-; LE-I64-NEXT:    mov r7, r1
-; LE-I64-NEXT:    vmov.32 d9[0], r0
-; LE-I64-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
-; LE-I64-NEXT:    vmov.32 d13[1], r9
-; LE-I64-NEXT:    bl lrintf
-; LE-I64-NEXT:    vmov.32 d8[0], r0
-; LE-I64-NEXT:    add lr, sp, #32
-; LE-I64-NEXT:    mov r9, r1
-; LE-I64-NEXT:    vmov.32 d12[1], r6
-; LE-I64-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
-; LE-I64-NEXT:    add lr, sp, #88
-; LE-I64-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
-; LE-I64-NEXT:    add lr, sp, #120
-; LE-I64-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
-; LE-I64-NEXT:    vmov.f32 s0, s19
-; LE-I64-NEXT:    bl lrintf
-; LE-I64-NEXT:    vmov.f32 s0, s18
-; LE-I64-NEXT:    add lr, sp, #72
-; LE-I64-NEXT:    mov r10, r1
-; LE-I64-NEXT:    vmov.32 d15[0], r0
-; LE-I64-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
-; LE-I64-NEXT:    vmov.32 d13[1], r4
-; LE-I64-NEXT:    bl lrintf
-; LE-I64-NEXT:    add lr, sp, #152
-; LE-I64-NEXT:    mov r4, r1
-; LE-I64-NEXT:    vmov.32 d14[0], r0
-; LE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-I64-NEXT:    vmov.32 d16[1], r5
-; LE-I64-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-I64-NEXT:    add lr, sp, #168
-; LE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-I64-NEXT:    add lr, sp, #48
-; LE-I64-NEXT:    vst1.64 {d16, d17}, [r11:128]!
-; LE-I64-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
-; LE-I64-NEXT:    vmov.f32 s0, s21
-; LE-I64-NEXT:    bl lrintf
-; LE-I64-NEXT:    vmov.f32 s0, s20
-; LE-I64-NEXT:    vmov.32 d12[1], r8
-; LE-I64-NEXT:    add lr, sp, #72
-; LE-I64-NEXT:    mov r5, r1
-; LE-I64-NEXT:    vmov.32 d9[0], r0
-; LE-I64-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
-; LE-I64-NEXT:    bl lrintf
-; LE-I64-NEXT:    vmov.f32 s0, s23
-; LE-I64-NEXT:    add lr, sp, #32
-; LE-I64-NEXT:    mov r6, r1
-; LE-I64-NEXT:    vmov.32 d8[0], r0
-; LE-I64-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
-; LE-I64-NEXT:    vmov.32 d13[1], r7
-; LE-I64-NEXT:    bl lrintf
-; LE-I64-NEXT:    add lr, sp, #48
-; LE-I64-NEXT:    mov r7, r1
-; LE-I64-NEXT:    vmov.32 d11[0], r0
-; LE-I64-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
-; LE-I64-NEXT:    vmov.f32 s0, s2
-; LE-I64-NEXT:    vmov.32 d12[1], r9
-; LE-I64-NEXT:    bl lrintf
-; LE-I64-NEXT:    add lr, sp, #16
-; LE-I64-NEXT:    vmov.32 d10[0], r0
-; LE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-I64-NEXT:    add lr, sp, #136
-; LE-I64-NEXT:    vmov.32 d11[1], r7
-; LE-I64-NEXT:    vst1.64 {d16, d17}, [r11:128]!
-; LE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-I64-NEXT:    add lr, sp, #152
-; LE-I64-NEXT:    vmov.32 d15[1], r10
-; LE-I64-NEXT:    vst1.64 {d16, d17}, [r11:128]
-; LE-I64-NEXT:    vmov.32 d10[1], r1
-; LE-I64-NEXT:    ldr r1, [sp, #68] @ 4-byte Reload
-; LE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-I64-NEXT:    add r0, r1, #192
-; LE-I64-NEXT:    add lr, sp, #72
-; LE-I64-NEXT:    vmov.32 d14[1], r4
-; LE-I64-NEXT:    vst1.64 {d16, d17}, [r0:128]!
-; LE-I64-NEXT:    vst1.64 {d14, d15}, [r0:128]!
-; LE-I64-NEXT:    vst1.64 {d12, d13}, [r0:128]!
-; LE-I64-NEXT:    vmov.32 d9[1], r5
-; LE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-I64-NEXT:    add lr, sp, #88
-; LE-I64-NEXT:    vmov.32 d8[1], r6
-; LE-I64-NEXT:    vst1.64 {d16, d17}, [r0:128]
-; LE-I64-NEXT:    add r0, r1, #128
-; LE-I64-NEXT:    vst1.64 {d8, d9}, [r0:128]!
-; LE-I64-NEXT:    vst1.64 {d10, d11}, [r0:128]!
-; LE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-I64-NEXT:    add lr, sp, #104
-; LE-I64-NEXT:    vst1.64 {d16, d17}, [r0:128]!
-; LE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-I64-NEXT:    vst1.64 {d16, d17}, [r0:128]
-; LE-I64-NEXT:    add sp, sp, #184
-; LE-I64-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-I64-NEXT:    add sp, sp, #4
-; LE-I64-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
-;
-; LE-I32-NEON-LABEL: lrint_v32f32:
-; LE-I32-NEON:       @ %bb.0:
-; LE-I32-NEON-NEXT:    .save {r4, r5, r6, lr}
-; LE-I32-NEON-NEXT:    push {r4, r5, r6, lr}
-; LE-I32-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-I32-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-I32-NEON-NEXT:    .pad #144
-; LE-I32-NEON-NEXT:    sub sp, sp, #144
-; LE-I32-NEON-NEXT:    add lr, sp, #48
-; LE-I32-NEON-NEXT:    mov r4, r0
-; LE-I32-NEON-NEXT:    add r0, sp, #224
-; LE-I32-NEON-NEXT:    vorr q4, q0, q0
-; LE-I32-NEON-NEXT:    vstmia lr, {d4, d5} @ 16-byte Spill
-; LE-I32-NEON-NEXT:    add lr, sp, #16
-; LE-I32-NEON-NEXT:    vorr q6, q3, q3
-; LE-I32-NEON-NEXT:    vstmia lr, {d0, d1} @ 16-byte Spill
-; LE-I32-NEON-NEXT:    vmov.f32 s0, s4
-; LE-I32-NEON-NEXT:    add lr, sp, #80
-; LE-I32-NEON-NEXT:    vorr q5, q1, q1
-; LE-I32-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
-; LE-I32-NEON-NEXT:    add r0, sp, #272
-; LE-I32-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-I32-NEON-NEXT:    add lr, sp, #64
-; LE-I32-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
-; LE-I32-NEON-NEXT:    add r0, sp, #240
-; LE-I32-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-I32-NEON-NEXT:    add lr, sp, #32
-; LE-I32-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
-; LE-I32-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-I32-NEON-NEXT:    vstmia sp, {d2, d3} @ 16-byte Spill
-; LE-I32-NEON-NEXT:    bl lrintf
-; LE-I32-NEON-NEXT:    vmov.f32 s0, s18
-; LE-I32-NEON-NEXT:    vmov.32 d14[0], r0
-; LE-I32-NEON-NEXT:    bl lrintf
-; LE-I32-NEON-NEXT:    vmov.f32 s0, s22
-; LE-I32-NEON-NEXT:    add lr, sp, #112
-; LE-I32-NEON-NEXT:    vmov.32 d17[0], r0
-; LE-I32-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-I32-NEON-NEXT:    bl lrintf
-; LE-I32-NEON-NEXT:    add lr, sp, #128
-; LE-I32-NEON-NEXT:    vmov.32 d15[0], r0
-; LE-I32-NEON-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
-; LE-I32-NEON-NEXT:    add lr, sp, #48
-; LE-I32-NEON-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
-; LE-I32-NEON-NEXT:    vmov.f32 s0, s20
-; LE-I32-NEON-NEXT:    bl lrintf
-; LE-I32-NEON-NEXT:    vmov.f32 s0, s22
-; LE-I32-NEON-NEXT:    vmov.32 d8[0], r0
-; LE-I32-NEON-NEXT:    vorr q7, q5, q5
-; LE-I32-NEON-NEXT:    bl lrintf
-; LE-I32-NEON-NEXT:    vmov.f32 s0, s26
-; LE-I32-NEON-NEXT:    vmov.32 d9[0], r0
-; LE-I32-NEON-NEXT:    bl lrintf
-; LE-I32-NEON-NEXT:    vmov.f32 s0, s24
-; LE-I32-NEON-NEXT:    vmov.32 d11[0], r0
-; LE-I32-NEON-NEXT:    bl lrintf
-; LE-I32-NEON-NEXT:    vmov.f32 s0, s27
-; LE-I32-NEON-NEXT:    vmov.32 d10[0], r0
-; LE-I32-NEON-NEXT:    bl lrintf
-; LE-I32-NEON-NEXT:    vmov.f32 s0, s25
-; LE-I32-NEON-NEXT:    vmov.32 d11[1], r0
-; LE-I32-NEON-NEXT:    bl lrintf
-; LE-I32-NEON-NEXT:    vmov.f32 s0, s31
-; LE-I32-NEON-NEXT:    vmov.32 d10[1], r0
-; LE-I32-NEON-NEXT:    add lr, sp, #96
-; LE-I32-NEON-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
-; LE-I32-NEON-NEXT:    bl lrintf
-; LE-I32-NEON-NEXT:    vmov.f32 s0, s29
-; LE-I32-NEON-NEXT:    vmov.32 d9[1], r0
-; LE-I32-NEON-NEXT:    bl lrintf
-; LE-I32-NEON-NEXT:    vldmia sp, {d14, d15} @ 16-byte Reload
-; LE-I32-NEON-NEXT:    vmov.f32 s0, s31
-; LE-I32-NEON-NEXT:    add lr, sp, #48
-; LE-I32-NEON-NEXT:    vmov.32 d8[1], r0
-; LE-I32-NEON-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
-; LE-I32-NEON-NEXT:    bl lrintf
-; LE-I32-NEON-NEXT:    add lr, sp, #128
-; LE-I32-NEON-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
-; LE-I32-NEON-NEXT:    add lr, sp, #16
-; LE-I32-NEON-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
-; LE-I32-NEON-NEXT:    vmov.f32 s0, s23
-; LE-I32-NEON-NEXT:    vmov.32 d9[1], r0
-; LE-I32-NEON-NEXT:    bl lrintf
-; LE-I32-NEON-NEXT:    vmov.f32 s0, s29
-; LE-I32-NEON-NEXT:    add lr, sp, #112
-; LE-I32-NEON-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
-; LE-I32-NEON-NEXT:    vmov.32 d13[1], r0
-; LE-I32-NEON-NEXT:    bl lrintf
-; LE-I32-NEON-NEXT:    vmov.f32 s0, s20
-; LE-I32-NEON-NEXT:    vmov.32 d8[1], r0
-; LE-I32-NEON-NEXT:    add lr, sp, #128
-; LE-I32-NEON-NEXT:    add r0, sp, #256
-; LE-I32-NEON-NEXT:    vld1.64 {d14, d15}, [r0]
-; LE-I32-NEON-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
-; LE-I32-NEON-NEXT:    bl lrintf
-; LE-I32-NEON-NEXT:    vmov.f32 s0, s21
-; LE-I32-NEON-NEXT:    vorr q4, q6, q6
-; LE-I32-NEON-NEXT:    vmov.32 d8[0], r0
-; LE-I32-NEON-NEXT:    bl lrintf
-; LE-I32-NEON-NEXT:    vorr q6, q7, q7
-; LE-I32-NEON-NEXT:    vmov.32 d8[1], r0
-; LE-I32-NEON-NEXT:    add lr, sp, #112
-; LE-I32-NEON-NEXT:    vstmia sp, {d14, d15} @ 16-byte Spill
-; LE-I32-NEON-NEXT:    vmov.f32 s0, s24
-; LE-I32-NEON-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
-; LE-I32-NEON-NEXT:    bl lrintf
-; LE-I32-NEON-NEXT:    add lr, sp, #32
-; LE-I32-NEON-NEXT:    vmov.32 d14[0], r0
-; LE-I32-NEON-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
-; LE-I32-NEON-NEXT:    vmov.f32 s0, s18
-; LE-I32-NEON-NEXT:    bl lrintf
-; LE-I32-NEON-NEXT:    vmov.f32 s0, s16
-; LE-I32-NEON-NEXT:    vmov.32 d11[0], r0
-; LE-I32-NEON-NEXT:    bl lrintf
-; LE-I32-NEON-NEXT:    vmov.f32 s0, s19
-; LE-I32-NEON-NEXT:    vmov.32 d10[0], r0
-; LE-I32-NEON-NEXT:    bl lrintf
-; LE-I32-NEON-NEXT:    vmov.f32 s0, s26
-; LE-I32-NEON-NEXT:    vmov.32 d11[1], r0
-; LE-I32-NEON-NEXT:    bl lrintf
-; LE-I32-NEON-NEXT:    vmov.f32 s0, s17
-; LE-I32-NEON-NEXT:    add lr, sp, #16
-; LE-I32-NEON-NEXT:    vmov.32 d15[0], r0
-; LE-I32-NEON-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
-; LE-I32-NEON-NEXT:    bl lrintf
-; LE-I32-NEON-NEXT:    vmov.32 d10[1], r0
-; LE-I32-NEON-NEXT:    add lr, sp, #32
-; LE-I32-NEON-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
-; LE-I32-NEON-NEXT:    add lr, sp, #64
-; LE-I32-NEON-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
-; LE-I32-NEON-NEXT:    vmov.f32 s0, s20
-; LE-I32-NEON-NEXT:    bl lrintf
-; LE-I32-NEON-NEXT:    add lr, sp, #80
-; LE-I32-NEON-NEXT:    vmov.32 d14[0], r0
-; LE-I32-NEON-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
-; LE-I32-NEON-NEXT:    vmov.f32 s0, s26
-; LE-I32-NEON-NEXT:    bl lrintf
-; LE-I32-NEON-NEXT:    vmov.f32 s0, s24
-; LE-I32-NEON-NEXT:    vmov.32 d9[0], r0
-; LE-I32-NEON-NEXT:    bl lrintf
-; LE-I32-NEON-NEXT:    vmov.f32 s0, s27
-; LE-I32-NEON-NEXT:    vmov.32 d8[0], r0
-; LE-I32-NEON-NEXT:    bl lrintf
-; LE-I32-NEON-NEXT:    vmov.f32 s0, s22
-; LE-I32-NEON-NEXT:    vmov.32 d9[1], r0
-; LE-I32-NEON-NEXT:    bl lrintf
-; LE-I32-NEON-NEXT:    vmov.f32 s0, s25
-; LE-I32-NEON-NEXT:    vmov.32 d15[0], r0
-; LE-I32-NEON-NEXT:    bl lrintf
-; LE-I32-NEON-NEXT:    vmov.f32 s0, s23
-; LE-I32-NEON-NEXT:    vmov.32 d8[1], r0
-; LE-I32-NEON-NEXT:    bl lrintf
-; LE-I32-NEON-NEXT:    vmov.f32 s0, s21
-; LE-I32-NEON-NEXT:    vmov.32 d15[1], r0
-; LE-I32-NEON-NEXT:    bl lrintf
-; LE-I32-NEON-NEXT:    vldmia sp, {d12, d13} @ 16-byte Reload
-; LE-I32-NEON-NEXT:    vmov.f32 s0, s27
-; LE-I32-NEON-NEXT:    vmov.32 d14[1], r0
-; LE-I32-NEON-NEXT:    bl lrintf
-; LE-I32-NEON-NEXT:    vmov.f32 s0, s25
-; LE-I32-NEON-NEXT:    add lr, sp, #16
-; LE-I32-NEON-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
-; LE-I32-NEON-NEXT:    vmov.32 d11[1], r0
-; LE-I32-NEON-NEXT:    bl lrintf
-; LE-I32-NEON-NEXT:    add lr, sp, #112
-; LE-I32-NEON-NEXT:    vmov.32 d10[1], r0
-; LE-I32-NEON-NEXT:    mov r0, r4
-; LE-I32-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-I32-NEON-NEXT:    add lr, sp, #128
-; LE-I32-NEON-NEXT:    vst1.32 {d16, d17}, [r0:128]!
-; LE-I32-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-I32-NEON-NEXT:    add lr, sp, #48
-; LE-I32-NEON-NEXT:    vst1.32 {d16, d17}, [r0:128]!
-; LE-I32-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-I32-NEON-NEXT:    add lr, sp, #96
-; LE-I32-NEON-NEXT:    vst1.32 {d16, d17}, [r0:128]!
-; LE-I32-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-I32-NEON-NEXT:    add lr, sp, #32
-; LE-I32-NEON-NEXT:    vst1.64 {d16, d17}, [r0:128]
-; LE-I32-NEON-NEXT:    add r0, r4, #64
-; LE-I32-NEON-NEXT:    vst1.32 {d8, d9}, [r0:128]!
-; LE-I32-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-I32-NEON-NEXT:    vst1.32 {d16, d17}, [r0:128]!
-; LE-I32-NEON-NEXT:    vst1.32 {d10, d11}, [r0:128]!
-; LE-I32-NEON-NEXT:    vst1.64 {d14, d15}, [r0:128]
-; LE-I32-NEON-NEXT:    add sp, sp, #144
-; LE-I32-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-I32-NEON-NEXT:    pop {r4, r5, r6, pc}
-;
-; LE-I64-NEON-LABEL: lrint_v32f32:
-; LE-I64-NEON:       @ %bb.0:
-; LE-I64-NEON-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; LE-I64-NEON-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; LE-I64-NEON-NEXT:    .pad #4
-; LE-I64-NEON-NEXT:    sub sp, sp, #4
-; LE-I64-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-I64-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-I64-NEON-NEXT:    .pad #184
-; LE-I64-NEON-NEXT:    sub sp, sp, #184
-; LE-I64-NEON-NEXT:    add lr, sp, #152
-; LE-I64-NEON-NEXT:    vorr q7, q3, q3
-; LE-I64-NEON-NEXT:    vorr q4, q2, q2
-; LE-I64-NEON-NEXT:    mov r5, r0
-; LE-I64-NEON-NEXT:    vstmia lr, {d2, d3} @ 16-byte Spill
-; LE-I64-NEON-NEXT:    add lr, sp, #88
-; LE-I64-NEON-NEXT:    vstmia lr, {d0, d1} @ 16-byte Spill
-; LE-I64-NEON-NEXT:    vmov.f32 s0, s3
-; LE-I64-NEON-NEXT:    str r0, [sp, #68] @ 4-byte Spill
-; LE-I64-NEON-NEXT:    bl lrintf
-; LE-I64-NEON-NEXT:    vmov.f32 s0, s18
-; LE-I64-NEON-NEXT:    add lr, sp, #168
-; LE-I64-NEON-NEXT:    vmov.32 d17[0], r0
-; LE-I64-NEON-NEXT:    str r1, [sp, #16] @ 4-byte Spill
-; LE-I64-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-I64-NEON-NEXT:    bl lrintf
-; LE-I64-NEON-NEXT:    vmov.f32 s0, s16
-; LE-I64-NEON-NEXT:    mov r8, r1
-; LE-I64-NEON-NEXT:    vmov.32 d10[0], r0
-; LE-I64-NEON-NEXT:    bl lrintf
-; LE-I64-NEON-NEXT:    vmov.f32 s0, s17
-; LE-I64-NEON-NEXT:    mov r6, r1
-; LE-I64-NEON-NEXT:    vmov.32 d12[0], r0
-; LE-I64-NEON-NEXT:    bl lrintf
-; LE-I64-NEON-NEXT:    vmov.f32 s0, s19
-; LE-I64-NEON-NEXT:    mov r4, r1
-; LE-I64-NEON-NEXT:    vmov.32 d13[0], r0
-; LE-I64-NEON-NEXT:    bl lrintf
-; LE-I64-NEON-NEXT:    vmov.f32 s0, s31
-; LE-I64-NEON-NEXT:    mov r7, r1
-; LE-I64-NEON-NEXT:    vmov.32 d11[0], r0
-; LE-I64-NEON-NEXT:    bl lrintf
-; LE-I64-NEON-NEXT:    vmov.f32 s0, s30
-; LE-I64-NEON-NEXT:    str r1, [sp, #8] @ 4-byte Spill
-; LE-I64-NEON-NEXT:    vmov.32 d9[0], r0
-; LE-I64-NEON-NEXT:    vmov.32 d11[1], r7
-; LE-I64-NEON-NEXT:    bl lrintf
-; LE-I64-NEON-NEXT:    vmov.f32 s0, s29
-; LE-I64-NEON-NEXT:    vmov.32 d8[0], r0
-; LE-I64-NEON-NEXT:    add lr, sp, #104
-; LE-I64-NEON-NEXT:    str r1, [sp, #12] @ 4-byte Spill
-; LE-I64-NEON-NEXT:    vmov.32 d13[1], r4
-; LE-I64-NEON-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
-; LE-I64-NEON-NEXT:    bl lrintf
-; LE-I64-NEON-NEXT:    vmov.32 d9[0], r0
-; LE-I64-NEON-NEXT:    add r0, sp, #320
-; LE-I64-NEON-NEXT:    add lr, sp, #120
-; LE-I64-NEON-NEXT:    mov r11, r1
-; LE-I64-NEON-NEXT:    vld1.64 {d0, d1}, [r0]
-; LE-I64-NEON-NEXT:    add r0, sp, #304
-; LE-I64-NEON-NEXT:    vstmia lr, {d0, d1} @ 16-byte Spill
-; LE-I64-NEON-NEXT:    add lr, sp, #72
-; LE-I64-NEON-NEXT:    vld1.64 {d0, d1}, [r0]
-; LE-I64-NEON-NEXT:    add r0, sp, #336
-; LE-I64-NEON-NEXT:    vstmia lr, {d0, d1} @ 16-byte Spill
-; LE-I64-NEON-NEXT:    add lr, sp, #32
-; LE-I64-NEON-NEXT:    vld1.64 {d0, d1}, [r0]
-; LE-I64-NEON-NEXT:    add r0, sp, #288
-; LE-I64-NEON-NEXT:    vmov.32 d12[1], r6
-; LE-I64-NEON-NEXT:    vstmia lr, {d0, d1} @ 16-byte Spill
-; LE-I64-NEON-NEXT:    add lr, sp, #48
-; LE-I64-NEON-NEXT:    vld1.64 {d0, d1}, [r0]
-; LE-I64-NEON-NEXT:    vmov.32 d10[1], r8
-; LE-I64-NEON-NEXT:    add r8, r5, #64
-; LE-I64-NEON-NEXT:    vstmia lr, {d0, d1} @ 16-byte Spill
-; LE-I64-NEON-NEXT:    add lr, sp, #152
-; LE-I64-NEON-NEXT:    vst1.64 {d12, d13}, [r8:128]!
-; LE-I64-NEON-NEXT:    vst1.64 {d10, d11}, [r8:128]!
-; LE-I64-NEON-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
-; LE-I64-NEON-NEXT:    vmov.f32 s0, s27
-; LE-I64-NEON-NEXT:    bl lrintf
-; LE-I64-NEON-NEXT:    vmov.f32 s0, s28
-; LE-I64-NEON-NEXT:    mov r4, r1
-; LE-I64-NEON-NEXT:    vmov.32 d11[0], r0
-; LE-I64-NEON-NEXT:    bl lrintf
-; LE-I64-NEON-NEXT:    vmov.f32 s0, s26
-; LE-I64-NEON-NEXT:    mov r9, r1
-; LE-I64-NEON-NEXT:    vmov.32 d8[0], r0
-; LE-I64-NEON-NEXT:    vmov.32 d11[1], r4
-; LE-I64-NEON-NEXT:    bl lrintf
-; LE-I64-NEON-NEXT:    vmov.32 d10[0], r0
-; LE-I64-NEON-NEXT:    add lr, sp, #136
-; LE-I64-NEON-NEXT:    ldr r0, [sp, #16] @ 4-byte Reload
-; LE-I64-NEON-NEXT:    mov r10, r1
-; LE-I64-NEON-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
-; LE-I64-NEON-NEXT:    add lr, sp, #168
-; LE-I64-NEON-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
-; LE-I64-NEON-NEXT:    add lr, sp, #88
-; LE-I64-NEON-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
-; LE-I64-NEON-NEXT:    vmov.f32 s0, s26
-; LE-I64-NEON-NEXT:    vmov.32 d11[1], r0
-; LE-I64-NEON-NEXT:    bl lrintf
-; LE-I64-NEON-NEXT:    vmov.f32 s0, s25
-; LE-I64-NEON-NEXT:    vmov.32 d10[0], r0
-; LE-I64-NEON-NEXT:    add lr, sp, #168
-; LE-I64-NEON-NEXT:    ldr r0, [sp, #8] @ 4-byte Reload
-; LE-I64-NEON-NEXT:    mov r7, r1
-; LE-I64-NEON-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
-; LE-I64-NEON-NEXT:    add lr, sp, #104
-; LE-I64-NEON-NEXT:    vorr q5, q6, q6
-; LE-I64-NEON-NEXT:    vldmia lr, {d14, d15} @ 16-byte Reload
-; LE-I64-NEON-NEXT:    vmov.32 d15[1], r0
-; LE-I64-NEON-NEXT:    bl lrintf
-; LE-I64-NEON-NEXT:    vmov.f32 s0, s20
-; LE-I64-NEON-NEXT:    vmov.32 d13[0], r0
-; LE-I64-NEON-NEXT:    ldr r0, [sp, #12] @ 4-byte Reload
-; LE-I64-NEON-NEXT:    add lr, sp, #104
-; LE-I64-NEON-NEXT:    mov r4, r1
-; LE-I64-NEON-NEXT:    vmov.32 d14[1], r0
-; LE-I64-NEON-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
-; LE-I64-NEON-NEXT:    bl lrintf
-; LE-I64-NEON-NEXT:    vmov.32 d12[0], r0
-; LE-I64-NEON-NEXT:    add lr, sp, #152
-; LE-I64-NEON-NEXT:    mov r6, r1
-; LE-I64-NEON-NEXT:    vorr q7, q6, q6
-; LE-I64-NEON-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
-; LE-I64-NEON-NEXT:    vmov.32 d9[1], r11
-; LE-I64-NEON-NEXT:    vmov.f32 s0, s25
-; LE-I64-NEON-NEXT:    bl lrintf
-; LE-I64-NEON-NEXT:    vmov.f32 s0, s24
-; LE-I64-NEON-NEXT:    mov r5, r1
-; LE-I64-NEON-NEXT:    vmov.32 d11[0], r0
-; LE-I64-NEON-NEXT:    vmov.32 d8[1], r9
-; LE-I64-NEON-NEXT:    bl lrintf
-; LE-I64-NEON-NEXT:    add lr, sp, #136
-; LE-I64-NEON-NEXT:    mov r11, r1
-; LE-I64-NEON-NEXT:    vmov.32 d10[0], r0
-; LE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-I64-NEON-NEXT:    vmov.32 d16[1], r10
-; LE-I64-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-I64-NEON-NEXT:    add lr, sp, #120
-; LE-I64-NEON-NEXT:    vst1.64 {d8, d9}, [r8:128]!
-; LE-I64-NEON-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
-; LE-I64-NEON-NEXT:    vmov.f32 s0, s1
-; LE-I64-NEON-NEXT:    bl lrintf
-; LE-I64-NEON-NEXT:    add lr, sp, #152
-; LE-I64-NEON-NEXT:    vmov.32 d17[0], r0
-; LE-I64-NEON-NEXT:    mov r10, r1
-; LE-I64-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-I64-NEON-NEXT:    add lr, sp, #104
-; LE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-I64-NEON-NEXT:    add lr, sp, #72
-; LE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r8:128]
-; LE-I64-NEON-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
-; LE-I64-NEON-NEXT:    vmov.f32 s0, s19
-; LE-I64-NEON-NEXT:    bl lrintf
-; LE-I64-NEON-NEXT:    add lr, sp, #168
-; LE-I64-NEON-NEXT:    vmov.f32 s0, s18
-; LE-I64-NEON-NEXT:    mov r8, r1
-; LE-I64-NEON-NEXT:    vmov.32 d13[0], r0
-; LE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-I64-NEON-NEXT:    vmov.32 d16[1], r7
-; LE-I64-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-I64-NEON-NEXT:    bl lrintf
-; LE-I64-NEON-NEXT:    vmov.f32 s0, s17
-; LE-I64-NEON-NEXT:    vmov.32 d12[0], r0
-; LE-I64-NEON-NEXT:    add lr, sp, #104
-; LE-I64-NEON-NEXT:    mov r7, r1
-; LE-I64-NEON-NEXT:    vmov.32 d15[1], r4
-; LE-I64-NEON-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
-; LE-I64-NEON-NEXT:    bl lrintf
-; LE-I64-NEON-NEXT:    vmov.f32 s0, s16
-; LE-I64-NEON-NEXT:    mov r9, r1
-; LE-I64-NEON-NEXT:    vmov.32 d13[0], r0
-; LE-I64-NEON-NEXT:    vmov.32 d14[1], r6
-; LE-I64-NEON-NEXT:    bl lrintf
-; LE-I64-NEON-NEXT:    vmov.32 d12[0], r0
-; LE-I64-NEON-NEXT:    add lr, sp, #88
-; LE-I64-NEON-NEXT:    mov r6, r1
-; LE-I64-NEON-NEXT:    vmov.32 d11[1], r5
-; LE-I64-NEON-NEXT:    vmov.32 d10[1], r11
-; LE-I64-NEON-NEXT:    ldr r11, [sp, #68] @ 4-byte Reload
-; LE-I64-NEON-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
-; LE-I64-NEON-NEXT:    add lr, sp, #16
-; LE-I64-NEON-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
-; LE-I64-NEON-NEXT:    add lr, sp, #32
-; LE-I64-NEON-NEXT:    vst1.64 {d14, d15}, [r11:128]!
-; LE-I64-NEON-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
-; LE-I64-NEON-NEXT:    vmov.f32 s0, s23
-; LE-I64-NEON-NEXT:    bl lrintf
-; LE-I64-NEON-NEXT:    add lr, sp, #152
-; LE-I64-NEON-NEXT:    mov r4, r1
-; LE-I64-NEON-NEXT:    vmov.32 d9[0], r0
-; LE-I64-NEON-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
-; LE-I64-NEON-NEXT:    add lr, sp, #120
-; LE-I64-NEON-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
-; LE-I64-NEON-NEXT:    @ kill: def $s0 killed $s0 killed $q0
-; LE-I64-NEON-NEXT:    vmov.32 d13[1], r10
-; LE-I64-NEON-NEXT:    bl lrintf
-; LE-I64-NEON-NEXT:    vmov.f32 s0, s22
-; LE-I64-NEON-NEXT:    vmov.32 d12[0], r0
-; LE-I64-NEON-NEXT:    add lr, sp, #152
-; LE-I64-NEON-NEXT:    mov r5, r1
-; LE-I64-NEON-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
-; LE-I64-NEON-NEXT:    add lr, sp, #104
-; LE-I64-NEON-NEXT:    vldmia lr, {d14, d15} @ 16-byte Reload
-; LE-I64-NEON-NEXT:    vmov.32 d15[1], r8
-; LE-I64-NEON-NEXT:    bl lrintf
-; LE-I64-NEON-NEXT:    vmov.f32 s0, s21
-; LE-I64-NEON-NEXT:    vmov.32 d8[0], r0
-; LE-I64-NEON-NEXT:    add lr, sp, #72
-; LE-I64-NEON-NEXT:    mov r8, r1
-; LE-I64-NEON-NEXT:    vmov.32 d14[1], r7
-; LE-I64-NEON-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
-; LE-I64-NEON-NEXT:    add lr, sp, #104
-; LE-I64-NEON-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
-; LE-I64-NEON-NEXT:    bl lrintf
-; LE-I64-NEON-NEXT:    vmov.f32 s0, s20
-; LE-I64-NEON-NEXT:    add lr, sp, #88
-; LE-I64-NEON-NEXT:    mov r7, r1
-; LE-I64-NEON-NEXT:    vmov.32 d9[0], r0
-; LE-I64-NEON-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
-; LE-I64-NEON-NEXT:    vmov.32 d13[1], r9
-; LE-I64-NEON-NEXT:    bl lrintf
-; LE-I64-NEON-NEXT:    vmov.32 d8[0], r0
-; LE-I64-NEON-NEXT:    add lr, sp, #32
-; LE-I64-NEON-NEXT:    mov r9, r1
-; LE-I64-NEON-NEXT:    vmov.32 d12[1], r6
-; LE-I64-NEON-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
-; LE-I64-NEON-NEXT:    add lr, sp, #88
-; LE-I64-NEON-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
-; LE-I64-NEON-NEXT:    add lr, sp, #120
-; LE-I64-NEON-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
-; LE-I64-NEON-NEXT:    vmov.f32 s0, s19
-; LE-I64-NEON-NEXT:    bl lrintf
-; LE-I64-NEON-NEXT:    vmov.f32 s0, s18
-; LE-I64-NEON-NEXT:    add lr, sp, #72
-; LE-I64-NEON-NEXT:    mov r10, r1
-; LE-I64-NEON-NEXT:    vmov.32 d15[0], r0
-; LE-I64-NEON-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
-; LE-I64-NEON-NEXT:    vmov.32 d13[1], r4
-; LE-I64-NEON-NEXT:    bl lrintf
-; LE-I64-NEON-NEXT:    add lr, sp, #152
-; LE-I64-NEON-NEXT:    mov r4, r1
-; LE-I64-NEON-NEXT:    vmov.32 d14[0], r0
-; LE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-I64-NEON-NEXT:    vmov.32 d16[1], r5
-; LE-I64-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-I64-NEON-NEXT:    add lr, sp, #168
-; LE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-I64-NEON-NEXT:    add lr, sp, #48
-; LE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r11:128]!
-; LE-I64-NEON-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
-; LE-I64-NEON-NEXT:    vmov.f32 s0, s21
-; LE-I64-NEON-NEXT:    bl lrintf
-; LE-I64-NEON-NEXT:    vmov.f32 s0, s20
-; LE-I64-NEON-NEXT:    vmov.32 d12[1], r8
-; LE-I64-NEON-NEXT:    add lr, sp, #72
-; LE-I64-NEON-NEXT:    mov r5, r1
-; LE-I64-NEON-NEXT:    vmov.32 d9[0], r0
-; LE-I64-NEON-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
-; LE-I64-NEON-NEXT:    bl lrintf
-; LE-I64-NEON-NEXT:    vmov.f32 s0, s23
-; LE-I64-NEON-NEXT:    add lr, sp, #32
-; LE-I64-NEON-NEXT:    mov r6, r1
-; LE-I64-NEON-NEXT:    vmov.32 d8[0], r0
-; LE-I64-NEON-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
-; LE-I64-NEON-NEXT:    vmov.32 d13[1], r7
-; LE-I64-NEON-NEXT:    bl lrintf
-; LE-I64-NEON-NEXT:    add lr, sp, #48
-; LE-I64-NEON-NEXT:    mov r7, r1
-; LE-I64-NEON-NEXT:    vmov.32 d11[0], r0
-; LE-I64-NEON-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
-; LE-I64-NEON-NEXT:    vmov.f32 s0, s2
-; LE-I64-NEON-NEXT:    vmov.32 d12[1], r9
-; LE-I64-NEON-NEXT:    bl lrintf
-; LE-I64-NEON-NEXT:    add lr, sp, #16
-; LE-I64-NEON-NEXT:    vmov.32 d10[0], r0
-; LE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-I64-NEON-NEXT:    add lr, sp, #136
-; LE-I64-NEON-NEXT:    vmov.32 d11[1], r7
-; LE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r11:128]!
-; LE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-I64-NEON-NEXT:    add lr, sp, #152
-; LE-I64-NEON-NEXT:    vmov.32 d15[1], r10
-; LE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r11:128]
-; LE-I64-NEON-NEXT:    vmov.32 d10[1], r1
-; LE-I64-NEON-NEXT:    ldr r1, [sp, #68] @ 4-byte Reload
-; LE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-I64-NEON-NEXT:    add r0, r1, #192
-; LE-I64-NEON-NEXT:    add lr, sp, #72
-; LE-I64-NEON-NEXT:    vmov.32 d14[1], r4
-; LE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r0:128]!
-; LE-I64-NEON-NEXT:    vst1.64 {d14, d15}, [r0:128]!
-; LE-I64-NEON-NEXT:    vst1.64 {d12, d13}, [r0:128]!
-; LE-I64-NEON-NEXT:    vmov.32 d9[1], r5
-; LE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-I64-NEON-NEXT:    add lr, sp, #88
-; LE-I64-NEON-NEXT:    vmov.32 d8[1], r6
-; LE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r0:128]
-; LE-I64-NEON-NEXT:    add r0, r1, #128
-; LE-I64-NEON-NEXT:    vst1.64 {d8, d9}, [r0:128]!
-; LE-I64-NEON-NEXT:    vst1.64 {d10, d11}, [r0:128]!
-; LE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-I64-NEON-NEXT:    add lr, sp, #104
-; LE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r0:128]!
-; LE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r0:128]
-; LE-I64-NEON-NEXT:    add sp, sp, #184
-; LE-I64-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-I64-NEON-NEXT:    add sp, sp, #4
-; LE-I64-NEON-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
-;
-; BE-I32-LABEL: lrint_v32f32:
-; BE-I32:       @ %bb.0:
-; BE-I32-NEXT:    .save {r4, r5, r6, lr}
-; BE-I32-NEXT:    push {r4, r5, r6, lr}
-; BE-I32-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
-; BE-I32-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
-; BE-I32-NEXT:    .pad #144
-; BE-I32-NEXT:    sub sp, sp, #144
-; BE-I32-NEXT:    mov r4, r0
-; BE-I32-NEXT:    add r0, sp, #256
-; BE-I32-NEXT:    vld1.64 {d16, d17}, [r0]
-; BE-I32-NEXT:    add lr, sp, #16
-; BE-I32-NEXT:    add r0, sp, #272
-; BE-I32-NEXT:    vrev64.32 q4, q3
-; BE-I32-NEXT:    vrev64.32 q7, q1
-; BE-I32-NEXT:    vrev64.32 q8, q8
-; BE-I32-NEXT:    vld1.64 {d18, d19}, [r0]
-; BE-I32-NEXT:    add r0, sp, #224
-; BE-I32-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
-; BE-I32-NEXT:    add lr, sp, #80
-; BE-I32-NEXT:    vrev64.32 q5, q0
-; BE-I32-NEXT:    vmov.f32 s0, s28
-; BE-I32-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-I32-NEXT:    add lr, sp, #64
-; BE-I32-NEXT:    vrev64.32 q8, q9
-; BE-I32-NEXT:    vld1.64 {d20, d21}, [r0]
-; BE-I32-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-I32-NEXT:    add lr, sp, #32
-; BE-I32-NEXT:    vrev64.32 q8, q10
-; BE-I32-NEXT:    vrev64.32 q6, q2
-; BE-I32-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-I32-NEXT:    vstmia sp, {d14, d15} @ 16-byte Spill
-; BE-I32-NEXT:    bl lrintf
-; BE-I32-NEXT:    vmov.f32 s0, s18
-; BE-I32-NEXT:    vmov.32 d16[0], r0
-; BE-I32-NEXT:    add lr, sp, #128
-; BE-I32-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-I32-NEXT:    bl lrintf
-; BE-I32-NEXT:    vmov.f32 s0, s20
-; BE-I32-NEXT:    add lr, sp, #112
-; BE-I32-NEXT:    vmov.32 d17[0], r0
-; BE-I32-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-I32-NEXT:    add lr, sp, #48
-; BE-I32-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
-; BE-I32-NEXT:    bl lrintf
-; BE-I32-NEXT:    vmov.f32 s0, s22
-; BE-I32-NEXT:    vmov.32 d8[0], r0
-; BE-I32-NEXT:    bl lrintf
-; BE-I32-NEXT:    vmov.f32 s0, s30
-; BE-I32-NEXT:    vmov.32 d9[0], r0
-; BE-I32-NEXT:    bl lrintf
-; BE-I32-NEXT:    vmov.f32 s0, s26
-; BE-I32-NEXT:    add lr, sp, #128
-; BE-I32-NEXT:    vldmia lr, {d14, d15} @ 16-byte Reload
-; BE-I32-NEXT:    vmov.32 d15[0], r0
-; BE-I32-NEXT:    bl lrintf
-; BE-I32-NEXT:    vmov.f32 s0, s24
-; BE-I32-NEXT:    vmov.32 d11[0], r0
-; BE-I32-NEXT:    bl lrintf
-; BE-I32-NEXT:    vmov.f32 s0, s27
-; BE-I32-NEXT:    vmov.32 d10[0], r0
-; BE-I32-NEXT:    bl lrintf
-; BE-I32-NEXT:    vmov.f32 s0, s25
-; BE-I32-NEXT:    vmov.32 d11[1], r0
-; BE-I32-NEXT:    bl lrintf
-; BE-I32-NEXT:    vldmia sp, {d12, d13} @ 16-byte Reload
-; BE-I32-NEXT:    vmov.f32 s0, s27
-; BE-I32-NEXT:    add lr, sp, #96
-; BE-I32-NEXT:    vmov.32 d10[1], r0
-; BE-I32-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
-; BE-I32-NEXT:    bl lrintf
-; BE-I32-NEXT:    add lr, sp, #48
-; BE-I32-NEXT:    vmov.32 d15[1], r0
-; BE-I32-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
-; BE-I32-NEXT:    vmov.f32 s0, s23
-; BE-I32-NEXT:    bl lrintf
-; BE-I32-NEXT:    vmov.f32 s0, s21
-; BE-I32-NEXT:    vmov.32 d9[1], r0
-; BE-I32-NEXT:    bl lrintf
-; BE-I32-NEXT:    vmov.32 d8[1], r0
-; BE-I32-NEXT:    add lr, sp, #48
-; BE-I32-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
-; BE-I32-NEXT:    add lr, sp, #16
-; BE-I32-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
-; BE-I32-NEXT:    vmov.f32 s0, s23
-; BE-I32-NEXT:    bl lrintf
-; BE-I32-NEXT:    vmov.f32 s0, s25
-; BE-I32-NEXT:    add lr, sp, #112
-; BE-I32-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
-; BE-I32-NEXT:    vmov.32 d9[1], r0
-; BE-I32-NEXT:    bl lrintf
-; BE-I32-NEXT:    vmov.f32 s0, s20
-; BE-I32-NEXT:    vmov.32 d14[1], r0
-; BE-I32-NEXT:    add r0, sp, #240
-; BE-I32-NEXT:    add lr, sp, #128
-; BE-I32-NEXT:    vld1.64 {d16, d17}, [r0]
-; BE-I32-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
-; BE-I32-NEXT:    vrev64.32 q6, q8
-; BE-I32-NEXT:    bl lrintf
-; BE-I32-NEXT:    vmov.f32 s0, s21
-; BE-I32-NEXT:    vmov.32 d8[0], r0
-; BE-I32-NEXT:    bl lrintf
-; BE-I32-NEXT:    vmov.f32 s0, s24
-; BE-I32-NEXT:    vmov.32 d8[1], r0
-; BE-I32-NEXT:    add lr, sp, #112
-; BE-I32-NEXT:    vorr q7, q6, q6
-; BE-I32-NEXT:    vstmia sp, {d12, d13} @ 16-byte Spill
-; BE-I32-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
-; BE-I32-NEXT:    bl lrintf
-; BE-I32-NEXT:    add lr, sp, #32
-; BE-I32-NEXT:    vmov.32 d12[0], r0
-; BE-I32-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
-; BE-I32-NEXT:    vmov.f32 s0, s18
-; BE-I32-NEXT:    bl lrintf
-; BE-I32-NEXT:    vmov.f32 s0, s16
-; BE-I32-NEXT:    vmov.32 d11[0], r0
-; BE-I32-NEXT:    bl lrintf
-; BE-I32-NEXT:    vmov.f32 s0, s19
-; BE-I32-NEXT:    vmov.32 d10[0], r0
-; BE-I32-NEXT:    bl lrintf
-; BE-I32-NEXT:    vmov.f32 s0, s30
-; BE-I32-NEXT:    vmov.32 d11[1], r0
-; BE-I32-NEXT:    bl lrintf
-; BE-I32-NEXT:    vmov.f32 s0, s17
-; BE-I32-NEXT:    add lr, sp, #16
-; BE-I32-NEXT:    vmov.32 d13[0], r0
-; BE-I32-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
-; BE-I32-NEXT:    bl lrintf
-; BE-I32-NEXT:    vmov.32 d10[1], r0
-; BE-I32-NEXT:    add lr, sp, #32
-; BE-I32-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
-; BE-I32-NEXT:    add lr, sp, #64
-; BE-I32-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
-; BE-I32-NEXT:    vmov.f32 s0, s20
-; BE-I32-NEXT:    bl lrintf
-; BE-I32-NEXT:    add lr, sp, #80
-; BE-I32-NEXT:    vmov.32 d8[0], r0
-; BE-I32-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
-; BE-I32-NEXT:    vmov.f32 s0, s26
-; BE-I32-NEXT:    bl lrintf
-; BE-I32-NEXT:    vmov.f32 s0, s24
-; BE-I32-NEXT:    vmov.32 d15[0], r0
-; BE-I32-NEXT:    bl lrintf
-; BE-I32-NEXT:    vmov.f32 s0, s27
-; BE-I32-NEXT:    vmov.32 d14[0], r0
-; BE-I32-NEXT:    bl lrintf
-; BE-I32-NEXT:    vmov.f32 s0, s22
-; BE-I32-NEXT:    vmov.32 d15[1], r0
-; BE-I32-NEXT:    bl lrintf
-; BE-I32-NEXT:    vmov.f32 s0, s25
-; BE-I32-NEXT:    vmov.32 d9[0], r0
-; BE-I32-NEXT:    bl lrintf
-; BE-I32-NEXT:    vmov.f32 s0, s23
-; BE-I32-NEXT:    vmov.32 d14[1], r0
-; BE-I32-NEXT:    bl lrintf
-; BE-I32-NEXT:    vmov.f32 s0, s21
-; BE-I32-NEXT:    vmov.32 d9[1], r0
-; BE-I32-NEXT:    bl lrintf
-; BE-I32-NEXT:    vldmia sp, {d12, d13} @ 16-byte Reload
-; BE-I32-NEXT:    vmov.f32 s0, s27
-; BE-I32-NEXT:    vmov.32 d8[1], r0
-; BE-I32-NEXT:    bl lrintf
-; BE-I32-NEXT:    add lr, sp, #16
-; BE-I32-NEXT:    vmov.f32 s0, s25
-; BE-I32-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
-; BE-I32-NEXT:    add lr, sp, #112
-; BE-I32-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-I32-NEXT:    vmov.32 d11[1], r0
-; BE-I32-NEXT:    vrev64.32 q8, q8
-; BE-I32-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-I32-NEXT:    bl lrintf
-; BE-I32-NEXT:    add lr, sp, #48
-; BE-I32-NEXT:    vmov.32 d10[1], r0
-; BE-I32-NEXT:    mov r0, r4
-; BE-I32-NEXT:    vldmia lr, {d18, d19} @ 16-byte Reload
-; BE-I32-NEXT:    add lr, sp, #128
-; BE-I32-NEXT:    vrev64.32 q8, q4
-; BE-I32-NEXT:    vst1.32 {d18, d19}, [r0:128]!
-; BE-I32-NEXT:    vldmia lr, {d18, d19} @ 16-byte Reload
-; BE-I32-NEXT:    add lr, sp, #96
-; BE-I32-NEXT:    vst1.32 {d18, d19}, [r0:128]!
-; BE-I32-NEXT:    vldmia lr, {d18, d19} @ 16-byte Reload
-; BE-I32-NEXT:    add lr, sp, #112
-; BE-I32-NEXT:    vst1.32 {d18, d19}, [r0:128]!
-; BE-I32-NEXT:    vldmia lr, {d18, d19} @ 16-byte Reload
-; BE-I32-NEXT:    add lr, sp, #32
-; BE-I32-NEXT:    vst1.64 {d18, d19}, [r0:128]
-; BE-I32-NEXT:    add r0, r4, #64
-; BE-I32-NEXT:    vldmia lr, {d18, d19} @ 16-byte Reload
-; BE-I32-NEXT:    vst1.32 {d18, d19}, [r0:128]!
-; BE-I32-NEXT:    vst1.32 {d10, d11}, [r0:128]!
-; BE-I32-NEXT:    vst1.32 {d14, d15}, [r0:128]!
-; BE-I32-NEXT:    vst1.64 {d16, d17}, [r0:128]
-; BE-I32-NEXT:    add sp, sp, #144
-; BE-I32-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
-; BE-I32-NEXT:    pop {r4, r5, r6, pc}
-;
-; BE-I64-LABEL: lrint_v32f32:
-; BE-I64:       @ %bb.0:
-; BE-I64-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; BE-I64-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; BE-I64-NEXT:    .pad #4
-; BE-I64-NEXT:    sub sp, sp, #4
-; BE-I64-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
-; BE-I64-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
-; BE-I64-NEXT:    .pad #256
-; BE-I64-NEXT:    sub sp, sp, #256
-; BE-I64-NEXT:    add lr, sp, #208
-; BE-I64-NEXT:    str r0, [sp, #156] @ 4-byte Spill
-; BE-I64-NEXT:    add r0, sp, #408
-; BE-I64-NEXT:    vstmia lr, {d6, d7} @ 16-byte Spill
-; BE-I64-NEXT:    add lr, sp, #120
-; BE-I64-NEXT:    vld1.64 {d10, d11}, [r0]
-; BE-I64-NEXT:    add r0, sp, #392
-; BE-I64-NEXT:    vstmia lr, {d4, d5} @ 16-byte Spill
-; BE-I64-NEXT:    add lr, sp, #160
-; BE-I64-NEXT:    vstmia lr, {d2, d3} @ 16-byte Spill
-; BE-I64-NEXT:    add lr, sp, #176
-; BE-I64-NEXT:    vrev64.32 d8, d10
-; BE-I64-NEXT:    vstmia lr, {d0, d1} @ 16-byte Spill
-; BE-I64-NEXT:    add lr, sp, #136
-; BE-I64-NEXT:    vmov.f32 s0, s17
-; BE-I64-NEXT:    vld1.64 {d12, d13}, [r0]
-; BE-I64-NEXT:    add r0, sp, #360
-; BE-I64-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
-; BE-I64-NEXT:    add lr, sp, #192
-; BE-I64-NEXT:    vld1.64 {d16, d17}, [r0]
-; BE-I64-NEXT:    add r0, sp, #376
-; BE-I64-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-I64-NEXT:    add lr, sp, #40
-; BE-I64-NEXT:    vld1.64 {d16, d17}, [r0]
-; BE-I64-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-I64-NEXT:    bl lrintf
-; BE-I64-NEXT:    vmov.f32 s0, s16
-; BE-I64-NEXT:    str r1, [sp, #88] @ 4-byte Spill
-; BE-I64-NEXT:    vmov.32 d15[0], r0
-; BE-I64-NEXT:    bl lrintf
-; BE-I64-NEXT:    vrev64.32 d9, d11
-; BE-I64-NEXT:    add lr, sp, #240
-; BE-I64-NEXT:    vmov.32 d14[0], r0
-; BE-I64-NEXT:    str r1, [sp, #104] @ 4-byte Spill
-; BE-I64-NEXT:    vmov.f32 s0, s18
-; BE-I64-NEXT:    vrev64.32 d8, d13
-; BE-I64-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
-; BE-I64-NEXT:    bl lrintf
-; BE-I64-NEXT:    vmov.f32 s0, s17
-; BE-I64-NEXT:    mov r5, r1
-; BE-I64-NEXT:    vmov.32 d14[0], r0
-; BE-I64-NEXT:    bl lrintf
-; BE-I64-NEXT:    vmov.f32 s0, s19
-; BE-I64-NEXT:    add lr, sp, #192
-; BE-I64-NEXT:    str r1, [sp, #72] @ 4-byte Spill
-; BE-I64-NEXT:    vmov.32 d13[0], r0
-; BE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-I64-NEXT:    vrev64.32 d10, d16
-; BE-I64-NEXT:    bl lrintf
-; BE-I64-NEXT:    vmov.f32 s0, s20
-; BE-I64-NEXT:    add lr, sp, #224
-; BE-I64-NEXT:    vmov.32 d15[0], r0
-; BE-I64-NEXT:    mov r4, r1
-; BE-I64-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
-; BE-I64-NEXT:    bl lrintf
-; BE-I64-NEXT:    vmov.f32 s0, s21
-; BE-I64-NEXT:    mov r7, r1
-; BE-I64-NEXT:    vmov.32 d14[0], r0
-; BE-I64-NEXT:    bl lrintf
-; BE-I64-NEXT:    vmov.f32 s0, s16
-; BE-I64-NEXT:    mov r6, r1
-; BE-I64-NEXT:    vmov.32 d15[0], r0
-; BE-I64-NEXT:    bl lrintf
-; BE-I64-NEXT:    vmov.32 d12[0], r0
-; BE-I64-NEXT:    add lr, sp, #24
-; BE-I64-NEXT:    mov r9, r1
-; BE-I64-NEXT:    vmov.32 d15[1], r6
-; BE-I64-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
-; BE-I64-NEXT:    add lr, sp, #192
-; BE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-I64-NEXT:    vrev64.32 d8, d17
-; BE-I64-NEXT:    vmov.f32 s0, s17
-; BE-I64-NEXT:    bl lrintf
-; BE-I64-NEXT:    vmov.f32 s0, s16
-; BE-I64-NEXT:    vmov.32 d14[1], r7
-; BE-I64-NEXT:    add lr, sp, #56
-; BE-I64-NEXT:    mov r10, r1
-; BE-I64-NEXT:    vmov.32 d11[0], r0
-; BE-I64-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
-; BE-I64-NEXT:    bl lrintf
-; BE-I64-NEXT:    vmov.32 d10[0], r0
-; BE-I64-NEXT:    add lr, sp, #192
-; BE-I64-NEXT:    mov r11, r1
-; BE-I64-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
-; BE-I64-NEXT:    add lr, sp, #40
-; BE-I64-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
-; BE-I64-NEXT:    add lr, sp, #224
-; BE-I64-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
-; BE-I64-NEXT:    vrev64.32 d8, d12
-; BE-I64-NEXT:    vmov.32 d11[1], r4
-; BE-I64-NEXT:    vmov.f32 s0, s17
-; BE-I64-NEXT:    bl lrintf
-; BE-I64-NEXT:    vmov.f32 s0, s16
-; BE-I64-NEXT:    vmov.32 d10[1], r5
-; BE-I64-NEXT:    add lr, sp, #224
-; BE-I64-NEXT:    mov r8, r1
-; BE-I64-NEXT:    vmov.32 d15[0], r0
-; BE-I64-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
-; BE-I64-NEXT:    bl lrintf
-; BE-I64-NEXT:    vrev64.32 d8, d13
-; BE-I64-NEXT:    add lr, sp, #8
-; BE-I64-NEXT:    vmov.32 d14[0], r0
-; BE-I64-NEXT:    ldr r0, [sp, #88] @ 4-byte Reload
-; BE-I64-NEXT:    vmov.f32 s0, s17
-; BE-I64-NEXT:    mov r5, r1
-; BE-I64-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
-; BE-I64-NEXT:    add lr, sp, #240
-; BE-I64-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
-; BE-I64-NEXT:    vmov.32 d11[1], r0
-; BE-I64-NEXT:    bl lrintf
-; BE-I64-NEXT:    vmov.f32 s0, s16
-; BE-I64-NEXT:    vmov.32 d15[0], r0
-; BE-I64-NEXT:    ldr r0, [sp, #104] @ 4-byte Reload
-; BE-I64-NEXT:    add lr, sp, #240
-; BE-I64-NEXT:    mov r4, r1
-; BE-I64-NEXT:    vmov.32 d10[1], r0
-; BE-I64-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
-; BE-I64-NEXT:    bl lrintf
-; BE-I64-NEXT:    add lr, sp, #136
-; BE-I64-NEXT:    vmov.32 d14[0], r0
-; BE-I64-NEXT:    ldr r0, [sp, #72] @ 4-byte Reload
-; BE-I64-NEXT:    mov r6, r1
-; BE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-I64-NEXT:    add lr, sp, #24
-; BE-I64-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
-; BE-I64-NEXT:    vrev64.32 d8, d16
-; BE-I64-NEXT:    vmov.32 d13[1], r0
-; BE-I64-NEXT:    vmov.f32 s0, s17
-; BE-I64-NEXT:    bl lrintf
-; BE-I64-NEXT:    vmov.f32 s0, s16
-; BE-I64-NEXT:    mov r7, r1
-; BE-I64-NEXT:    vmov.32 d11[0], r0
-; BE-I64-NEXT:    vmov.32 d12[1], r9
-; BE-I64-NEXT:    bl lrintf
-; BE-I64-NEXT:    add lr, sp, #192
-; BE-I64-NEXT:    vmov.32 d15[1], r4
-; BE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-I64-NEXT:    add lr, sp, #8
-; BE-I64-NEXT:    vmov.32 d17[1], r10
-; BE-I64-NEXT:    vmov.32 d16[1], r11
-; BE-I64-NEXT:    vorr q9, q8, q8
-; BE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-I64-NEXT:    add lr, sp, #192
-; BE-I64-NEXT:    vmov.32 d17[1], r8
-; BE-I64-NEXT:    vmov.32 d16[1], r5
-; BE-I64-NEXT:    vorr q10, q8, q8
-; BE-I64-NEXT:    vrev64.32 q8, q6
-; BE-I64-NEXT:    vmov.32 d14[1], r6
-; BE-I64-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-I64-NEXT:    add lr, sp, #240
-; BE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-I64-NEXT:    vmov.32 d10[0], r0
-; BE-I64-NEXT:    vrev64.32 q8, q8
-; BE-I64-NEXT:    vmov.32 d11[1], r7
-; BE-I64-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-I64-NEXT:    add lr, sp, #224
-; BE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-I64-NEXT:    vmov.32 d10[1], r1
-; BE-I64-NEXT:    vrev64.32 q8, q8
-; BE-I64-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-I64-NEXT:    add lr, sp, #56
-; BE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-I64-NEXT:    add lr, sp, #136
-; BE-I64-NEXT:    vrev64.32 q8, q8
-; BE-I64-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-I64-NEXT:    add lr, sp, #104
-; BE-I64-NEXT:    vrev64.32 q8, q9
-; BE-I64-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-I64-NEXT:    add lr, sp, #88
-; BE-I64-NEXT:    vrev64.32 q8, q10
-; BE-I64-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-I64-NEXT:    add lr, sp, #72
-; BE-I64-NEXT:    vrev64.32 q8, q7
-; BE-I64-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-I64-NEXT:    add lr, sp, #208
-; BE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-I64-NEXT:    add lr, sp, #56
-; BE-I64-NEXT:    vrev64.32 d8, d17
-; BE-I64-NEXT:    vrev64.32 q8, q5
-; BE-I64-NEXT:    vmov.f32 s0, s17
-; BE-I64-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-I64-NEXT:    bl lrintf
-; BE-I64-NEXT:    vmov.f32 s0, s16
-; BE-I64-NEXT:    mov r4, r1
-; BE-I64-NEXT:    vmov.32 d13[0], r0
-; BE-I64-NEXT:    bl lrintf
-; BE-I64-NEXT:    add lr, sp, #120
-; BE-I64-NEXT:    vmov.32 d12[0], r0
-; BE-I64-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
-; BE-I64-NEXT:    vmov.32 d13[1], r4
-; BE-I64-NEXT:    vrev64.32 d8, d10
-; BE-I64-NEXT:    vmov.32 d12[1], r1
-; BE-I64-NEXT:    vmov.f32 s0, s17
-; BE-I64-NEXT:    vrev64.32 q6, q6
-; BE-I64-NEXT:    bl lrintf
-; BE-I64-NEXT:    vmov.f32 s0, s16
-; BE-I64-NEXT:    vmov.32 d15[0], r0
-; BE-I64-NEXT:    vmov.32 d15[1], r1
-; BE-I64-NEXT:    bl lrintf
-; BE-I64-NEXT:    vmov.32 d14[0], r0
-; BE-I64-NEXT:    ldr r6, [sp, #156] @ 4-byte Reload
-; BE-I64-NEXT:    vrev64.32 d8, d11
-; BE-I64-NEXT:    add r5, r6, #64
-; BE-I64-NEXT:    vmov.32 d14[1], r1
-; BE-I64-NEXT:    vmov.f32 s0, s17
-; BE-I64-NEXT:    vrev64.32 q8, q7
-; BE-I64-NEXT:    vst1.64 {d16, d17}, [r5:128]!
-; BE-I64-NEXT:    bl lrintf
-; BE-I64-NEXT:    vmov.f32 s0, s16
-; BE-I64-NEXT:    vmov.32 d15[0], r0
-; BE-I64-NEXT:    vmov.32 d15[1], r1
-; BE-I64-NEXT:    bl lrintf
-; BE-I64-NEXT:    add lr, sp, #208
-; BE-I64-NEXT:    vmov.32 d14[0], r0
-; BE-I64-NEXT:    vldmia lr, {d18, d19} @ 16-byte Reload
-; BE-I64-NEXT:    vmov.32 d14[1], r1
-; BE-I64-NEXT:    vrev64.32 d8, d18
-; BE-I64-NEXT:    vrev64.32 q8, q7
-; BE-I64-NEXT:    vmov.f32 s0, s17
-; BE-I64-NEXT:    vst1.64 {d16, d17}, [r5:128]!
-; BE-I64-NEXT:    bl lrintf
-; BE-I64-NEXT:    vmov.f32 s0, s16
-; BE-I64-NEXT:    mov r4, r1
-; BE-I64-NEXT:    vmov.32 d15[0], r0
-; BE-I64-NEXT:    bl lrintf
-; BE-I64-NEXT:    vmov.32 d14[0], r0
-; BE-I64-NEXT:    add lr, sp, #160
-; BE-I64-NEXT:    vmov.32 d15[1], r4
-; BE-I64-NEXT:    vmov.32 d14[1], r1
-; BE-I64-NEXT:    vrev64.32 q8, q7
-; BE-I64-NEXT:    vst1.64 {d16, d17}, [r5:128]!
-; BE-I64-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
-; BE-I64-NEXT:    vrev64.32 d8, d11
-; BE-I64-NEXT:    vst1.64 {d12, d13}, [r5:128]
-; BE-I64-NEXT:    vmov.f32 s0, s17
-; BE-I64-NEXT:    bl lrintf
-; BE-I64-NEXT:    vmov.f32 s0, s16
-; BE-I64-NEXT:    mov r4, r1
-; BE-I64-NEXT:    vmov.32 d13[0], r0
-; BE-I64-NEXT:    bl lrintf
-; BE-I64-NEXT:    vmov.32 d12[0], r0
-; BE-I64-NEXT:    add lr, sp, #208
-; BE-I64-NEXT:    vmov.32 d13[1], r4
-; BE-I64-NEXT:    vmov.32 d12[1], r1
-; BE-I64-NEXT:    vrev64.32 q8, q6
-; BE-I64-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-I64-NEXT:    add lr, sp, #176
-; BE-I64-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
-; BE-I64-NEXT:    vrev64.32 d8, d12
-; BE-I64-NEXT:    vmov.f32 s0, s17
-; BE-I64-NEXT:    bl lrintf
-; BE-I64-NEXT:    vmov.f32 s0, s16
-; BE-I64-NEXT:    vmov.32 d15[0], r0
-; BE-I64-NEXT:    vmov.32 d15[1], r1
-; BE-I64-NEXT:    bl lrintf
-; BE-I64-NEXT:    vmov.32 d14[0], r0
-; BE-I64-NEXT:    mov r5, r6
-; BE-I64-NEXT:    vrev64.32 d8, d13
-; BE-I64-NEXT:    vmov.32 d14[1], r1
-; BE-I64-NEXT:    vmov.f32 s0, s17
-; BE-I64-NEXT:    vrev64.32 q8, q7
-; BE-I64-NEXT:    vst1.64 {d16, d17}, [r5:128]!
-; BE-I64-NEXT:    bl lrintf
-; BE-I64-NEXT:    vmov.f32 s0, s16
-; BE-I64-NEXT:    vmov.32 d15[0], r0
-; BE-I64-NEXT:    vmov.32 d15[1], r1
-; BE-I64-NEXT:    bl lrintf
-; BE-I64-NEXT:    vmov.32 d14[0], r0
-; BE-I64-NEXT:    vrev64.32 d8, d10
-; BE-I64-NEXT:    vmov.32 d14[1], r1
-; BE-I64-NEXT:    vmov.f32 s0, s17
-; BE-I64-NEXT:    vrev64.32 q8, q7
-; BE-I64-NEXT:    vst1.64 {d16, d17}, [r5:128]!
-; BE-I64-NEXT:    bl lrintf
-; BE-I64-NEXT:    vmov.f32 s0, s16
-; BE-I64-NEXT:    mov r4, r1
-; BE-I64-NEXT:    vmov.32 d15[0], r0
-; BE-I64-NEXT:    bl lrintf
-; BE-I64-NEXT:    vmov.32 d14[0], r0
-; BE-I64-NEXT:    add lr, sp, #208
-; BE-I64-NEXT:    add r0, r6, #192
-; BE-I64-NEXT:    vmov.32 d15[1], r4
-; BE-I64-NEXT:    vmov.32 d14[1], r1
-; BE-I64-NEXT:    vrev64.32 q8, q7
-; BE-I64-NEXT:    vst1.64 {d16, d17}, [r5:128]!
-; BE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-I64-NEXT:    add lr, sp, #56
-; BE-I64-NEXT:    vst1.64 {d16, d17}, [r5:128]
-; BE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-I64-NEXT:    add lr, sp, #192
-; BE-I64-NEXT:    vst1.64 {d16, d17}, [r0:128]!
-; BE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-I64-NEXT:    add lr, sp, #240
-; BE-I64-NEXT:    vst1.64 {d16, d17}, [r0:128]!
-; BE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-I64-NEXT:    add lr, sp, #224
-; BE-I64-NEXT:    vst1.64 {d16, d17}, [r0:128]!
-; BE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-I64-NEXT:    add lr, sp, #136
-; BE-I64-NEXT:    vst1.64 {d16, d17}, [r0:128]
-; BE-I64-NEXT:    add r0, r6, #128
-; BE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-I64-NEXT:    add lr, sp, #104
-; BE-I64-NEXT:    vst1.64 {d16, d17}, [r0:128]!
-; BE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-I64-NEXT:    add lr, sp, #88
-; BE-I64-NEXT:    vst1.64 {d16, d17}, [r0:128]!
-; BE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-I64-NEXT:    add lr, sp, #72
-; BE-I64-NEXT:    vst1.64 {d16, d17}, [r0:128]!
-; BE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-I64-NEXT:    vst1.64 {d16, d17}, [r0:128]
-; BE-I64-NEXT:    add sp, sp, #256
-; BE-I64-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
-; BE-I64-NEXT:    add sp, sp, #4
-; BE-I64-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
-;
-; BE-I32-NEON-LABEL: lrint_v32f32:
-; BE-I32-NEON:       @ %bb.0:
-; BE-I32-NEON-NEXT:    .save {r4, r5, r6, lr}
-; BE-I32-NEON-NEXT:    push {r4, r5, r6, lr}
-; BE-I32-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
-; BE-I32-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
-; BE-I32-NEON-NEXT:    .pad #144
-; BE-I32-NEON-NEXT:    sub sp, sp, #144
-; BE-I32-NEON-NEXT:    mov r4, r0
-; BE-I32-NEON-NEXT:    add r0, sp, #256
-; BE-I32-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
-; BE-I32-NEON-NEXT:    add lr, sp, #16
-; BE-I32-NEON-NEXT:    add r0, sp, #272
-; BE-I32-NEON-NEXT:    vrev64.32 q4, q3
-; BE-I32-NEON-NEXT:    vrev64.32 q7, q1
-; BE-I32-NEON-NEXT:    vrev64.32 q8, q8
-; BE-I32-NEON-NEXT:    vld1.64 {d18, d19}, [r0]
-; BE-I32-NEON-NEXT:    add r0, sp, #224
-; BE-I32-NEON-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
-; BE-I32-NEON-NEXT:    add lr, sp, #80
-; BE-I32-NEON-NEXT:    vrev64.32 q5, q0
-; BE-I32-NEON-NEXT:    vmov.f32 s0, s28
-; BE-I32-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-I32-NEON-NEXT:    add lr, sp, #64
-; BE-I32-NEON-NEXT:    vrev64.32 q8, q9
-; BE-I32-NEON-NEXT:    vld1.64 {d20, d21}, [r0]
-; BE-I32-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-I32-NEON-NEXT:    add lr, sp, #32
-; BE-I32-NEON-NEXT:    vrev64.32 q8, q10
-; BE-I32-NEON-NEXT:    vrev64.32 q6, q2
-; BE-I32-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-I32-NEON-NEXT:    vstmia sp, {d14, d15} @ 16-byte Spill
-; BE-I32-NEON-NEXT:    bl lrintf
-; BE-I32-NEON-NEXT:    vmov.f32 s0, s18
-; BE-I32-NEON-NEXT:    vmov.32 d16[0], r0
-; BE-I32-NEON-NEXT:    add lr, sp, #128
-; BE-I32-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-I32-NEON-NEXT:    bl lrintf
-; BE-I32-NEON-NEXT:    vmov.f32 s0, s20
-; BE-I32-NEON-NEXT:    add lr, sp, #112
-; BE-I32-NEON-NEXT:    vmov.32 d17[0], r0
-; BE-I32-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-I32-NEON-NEXT:    add lr, sp, #48
-; BE-I32-NEON-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
-; BE-I32-NEON-NEXT:    bl lrintf
-; BE-I32-NEON-NEXT:    vmov.f32 s0, s22
-; BE-I32-NEON-NEXT:    vmov.32 d8[0], r0
-; BE-I32-NEON-NEXT:    bl lrintf
-; BE-I32-NEON-NEXT:    vmov.f32 s0, s30
-; BE-I32-NEON-NEXT:    vmov.32 d9[0], r0
-; BE-I32-NEON-NEXT:    bl lrintf
-; BE-I32-NEON-NEXT:    vmov.f32 s0, s26
-; BE-I32-NEON-NEXT:    add lr, sp, #128
-; BE-I32-NEON-NEXT:    vldmia lr, {d14, d15} @ 16-byte Reload
-; BE-I32-NEON-NEXT:    vmov.32 d15[0], r0
-; BE-I32-NEON-NEXT:    bl lrintf
-; BE-I32-NEON-NEXT:    vmov.f32 s0, s24
-; BE-I32-NEON-NEXT:    vmov.32 d11[0], r0
-; BE-I32-NEON-NEXT:    bl lrintf
-; BE-I32-NEON-NEXT:    vmov.f32 s0, s27
-; BE-I32-NEON-NEXT:    vmov.32 d10[0], r0
-; BE-I32-NEON-NEXT:    bl lrintf
-; BE-I32-NEON-NEXT:    vmov.f32 s0, s25
-; BE-I32-NEON-NEXT:    vmov.32 d11[1], r0
-; BE-I32-NEON-NEXT:    bl lrintf
-; BE-I32-NEON-NEXT:    vldmia sp, {d12, d13} @ 16-byte Reload
-; BE-I32-NEON-NEXT:    vmov.f32 s0, s27
-; BE-I32-NEON-NEXT:    add lr, sp, #96
-; BE-I32-NEON-NEXT:    vmov.32 d10[1], r0
-; BE-I32-NEON-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
-; BE-I32-NEON-NEXT:    bl lrintf
-; BE-I32-NEON-NEXT:    add lr, sp, #48
-; BE-I32-NEON-NEXT:    vmov.32 d15[1], r0
-; BE-I32-NEON-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
-; BE-I32-NEON-NEXT:    vmov.f32 s0, s23
-; BE-I32-NEON-NEXT:    bl lrintf
-; BE-I32-NEON-NEXT:    vmov.f32 s0, s21
-; BE-I32-NEON-NEXT:    vmov.32 d9[1], r0
-; BE-I32-NEON-NEXT:    bl lrintf
-; BE-I32-NEON-NEXT:    vmov.32 d8[1], r0
-; BE-I32-NEON-NEXT:    add lr, sp, #48
-; BE-I32-NEON-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
-; BE-I32-NEON-NEXT:    add lr, sp, #16
-; BE-I32-NEON-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
-; BE-I32-NEON-NEXT:    vmov.f32 s0, s23
-; BE-I32-NEON-NEXT:    bl lrintf
-; BE-I32-NEON-NEXT:    vmov.f32 s0, s25
-; BE-I32-NEON-NEXT:    add lr, sp, #112
-; BE-I32-NEON-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
-; BE-I32-NEON-NEXT:    vmov.32 d9[1], r0
-; BE-I32-NEON-NEXT:    bl lrintf
-; BE-I32-NEON-NEXT:    vmov.f32 s0, s20
-; BE-I32-NEON-NEXT:    vmov.32 d14[1], r0
-; BE-I32-NEON-NEXT:    add r0, sp, #240
-; BE-I32-NEON-NEXT:    add lr, sp, #128
-; BE-I32-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
-; BE-I32-NEON-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
-; BE-I32-NEON-NEXT:    vrev64.32 q6, q8
-; BE-I32-NEON-NEXT:    bl lrintf
-; BE-I32-NEON-NEXT:    vmov.f32 s0, s21
-; BE-I32-NEON-NEXT:    vmov.32 d8[0], r0
-; BE-I32-NEON-NEXT:    bl lrintf
-; BE-I32-NEON-NEXT:    vmov.f32 s0, s24
-; BE-I32-NEON-NEXT:    vmov.32 d8[1], r0
-; BE-I32-NEON-NEXT:    add lr, sp, #112
-; BE-I32-NEON-NEXT:    vorr q7, q6, q6
-; BE-I32-NEON-NEXT:    vstmia sp, {d12, d13} @ 16-byte Spill
-; BE-I32-NEON-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
-; BE-I32-NEON-NEXT:    bl lrintf
-; BE-I32-NEON-NEXT:    add lr, sp, #32
-; BE-I32-NEON-NEXT:    vmov.32 d12[0], r0
-; BE-I32-NEON-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
-; BE-I32-NEON-NEXT:    vmov.f32 s0, s18
-; BE-I32-NEON-NEXT:    bl lrintf
-; BE-I32-NEON-NEXT:    vmov.f32 s0, s16
-; BE-I32-NEON-NEXT:    vmov.32 d11[0], r0
-; BE-I32-NEON-NEXT:    bl lrintf
-; BE-I32-NEON-NEXT:    vmov.f32 s0, s19
-; BE-I32-NEON-NEXT:    vmov.32 d10[0], r0
-; BE-I32-NEON-NEXT:    bl lrintf
-; BE-I32-NEON-NEXT:    vmov.f32 s0, s30
-; BE-I32-NEON-NEXT:    vmov.32 d11[1], r0
-; BE-I32-NEON-NEXT:    bl lrintf
-; BE-I32-NEON-NEXT:    vmov.f32 s0, s17
-; BE-I32-NEON-NEXT:    add lr, sp, #16
-; BE-I32-NEON-NEXT:    vmov.32 d13[0], r0
-; BE-I32-NEON-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
-; BE-I32-NEON-NEXT:    bl lrintf
-; BE-I32-NEON-NEXT:    vmov.32 d10[1], r0
-; BE-I32-NEON-NEXT:    add lr, sp, #32
-; BE-I32-NEON-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
-; BE-I32-NEON-NEXT:    add lr, sp, #64
-; BE-I32-NEON-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
-; BE-I32-NEON-NEXT:    vmov.f32 s0, s20
-; BE-I32-NEON-NEXT:    bl lrintf
-; BE-I32-NEON-NEXT:    add lr, sp, #80
-; BE-I32-NEON-NEXT:    vmov.32 d8[0], r0
-; BE-I32-NEON-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
-; BE-I32-NEON-NEXT:    vmov.f32 s0, s26
-; BE-I32-NEON-NEXT:    bl lrintf
-; BE-I32-NEON-NEXT:    vmov.f32 s0, s24
-; BE-I32-NEON-NEXT:    vmov.32 d15[0], r0
-; BE-I32-NEON-NEXT:    bl lrintf
-; BE-I32-NEON-NEXT:    vmov.f32 s0, s27
-; BE-I32-NEON-NEXT:    vmov.32 d14[0], r0
-; BE-I32-NEON-NEXT:    bl lrintf
-; BE-I32-NEON-NEXT:    vmov.f32 s0, s22
-; BE-I32-NEON-NEXT:    vmov.32 d15[1], r0
-; BE-I32-NEON-NEXT:    bl lrintf
-; BE-I32-NEON-NEXT:    vmov.f32 s0, s25
-; BE-I32-NEON-NEXT:    vmov.32 d9[0], r0
-; BE-I32-NEON-NEXT:    bl lrintf
-; BE-I32-NEON-NEXT:    vmov.f32 s0, s23
-; BE-I32-NEON-NEXT:    vmov.32 d14[1], r0
-; BE-I32-NEON-NEXT:    bl lrintf
-; BE-I32-NEON-NEXT:    vmov.f32 s0, s21
-; BE-I32-NEON-NEXT:    vmov.32 d9[1], r0
-; BE-I32-NEON-NEXT:    bl lrintf
-; BE-I32-NEON-NEXT:    vldmia sp, {d12, d13} @ 16-byte Reload
-; BE-I32-NEON-NEXT:    vmov.f32 s0, s27
-; BE-I32-NEON-NEXT:    vmov.32 d8[1], r0
-; BE-I32-NEON-NEXT:    bl lrintf
-; BE-I32-NEON-NEXT:    add lr, sp, #16
-; BE-I32-NEON-NEXT:    vmov.f32 s0, s25
-; BE-I32-NEON-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
-; BE-I32-NEON-NEXT:    add lr, sp, #112
-; BE-I32-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-I32-NEON-NEXT:    vmov.32 d11[1], r0
-; BE-I32-NEON-NEXT:    vrev64.32 q8, q8
-; BE-I32-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-I32-NEON-NEXT:    bl lrintf
-; BE-I32-NEON-NEXT:    add lr, sp, #48
-; BE-I32-NEON-NEXT:    vmov.32 d10[1], r0
-; BE-I32-NEON-NEXT:    mov r0, r4
-; BE-I32-NEON-NEXT:    vldmia lr, {d18, d19} @ 16-byte Reload
-; BE-I32-NEON-NEXT:    add lr, sp, #128
-; BE-I32-NEON-NEXT:    vrev64.32 q8, q4
-; BE-I32-NEON-NEXT:    vst1.32 {d18, d19}, [r0:128]!
-; BE-I32-NEON-NEXT:    vldmia lr, {d18, d19} @ 16-byte Reload
-; BE-I32-NEON-NEXT:    add lr, sp, #96
-; BE-I32-NEON-NEXT:    vst1.32 {d18, d19}, [r0:128]!
-; BE-I32-NEON-NEXT:    vldmia lr, {d18, d19} @ 16-byte Reload
-; BE-I32-NEON-NEXT:    add lr, sp, #112
-; BE-I32-NEON-NEXT:    vst1.32 {d18, d19}, [r0:128]!
-; BE-I32-NEON-NEXT:    vldmia lr, {d18, d19} @ 16-byte Reload
-; BE-I32-NEON-NEXT:    add lr, sp, #32
-; BE-I32-NEON-NEXT:    vst1.64 {d18, d19}, [r0:128]
-; BE-I32-NEON-NEXT:    add r0, r4, #64
-; BE-I32-NEON-NEXT:    vldmia lr, {d18, d19} @ 16-byte Reload
-; BE-I32-NEON-NEXT:    vst1.32 {d18, d19}, [r0:128]!
-; BE-I32-NEON-NEXT:    vst1.32 {d10, d11}, [r0:128]!
-; BE-I32-NEON-NEXT:    vst1.32 {d14, d15}, [r0:128]!
-; BE-I32-NEON-NEXT:    vst1.64 {d16, d17}, [r0:128]
-; BE-I32-NEON-NEXT:    add sp, sp, #144
-; BE-I32-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
-; BE-I32-NEON-NEXT:    pop {r4, r5, r6, pc}
-;
-; BE-I64-NEON-LABEL: lrint_v32f32:
-; BE-I64-NEON:       @ %bb.0:
-; BE-I64-NEON-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; BE-I64-NEON-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; BE-I64-NEON-NEXT:    .pad #4
-; BE-I64-NEON-NEXT:    sub sp, sp, #4
-; BE-I64-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
-; BE-I64-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
-; BE-I64-NEON-NEXT:    .pad #256
-; BE-I64-NEON-NEXT:    sub sp, sp, #256
-; BE-I64-NEON-NEXT:    add lr, sp, #208
-; BE-I64-NEON-NEXT:    str r0, [sp, #156] @ 4-byte Spill
-; BE-I64-NEON-NEXT:    add r0, sp, #408
-; BE-I64-NEON-NEXT:    vstmia lr, {d6, d7} @ 16-byte Spill
-; BE-I64-NEON-NEXT:    add lr, sp, #120
-; BE-I64-NEON-NEXT:    vld1.64 {d10, d11}, [r0]
-; BE-I64-NEON-NEXT:    add r0, sp, #392
-; BE-I64-NEON-NEXT:    vstmia lr, {d4, d5} @ 16-byte Spill
-; BE-I64-NEON-NEXT:    add lr, sp, #160
-; BE-I64-NEON-NEXT:    vstmia lr, {d2, d3} @ 16-byte Spill
-; BE-I64-NEON-NEXT:    add lr, sp, #176
-; BE-I64-NEON-NEXT:    vrev64.32 d8, d10
-; BE-I64-NEON-NEXT:    vstmia lr, {d0, d1} @ 16-byte Spill
-; BE-I64-NEON-NEXT:    add lr, sp, #136
-; BE-I64-NEON-NEXT:    vmov.f32 s0, s17
-; BE-I64-NEON-NEXT:    vld1.64 {d12, d13}, [r0]
-; BE-I64-NEON-NEXT:    add r0, sp, #360
-; BE-I64-NEON-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
-; BE-I64-NEON-NEXT:    add lr, sp, #192
-; BE-I64-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
-; BE-I64-NEON-NEXT:    add r0, sp, #376
-; BE-I64-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-I64-NEON-NEXT:    add lr, sp, #40
-; BE-I64-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
-; BE-I64-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-I64-NEON-NEXT:    bl lrintf
-; BE-I64-NEON-NEXT:    vmov.f32 s0, s16
-; BE-I64-NEON-NEXT:    str r1, [sp, #88] @ 4-byte Spill
-; BE-I64-NEON-NEXT:    vmov.32 d15[0], r0
-; BE-I64-NEON-NEXT:    bl lrintf
-; BE-I64-NEON-NEXT:    vrev64.32 d9, d11
-; BE-I64-NEON-NEXT:    add lr, sp, #240
-; BE-I64-NEON-NEXT:    vmov.32 d14[0], r0
-; BE-I64-NEON-NEXT:    str r1, [sp, #104] @ 4-byte Spill
-; BE-I64-NEON-NEXT:    vmov.f32 s0, s18
-; BE-I64-NEON-NEXT:    vrev64.32 d8, d13
-; BE-I64-NEON-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
-; BE-I64-NEON-NEXT:    bl lrintf
-; BE-I64-NEON-NEXT:    vmov.f32 s0, s17
-; BE-I64-NEON-NEXT:    mov r5, r1
-; BE-I64-NEON-NEXT:    vmov.32 d14[0], r0
-; BE-I64-NEON-NEXT:    bl lrintf
-; BE-I64-NEON-NEXT:    vmov.f32 s0, s19
-; BE-I64-NEON-NEXT:    add lr, sp, #192
-; BE-I64-NEON-NEXT:    str r1, [sp, #72] @ 4-byte Spill
-; BE-I64-NEON-NEXT:    vmov.32 d13[0], r0
-; BE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-I64-NEON-NEXT:    vrev64.32 d10, d16
-; BE-I64-NEON-NEXT:    bl lrintf
-; BE-I64-NEON-NEXT:    vmov.f32 s0, s20
-; BE-I64-NEON-NEXT:    add lr, sp, #224
-; BE-I64-NEON-NEXT:    vmov.32 d15[0], r0
-; BE-I64-NEON-NEXT:    mov r4, r1
-; BE-I64-NEON-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
-; BE-I64-NEON-NEXT:    bl lrintf
-; BE-I64-NEON-NEXT:    vmov.f32 s0, s21
-; BE-I64-NEON-NEXT:    mov r7, r1
-; BE-I64-NEON-NEXT:    vmov.32 d14[0], r0
-; BE-I64-NEON-NEXT:    bl lrintf
-; BE-I64-NEON-NEXT:    vmov.f32 s0, s16
-; BE-I64-NEON-NEXT:    mov r6, r1
-; BE-I64-NEON-NEXT:    vmov.32 d15[0], r0
-; BE-I64-NEON-NEXT:    bl lrintf
-; BE-I64-NEON-NEXT:    vmov.32 d12[0], r0
-; BE-I64-NEON-NEXT:    add lr, sp, #24
-; BE-I64-NEON-NEXT:    mov r9, r1
-; BE-I64-NEON-NEXT:    vmov.32 d15[1], r6
-; BE-I64-NEON-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
-; BE-I64-NEON-NEXT:    add lr, sp, #192
-; BE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-I64-NEON-NEXT:    vrev64.32 d8, d17
-; BE-I64-NEON-NEXT:    vmov.f32 s0, s17
-; BE-I64-NEON-NEXT:    bl lrintf
-; BE-I64-NEON-NEXT:    vmov.f32 s0, s16
-; BE-I64-NEON-NEXT:    vmov.32 d14[1], r7
-; BE-I64-NEON-NEXT:    add lr, sp, #56
-; BE-I64-NEON-NEXT:    mov r10, r1
-; BE-I64-NEON-NEXT:    vmov.32 d11[0], r0
-; BE-I64-NEON-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
-; BE-I64-NEON-NEXT:    bl lrintf
-; BE-I64-NEON-NEXT:    vmov.32 d10[0], r0
-; BE-I64-NEON-NEXT:    add lr, sp, #192
-; BE-I64-NEON-NEXT:    mov r11, r1
-; BE-I64-NEON-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
-; BE-I64-NEON-NEXT:    add lr, sp, #40
-; BE-I64-NEON-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
-; BE-I64-NEON-NEXT:    add lr, sp, #224
-; BE-I64-NEON-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
-; BE-I64-NEON-NEXT:    vrev64.32 d8, d12
-; BE-I64-NEON-NEXT:    vmov.32 d11[1], r4
-; BE-I64-NEON-NEXT:    vmov.f32 s0, s17
-; BE-I64-NEON-NEXT:    bl lrintf
-; BE-I64-NEON-NEXT:    vmov.f32 s0, s16
-; BE-I64-NEON-NEXT:    vmov.32 d10[1], r5
-; BE-I64-NEON-NEXT:    add lr, sp, #224
-; BE-I64-NEON-NEXT:    mov r8, r1
-; BE-I64-NEON-NEXT:    vmov.32 d15[0], r0
-; BE-I64-NEON-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
-; BE-I64-NEON-NEXT:    bl lrintf
-; BE-I64-NEON-NEXT:    vrev64.32 d8, d13
-; BE-I64-NEON-NEXT:    add lr, sp, #8
-; BE-I64-NEON-NEXT:    vmov.32 d14[0], r0
-; BE-I64-NEON-NEXT:    ldr r0, [sp, #88] @ 4-byte Reload
-; BE-I64-NEON-NEXT:    vmov.f32 s0, s17
-; BE-I64-NEON-NEXT:    mov r5, r1
-; BE-I64-NEON-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
-; BE-I64-NEON-NEXT:    add lr, sp, #240
-; BE-I64-NEON-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
-; BE-I64-NEON-NEXT:    vmov.32 d11[1], r0
-; BE-I64-NEON-NEXT:    bl lrintf
-; BE-I64-NEON-NEXT:    vmov.f32 s0, s16
-; BE-I64-NEON-NEXT:    vmov.32 d15[0], r0
-; BE-I64-NEON-NEXT:    ldr r0, [sp, #104] @ 4-byte Reload
-; BE-I64-NEON-NEXT:    add lr, sp, #240
-; BE-I64-NEON-NEXT:    mov r4, r1
-; BE-I64-NEON-NEXT:    vmov.32 d10[1], r0
-; BE-I64-NEON-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
-; BE-I64-NEON-NEXT:    bl lrintf
-; BE-I64-NEON-NEXT:    add lr, sp, #136
-; BE-I64-NEON-NEXT:    vmov.32 d14[0], r0
-; BE-I64-NEON-NEXT:    ldr r0, [sp, #72] @ 4-byte Reload
-; BE-I64-NEON-NEXT:    mov r6, r1
-; BE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-I64-NEON-NEXT:    add lr, sp, #24
-; BE-I64-NEON-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
-; BE-I64-NEON-NEXT:    vrev64.32 d8, d16
-; BE-I64-NEON-NEXT:    vmov.32 d13[1], r0
-; BE-I64-NEON-NEXT:    vmov.f32 s0, s17
-; BE-I64-NEON-NEXT:    bl lrintf
-; BE-I64-NEON-NEXT:    vmov.f32 s0, s16
-; BE-I64-NEON-NEXT:    mov r7, r1
-; BE-I64-NEON-NEXT:    vmov.32 d11[0], r0
-; BE-I64-NEON-NEXT:    vmov.32 d12[1], r9
-; BE-I64-NEON-NEXT:    bl lrintf
-; BE-I64-NEON-NEXT:    add lr, sp, #192
-; BE-I64-NEON-NEXT:    vmov.32 d15[1], r4
-; BE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-I64-NEON-NEXT:    add lr, sp, #8
-; BE-I64-NEON-NEXT:    vmov.32 d17[1], r10
-; BE-I64-NEON-NEXT:    vmov.32 d16[1], r11
-; BE-I64-NEON-NEXT:    vorr q9, q8, q8
-; BE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-I64-NEON-NEXT:    add lr, sp, #192
-; BE-I64-NEON-NEXT:    vmov.32 d17[1], r8
-; BE-I64-NEON-NEXT:    vmov.32 d16[1], r5
-; BE-I64-NEON-NEXT:    vorr q10, q8, q8
-; BE-I64-NEON-NEXT:    vrev64.32 q8, q6
-; BE-I64-NEON-NEXT:    vmov.32 d14[1], r6
-; BE-I64-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-I64-NEON-NEXT:    add lr, sp, #240
-; BE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-I64-NEON-NEXT:    vmov.32 d10[0], r0
-; BE-I64-NEON-NEXT:    vrev64.32 q8, q8
-; BE-I64-NEON-NEXT:    vmov.32 d11[1], r7
-; BE-I64-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-I64-NEON-NEXT:    add lr, sp, #224
-; BE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-I64-NEON-NEXT:    vmov.32 d10[1], r1
-; BE-I64-NEON-NEXT:    vrev64.32 q8, q8
-; BE-I64-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-I64-NEON-NEXT:    add lr, sp, #56
-; BE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-I64-NEON-NEXT:    add lr, sp, #136
-; BE-I64-NEON-NEXT:    vrev64.32 q8, q8
-; BE-I64-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-I64-NEON-NEXT:    add lr, sp, #104
-; BE-I64-NEON-NEXT:    vrev64.32 q8, q9
-; BE-I64-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-I64-NEON-NEXT:    add lr, sp, #88
-; BE-I64-NEON-NEXT:    vrev64.32 q8, q10
-; BE-I64-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-I64-NEON-NEXT:    add lr, sp, #72
-; BE-I64-NEON-NEXT:    vrev64.32 q8, q7
-; BE-I64-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-I64-NEON-NEXT:    add lr, sp, #208
-; BE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-I64-NEON-NEXT:    add lr, sp, #56
-; BE-I64-NEON-NEXT:    vrev64.32 d8, d17
-; BE-I64-NEON-NEXT:    vrev64.32 q8, q5
-; BE-I64-NEON-NEXT:    vmov.f32 s0, s17
-; BE-I64-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-I64-NEON-NEXT:    bl lrintf
-; BE-I64-NEON-NEXT:    vmov.f32 s0, s16
-; BE-I64-NEON-NEXT:    mov r4, r1
-; BE-I64-NEON-NEXT:    vmov.32 d13[0], r0
-; BE-I64-NEON-NEXT:    bl lrintf
-; BE-I64-NEON-NEXT:    add lr, sp, #120
-; BE-I64-NEON-NEXT:    vmov.32 d12[0], r0
-; BE-I64-NEON-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
-; BE-I64-NEON-NEXT:    vmov.32 d13[1], r4
-; BE-I64-NEON-NEXT:    vrev64.32 d8, d10
-; BE-I64-NEON-NEXT:    vmov.32 d12[1], r1
-; BE-I64-NEON-NEXT:    vmov.f32 s0, s17
-; BE-I64-NEON-NEXT:    vrev64.32 q6, q6
-; BE-I64-NEON-NEXT:    bl lrintf
-; BE-I64-NEON-NEXT:    vmov.f32 s0, s16
-; BE-I64-NEON-NEXT:    vmov.32 d15[0], r0
-; BE-I64-NEON-NEXT:    vmov.32 d15[1], r1
-; BE-I64-NEON-NEXT:    bl lrintf
-; BE-I64-NEON-NEXT:    vmov.32 d14[0], r0
-; BE-I64-NEON-NEXT:    ldr r6, [sp, #156] @ 4-byte Reload
-; BE-I64-NEON-NEXT:    vrev64.32 d8, d11
-; BE-I64-NEON-NEXT:    add r5, r6, #64
-; BE-I64-NEON-NEXT:    vmov.32 d14[1], r1
-; BE-I64-NEON-NEXT:    vmov.f32 s0, s17
-; BE-I64-NEON-NEXT:    vrev64.32 q8, q7
-; BE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r5:128]!
-; BE-I64-NEON-NEXT:    bl lrintf
-; BE-I64-NEON-NEXT:    vmov.f32 s0, s16
-; BE-I64-NEON-NEXT:    vmov.32 d15[0], r0
-; BE-I64-NEON-NEXT:    vmov.32 d15[1], r1
-; BE-I64-NEON-NEXT:    bl lrintf
-; BE-I64-NEON-NEXT:    add lr, sp, #208
-; BE-I64-NEON-NEXT:    vmov.32 d14[0], r0
-; BE-I64-NEON-NEXT:    vldmia lr, {d18, d19} @ 16-byte Reload
-; BE-I64-NEON-NEXT:    vmov.32 d14[1], r1
-; BE-I64-NEON-NEXT:    vrev64.32 d8, d18
-; BE-I64-NEON-NEXT:    vrev64.32 q8, q7
-; BE-I64-NEON-NEXT:    vmov.f32 s0, s17
-; BE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r5:128]!
-; BE-I64-NEON-NEXT:    bl lrintf
-; BE-I64-NEON-NEXT:    vmov.f32 s0, s16
-; BE-I64-NEON-NEXT:    mov r4, r1
-; BE-I64-NEON-NEXT:    vmov.32 d15[0], r0
-; BE-I64-NEON-NEXT:    bl lrintf
-; BE-I64-NEON-NEXT:    vmov.32 d14[0], r0
-; BE-I64-NEON-NEXT:    add lr, sp, #160
-; BE-I64-NEON-NEXT:    vmov.32 d15[1], r4
-; BE-I64-NEON-NEXT:    vmov.32 d14[1], r1
-; BE-I64-NEON-NEXT:    vrev64.32 q8, q7
-; BE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r5:128]!
-; BE-I64-NEON-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
-; BE-I64-NEON-NEXT:    vrev64.32 d8, d11
-; BE-I64-NEON-NEXT:    vst1.64 {d12, d13}, [r5:128]
-; BE-I64-NEON-NEXT:    vmov.f32 s0, s17
-; BE-I64-NEON-NEXT:    bl lrintf
-; BE-I64-NEON-NEXT:    vmov.f32 s0, s16
-; BE-I64-NEON-NEXT:    mov r4, r1
-; BE-I64-NEON-NEXT:    vmov.32 d13[0], r0
-; BE-I64-NEON-NEXT:    bl lrintf
-; BE-I64-NEON-NEXT:    vmov.32 d12[0], r0
-; BE-I64-NEON-NEXT:    add lr, sp, #208
-; BE-I64-NEON-NEXT:    vmov.32 d13[1], r4
-; BE-I64-NEON-NEXT:    vmov.32 d12[1], r1
-; BE-I64-NEON-NEXT:    vrev64.32 q8, q6
-; BE-I64-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-I64-NEON-NEXT:    add lr, sp, #176
-; BE-I64-NEON-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
-; BE-I64-NEON-NEXT:    vrev64.32 d8, d12
-; BE-I64-NEON-NEXT:    vmov.f32 s0, s17
-; BE-I64-NEON-NEXT:    bl lrintf
-; BE-I64-NEON-NEXT:    vmov.f32 s0, s16
-; BE-I64-NEON-NEXT:    vmov.32 d15[0], r0
-; BE-I64-NEON-NEXT:    vmov.32 d15[1], r1
-; BE-I64-NEON-NEXT:    bl lrintf
-; BE-I64-NEON-NEXT:    vmov.32 d14[0], r0
-; BE-I64-NEON-NEXT:    mov r5, r6
-; BE-I64-NEON-NEXT:    vrev64.32 d8, d13
-; BE-I64-NEON-NEXT:    vmov.32 d14[1], r1
-; BE-I64-NEON-NEXT:    vmov.f32 s0, s17
-; BE-I64-NEON-NEXT:    vrev64.32 q8, q7
-; BE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r5:128]!
-; BE-I64-NEON-NEXT:    bl lrintf
-; BE-I64-NEON-NEXT:    vmov.f32 s0, s16
-; BE-I64-NEON-NEXT:    vmov.32 d15[0], r0
-; BE-I64-NEON-NEXT:    vmov.32 d15[1], r1
-; BE-I64-NEON-NEXT:    bl lrintf
-; BE-I64-NEON-NEXT:    vmov.32 d14[0], r0
-; BE-I64-NEON-NEXT:    vrev64.32 d8, d10
-; BE-I64-NEON-NEXT:    vmov.32 d14[1], r1
-; BE-I64-NEON-NEXT:    vmov.f32 s0, s17
-; BE-I64-NEON-NEXT:    vrev64.32 q8, q7
-; BE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r5:128]!
-; BE-I64-NEON-NEXT:    bl lrintf
-; BE-I64-NEON-NEXT:    vmov.f32 s0, s16
-; BE-I64-NEON-NEXT:    mov r4, r1
-; BE-I64-NEON-NEXT:    vmov.32 d15[0], r0
-; BE-I64-NEON-NEXT:    bl lrintf
-; BE-I64-NEON-NEXT:    vmov.32 d14[0], r0
-; BE-I64-NEON-NEXT:    add lr, sp, #208
-; BE-I64-NEON-NEXT:    add r0, r6, #192
-; BE-I64-NEON-NEXT:    vmov.32 d15[1], r4
-; BE-I64-NEON-NEXT:    vmov.32 d14[1], r1
-; BE-I64-NEON-NEXT:    vrev64.32 q8, q7
-; BE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r5:128]!
-; BE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-I64-NEON-NEXT:    add lr, sp, #56
-; BE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r5:128]
-; BE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-I64-NEON-NEXT:    add lr, sp, #192
-; BE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r0:128]!
-; BE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-I64-NEON-NEXT:    add lr, sp, #240
-; BE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r0:128]!
-; BE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-I64-NEON-NEXT:    add lr, sp, #224
-; BE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r0:128]!
-; BE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-I64-NEON-NEXT:    add lr, sp, #136
-; BE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r0:128]
-; BE-I64-NEON-NEXT:    add r0, r6, #128
-; BE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-I64-NEON-NEXT:    add lr, sp, #104
-; BE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r0:128]!
-; BE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-I64-NEON-NEXT:    add lr, sp, #88
-; BE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r0:128]!
-; BE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-I64-NEON-NEXT:    add lr, sp, #72
-; BE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r0:128]!
-; BE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r0:128]
-; BE-I64-NEON-NEXT:    add sp, sp, #256
-; BE-I64-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
-; BE-I64-NEON-NEXT:    add sp, sp, #4
-; BE-I64-NEON-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
-  %a = call <32 x iXLen> @llvm.lrint.v32iXLen.v32f32(<32 x float> %x)
-  ret <32 x iXLen> %a
-}
-declare <32 x iXLen> @llvm.lrint.v32iXLen.v32f32(<32 x float>)
-
-define <1 x iXLen> @lrint_v1f64(<1 x double> %x) {
-; LE-I32-LABEL: lrint_v1f64:
-; LE-I32:       @ %bb.0:
-; LE-I32-NEXT:    .save {r11, lr}
-; LE-I32-NEXT:    push {r11, lr}
-; LE-I32-NEXT:    bl lrint
-; LE-I32-NEXT:    pop {r11, pc}
-;
-; LE-I64-LABEL: lrint_v1f64:
-; LE-I64:       @ %bb.0:
-; LE-I64-NEXT:    .save {r11, lr}
-; LE-I64-NEXT:    push {r11, lr}
-; LE-I64-NEXT:    bl lrint
-; LE-I64-NEXT:    vmov.32 d0[0], r0
-; LE-I64-NEXT:    vmov.32 d0[1], r1
-; LE-I64-NEXT:    pop {r11, pc}
-;
-; LE-I32-NEON-LABEL: lrint_v1f64:
-; LE-I32-NEON:       @ %bb.0:
-; LE-I32-NEON-NEXT:    .save {r11, lr}
-; LE-I32-NEON-NEXT:    push {r11, lr}
-; LE-I32-NEON-NEXT:    bl lrint
-; LE-I32-NEON-NEXT:    pop {r11, pc}
-;
-; LE-I64-NEON-LABEL: lrint_v1f64:
-; LE-I64-NEON:       @ %bb.0:
-; LE-I64-NEON-NEXT:    .save {r11, lr}
-; LE-I64-NEON-NEXT:    push {r11, lr}
-; LE-I64-NEON-NEXT:    bl lrint
-; LE-I64-NEON-NEXT:    vmov.32 d0[0], r0
-; LE-I64-NEON-NEXT:    vmov.32 d0[1], r1
-; LE-I64-NEON-NEXT:    pop {r11, pc}
-;
-; BE-I32-LABEL: lrint_v1f64:
-; BE-I32:       @ %bb.0:
-; BE-I32-NEXT:    .save {r11, lr}
-; BE-I32-NEXT:    push {r11, lr}
-; BE-I32-NEXT:    bl lrint
-; BE-I32-NEXT:    pop {r11, pc}
-;
-; BE-I64-LABEL: lrint_v1f64:
-; BE-I64:       @ %bb.0:
-; BE-I64-NEXT:    .save {r11, lr}
-; BE-I64-NEXT:    push {r11, lr}
-; BE-I64-NEXT:    bl lrint
-; BE-I64-NEXT:    vmov.32 d16[0], r0
-; BE-I64-NEXT:    vmov.32 d16[1], r1
-; BE-I64-NEXT:    vrev64.32 d0, d16
-; BE-I64-NEXT:    pop {r11, pc}
-;
-; BE-I32-NEON-LABEL: lrint_v1f64:
-; BE-I32-NEON:       @ %bb.0:
-; BE-I32-NEON-NEXT:    .save {r11, lr}
-; BE-I32-NEON-NEXT:    push {r11, lr}
-; BE-I32-NEON-NEXT:    bl lrint
-; BE-I32-NEON-NEXT:    pop {r11, pc}
-;
-; BE-I64-NEON-LABEL: lrint_v1f64:
-; BE-I64-NEON:       @ %bb.0:
-; BE-I64-NEON-NEXT:    .save {r11, lr}
-; BE-I64-NEON-NEXT:    push {r11, lr}
-; BE-I64-NEON-NEXT:    bl lrint
-; BE-I64-NEON-NEXT:    vmov.32 d16[0], r0
-; BE-I64-NEON-NEXT:    vmov.32 d16[1], r1
-; BE-I64-NEON-NEXT:    vrev64.32 d0, d16
-; BE-I64-NEON-NEXT:    pop {r11, pc}
-  %a = call <1 x iXLen> @llvm.lrint.v1iXLen.v1f64(<1 x double> %x)
-  ret <1 x iXLen> %a
-}
-declare <1 x iXLen> @llvm.lrint.v1iXLen.v1f64(<1 x double>)
-
-define <2 x iXLen> @lrint_v2f64(<2 x double> %x) {
-; LE-I32-LABEL: lrint_v2f64:
-; LE-I32:       @ %bb.0:
-; LE-I32-NEXT:    .save {r11, lr}
-; LE-I32-NEXT:    push {r11, lr}
-; LE-I32-NEXT:    .vsave {d8, d9, d10}
-; LE-I32-NEXT:    vpush {d8, d9, d10}
-; LE-I32-NEXT:    vorr q4, q0, q0
-; LE-I32-NEXT:    bl lrint
-; LE-I32-NEXT:    vorr d0, d9, d9
-; LE-I32-NEXT:    vmov.32 d10[0], r0
-; LE-I32-NEXT:    bl lrint
-; LE-I32-NEXT:    vmov.32 d10[1], r0
-; LE-I32-NEXT:    vorr d0, d10, d10
-; LE-I32-NEXT:    vpop {d8, d9, d10}
-; LE-I32-NEXT:    pop {r11, pc}
-;
-; LE-I64-LABEL: lrint_v2f64:
-; LE-I64:       @ %bb.0:
-; LE-I64-NEXT:    .save {r4, lr}
-; LE-I64-NEXT:    push {r4, lr}
-; LE-I64-NEXT:    .vsave {d8, d9, d10, d11}
-; LE-I64-NEXT:    vpush {d8, d9, d10, d11}
-; LE-I64-NEXT:    vorr q4, q0, q0
-; LE-I64-NEXT:    vorr d0, d9, d9
-; LE-I64-NEXT:    bl lrint
-; LE-I64-NEXT:    vorr d0, d8, d8
+; LE-I64-NEXT:    .save {r4, lr}
+; LE-I64-NEXT:    push {r4, lr}
+; LE-I64-NEXT:    .vsave {d8, d9, d10, d11}
+; LE-I64-NEXT:    vpush {d8, d9, d10, d11}
+; LE-I64-NEXT:    vorr q4, q0, q0
+; LE-I64-NEXT:    vorr d0, d9, d9
+; LE-I64-NEXT:    bl lrint
+; LE-I64-NEXT:    vorr d0, d8, d8
 ; LE-I64-NEXT:    mov r4, r1
 ; LE-I64-NEXT:    vmov.32 d11[0], r0
 ; LE-I64-NEXT:    bl lrint
@@ -4120,42 +1082,6 @@ define <2 x iXLen> @lrint_v2f64(<2 x double> %x) {
 ; LE-I64-NEXT:    vpop {d8, d9, d10, d11}
 ; LE-I64-NEXT:    pop {r4, pc}
 ;
-; LE-I32-NEON-LABEL: lrint_v2f64:
-; LE-I32-NEON:       @ %bb.0:
-; LE-I32-NEON-NEXT:    .save {r11, lr}
-; LE-I32-NEON-NEXT:    push {r11, lr}
-; LE-I32-NEON-NEXT:    .vsave {d8, d9, d10}
-; LE-I32-NEON-NEXT:    vpush {d8, d9, d10}
-; LE-I32-NEON-NEXT:    vorr q4, q0, q0
-; LE-I32-NEON-NEXT:    bl lrint
-; LE-I32-NEON-NEXT:    vorr d0, d9, d9
-; LE-I32-NEON-NEXT:    vmov.32 d10[0], r0
-; LE-I32-NEON-NEXT:    bl lrint
-; LE-I32-NEON-NEXT:    vmov.32 d10[1], r0
-; LE-I32-NEON-NEXT:    vorr d0, d10, d10
-; LE-I32-NEON-NEXT:    vpop {d8, d9, d10}
-; LE-I32-NEON-NEXT:    pop {r11, pc}
-;
-; LE-I64-NEON-LABEL: lrint_v2f64:
-; LE-I64-NEON:       @ %bb.0:
-; LE-I64-NEON-NEXT:    .save {r4, lr}
-; LE-I64-NEON-NEXT:    push {r4, lr}
-; LE-I64-NEON-NEXT:    .vsave {d8, d9, d10, d11}
-; LE-I64-NEON-NEXT:    vpush {d8, d9, d10, d11}
-; LE-I64-NEON-NEXT:    vorr q4, q0, q0
-; LE-I64-NEON-NEXT:    vorr d0, d9, d9
-; LE-I64-NEON-NEXT:    bl lrint
-; LE-I64-NEON-NEXT:    vorr d0, d8, d8
-; LE-I64-NEON-NEXT:    mov r4, r1
-; LE-I64-NEON-NEXT:    vmov.32 d11[0], r0
-; LE-I64-NEON-NEXT:    bl lrint
-; LE-I64-NEON-NEXT:    vmov.32 d10[0], r0
-; LE-I64-NEON-NEXT:    vmov.32 d11[1], r4
-; LE-I64-NEON-NEXT:    vmov.32 d10[1], r1
-; LE-I64-NEON-NEXT:    vorr q0, q5, q5
-; LE-I64-NEON-NEXT:    vpop {d8, d9, d10, d11}
-; LE-I64-NEON-NEXT:    pop {r4, pc}
-;
 ; BE-I32-LABEL: lrint_v2f64:
 ; BE-I32:       @ %bb.0:
 ; BE-I32-NEXT:    .save {r11, lr}
@@ -4191,42 +1117,6 @@ define <2 x iXLen> @lrint_v2f64(<2 x double> %x) {
 ; BE-I64-NEXT:    vrev64.32 q0, q5
 ; BE-I64-NEXT:    vpop {d8, d9, d10, d11}
 ; BE-I64-NEXT:    pop {r4, pc}
-;
-; BE-I32-NEON-LABEL: lrint_v2f64:
-; BE-I32-NEON:       @ %bb.0:
-; BE-I32-NEON-NEXT:    .save {r11, lr}
-; BE-I32-NEON-NEXT:    push {r11, lr}
-; BE-I32-NEON-NEXT:    .vsave {d8, d9, d10}
-; BE-I32-NEON-NEXT:    vpush {d8, d9, d10}
-; BE-I32-NEON-NEXT:    vorr q4, q0, q0
-; BE-I32-NEON-NEXT:    bl lrint
-; BE-I32-NEON-NEXT:    vorr d0, d9, d9
-; BE-I32-NEON-NEXT:    vmov.32 d10[0], r0
-; BE-I32-NEON-NEXT:    bl lrint
-; BE-I32-NEON-NEXT:    vmov.32 d10[1], r0
-; BE-I32-NEON-NEXT:    vrev64.32 d0, d10
-; BE-I32-NEON-NEXT:    vpop {d8, d9, d10}
-; BE-I32-NEON-NEXT:    pop {r11, pc}
-;
-; BE-I64-NEON-LABEL: lrint_v2f64:
-; BE-I64-NEON:       @ %bb.0:
-; BE-I64-NEON-NEXT:    .save {r4, lr}
-; BE-I64-NEON-NEXT:    push {r4, lr}
-; BE-I64-NEON-NEXT:    .vsave {d8, d9, d10, d11}
-; BE-I64-NEON-NEXT:    vpush {d8, d9, d10, d11}
-; BE-I64-NEON-NEXT:    vorr q4, q0, q0
-; BE-I64-NEON-NEXT:    vorr d0, d9, d9
-; BE-I64-NEON-NEXT:    bl lrint
-; BE-I64-NEON-NEXT:    vorr d0, d8, d8
-; BE-I64-NEON-NEXT:    mov r4, r1
-; BE-I64-NEON-NEXT:    vmov.32 d11[0], r0
-; BE-I64-NEON-NEXT:    bl lrint
-; BE-I64-NEON-NEXT:    vmov.32 d10[0], r0
-; BE-I64-NEON-NEXT:    vmov.32 d11[1], r4
-; BE-I64-NEON-NEXT:    vmov.32 d10[1], r1
-; BE-I64-NEON-NEXT:    vrev64.32 q0, q5
-; BE-I64-NEON-NEXT:    vpop {d8, d9, d10, d11}
-; BE-I64-NEON-NEXT:    pop {r4, pc}
   %a = call <2 x iXLen> @llvm.lrint.v2iXLen.v2f64(<2 x double> %x)
   ret <2 x iXLen> %a
 }
@@ -4289,62 +1179,6 @@ define <4 x iXLen> @lrint_v4f64(<4 x double> %x) {
 ; LE-I64-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
 ; LE-I64-NEXT:    pop {r4, r5, r6, pc}
 ;
-; LE-I32-NEON-LABEL: lrint_v4f64:
-; LE-I32-NEON:       @ %bb.0:
-; LE-I32-NEON-NEXT:    .save {r11, lr}
-; LE-I32-NEON-NEXT:    push {r11, lr}
-; LE-I32-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13}
-; LE-I32-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13}
-; LE-I32-NEON-NEXT:    vorr q4, q1, q1
-; LE-I32-NEON-NEXT:    vorr q5, q0, q0
-; LE-I32-NEON-NEXT:    vorr d0, d8, d8
-; LE-I32-NEON-NEXT:    bl lrint
-; LE-I32-NEON-NEXT:    vorr d0, d10, d10
-; LE-I32-NEON-NEXT:    vmov.32 d13[0], r0
-; LE-I32-NEON-NEXT:    bl lrint
-; LE-I32-NEON-NEXT:    vorr d0, d9, d9
-; LE-I32-NEON-NEXT:    vmov.32 d12[0], r0
-; LE-I32-NEON-NEXT:    bl lrint
-; LE-I32-NEON-NEXT:    vorr d0, d11, d11
-; LE-I32-NEON-NEXT:    vmov.32 d13[1], r0
-; LE-I32-NEON-NEXT:    bl lrint
-; LE-I32-NEON-NEXT:    vmov.32 d12[1], r0
-; LE-I32-NEON-NEXT:    vorr q0, q6, q6
-; LE-I32-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13}
-; LE-I32-NEON-NEXT:    pop {r11, pc}
-;
-; LE-I64-NEON-LABEL: lrint_v4f64:
-; LE-I64-NEON:       @ %bb.0:
-; LE-I64-NEON-NEXT:    .save {r4, r5, r6, lr}
-; LE-I64-NEON-NEXT:    push {r4, r5, r6, lr}
-; LE-I64-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-I64-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-I64-NEON-NEXT:    vorr q5, q1, q1
-; LE-I64-NEON-NEXT:    vorr q6, q0, q0
-; LE-I64-NEON-NEXT:    vorr d0, d11, d11
-; LE-I64-NEON-NEXT:    bl lrint
-; LE-I64-NEON-NEXT:    vorr d0, d12, d12
-; LE-I64-NEON-NEXT:    mov r4, r1
-; LE-I64-NEON-NEXT:    vmov.32 d9[0], r0
-; LE-I64-NEON-NEXT:    bl lrint
-; LE-I64-NEON-NEXT:    vorr d0, d13, d13
-; LE-I64-NEON-NEXT:    mov r5, r1
-; LE-I64-NEON-NEXT:    vmov.32 d14[0], r0
-; LE-I64-NEON-NEXT:    bl lrint
-; LE-I64-NEON-NEXT:    vorr d0, d10, d10
-; LE-I64-NEON-NEXT:    mov r6, r1
-; LE-I64-NEON-NEXT:    vmov.32 d15[0], r0
-; LE-I64-NEON-NEXT:    bl lrint
-; LE-I64-NEON-NEXT:    vmov.32 d8[0], r0
-; LE-I64-NEON-NEXT:    vmov.32 d15[1], r6
-; LE-I64-NEON-NEXT:    vmov.32 d9[1], r4
-; LE-I64-NEON-NEXT:    vmov.32 d14[1], r5
-; LE-I64-NEON-NEXT:    vmov.32 d8[1], r1
-; LE-I64-NEON-NEXT:    vorr q0, q7, q7
-; LE-I64-NEON-NEXT:    vorr q1, q4, q4
-; LE-I64-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-I64-NEON-NEXT:    pop {r4, r5, r6, pc}
-;
 ; BE-I32-LABEL: lrint_v4f64:
 ; BE-I32:       @ %bb.0:
 ; BE-I32-NEXT:    .save {r11, lr}
@@ -4400,62 +1234,6 @@ define <4 x iXLen> @lrint_v4f64(<4 x double> %x) {
 ; BE-I64-NEXT:    vrev64.32 q1, q6
 ; BE-I64-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
 ; BE-I64-NEXT:    pop {r4, r5, r6, pc}
-;
-; BE-I32-NEON-LABEL: lrint_v4f64:
-; BE-I32-NEON:       @ %bb.0:
-; BE-I32-NEON-NEXT:    .save {r11, lr}
-; BE-I32-NEON-NEXT:    push {r11, lr}
-; BE-I32-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13}
-; BE-I32-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13}
-; BE-I32-NEON-NEXT:    vorr q4, q1, q1
-; BE-I32-NEON-NEXT:    vorr q5, q0, q0
-; BE-I32-NEON-NEXT:    vorr d0, d8, d8
-; BE-I32-NEON-NEXT:    bl lrint
-; BE-I32-NEON-NEXT:    vorr d0, d10, d10
-; BE-I32-NEON-NEXT:    vmov.32 d13[0], r0
-; BE-I32-NEON-NEXT:    bl lrint
-; BE-I32-NEON-NEXT:    vorr d0, d9, d9
-; BE-I32-NEON-NEXT:    vmov.32 d12[0], r0
-; BE-I32-NEON-NEXT:    bl lrint
-; BE-I32-NEON-NEXT:    vorr d0, d11, d11
-; BE-I32-NEON-NEXT:    vmov.32 d13[1], r0
-; BE-I32-NEON-NEXT:    bl lrint
-; BE-I32-NEON-NEXT:    vmov.32 d12[1], r0
-; BE-I32-NEON-NEXT:    vrev64.32 q0, q6
-; BE-I32-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13}
-; BE-I32-NEON-NEXT:    pop {r11, pc}
-;
-; BE-I64-NEON-LABEL: lrint_v4f64:
-; BE-I64-NEON:       @ %bb.0:
-; BE-I64-NEON-NEXT:    .save {r4, r5, r6, lr}
-; BE-I64-NEON-NEXT:    push {r4, r5, r6, lr}
-; BE-I64-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
-; BE-I64-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
-; BE-I64-NEON-NEXT:    vorr q4, q1, q1
-; BE-I64-NEON-NEXT:    vorr q5, q0, q0
-; BE-I64-NEON-NEXT:    vorr d0, d9, d9
-; BE-I64-NEON-NEXT:    bl lrint
-; BE-I64-NEON-NEXT:    vorr d0, d10, d10
-; BE-I64-NEON-NEXT:    mov r4, r1
-; BE-I64-NEON-NEXT:    vmov.32 d13[0], r0
-; BE-I64-NEON-NEXT:    bl lrint
-; BE-I64-NEON-NEXT:    vorr d0, d11, d11
-; BE-I64-NEON-NEXT:    mov r5, r1
-; BE-I64-NEON-NEXT:    vmov.32 d14[0], r0
-; BE-I64-NEON-NEXT:    bl lrint
-; BE-I64-NEON-NEXT:    vorr d0, d8, d8
-; BE-I64-NEON-NEXT:    mov r6, r1
-; BE-I64-NEON-NEXT:    vmov.32 d15[0], r0
-; BE-I64-NEON-NEXT:    bl lrint
-; BE-I64-NEON-NEXT:    vmov.32 d12[0], r0
-; BE-I64-NEON-NEXT:    vmov.32 d15[1], r6
-; BE-I64-NEON-NEXT:    vmov.32 d13[1], r4
-; BE-I64-NEON-NEXT:    vmov.32 d14[1], r5
-; BE-I64-NEON-NEXT:    vmov.32 d12[1], r1
-; BE-I64-NEON-NEXT:    vrev64.32 q0, q7
-; BE-I64-NEON-NEXT:    vrev64.32 q1, q6
-; BE-I64-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
-; BE-I64-NEON-NEXT:    pop {r4, r5, r6, pc}
   %a = call <4 x iXLen> @llvm.lrint.v4iXLen.v4f64(<4 x double> %x)
   ret <4 x iXLen> %a
 }
@@ -4470,1842 +1248,342 @@ define <8 x iXLen> @lrint_v8f64(<8 x double> %x) {
 ; LE-I32-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
 ; LE-I32-NEXT:    .pad #32
 ; LE-I32-NEXT:    sub sp, sp, #32
-; LE-I32-NEXT:    vorr q5, q0, q0
-; LE-I32-NEXT:    add lr, sp, #16
-; LE-I32-NEXT:    vorr d0, d4, d4
-; LE-I32-NEXT:    vstmia sp, {d6, d7} @ 16-byte Spill
-; LE-I32-NEXT:    vorr q7, q3, q3
-; LE-I32-NEXT:    vstmia lr, {d4, d5} @ 16-byte Spill
-; LE-I32-NEXT:    vorr q6, q1, q1
-; LE-I32-NEXT:    bl lrint
-; LE-I32-NEXT:    vorr d0, d14, d14
-; LE-I32-NEXT:    vmov.32 d8[0], r0
-; LE-I32-NEXT:    bl lrint
-; LE-I32-NEXT:    vorr d0, d12, d12
-; LE-I32-NEXT:    vmov.32 d9[0], r0
-; LE-I32-NEXT:    bl lrint
-; LE-I32-NEXT:    vorr d0, d10, d10
-; LE-I32-NEXT:    vmov.32 d15[0], r0
-; LE-I32-NEXT:    bl lrint
-; LE-I32-NEXT:    vorr d0, d13, d13
-; LE-I32-NEXT:    vmov.32 d14[0], r0
-; LE-I32-NEXT:    bl lrint
-; LE-I32-NEXT:    vorr d0, d11, d11
-; LE-I32-NEXT:    vmov.32 d15[1], r0
-; LE-I32-NEXT:    bl lrint
-; LE-I32-NEXT:    vldmia sp, {d16, d17} @ 16-byte Reload
-; LE-I32-NEXT:    vorr d0, d17, d17
-; LE-I32-NEXT:    vmov.32 d14[1], r0
-; LE-I32-NEXT:    bl lrint
-; LE-I32-NEXT:    add lr, sp, #16
-; LE-I32-NEXT:    vmov.32 d9[1], r0
-; LE-I32-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-I32-NEXT:    vorr d0, d17, d17
-; LE-I32-NEXT:    bl lrint
-; LE-I32-NEXT:    vmov.32 d8[1], r0
-; LE-I32-NEXT:    vorr q0, q7, q7
-; LE-I32-NEXT:    vorr q1, q4, q4
-; LE-I32-NEXT:    add sp, sp, #32
-; LE-I32-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-I32-NEXT:    pop {r11, pc}
-;
-; LE-I64-LABEL: lrint_v8f64:
-; LE-I64:       @ %bb.0:
-; LE-I64-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, lr}
-; LE-I64-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, lr}
-; LE-I64-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-I64-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-I64-NEXT:    .pad #40
-; LE-I64-NEXT:    sub sp, sp, #40
-; LE-I64-NEXT:    vorr q4, q0, q0
-; LE-I64-NEXT:    add lr, sp, #24
-; LE-I64-NEXT:    vorr d0, d7, d7
-; LE-I64-NEXT:    vstmia lr, {d6, d7} @ 16-byte Spill
-; LE-I64-NEXT:    vorr q7, q2, q2
-; LE-I64-NEXT:    vorr q6, q1, q1
-; LE-I64-NEXT:    bl lrint
-; LE-I64-NEXT:    vorr d0, d14, d14
-; LE-I64-NEXT:    add lr, sp, #8
-; LE-I64-NEXT:    vmov.32 d17[0], r0
-; LE-I64-NEXT:    mov r8, r1
-; LE-I64-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-I64-NEXT:    bl lrint
-; LE-I64-NEXT:    vorr d0, d15, d15
-; LE-I64-NEXT:    mov r9, r1
-; LE-I64-NEXT:    vmov.32 d10[0], r0
-; LE-I64-NEXT:    bl lrint
-; LE-I64-NEXT:    vorr d0, d12, d12
-; LE-I64-NEXT:    mov r10, r1
-; LE-I64-NEXT:    vmov.32 d11[0], r0
-; LE-I64-NEXT:    bl lrint
-; LE-I64-NEXT:    vorr d0, d13, d13
-; LE-I64-NEXT:    mov r7, r1
-; LE-I64-NEXT:    vmov.32 d14[0], r0
-; LE-I64-NEXT:    bl lrint
-; LE-I64-NEXT:    vorr d0, d8, d8
-; LE-I64-NEXT:    mov r4, r1
-; LE-I64-NEXT:    vmov.32 d15[0], r0
-; LE-I64-NEXT:    bl lrint
-; LE-I64-NEXT:    vorr d0, d9, d9
-; LE-I64-NEXT:    mov r5, r1
-; LE-I64-NEXT:    vmov.32 d12[0], r0
-; LE-I64-NEXT:    bl lrint
-; LE-I64-NEXT:    add lr, sp, #24
-; LE-I64-NEXT:    mov r6, r1
-; LE-I64-NEXT:    vmov.32 d13[0], r0
-; LE-I64-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
-; LE-I64-NEXT:    @ kill: def $d0 killed $d0 killed $q0
-; LE-I64-NEXT:    bl lrint
-; LE-I64-NEXT:    add lr, sp, #8
-; LE-I64-NEXT:    vmov.32 d13[1], r6
-; LE-I64-NEXT:    vldmia lr, {d6, d7} @ 16-byte Reload
-; LE-I64-NEXT:    vmov.32 d15[1], r4
-; LE-I64-NEXT:    vmov.32 d11[1], r10
-; LE-I64-NEXT:    vmov.32 d6[0], r0
-; LE-I64-NEXT:    vmov.32 d12[1], r5
-; LE-I64-NEXT:    vmov.32 d14[1], r7
-; LE-I64-NEXT:    vorr q0, q6, q6
-; LE-I64-NEXT:    vmov.32 d10[1], r9
-; LE-I64-NEXT:    vorr q1, q7, q7
-; LE-I64-NEXT:    vmov.32 d7[1], r8
-; LE-I64-NEXT:    vorr q2, q5, q5
-; LE-I64-NEXT:    vmov.32 d6[1], r1
-; LE-I64-NEXT:    add sp, sp, #40
-; LE-I64-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-I64-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, pc}
-;
-; LE-I32-NEON-LABEL: lrint_v8f64:
-; LE-I32-NEON:       @ %bb.0:
-; LE-I32-NEON-NEXT:    .save {r11, lr}
-; LE-I32-NEON-NEXT:    push {r11, lr}
-; LE-I32-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-I32-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-I32-NEON-NEXT:    .pad #32
-; LE-I32-NEON-NEXT:    sub sp, sp, #32
-; LE-I32-NEON-NEXT:    vorr q5, q0, q0
-; LE-I32-NEON-NEXT:    add lr, sp, #16
-; LE-I32-NEON-NEXT:    vorr d0, d4, d4
-; LE-I32-NEON-NEXT:    vstmia sp, {d6, d7} @ 16-byte Spill
-; LE-I32-NEON-NEXT:    vorr q7, q3, q3
-; LE-I32-NEON-NEXT:    vstmia lr, {d4, d5} @ 16-byte Spill
-; LE-I32-NEON-NEXT:    vorr q6, q1, q1
-; LE-I32-NEON-NEXT:    bl lrint
-; LE-I32-NEON-NEXT:    vorr d0, d14, d14
-; LE-I32-NEON-NEXT:    vmov.32 d8[0], r0
-; LE-I32-NEON-NEXT:    bl lrint
-; LE-I32-NEON-NEXT:    vorr d0, d12, d12
-; LE-I32-NEON-NEXT:    vmov.32 d9[0], r0
-; LE-I32-NEON-NEXT:    bl lrint
-; LE-I32-NEON-NEXT:    vorr d0, d10, d10
-; LE-I32-NEON-NEXT:    vmov.32 d15[0], r0
-; LE-I32-NEON-NEXT:    bl lrint
-; LE-I32-NEON-NEXT:    vorr d0, d13, d13
-; LE-I32-NEON-NEXT:    vmov.32 d14[0], r0
-; LE-I32-NEON-NEXT:    bl lrint
-; LE-I32-NEON-NEXT:    vorr d0, d11, d11
-; LE-I32-NEON-NEXT:    vmov.32 d15[1], r0
-; LE-I32-NEON-NEXT:    bl lrint
-; LE-I32-NEON-NEXT:    vldmia sp, {d16, d17} @ 16-byte Reload
-; LE-I32-NEON-NEXT:    vorr d0, d17, d17
-; LE-I32-NEON-NEXT:    vmov.32 d14[1], r0
-; LE-I32-NEON-NEXT:    bl lrint
-; LE-I32-NEON-NEXT:    add lr, sp, #16
-; LE-I32-NEON-NEXT:    vmov.32 d9[1], r0
-; LE-I32-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-I32-NEON-NEXT:    vorr d0, d17, d17
-; LE-I32-NEON-NEXT:    bl lrint
-; LE-I32-NEON-NEXT:    vmov.32 d8[1], r0
-; LE-I32-NEON-NEXT:    vorr q0, q7, q7
-; LE-I32-NEON-NEXT:    vorr q1, q4, q4
-; LE-I32-NEON-NEXT:    add sp, sp, #32
-; LE-I32-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-I32-NEON-NEXT:    pop {r11, pc}
-;
-; LE-I64-NEON-LABEL: lrint_v8f64:
-; LE-I64-NEON:       @ %bb.0:
-; LE-I64-NEON-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, lr}
-; LE-I64-NEON-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, lr}
-; LE-I64-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-I64-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-I64-NEON-NEXT:    .pad #40
-; LE-I64-NEON-NEXT:    sub sp, sp, #40
-; LE-I64-NEON-NEXT:    vorr q4, q0, q0
-; LE-I64-NEON-NEXT:    add lr, sp, #24
-; LE-I64-NEON-NEXT:    vorr d0, d7, d7
-; LE-I64-NEON-NEXT:    vstmia lr, {d6, d7} @ 16-byte Spill
-; LE-I64-NEON-NEXT:    vorr q7, q2, q2
-; LE-I64-NEON-NEXT:    vorr q6, q1, q1
-; LE-I64-NEON-NEXT:    bl lrint
-; LE-I64-NEON-NEXT:    vorr d0, d14, d14
-; LE-I64-NEON-NEXT:    add lr, sp, #8
-; LE-I64-NEON-NEXT:    vmov.32 d17[0], r0
-; LE-I64-NEON-NEXT:    mov r8, r1
-; LE-I64-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-I64-NEON-NEXT:    bl lrint
-; LE-I64-NEON-NEXT:    vorr d0, d15, d15
-; LE-I64-NEON-NEXT:    mov r9, r1
-; LE-I64-NEON-NEXT:    vmov.32 d10[0], r0
-; LE-I64-NEON-NEXT:    bl lrint
-; LE-I64-NEON-NEXT:    vorr d0, d12, d12
-; LE-I64-NEON-NEXT:    mov r10, r1
-; LE-I64-NEON-NEXT:    vmov.32 d11[0], r0
-; LE-I64-NEON-NEXT:    bl lrint
-; LE-I64-NEON-NEXT:    vorr d0, d13, d13
-; LE-I64-NEON-NEXT:    mov r7, r1
-; LE-I64-NEON-NEXT:    vmov.32 d14[0], r0
-; LE-I64-NEON-NEXT:    bl lrint
-; LE-I64-NEON-NEXT:    vorr d0, d8, d8
-; LE-I64-NEON-NEXT:    mov r4, r1
-; LE-I64-NEON-NEXT:    vmov.32 d15[0], r0
-; LE-I64-NEON-NEXT:    bl lrint
-; LE-I64-NEON-NEXT:    vorr d0, d9, d9
-; LE-I64-NEON-NEXT:    mov r5, r1
-; LE-I64-NEON-NEXT:    vmov.32 d12[0], r0
-; LE-I64-NEON-NEXT:    bl lrint
-; LE-I64-NEON-NEXT:    add lr, sp, #24
-; LE-I64-NEON-NEXT:    mov r6, r1
-; LE-I64-NEON-NEXT:    vmov.32 d13[0], r0
-; LE-I64-NEON-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
-; LE-I64-NEON-NEXT:    @ kill: def $d0 killed $d0 killed $q0
-; LE-I64-NEON-NEXT:    bl lrint
-; LE-I64-NEON-NEXT:    add lr, sp, #8
-; LE-I64-NEON-NEXT:    vmov.32 d13[1], r6
-; LE-I64-NEON-NEXT:    vldmia lr, {d6, d7} @ 16-byte Reload
-; LE-I64-NEON-NEXT:    vmov.32 d15[1], r4
-; LE-I64-NEON-NEXT:    vmov.32 d11[1], r10
-; LE-I64-NEON-NEXT:    vmov.32 d6[0], r0
-; LE-I64-NEON-NEXT:    vmov.32 d12[1], r5
-; LE-I64-NEON-NEXT:    vmov.32 d14[1], r7
-; LE-I64-NEON-NEXT:    vorr q0, q6, q6
-; LE-I64-NEON-NEXT:    vmov.32 d10[1], r9
-; LE-I64-NEON-NEXT:    vorr q1, q7, q7
-; LE-I64-NEON-NEXT:    vmov.32 d7[1], r8
-; LE-I64-NEON-NEXT:    vorr q2, q5, q5
-; LE-I64-NEON-NEXT:    vmov.32 d6[1], r1
-; LE-I64-NEON-NEXT:    add sp, sp, #40
-; LE-I64-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-I64-NEON-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, pc}
-;
-; BE-I32-LABEL: lrint_v8f64:
-; BE-I32:       @ %bb.0:
-; BE-I32-NEXT:    .save {r11, lr}
-; BE-I32-NEXT:    push {r11, lr}
-; BE-I32-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
-; BE-I32-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
-; BE-I32-NEXT:    .pad #32
-; BE-I32-NEXT:    sub sp, sp, #32
-; BE-I32-NEXT:    vorr q5, q0, q0
-; BE-I32-NEXT:    vstmia sp, {d0, d1} @ 16-byte Spill
-; BE-I32-NEXT:    vorr d0, d4, d4
-; BE-I32-NEXT:    add lr, sp, #16
-; BE-I32-NEXT:    vorr q7, q3, q3
-; BE-I32-NEXT:    vstmia lr, {d4, d5} @ 16-byte Spill
-; BE-I32-NEXT:    vorr q6, q1, q1
-; BE-I32-NEXT:    bl lrint
-; BE-I32-NEXT:    vorr d0, d10, d10
-; BE-I32-NEXT:    vmov.32 d8[0], r0
-; BE-I32-NEXT:    bl lrint
-; BE-I32-NEXT:    vorr d0, d14, d14
-; BE-I32-NEXT:    vmov.32 d10[0], r0
-; BE-I32-NEXT:    bl lrint
-; BE-I32-NEXT:    vorr d0, d12, d12
-; BE-I32-NEXT:    vmov.32 d9[0], r0
-; BE-I32-NEXT:    bl lrint
-; BE-I32-NEXT:    vorr d0, d15, d15
-; BE-I32-NEXT:    vmov.32 d11[0], r0
-; BE-I32-NEXT:    bl lrint
-; BE-I32-NEXT:    vorr d0, d13, d13
-; BE-I32-NEXT:    vmov.32 d9[1], r0
-; BE-I32-NEXT:    bl lrint
-; BE-I32-NEXT:    vldmia sp, {d16, d17} @ 16-byte Reload
-; BE-I32-NEXT:    vorr d0, d17, d17
-; BE-I32-NEXT:    vmov.32 d11[1], r0
-; BE-I32-NEXT:    bl lrint
-; BE-I32-NEXT:    add lr, sp, #16
-; BE-I32-NEXT:    vmov.32 d10[1], r0
-; BE-I32-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-I32-NEXT:    vorr d0, d17, d17
-; BE-I32-NEXT:    bl lrint
-; BE-I32-NEXT:    vmov.32 d8[1], r0
-; BE-I32-NEXT:    vrev64.32 q0, q5
-; BE-I32-NEXT:    vrev64.32 q1, q4
-; BE-I32-NEXT:    add sp, sp, #32
-; BE-I32-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
-; BE-I32-NEXT:    pop {r11, pc}
-;
-; BE-I64-LABEL: lrint_v8f64:
-; BE-I64:       @ %bb.0:
-; BE-I64-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, lr}
-; BE-I64-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, lr}
-; BE-I64-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
-; BE-I64-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
-; BE-I64-NEXT:    .pad #40
-; BE-I64-NEXT:    sub sp, sp, #40
-; BE-I64-NEXT:    vorr q4, q0, q0
-; BE-I64-NEXT:    add lr, sp, #24
-; BE-I64-NEXT:    vorr d0, d7, d7
-; BE-I64-NEXT:    vstmia lr, {d6, d7} @ 16-byte Spill
-; BE-I64-NEXT:    vorr q7, q2, q2
-; BE-I64-NEXT:    vorr q6, q1, q1
-; BE-I64-NEXT:    bl lrint
-; BE-I64-NEXT:    vorr d0, d14, d14
-; BE-I64-NEXT:    add lr, sp, #8
-; BE-I64-NEXT:    vmov.32 d17[0], r0
-; BE-I64-NEXT:    mov r8, r1
-; BE-I64-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-I64-NEXT:    bl lrint
-; BE-I64-NEXT:    vorr d0, d15, d15
-; BE-I64-NEXT:    mov r9, r1
-; BE-I64-NEXT:    vmov.32 d10[0], r0
-; BE-I64-NEXT:    bl lrint
-; BE-I64-NEXT:    vorr d0, d12, d12
-; BE-I64-NEXT:    mov r10, r1
-; BE-I64-NEXT:    vmov.32 d11[0], r0
-; BE-I64-NEXT:    bl lrint
-; BE-I64-NEXT:    vorr d0, d13, d13
-; BE-I64-NEXT:    mov r7, r1
-; BE-I64-NEXT:    vmov.32 d14[0], r0
-; BE-I64-NEXT:    bl lrint
-; BE-I64-NEXT:    vorr d0, d8, d8
-; BE-I64-NEXT:    mov r4, r1
-; BE-I64-NEXT:    vmov.32 d15[0], r0
-; BE-I64-NEXT:    bl lrint
-; BE-I64-NEXT:    vorr d0, d9, d9
-; BE-I64-NEXT:    mov r5, r1
-; BE-I64-NEXT:    vmov.32 d12[0], r0
-; BE-I64-NEXT:    bl lrint
-; BE-I64-NEXT:    add lr, sp, #24
-; BE-I64-NEXT:    mov r6, r1
-; BE-I64-NEXT:    vmov.32 d13[0], r0
-; BE-I64-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
-; BE-I64-NEXT:    @ kill: def $d0 killed $d0 killed $q0
-; BE-I64-NEXT:    bl lrint
-; BE-I64-NEXT:    add lr, sp, #8
-; BE-I64-NEXT:    vmov.32 d13[1], r6
-; BE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-I64-NEXT:    vmov.32 d15[1], r4
-; BE-I64-NEXT:    vmov.32 d16[0], r0
-; BE-I64-NEXT:    vmov.32 d11[1], r10
-; BE-I64-NEXT:    vmov.32 d17[1], r8
-; BE-I64-NEXT:    vmov.32 d12[1], r5
-; BE-I64-NEXT:    vmov.32 d14[1], r7
-; BE-I64-NEXT:    vmov.32 d10[1], r9
-; BE-I64-NEXT:    vmov.32 d16[1], r1
-; BE-I64-NEXT:    vrev64.32 q0, q6
-; BE-I64-NEXT:    vrev64.32 q1, q7
-; BE-I64-NEXT:    vrev64.32 q2, q5
-; BE-I64-NEXT:    vrev64.32 q3, q8
-; BE-I64-NEXT:    add sp, sp, #40
-; BE-I64-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
-; BE-I64-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, pc}
-;
-; BE-I32-NEON-LABEL: lrint_v8f64:
-; BE-I32-NEON:       @ %bb.0:
-; BE-I32-NEON-NEXT:    .save {r11, lr}
-; BE-I32-NEON-NEXT:    push {r11, lr}
-; BE-I32-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
-; BE-I32-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
-; BE-I32-NEON-NEXT:    .pad #32
-; BE-I32-NEON-NEXT:    sub sp, sp, #32
-; BE-I32-NEON-NEXT:    vorr q5, q0, q0
-; BE-I32-NEON-NEXT:    vstmia sp, {d0, d1} @ 16-byte Spill
-; BE-I32-NEON-NEXT:    vorr d0, d4, d4
-; BE-I32-NEON-NEXT:    add lr, sp, #16
-; BE-I32-NEON-NEXT:    vorr q7, q3, q3
-; BE-I32-NEON-NEXT:    vstmia lr, {d4, d5} @ 16-byte Spill
-; BE-I32-NEON-NEXT:    vorr q6, q1, q1
-; BE-I32-NEON-NEXT:    bl lrint
-; BE-I32-NEON-NEXT:    vorr d0, d10, d10
-; BE-I32-NEON-NEXT:    vmov.32 d8[0], r0
-; BE-I32-NEON-NEXT:    bl lrint
-; BE-I32-NEON-NEXT:    vorr d0, d14, d14
-; BE-I32-NEON-NEXT:    vmov.32 d10[0], r0
-; BE-I32-NEON-NEXT:    bl lrint
-; BE-I32-NEON-NEXT:    vorr d0, d12, d12
-; BE-I32-NEON-NEXT:    vmov.32 d9[0], r0
-; BE-I32-NEON-NEXT:    bl lrint
-; BE-I32-NEON-NEXT:    vorr d0, d15, d15
-; BE-I32-NEON-NEXT:    vmov.32 d11[0], r0
-; BE-I32-NEON-NEXT:    bl lrint
-; BE-I32-NEON-NEXT:    vorr d0, d13, d13
-; BE-I32-NEON-NEXT:    vmov.32 d9[1], r0
-; BE-I32-NEON-NEXT:    bl lrint
-; BE-I32-NEON-NEXT:    vldmia sp, {d16, d17} @ 16-byte Reload
-; BE-I32-NEON-NEXT:    vorr d0, d17, d17
-; BE-I32-NEON-NEXT:    vmov.32 d11[1], r0
-; BE-I32-NEON-NEXT:    bl lrint
-; BE-I32-NEON-NEXT:    add lr, sp, #16
-; BE-I32-NEON-NEXT:    vmov.32 d10[1], r0
-; BE-I32-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-I32-NEON-NEXT:    vorr d0, d17, d17
-; BE-I32-NEON-NEXT:    bl lrint
-; BE-I32-NEON-NEXT:    vmov.32 d8[1], r0
-; BE-I32-NEON-NEXT:    vrev64.32 q0, q5
-; BE-I32-NEON-NEXT:    vrev64.32 q1, q4
-; BE-I32-NEON-NEXT:    add sp, sp, #32
-; BE-I32-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
-; BE-I32-NEON-NEXT:    pop {r11, pc}
-;
-; BE-I64-NEON-LABEL: lrint_v8f64:
-; BE-I64-NEON:       @ %bb.0:
-; BE-I64-NEON-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, lr}
-; BE-I64-NEON-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, lr}
-; BE-I64-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
-; BE-I64-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
-; BE-I64-NEON-NEXT:    .pad #40
-; BE-I64-NEON-NEXT:    sub sp, sp, #40
-; BE-I64-NEON-NEXT:    vorr q4, q0, q0
-; BE-I64-NEON-NEXT:    add lr, sp, #24
-; BE-I64-NEON-NEXT:    vorr d0, d7, d7
-; BE-I64-NEON-NEXT:    vstmia lr, {d6, d7} @ 16-byte Spill
-; BE-I64-NEON-NEXT:    vorr q7, q2, q2
-; BE-I64-NEON-NEXT:    vorr q6, q1, q1
-; BE-I64-NEON-NEXT:    bl lrint
-; BE-I64-NEON-NEXT:    vorr d0, d14, d14
-; BE-I64-NEON-NEXT:    add lr, sp, #8
-; BE-I64-NEON-NEXT:    vmov.32 d17[0], r0
-; BE-I64-NEON-NEXT:    mov r8, r1
-; BE-I64-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-I64-NEON-NEXT:    bl lrint
-; BE-I64-NEON-NEXT:    vorr d0, d15, d15
-; BE-I64-NEON-NEXT:    mov r9, r1
-; BE-I64-NEON-NEXT:    vmov.32 d10[0], r0
-; BE-I64-NEON-NEXT:    bl lrint
-; BE-I64-NEON-NEXT:    vorr d0, d12, d12
-; BE-I64-NEON-NEXT:    mov r10, r1
-; BE-I64-NEON-NEXT:    vmov.32 d11[0], r0
-; BE-I64-NEON-NEXT:    bl lrint
-; BE-I64-NEON-NEXT:    vorr d0, d13, d13
-; BE-I64-NEON-NEXT:    mov r7, r1
-; BE-I64-NEON-NEXT:    vmov.32 d14[0], r0
-; BE-I64-NEON-NEXT:    bl lrint
-; BE-I64-NEON-NEXT:    vorr d0, d8, d8
-; BE-I64-NEON-NEXT:    mov r4, r1
-; BE-I64-NEON-NEXT:    vmov.32 d15[0], r0
-; BE-I64-NEON-NEXT:    bl lrint
-; BE-I64-NEON-NEXT:    vorr d0, d9, d9
-; BE-I64-NEON-NEXT:    mov r5, r1
-; BE-I64-NEON-NEXT:    vmov.32 d12[0], r0
-; BE-I64-NEON-NEXT:    bl lrint
-; BE-I64-NEON-NEXT:    add lr, sp, #24
-; BE-I64-NEON-NEXT:    mov r6, r1
-; BE-I64-NEON-NEXT:    vmov.32 d13[0], r0
-; BE-I64-NEON-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
-; BE-I64-NEON-NEXT:    @ kill: def $d0 killed $d0 killed $q0
-; BE-I64-NEON-NEXT:    bl lrint
-; BE-I64-NEON-NEXT:    add lr, sp, #8
-; BE-I64-NEON-NEXT:    vmov.32 d13[1], r6
-; BE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-I64-NEON-NEXT:    vmov.32 d15[1], r4
-; BE-I64-NEON-NEXT:    vmov.32 d16[0], r0
-; BE-I64-NEON-NEXT:    vmov.32 d11[1], r10
-; BE-I64-NEON-NEXT:    vmov.32 d17[1], r8
-; BE-I64-NEON-NEXT:    vmov.32 d12[1], r5
-; BE-I64-NEON-NEXT:    vmov.32 d14[1], r7
-; BE-I64-NEON-NEXT:    vmov.32 d10[1], r9
-; BE-I64-NEON-NEXT:    vmov.32 d16[1], r1
-; BE-I64-NEON-NEXT:    vrev64.32 q0, q6
-; BE-I64-NEON-NEXT:    vrev64.32 q1, q7
-; BE-I64-NEON-NEXT:    vrev64.32 q2, q5
-; BE-I64-NEON-NEXT:    vrev64.32 q3, q8
-; BE-I64-NEON-NEXT:    add sp, sp, #40
-; BE-I64-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
-; BE-I64-NEON-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, pc}
-  %a = call <8 x iXLen> @llvm.lrint.v8iXLen.v8f64(<8 x double> %x)
-  ret <8 x iXLen> %a
-}
-declare <8 x iXLen> @llvm.lrint.v8iXLen.v8f64(<8 x double>)
-
-define <16 x iXLen> @lrint_v16f64(<16 x double> %x) {
-; LE-I32-LABEL: lrint_v16f64:
-; LE-I32:       @ %bb.0:
-; LE-I32-NEXT:    .save {r4, r5, r6, lr}
-; LE-I32-NEXT:    push {r4, r5, r6, lr}
-; LE-I32-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-I32-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-I32-NEXT:    .pad #128
-; LE-I32-NEXT:    sub sp, sp, #128
-; LE-I32-NEXT:    add lr, sp, #80
-; LE-I32-NEXT:    add r0, sp, #240
-; LE-I32-NEXT:    vld1.64 {d16, d17}, [r0]
-; LE-I32-NEXT:    add r0, sp, #208
-; LE-I32-NEXT:    vorr q6, q0, q0
-; LE-I32-NEXT:    vstmia lr, {d6, d7} @ 16-byte Spill
-; LE-I32-NEXT:    add lr, sp, #32
-; LE-I32-NEXT:    vorr q5, q1, q1
-; LE-I32-NEXT:    vstmia lr, {d4, d5} @ 16-byte Spill
+; LE-I32-NEXT:    vorr q5, q0, q0
 ; LE-I32-NEXT:    add lr, sp, #16
-; LE-I32-NEXT:    vstmia lr, {d0, d1} @ 16-byte Spill
-; LE-I32-NEXT:    add lr, sp, #64
 ; LE-I32-NEXT:    vorr d0, d4, d4
-; LE-I32-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-I32-NEXT:    add lr, sp, #112
-; LE-I32-NEXT:    vld1.64 {d16, d17}, [r0]
-; LE-I32-NEXT:    add r0, sp, #224
-; LE-I32-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-I32-NEXT:    add lr, sp, #96
-; LE-I32-NEXT:    vld1.64 {d16, d17}, [r0]
-; LE-I32-NEXT:    add r0, sp, #256
-; LE-I32-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-I32-NEXT:    add lr, sp, #48
-; LE-I32-NEXT:    vld1.64 {d14, d15}, [r0]
-; LE-I32-NEXT:    vstmia sp, {d2, d3} @ 16-byte Spill
-; LE-I32-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-I32-NEXT:    vstmia sp, {d6, d7} @ 16-byte Spill
+; LE-I32-NEXT:    vorr q7, q3, q3
+; LE-I32-NEXT:    vstmia lr, {d4, d5} @ 16-byte Spill
+; LE-I32-NEXT:    vorr q6, q1, q1
 ; LE-I32-NEXT:    bl lrint
-; LE-I32-NEXT:    vorr d0, d12, d12
+; LE-I32-NEXT:    vorr d0, d14, d14
 ; LE-I32-NEXT:    vmov.32 d8[0], r0
 ; LE-I32-NEXT:    bl lrint
-; LE-I32-NEXT:    vorr d0, d10, d10
-; LE-I32-NEXT:    vmov.32 d12[0], r0
-; LE-I32-NEXT:    bl lrint
-; LE-I32-NEXT:    vorr d0, d14, d14
-; LE-I32-NEXT:    vmov.32 d13[0], r0
+; LE-I32-NEXT:    vorr d0, d12, d12
+; LE-I32-NEXT:    vmov.32 d9[0], r0
 ; LE-I32-NEXT:    bl lrint
-; LE-I32-NEXT:    add lr, sp, #64
-; LE-I32-NEXT:    mov r4, r0
-; LE-I32-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
-; LE-I32-NEXT:    @ kill: def $d0 killed $d0 killed $q0
+; LE-I32-NEXT:    vorr d0, d10, d10
+; LE-I32-NEXT:    vmov.32 d15[0], r0
 ; LE-I32-NEXT:    bl lrint
-; LE-I32-NEXT:    add lr, sp, #80
+; LE-I32-NEXT:    vorr d0, d13, d13
 ; LE-I32-NEXT:    vmov.32 d14[0], r0
-; LE-I32-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
-; LE-I32-NEXT:    @ kill: def $d0 killed $d0 killed $q0
 ; LE-I32-NEXT:    bl lrint
-; LE-I32-NEXT:    add lr, sp, #112
-; LE-I32-NEXT:    vmov.32 d9[0], r0
-; LE-I32-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
-; LE-I32-NEXT:    @ kill: def $d0 killed $d0 killed $q0
-; LE-I32-NEXT:    vmov.32 d15[0], r4
+; LE-I32-NEXT:    vorr d0, d11, d11
+; LE-I32-NEXT:    vmov.32 d15[1], r0
 ; LE-I32-NEXT:    bl lrint
 ; LE-I32-NEXT:    vldmia sp, {d16, d17} @ 16-byte Reload
 ; LE-I32-NEXT:    vorr d0, d17, d17
-; LE-I32-NEXT:    vmov.32 d10[0], r0
-; LE-I32-NEXT:    bl lrint
-; LE-I32-NEXT:    add lr, sp, #96
-; LE-I32-NEXT:    vmov.32 d13[1], r0
-; LE-I32-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
-; LE-I32-NEXT:    @ kill: def $d0 killed $d0 killed $q0
+; LE-I32-NEXT:    vmov.32 d14[1], r0
 ; LE-I32-NEXT:    bl lrint
 ; LE-I32-NEXT:    add lr, sp, #16
-; LE-I32-NEXT:    vmov.32 d11[0], r0
-; LE-I32-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-I32-NEXT:    vorr d0, d17, d17
-; LE-I32-NEXT:    bl lrint
-; LE-I32-NEXT:    add lr, sp, #80
-; LE-I32-NEXT:    vmov.32 d12[1], r0
-; LE-I32-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-I32-NEXT:    vorr d0, d17, d17
-; LE-I32-NEXT:    bl lrint
-; LE-I32-NEXT:    add lr, sp, #32
 ; LE-I32-NEXT:    vmov.32 d9[1], r0
 ; LE-I32-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
 ; LE-I32-NEXT:    vorr d0, d17, d17
 ; LE-I32-NEXT:    bl lrint
-; LE-I32-NEXT:    add lr, sp, #96
 ; LE-I32-NEXT:    vmov.32 d8[1], r0
-; LE-I32-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-I32-NEXT:    vorr d0, d17, d17
-; LE-I32-NEXT:    bl lrint
-; LE-I32-NEXT:    add lr, sp, #112
-; LE-I32-NEXT:    vmov.32 d11[1], r0
-; LE-I32-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-I32-NEXT:    vorr d0, d17, d17
-; LE-I32-NEXT:    bl lrint
-; LE-I32-NEXT:    add lr, sp, #48
-; LE-I32-NEXT:    vmov.32 d10[1], r0
-; LE-I32-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-I32-NEXT:    vorr d0, d17, d17
-; LE-I32-NEXT:    bl lrint
-; LE-I32-NEXT:    add lr, sp, #64
-; LE-I32-NEXT:    vmov.32 d15[1], r0
-; LE-I32-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-I32-NEXT:    vorr d0, d17, d17
-; LE-I32-NEXT:    bl lrint
-; LE-I32-NEXT:    vmov.32 d14[1], r0
-; LE-I32-NEXT:    vorr q0, q6, q6
+; LE-I32-NEXT:    vorr q0, q7, q7
 ; LE-I32-NEXT:    vorr q1, q4, q4
-; LE-I32-NEXT:    vorr q2, q5, q5
-; LE-I32-NEXT:    vorr q3, q7, q7
-; LE-I32-NEXT:    add sp, sp, #128
+; LE-I32-NEXT:    add sp, sp, #32
 ; LE-I32-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-I32-NEXT:    pop {r4, r5, r6, pc}
+; LE-I32-NEXT:    pop {r11, pc}
 ;
-; LE-I64-LABEL: lrint_v16f64:
+; LE-I64-LABEL: lrint_v8f64:
 ; LE-I64:       @ %bb.0:
-; LE-I64-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; LE-I64-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; LE-I64-NEXT:    .pad #4
-; LE-I64-NEXT:    sub sp, sp, #4
+; LE-I64-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, lr}
+; LE-I64-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, lr}
 ; LE-I64-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
 ; LE-I64-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-I64-NEXT:    .pad #176
-; LE-I64-NEXT:    sub sp, sp, #176
-; LE-I64-NEXT:    add lr, sp, #40
-; LE-I64-NEXT:    str r0, [sp, #140] @ 4-byte Spill
-; LE-I64-NEXT:    add r0, sp, #312
-; LE-I64-NEXT:    vorr q6, q2, q2
+; LE-I64-NEXT:    .pad #40
+; LE-I64-NEXT:    sub sp, sp, #40
+; LE-I64-NEXT:    vorr q4, q0, q0
+; LE-I64-NEXT:    add lr, sp, #24
+; LE-I64-NEXT:    vorr d0, d7, d7
 ; LE-I64-NEXT:    vstmia lr, {d6, d7} @ 16-byte Spill
-; LE-I64-NEXT:    add lr, sp, #96
-; LE-I64-NEXT:    vorr q7, q1, q1
-; LE-I64-NEXT:    vstmia lr, {d0, d1} @ 16-byte Spill
-; LE-I64-NEXT:    add lr, sp, #144
-; LE-I64-NEXT:    vorr d0, d1, d1
-; LE-I64-NEXT:    vld1.64 {d16, d17}, [r0]
-; LE-I64-NEXT:    add r0, sp, #280
-; LE-I64-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-I64-NEXT:    add lr, sp, #80
-; LE-I64-NEXT:    vld1.64 {d16, d17}, [r0]
-; LE-I64-NEXT:    add r0, sp, #296
-; LE-I64-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-I64-NEXT:    add lr, sp, #120
-; LE-I64-NEXT:    vld1.64 {d16, d17}, [r0]
-; LE-I64-NEXT:    add r0, sp, #328
-; LE-I64-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-I64-NEXT:    add lr, sp, #56
-; LE-I64-NEXT:    vld1.64 {d16, d17}, [r0]
-; LE-I64-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEXT:    vorr q7, q2, q2
+; LE-I64-NEXT:    vorr q6, q1, q1
 ; LE-I64-NEXT:    bl lrint
 ; LE-I64-NEXT:    vorr d0, d14, d14
-; LE-I64-NEXT:    str r1, [sp, #116] @ 4-byte Spill
-; LE-I64-NEXT:    vmov.32 d11[0], r0
+; LE-I64-NEXT:    add lr, sp, #8
+; LE-I64-NEXT:    vmov.32 d17[0], r0
+; LE-I64-NEXT:    mov r8, r1
+; LE-I64-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
 ; LE-I64-NEXT:    bl lrint
 ; LE-I64-NEXT:    vorr d0, d15, d15
-; LE-I64-NEXT:    str r1, [sp, #76] @ 4-byte Spill
-; LE-I64-NEXT:    vmov.32 d8[0], r0
+; LE-I64-NEXT:    mov r9, r1
+; LE-I64-NEXT:    vmov.32 d10[0], r0
 ; LE-I64-NEXT:    bl lrint
 ; LE-I64-NEXT:    vorr d0, d12, d12
-; LE-I64-NEXT:    add lr, sp, #160
-; LE-I64-NEXT:    vmov.32 d9[0], r0
-; LE-I64-NEXT:    str r1, [sp, #72] @ 4-byte Spill
-; LE-I64-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-I64-NEXT:    mov r10, r1
+; LE-I64-NEXT:    vmov.32 d11[0], r0
 ; LE-I64-NEXT:    bl lrint
 ; LE-I64-NEXT:    vorr d0, d13, d13
-; LE-I64-NEXT:    mov r6, r1
+; LE-I64-NEXT:    mov r7, r1
 ; LE-I64-NEXT:    vmov.32 d14[0], r0
 ; LE-I64-NEXT:    bl lrint
-; LE-I64-NEXT:    add lr, sp, #40
+; LE-I64-NEXT:    vorr d0, d8, d8
 ; LE-I64-NEXT:    mov r4, r1
 ; LE-I64-NEXT:    vmov.32 d15[0], r0
-; LE-I64-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
-; LE-I64-NEXT:    vorr d0, d8, d8
 ; LE-I64-NEXT:    bl lrint
 ; LE-I64-NEXT:    vorr d0, d9, d9
-; LE-I64-NEXT:    mov r7, r1
-; LE-I64-NEXT:    vmov.32 d12[0], r0
-; LE-I64-NEXT:    bl lrint
-; LE-I64-NEXT:    add lr, sp, #96
 ; LE-I64-NEXT:    mov r5, r1
-; LE-I64-NEXT:    vmov.32 d13[0], r0
-; LE-I64-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
-; LE-I64-NEXT:    @ kill: def $d0 killed $d0 killed $q0
-; LE-I64-NEXT:    bl lrint
-; LE-I64-NEXT:    vmov.32 d10[0], r0
-; LE-I64-NEXT:    add lr, sp, #40
-; LE-I64-NEXT:    mov r10, r1
-; LE-I64-NEXT:    vmov.32 d13[1], r5
-; LE-I64-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
-; LE-I64-NEXT:    add lr, sp, #56
-; LE-I64-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
-; LE-I64-NEXT:    vorr d0, d9, d9
-; LE-I64-NEXT:    bl lrint
-; LE-I64-NEXT:    vorr d0, d8, d8
-; LE-I64-NEXT:    vmov.32 d12[1], r7
-; LE-I64-NEXT:    add lr, sp, #96
-; LE-I64-NEXT:    mov r9, r1
-; LE-I64-NEXT:    vmov.32 d11[0], r0
-; LE-I64-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-I64-NEXT:    vmov.32 d12[0], r0
 ; LE-I64-NEXT:    bl lrint
-; LE-I64-NEXT:    vmov.32 d10[0], r0
 ; LE-I64-NEXT:    add lr, sp, #24
-; LE-I64-NEXT:    mov r11, r1
-; LE-I64-NEXT:    vmov.32 d15[1], r4
-; LE-I64-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
-; LE-I64-NEXT:    add lr, sp, #144
-; LE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-I64-NEXT:    vorr d0, d17, d17
-; LE-I64-NEXT:    bl lrint
-; LE-I64-NEXT:    add lr, sp, #8
-; LE-I64-NEXT:    vmov.32 d14[1], r6
-; LE-I64-NEXT:    mov r8, r1
-; LE-I64-NEXT:    vmov.32 d17[0], r0
-; LE-I64-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-I64-NEXT:    add lr, sp, #56
-; LE-I64-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
-; LE-I64-NEXT:    add lr, sp, #80
-; LE-I64-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
-; LE-I64-NEXT:    vorr d0, d11, d11
-; LE-I64-NEXT:    bl lrint
-; LE-I64-NEXT:    vmov.32 d15[0], r0
-; LE-I64-NEXT:    add lr, sp, #160
-; LE-I64-NEXT:    vorr d0, d10, d10
-; LE-I64-NEXT:    ldr r0, [sp, #72] @ 4-byte Reload
-; LE-I64-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
 ; LE-I64-NEXT:    mov r6, r1
-; LE-I64-NEXT:    vmov.32 d9[1], r0
-; LE-I64-NEXT:    bl lrint
-; LE-I64-NEXT:    vmov.32 d14[0], r0
-; LE-I64-NEXT:    ldr r0, [sp, #76] @ 4-byte Reload
-; LE-I64-NEXT:    add lr, sp, #160
-; LE-I64-NEXT:    mov r4, r1
-; LE-I64-NEXT:    vmov.32 d8[1], r0
-; LE-I64-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
-; LE-I64-NEXT:    add lr, sp, #120
-; LE-I64-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
-; LE-I64-NEXT:    vorr d0, d11, d11
-; LE-I64-NEXT:    bl lrint
 ; LE-I64-NEXT:    vmov.32 d13[0], r0
-; LE-I64-NEXT:    add lr, sp, #40
-; LE-I64-NEXT:    vorr d0, d10, d10
-; LE-I64-NEXT:    ldr r0, [sp, #116] @ 4-byte Reload
-; LE-I64-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
-; LE-I64-NEXT:    mov r5, r1
-; LE-I64-NEXT:    vmov.32 d9[1], r0
-; LE-I64-NEXT:    bl lrint
-; LE-I64-NEXT:    add lr, sp, #144
-; LE-I64-NEXT:    mov r7, r1
-; LE-I64-NEXT:    vmov.32 d12[0], r0
 ; LE-I64-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
 ; LE-I64-NEXT:    @ kill: def $d0 killed $d0 killed $q0
-; LE-I64-NEXT:    vmov.32 d8[1], r10
-; LE-I64-NEXT:    bl lrint
-; LE-I64-NEXT:    add lr, sp, #8
-; LE-I64-NEXT:    vmov.32 d15[1], r6
-; LE-I64-NEXT:    vldmia lr, {d20, d21} @ 16-byte Reload
-; LE-I64-NEXT:    add lr, sp, #24
-; LE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-I64-NEXT:    add lr, sp, #160
-; LE-I64-NEXT:    vmov.32 d20[0], r0
-; LE-I64-NEXT:    vmov.32 d21[1], r8
-; LE-I64-NEXT:    vmov.32 d20[1], r1
-; LE-I64-NEXT:    ldr r1, [sp, #140] @ 4-byte Reload
-; LE-I64-NEXT:    vmov.32 d13[1], r5
-; LE-I64-NEXT:    mov r0, r1
-; LE-I64-NEXT:    vst1.64 {d8, d9}, [r0:128]!
-; LE-I64-NEXT:    vldmia lr, {d18, d19} @ 16-byte Reload
-; LE-I64-NEXT:    add lr, sp, #56
-; LE-I64-NEXT:    vmov.32 d14[1], r4
-; LE-I64-NEXT:    vst1.64 {d18, d19}, [r0:128]!
-; LE-I64-NEXT:    vldmia lr, {d18, d19} @ 16-byte Reload
-; LE-I64-NEXT:    add lr, sp, #96
-; LE-I64-NEXT:    vmov.32 d12[1], r7
-; LE-I64-NEXT:    vst1.64 {d18, d19}, [r0:128]!
-; LE-I64-NEXT:    vldmia lr, {d18, d19} @ 16-byte Reload
-; LE-I64-NEXT:    vmov.32 d17[1], r9
-; LE-I64-NEXT:    vst1.64 {d18, d19}, [r0:128]
-; LE-I64-NEXT:    add r0, r1, #64
-; LE-I64-NEXT:    vst1.64 {d14, d15}, [r0:128]!
-; LE-I64-NEXT:    vst1.64 {d12, d13}, [r0:128]!
-; LE-I64-NEXT:    vmov.32 d16[1], r11
-; LE-I64-NEXT:    vst1.64 {d20, d21}, [r0:128]!
-; LE-I64-NEXT:    vst1.64 {d16, d17}, [r0:128]
-; LE-I64-NEXT:    add sp, sp, #176
+; LE-I64-NEXT:    bl lrint
+; LE-I64-NEXT:    add lr, sp, #8
+; LE-I64-NEXT:    vmov.32 d13[1], r6
+; LE-I64-NEXT:    vldmia lr, {d6, d7} @ 16-byte Reload
+; LE-I64-NEXT:    vmov.32 d15[1], r4
+; LE-I64-NEXT:    vmov.32 d11[1], r10
+; LE-I64-NEXT:    vmov.32 d6[0], r0
+; LE-I64-NEXT:    vmov.32 d12[1], r5
+; LE-I64-NEXT:    vmov.32 d14[1], r7
+; LE-I64-NEXT:    vorr q0, q6, q6
+; LE-I64-NEXT:    vmov.32 d10[1], r9
+; LE-I64-NEXT:    vorr q1, q7, q7
+; LE-I64-NEXT:    vmov.32 d7[1], r8
+; LE-I64-NEXT:    vorr q2, q5, q5
+; LE-I64-NEXT:    vmov.32 d6[1], r1
+; LE-I64-NEXT:    add sp, sp, #40
 ; LE-I64-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-I64-NEXT:    add sp, sp, #4
-; LE-I64-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
-;
-; LE-I32-NEON-LABEL: lrint_v16f64:
-; LE-I32-NEON:       @ %bb.0:
-; LE-I32-NEON-NEXT:    .save {r4, r5, r6, lr}
-; LE-I32-NEON-NEXT:    push {r4, r5, r6, lr}
-; LE-I32-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-I32-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-I32-NEON-NEXT:    .pad #128
-; LE-I32-NEON-NEXT:    sub sp, sp, #128
-; LE-I32-NEON-NEXT:    add lr, sp, #80
-; LE-I32-NEON-NEXT:    add r0, sp, #240
-; LE-I32-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
-; LE-I32-NEON-NEXT:    add r0, sp, #208
-; LE-I32-NEON-NEXT:    vorr q6, q0, q0
-; LE-I32-NEON-NEXT:    vstmia lr, {d6, d7} @ 16-byte Spill
-; LE-I32-NEON-NEXT:    add lr, sp, #32
-; LE-I32-NEON-NEXT:    vorr q5, q1, q1
-; LE-I32-NEON-NEXT:    vstmia lr, {d4, d5} @ 16-byte Spill
-; LE-I32-NEON-NEXT:    add lr, sp, #16
-; LE-I32-NEON-NEXT:    vstmia lr, {d0, d1} @ 16-byte Spill
-; LE-I32-NEON-NEXT:    add lr, sp, #64
-; LE-I32-NEON-NEXT:    vorr d0, d4, d4
-; LE-I32-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-I32-NEON-NEXT:    add lr, sp, #112
-; LE-I32-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
-; LE-I32-NEON-NEXT:    add r0, sp, #224
-; LE-I32-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-I32-NEON-NEXT:    add lr, sp, #96
-; LE-I32-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
-; LE-I32-NEON-NEXT:    add r0, sp, #256
-; LE-I32-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-I32-NEON-NEXT:    add lr, sp, #48
-; LE-I32-NEON-NEXT:    vld1.64 {d14, d15}, [r0]
-; LE-I32-NEON-NEXT:    vstmia sp, {d2, d3} @ 16-byte Spill
-; LE-I32-NEON-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
-; LE-I32-NEON-NEXT:    bl lrint
-; LE-I32-NEON-NEXT:    vorr d0, d12, d12
-; LE-I32-NEON-NEXT:    vmov.32 d8[0], r0
-; LE-I32-NEON-NEXT:    bl lrint
-; LE-I32-NEON-NEXT:    vorr d0, d10, d10
-; LE-I32-NEON-NEXT:    vmov.32 d12[0], r0
-; LE-I32-NEON-NEXT:    bl lrint
-; LE-I32-NEON-NEXT:    vorr d0, d14, d14
-; LE-I32-NEON-NEXT:    vmov.32 d13[0], r0
-; LE-I32-NEON-NEXT:    bl lrint
-; LE-I32-NEON-NEXT:    add lr, sp, #64
-; LE-I32-NEON-NEXT:    mov r4, r0
-; LE-I32-NEON-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
-; LE-I32-NEON-NEXT:    @ kill: def $d0 killed $d0 killed $q0
-; LE-I32-NEON-NEXT:    bl lrint
-; LE-I32-NEON-NEXT:    add lr, sp, #80
-; LE-I32-NEON-NEXT:    vmov.32 d14[0], r0
-; LE-I32-NEON-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
-; LE-I32-NEON-NEXT:    @ kill: def $d0 killed $d0 killed $q0
-; LE-I32-NEON-NEXT:    bl lrint
-; LE-I32-NEON-NEXT:    add lr, sp, #112
-; LE-I32-NEON-NEXT:    vmov.32 d9[0], r0
-; LE-I32-NEON-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
-; LE-I32-NEON-NEXT:    @ kill: def $d0 killed $d0 killed $q0
-; LE-I32-NEON-NEXT:    vmov.32 d15[0], r4
-; LE-I32-NEON-NEXT:    bl lrint
-; LE-I32-NEON-NEXT:    vldmia sp, {d16, d17} @ 16-byte Reload
-; LE-I32-NEON-NEXT:    vorr d0, d17, d17
-; LE-I32-NEON-NEXT:    vmov.32 d10[0], r0
-; LE-I32-NEON-NEXT:    bl lrint
-; LE-I32-NEON-NEXT:    add lr, sp, #96
-; LE-I32-NEON-NEXT:    vmov.32 d13[1], r0
-; LE-I32-NEON-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
-; LE-I32-NEON-NEXT:    @ kill: def $d0 killed $d0 killed $q0
-; LE-I32-NEON-NEXT:    bl lrint
-; LE-I32-NEON-NEXT:    add lr, sp, #16
-; LE-I32-NEON-NEXT:    vmov.32 d11[0], r0
-; LE-I32-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-I32-NEON-NEXT:    vorr d0, d17, d17
-; LE-I32-NEON-NEXT:    bl lrint
-; LE-I32-NEON-NEXT:    add lr, sp, #80
-; LE-I32-NEON-NEXT:    vmov.32 d12[1], r0
-; LE-I32-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-I32-NEON-NEXT:    vorr d0, d17, d17
-; LE-I32-NEON-NEXT:    bl lrint
-; LE-I32-NEON-NEXT:    add lr, sp, #32
-; LE-I32-NEON-NEXT:    vmov.32 d9[1], r0
-; LE-I32-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-I32-NEON-NEXT:    vorr d0, d17, d17
-; LE-I32-NEON-NEXT:    bl lrint
-; LE-I32-NEON-NEXT:    add lr, sp, #96
-; LE-I32-NEON-NEXT:    vmov.32 d8[1], r0
-; LE-I32-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-I32-NEON-NEXT:    vorr d0, d17, d17
-; LE-I32-NEON-NEXT:    bl lrint
-; LE-I32-NEON-NEXT:    add lr, sp, #112
-; LE-I32-NEON-NEXT:    vmov.32 d11[1], r0
-; LE-I32-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-I32-NEON-NEXT:    vorr d0, d17, d17
-; LE-I32-NEON-NEXT:    bl lrint
-; LE-I32-NEON-NEXT:    add lr, sp, #48
-; LE-I32-NEON-NEXT:    vmov.32 d10[1], r0
-; LE-I32-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-I32-NEON-NEXT:    vorr d0, d17, d17
-; LE-I32-NEON-NEXT:    bl lrint
-; LE-I32-NEON-NEXT:    add lr, sp, #64
-; LE-I32-NEON-NEXT:    vmov.32 d15[1], r0
-; LE-I32-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-I32-NEON-NEXT:    vorr d0, d17, d17
-; LE-I32-NEON-NEXT:    bl lrint
-; LE-I32-NEON-NEXT:    vmov.32 d14[1], r0
-; LE-I32-NEON-NEXT:    vorr q0, q6, q6
-; LE-I32-NEON-NEXT:    vorr q1, q4, q4
-; LE-I32-NEON-NEXT:    vorr q2, q5, q5
-; LE-I32-NEON-NEXT:    vorr q3, q7, q7
-; LE-I32-NEON-NEXT:    add sp, sp, #128
-; LE-I32-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-I32-NEON-NEXT:    pop {r4, r5, r6, pc}
-;
-; LE-I64-NEON-LABEL: lrint_v16f64:
-; LE-I64-NEON:       @ %bb.0:
-; LE-I64-NEON-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; LE-I64-NEON-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; LE-I64-NEON-NEXT:    .pad #4
-; LE-I64-NEON-NEXT:    sub sp, sp, #4
-; LE-I64-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-I64-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-I64-NEON-NEXT:    .pad #176
-; LE-I64-NEON-NEXT:    sub sp, sp, #176
-; LE-I64-NEON-NEXT:    add lr, sp, #40
-; LE-I64-NEON-NEXT:    str r0, [sp, #140] @ 4-byte Spill
-; LE-I64-NEON-NEXT:    add r0, sp, #312
-; LE-I64-NEON-NEXT:    vorr q6, q2, q2
-; LE-I64-NEON-NEXT:    vstmia lr, {d6, d7} @ 16-byte Spill
-; LE-I64-NEON-NEXT:    add lr, sp, #96
-; LE-I64-NEON-NEXT:    vorr q7, q1, q1
-; LE-I64-NEON-NEXT:    vstmia lr, {d0, d1} @ 16-byte Spill
-; LE-I64-NEON-NEXT:    add lr, sp, #144
-; LE-I64-NEON-NEXT:    vorr d0, d1, d1
-; LE-I64-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
-; LE-I64-NEON-NEXT:    add r0, sp, #280
-; LE-I64-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-I64-NEON-NEXT:    add lr, sp, #80
-; LE-I64-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
-; LE-I64-NEON-NEXT:    add r0, sp, #296
-; LE-I64-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-I64-NEON-NEXT:    add lr, sp, #120
-; LE-I64-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
-; LE-I64-NEON-NEXT:    add r0, sp, #328
-; LE-I64-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-I64-NEON-NEXT:    add lr, sp, #56
-; LE-I64-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
-; LE-I64-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-I64-NEON-NEXT:    bl lrint
-; LE-I64-NEON-NEXT:    vorr d0, d14, d14
-; LE-I64-NEON-NEXT:    str r1, [sp, #116] @ 4-byte Spill
-; LE-I64-NEON-NEXT:    vmov.32 d11[0], r0
-; LE-I64-NEON-NEXT:    bl lrint
-; LE-I64-NEON-NEXT:    vorr d0, d15, d15
-; LE-I64-NEON-NEXT:    str r1, [sp, #76] @ 4-byte Spill
-; LE-I64-NEON-NEXT:    vmov.32 d8[0], r0
-; LE-I64-NEON-NEXT:    bl lrint
-; LE-I64-NEON-NEXT:    vorr d0, d12, d12
-; LE-I64-NEON-NEXT:    add lr, sp, #160
-; LE-I64-NEON-NEXT:    vmov.32 d9[0], r0
-; LE-I64-NEON-NEXT:    str r1, [sp, #72] @ 4-byte Spill
-; LE-I64-NEON-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
-; LE-I64-NEON-NEXT:    bl lrint
-; LE-I64-NEON-NEXT:    vorr d0, d13, d13
-; LE-I64-NEON-NEXT:    mov r6, r1
-; LE-I64-NEON-NEXT:    vmov.32 d14[0], r0
-; LE-I64-NEON-NEXT:    bl lrint
-; LE-I64-NEON-NEXT:    add lr, sp, #40
-; LE-I64-NEON-NEXT:    mov r4, r1
-; LE-I64-NEON-NEXT:    vmov.32 d15[0], r0
-; LE-I64-NEON-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
-; LE-I64-NEON-NEXT:    vorr d0, d8, d8
-; LE-I64-NEON-NEXT:    bl lrint
-; LE-I64-NEON-NEXT:    vorr d0, d9, d9
-; LE-I64-NEON-NEXT:    mov r7, r1
-; LE-I64-NEON-NEXT:    vmov.32 d12[0], r0
-; LE-I64-NEON-NEXT:    bl lrint
-; LE-I64-NEON-NEXT:    add lr, sp, #96
-; LE-I64-NEON-NEXT:    mov r5, r1
-; LE-I64-NEON-NEXT:    vmov.32 d13[0], r0
-; LE-I64-NEON-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
-; LE-I64-NEON-NEXT:    @ kill: def $d0 killed $d0 killed $q0
-; LE-I64-NEON-NEXT:    bl lrint
-; LE-I64-NEON-NEXT:    vmov.32 d10[0], r0
-; LE-I64-NEON-NEXT:    add lr, sp, #40
-; LE-I64-NEON-NEXT:    mov r10, r1
-; LE-I64-NEON-NEXT:    vmov.32 d13[1], r5
-; LE-I64-NEON-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
-; LE-I64-NEON-NEXT:    add lr, sp, #56
-; LE-I64-NEON-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
-; LE-I64-NEON-NEXT:    vorr d0, d9, d9
-; LE-I64-NEON-NEXT:    bl lrint
-; LE-I64-NEON-NEXT:    vorr d0, d8, d8
-; LE-I64-NEON-NEXT:    vmov.32 d12[1], r7
-; LE-I64-NEON-NEXT:    add lr, sp, #96
-; LE-I64-NEON-NEXT:    mov r9, r1
-; LE-I64-NEON-NEXT:    vmov.32 d11[0], r0
-; LE-I64-NEON-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
-; LE-I64-NEON-NEXT:    bl lrint
-; LE-I64-NEON-NEXT:    vmov.32 d10[0], r0
-; LE-I64-NEON-NEXT:    add lr, sp, #24
-; LE-I64-NEON-NEXT:    mov r11, r1
-; LE-I64-NEON-NEXT:    vmov.32 d15[1], r4
-; LE-I64-NEON-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
-; LE-I64-NEON-NEXT:    add lr, sp, #144
-; LE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-I64-NEON-NEXT:    vorr d0, d17, d17
-; LE-I64-NEON-NEXT:    bl lrint
-; LE-I64-NEON-NEXT:    add lr, sp, #8
-; LE-I64-NEON-NEXT:    vmov.32 d14[1], r6
-; LE-I64-NEON-NEXT:    mov r8, r1
-; LE-I64-NEON-NEXT:    vmov.32 d17[0], r0
-; LE-I64-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-I64-NEON-NEXT:    add lr, sp, #56
-; LE-I64-NEON-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
-; LE-I64-NEON-NEXT:    add lr, sp, #80
-; LE-I64-NEON-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
-; LE-I64-NEON-NEXT:    vorr d0, d11, d11
-; LE-I64-NEON-NEXT:    bl lrint
-; LE-I64-NEON-NEXT:    vmov.32 d15[0], r0
-; LE-I64-NEON-NEXT:    add lr, sp, #160
-; LE-I64-NEON-NEXT:    vorr d0, d10, d10
-; LE-I64-NEON-NEXT:    ldr r0, [sp, #72] @ 4-byte Reload
-; LE-I64-NEON-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
-; LE-I64-NEON-NEXT:    mov r6, r1
-; LE-I64-NEON-NEXT:    vmov.32 d9[1], r0
-; LE-I64-NEON-NEXT:    bl lrint
-; LE-I64-NEON-NEXT:    vmov.32 d14[0], r0
-; LE-I64-NEON-NEXT:    ldr r0, [sp, #76] @ 4-byte Reload
-; LE-I64-NEON-NEXT:    add lr, sp, #160
-; LE-I64-NEON-NEXT:    mov r4, r1
-; LE-I64-NEON-NEXT:    vmov.32 d8[1], r0
-; LE-I64-NEON-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
-; LE-I64-NEON-NEXT:    add lr, sp, #120
-; LE-I64-NEON-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
-; LE-I64-NEON-NEXT:    vorr d0, d11, d11
-; LE-I64-NEON-NEXT:    bl lrint
-; LE-I64-NEON-NEXT:    vmov.32 d13[0], r0
-; LE-I64-NEON-NEXT:    add lr, sp, #40
-; LE-I64-NEON-NEXT:    vorr d0, d10, d10
-; LE-I64-NEON-NEXT:    ldr r0, [sp, #116] @ 4-byte Reload
-; LE-I64-NEON-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
-; LE-I64-NEON-NEXT:    mov r5, r1
-; LE-I64-NEON-NEXT:    vmov.32 d9[1], r0
-; LE-I64-NEON-NEXT:    bl lrint
-; LE-I64-NEON-NEXT:    add lr, sp, #144
-; LE-I64-NEON-NEXT:    mov r7, r1
-; LE-I64-NEON-NEXT:    vmov.32 d12[0], r0
-; LE-I64-NEON-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
-; LE-I64-NEON-NEXT:    @ kill: def $d0 killed $d0 killed $q0
-; LE-I64-NEON-NEXT:    vmov.32 d8[1], r10
-; LE-I64-NEON-NEXT:    bl lrint
-; LE-I64-NEON-NEXT:    add lr, sp, #8
-; LE-I64-NEON-NEXT:    vmov.32 d15[1], r6
-; LE-I64-NEON-NEXT:    vldmia lr, {d20, d21} @ 16-byte Reload
-; LE-I64-NEON-NEXT:    add lr, sp, #24
-; LE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-I64-NEON-NEXT:    add lr, sp, #160
-; LE-I64-NEON-NEXT:    vmov.32 d20[0], r0
-; LE-I64-NEON-NEXT:    vmov.32 d21[1], r8
-; LE-I64-NEON-NEXT:    vmov.32 d20[1], r1
-; LE-I64-NEON-NEXT:    ldr r1, [sp, #140] @ 4-byte Reload
-; LE-I64-NEON-NEXT:    vmov.32 d13[1], r5
-; LE-I64-NEON-NEXT:    mov r0, r1
-; LE-I64-NEON-NEXT:    vst1.64 {d8, d9}, [r0:128]!
-; LE-I64-NEON-NEXT:    vldmia lr, {d18, d19} @ 16-byte Reload
-; LE-I64-NEON-NEXT:    add lr, sp, #56
-; LE-I64-NEON-NEXT:    vmov.32 d14[1], r4
-; LE-I64-NEON-NEXT:    vst1.64 {d18, d19}, [r0:128]!
-; LE-I64-NEON-NEXT:    vldmia lr, {d18, d19} @ 16-byte Reload
-; LE-I64-NEON-NEXT:    add lr, sp, #96
-; LE-I64-NEON-NEXT:    vmov.32 d12[1], r7
-; LE-I64-NEON-NEXT:    vst1.64 {d18, d19}, [r0:128]!
-; LE-I64-NEON-NEXT:    vldmia lr, {d18, d19} @ 16-byte Reload
-; LE-I64-NEON-NEXT:    vmov.32 d17[1], r9
-; LE-I64-NEON-NEXT:    vst1.64 {d18, d19}, [r0:128]
-; LE-I64-NEON-NEXT:    add r0, r1, #64
-; LE-I64-NEON-NEXT:    vst1.64 {d14, d15}, [r0:128]!
-; LE-I64-NEON-NEXT:    vst1.64 {d12, d13}, [r0:128]!
-; LE-I64-NEON-NEXT:    vmov.32 d16[1], r11
-; LE-I64-NEON-NEXT:    vst1.64 {d20, d21}, [r0:128]!
-; LE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r0:128]
-; LE-I64-NEON-NEXT:    add sp, sp, #176
-; LE-I64-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-I64-NEON-NEXT:    add sp, sp, #4
-; LE-I64-NEON-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+; LE-I64-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, pc}
 ;
-; BE-I32-LABEL: lrint_v16f64:
+; BE-I32-LABEL: lrint_v8f64:
 ; BE-I32:       @ %bb.0:
-; BE-I32-NEXT:    .save {r4, r5, r6, lr}
-; BE-I32-NEXT:    push {r4, r5, r6, lr}
+; BE-I32-NEXT:    .save {r11, lr}
+; BE-I32-NEXT:    push {r11, lr}
 ; BE-I32-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
 ; BE-I32-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
-; BE-I32-NEXT:    .pad #128
-; BE-I32-NEXT:    sub sp, sp, #128
-; BE-I32-NEXT:    add lr, sp, #64
-; BE-I32-NEXT:    add r0, sp, #240
-; BE-I32-NEXT:    vld1.64 {d16, d17}, [r0]
-; BE-I32-NEXT:    add r0, sp, #224
-; BE-I32-NEXT:    vorr q6, q3, q3
-; BE-I32-NEXT:    vstmia lr, {d4, d5} @ 16-byte Spill
+; BE-I32-NEXT:    .pad #32
+; BE-I32-NEXT:    sub sp, sp, #32
+; BE-I32-NEXT:    vorr q5, q0, q0
+; BE-I32-NEXT:    vstmia sp, {d0, d1} @ 16-byte Spill
+; BE-I32-NEXT:    vorr d0, d4, d4
 ; BE-I32-NEXT:    add lr, sp, #16
-; BE-I32-NEXT:    vorr q5, q1, q1
-; BE-I32-NEXT:    vstmia lr, {d2, d3} @ 16-byte Spill
-; BE-I32-NEXT:    add lr, sp, #32
-; BE-I32-NEXT:    vstmia lr, {d0, d1} @ 16-byte Spill
-; BE-I32-NEXT:    add lr, sp, #80
-; BE-I32-NEXT:    @ kill: def $d0 killed $d0 killed $q0
-; BE-I32-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-I32-NEXT:    add lr, sp, #112
-; BE-I32-NEXT:    vld1.64 {d16, d17}, [r0]
-; BE-I32-NEXT:    add r0, sp, #256
-; BE-I32-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-I32-NEXT:    add lr, sp, #96
-; BE-I32-NEXT:    vld1.64 {d16, d17}, [r0]
-; BE-I32-NEXT:    add r0, sp, #208
-; BE-I32-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-I32-NEXT:    add lr, sp, #48
-; BE-I32-NEXT:    vld1.64 {d14, d15}, [r0]
-; BE-I32-NEXT:    vstmia sp, {d6, d7} @ 16-byte Spill
-; BE-I32-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
+; BE-I32-NEXT:    vorr q7, q3, q3
+; BE-I32-NEXT:    vstmia lr, {d4, d5} @ 16-byte Spill
+; BE-I32-NEXT:    vorr q6, q1, q1
 ; BE-I32-NEXT:    bl lrint
 ; BE-I32-NEXT:    vorr d0, d10, d10
 ; BE-I32-NEXT:    vmov.32 d8[0], r0
 ; BE-I32-NEXT:    bl lrint
+; BE-I32-NEXT:    vorr d0, d14, d14
+; BE-I32-NEXT:    vmov.32 d10[0], r0
+; BE-I32-NEXT:    bl lrint
 ; BE-I32-NEXT:    vorr d0, d12, d12
 ; BE-I32-NEXT:    vmov.32 d9[0], r0
 ; BE-I32-NEXT:    bl lrint
-; BE-I32-NEXT:    vorr d0, d14, d14
+; BE-I32-NEXT:    vorr d0, d15, d15
 ; BE-I32-NEXT:    vmov.32 d11[0], r0
 ; BE-I32-NEXT:    bl lrint
-; BE-I32-NEXT:    add lr, sp, #80
-; BE-I32-NEXT:    mov r4, r0
-; BE-I32-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
-; BE-I32-NEXT:    @ kill: def $d0 killed $d0 killed $q0
-; BE-I32-NEXT:    bl lrint
-; BE-I32-NEXT:    add lr, sp, #64
-; BE-I32-NEXT:    vmov.32 d12[0], r0
-; BE-I32-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
-; BE-I32-NEXT:    @ kill: def $d0 killed $d0 killed $q0
-; BE-I32-NEXT:    bl lrint
-; BE-I32-NEXT:    add lr, sp, #112
-; BE-I32-NEXT:    vmov.32 d10[0], r0
-; BE-I32-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
-; BE-I32-NEXT:    @ kill: def $d0 killed $d0 killed $q0
-; BE-I32-NEXT:    vmov.32 d14[0], r4
+; BE-I32-NEXT:    vorr d0, d13, d13
+; BE-I32-NEXT:    vmov.32 d9[1], r0
 ; BE-I32-NEXT:    bl lrint
 ; BE-I32-NEXT:    vldmia sp, {d16, d17} @ 16-byte Reload
 ; BE-I32-NEXT:    vorr d0, d17, d17
-; BE-I32-NEXT:    vmov.32 d15[0], r0
-; BE-I32-NEXT:    bl lrint
-; BE-I32-NEXT:    add lr, sp, #96
 ; BE-I32-NEXT:    vmov.32 d11[1], r0
-; BE-I32-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
-; BE-I32-NEXT:    @ kill: def $d0 killed $d0 killed $q0
-; BE-I32-NEXT:    bl lrint
-; BE-I32-NEXT:    add lr, sp, #64
-; BE-I32-NEXT:    vmov.32 d13[0], r0
-; BE-I32-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-I32-NEXT:    vorr d0, d17, d17
 ; BE-I32-NEXT:    bl lrint
 ; BE-I32-NEXT:    add lr, sp, #16
 ; BE-I32-NEXT:    vmov.32 d10[1], r0
 ; BE-I32-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
 ; BE-I32-NEXT:    vorr d0, d17, d17
 ; BE-I32-NEXT:    bl lrint
-; BE-I32-NEXT:    add lr, sp, #32
-; BE-I32-NEXT:    vmov.32 d9[1], r0
-; BE-I32-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-I32-NEXT:    vorr d0, d17, d17
-; BE-I32-NEXT:    bl lrint
-; BE-I32-NEXT:    add lr, sp, #96
 ; BE-I32-NEXT:    vmov.32 d8[1], r0
-; BE-I32-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-I32-NEXT:    vorr d0, d17, d17
-; BE-I32-NEXT:    bl lrint
-; BE-I32-NEXT:    add lr, sp, #112
-; BE-I32-NEXT:    vmov.32 d13[1], r0
-; BE-I32-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-I32-NEXT:    vorr d0, d17, d17
-; BE-I32-NEXT:    bl lrint
-; BE-I32-NEXT:    add lr, sp, #48
-; BE-I32-NEXT:    vmov.32 d15[1], r0
-; BE-I32-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-I32-NEXT:    vorr d0, d17, d17
-; BE-I32-NEXT:    bl lrint
-; BE-I32-NEXT:    add lr, sp, #80
-; BE-I32-NEXT:    vmov.32 d14[1], r0
-; BE-I32-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-I32-NEXT:    vorr d0, d17, d17
-; BE-I32-NEXT:    bl lrint
-; BE-I32-NEXT:    vmov.32 d12[1], r0
-; BE-I32-NEXT:    vrev64.32 q0, q4
-; BE-I32-NEXT:    vrev64.32 q1, q5
-; BE-I32-NEXT:    vrev64.32 q2, q7
-; BE-I32-NEXT:    vrev64.32 q3, q6
-; BE-I32-NEXT:    add sp, sp, #128
+; BE-I32-NEXT:    vrev64.32 q0, q5
+; BE-I32-NEXT:    vrev64.32 q1, q4
+; BE-I32-NEXT:    add sp, sp, #32
 ; BE-I32-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
-; BE-I32-NEXT:    pop {r4, r5, r6, pc}
+; BE-I32-NEXT:    pop {r11, pc}
 ;
-; BE-I64-LABEL: lrint_v16f64:
+; BE-I64-LABEL: lrint_v8f64:
 ; BE-I64:       @ %bb.0:
-; BE-I64-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; BE-I64-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; BE-I64-NEXT:    .pad #4
-; BE-I64-NEXT:    sub sp, sp, #4
+; BE-I64-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, lr}
+; BE-I64-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, lr}
 ; BE-I64-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
 ; BE-I64-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
-; BE-I64-NEXT:    .pad #168
-; BE-I64-NEXT:    sub sp, sp, #168
-; BE-I64-NEXT:    add lr, sp, #64
-; BE-I64-NEXT:    str r0, [sp, #132] @ 4-byte Spill
-; BE-I64-NEXT:    add r0, sp, #304
-; BE-I64-NEXT:    vorr q4, q3, q3
-; BE-I64-NEXT:    vstmia lr, {d0, d1} @ 16-byte Spill
-; BE-I64-NEXT:    add lr, sp, #48
-; BE-I64-NEXT:    vorr d0, d1, d1
-; BE-I64-NEXT:    vld1.64 {d16, d17}, [r0]
-; BE-I64-NEXT:    add r0, sp, #320
-; BE-I64-NEXT:    vorr q6, q2, q2
-; BE-I64-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-I64-NEXT:    add lr, sp, #88
-; BE-I64-NEXT:    vorr q7, q1, q1
-; BE-I64-NEXT:    vld1.64 {d16, d17}, [r0]
-; BE-I64-NEXT:    add r0, sp, #272
-; BE-I64-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-I64-NEXT:    add lr, sp, #112
-; BE-I64-NEXT:    vld1.64 {d16, d17}, [r0]
-; BE-I64-NEXT:    add r0, sp, #288
-; BE-I64-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I64-NEXT:    .pad #40
+; BE-I64-NEXT:    sub sp, sp, #40
+; BE-I64-NEXT:    vorr q4, q0, q0
 ; BE-I64-NEXT:    add lr, sp, #24
-; BE-I64-NEXT:    vld1.64 {d16, d17}, [r0]
-; BE-I64-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I64-NEXT:    vorr d0, d7, d7
+; BE-I64-NEXT:    vstmia lr, {d6, d7} @ 16-byte Spill
+; BE-I64-NEXT:    vorr q7, q2, q2
+; BE-I64-NEXT:    vorr q6, q1, q1
 ; BE-I64-NEXT:    bl lrint
 ; BE-I64-NEXT:    vorr d0, d14, d14
-; BE-I64-NEXT:    add lr, sp, #136
+; BE-I64-NEXT:    add lr, sp, #8
 ; BE-I64-NEXT:    vmov.32 d17[0], r0
-; BE-I64-NEXT:    str r1, [sp, #108] @ 4-byte Spill
+; BE-I64-NEXT:    mov r8, r1
 ; BE-I64-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
 ; BE-I64-NEXT:    bl lrint
 ; BE-I64-NEXT:    vorr d0, d15, d15
-; BE-I64-NEXT:    str r1, [sp, #84] @ 4-byte Spill
+; BE-I64-NEXT:    mov r9, r1
 ; BE-I64-NEXT:    vmov.32 d10[0], r0
 ; BE-I64-NEXT:    bl lrint
 ; BE-I64-NEXT:    vorr d0, d12, d12
-; BE-I64-NEXT:    add lr, sp, #152
+; BE-I64-NEXT:    mov r10, r1
 ; BE-I64-NEXT:    vmov.32 d11[0], r0
-; BE-I64-NEXT:    str r1, [sp, #44] @ 4-byte Spill
-; BE-I64-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
 ; BE-I64-NEXT:    bl lrint
 ; BE-I64-NEXT:    vorr d0, d13, d13
-; BE-I64-NEXT:    mov r6, r1
-; BE-I64-NEXT:    vmov.32 d10[0], r0
-; BE-I64-NEXT:    bl lrint
-; BE-I64-NEXT:    vorr d0, d8, d8
-; BE-I64-NEXT:    mov r4, r1
-; BE-I64-NEXT:    vmov.32 d11[0], r0
-; BE-I64-NEXT:    bl lrint
-; BE-I64-NEXT:    vorr d0, d9, d9
 ; BE-I64-NEXT:    mov r7, r1
-; BE-I64-NEXT:    vmov.32 d12[0], r0
-; BE-I64-NEXT:    bl lrint
-; BE-I64-NEXT:    add lr, sp, #64
-; BE-I64-NEXT:    mov r5, r1
-; BE-I64-NEXT:    vmov.32 d13[0], r0
-; BE-I64-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
-; BE-I64-NEXT:    @ kill: def $d0 killed $d0 killed $q0
-; BE-I64-NEXT:    bl lrint
-; BE-I64-NEXT:    add lr, sp, #136
-; BE-I64-NEXT:    mov r9, r1
-; BE-I64-NEXT:    vmov.32 d13[1], r5
-; BE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-I64-NEXT:    vmov.32 d16[0], r0
-; BE-I64-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-I64-NEXT:    add lr, sp, #24
-; BE-I64-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
-; BE-I64-NEXT:    vorr d0, d9, d9
-; BE-I64-NEXT:    bl lrint
-; BE-I64-NEXT:    vorr d0, d8, d8
-; BE-I64-NEXT:    vmov.32 d12[1], r7
-; BE-I64-NEXT:    add lr, sp, #64
-; BE-I64-NEXT:    mov r10, r1
-; BE-I64-NEXT:    vmov.32 d15[0], r0
-; BE-I64-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
-; BE-I64-NEXT:    bl lrint
 ; BE-I64-NEXT:    vmov.32 d14[0], r0
-; BE-I64-NEXT:    add lr, sp, #8
-; BE-I64-NEXT:    mov r11, r1
-; BE-I64-NEXT:    vmov.32 d11[1], r4
-; BE-I64-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
-; BE-I64-NEXT:    add lr, sp, #48
-; BE-I64-NEXT:    vorr q6, q5, q5
-; BE-I64-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
-; BE-I64-NEXT:    vorr d0, d9, d9
 ; BE-I64-NEXT:    bl lrint
 ; BE-I64-NEXT:    vorr d0, d8, d8
-; BE-I64-NEXT:    vmov.32 d12[1], r6
-; BE-I64-NEXT:    add lr, sp, #24
-; BE-I64-NEXT:    mov r8, r1
-; BE-I64-NEXT:    vmov.32 d11[0], r0
-; BE-I64-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
-; BE-I64-NEXT:    bl lrint
-; BE-I64-NEXT:    vmov.32 d10[0], r0
-; BE-I64-NEXT:    add lr, sp, #48
-; BE-I64-NEXT:    ldr r0, [sp, #44] @ 4-byte Reload
-; BE-I64-NEXT:    mov r6, r1
-; BE-I64-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
-; BE-I64-NEXT:    add lr, sp, #152
-; BE-I64-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
-; BE-I64-NEXT:    add lr, sp, #88
-; BE-I64-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
-; BE-I64-NEXT:    vorr d0, d13, d13
-; BE-I64-NEXT:    vmov.32 d9[1], r0
-; BE-I64-NEXT:    bl lrint
-; BE-I64-NEXT:    vmov.32 d15[0], r0
-; BE-I64-NEXT:    ldr r0, [sp, #84] @ 4-byte Reload
-; BE-I64-NEXT:    vorr d0, d12, d12
-; BE-I64-NEXT:    add lr, sp, #152
 ; BE-I64-NEXT:    mov r4, r1
-; BE-I64-NEXT:    vmov.32 d8[1], r0
-; BE-I64-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
+; BE-I64-NEXT:    vmov.32 d15[0], r0
 ; BE-I64-NEXT:    bl lrint
-; BE-I64-NEXT:    add lr, sp, #136
-; BE-I64-NEXT:    vmov.32 d14[0], r0
-; BE-I64-NEXT:    ldr r0, [sp, #108] @ 4-byte Reload
-; BE-I64-NEXT:    mov r5, r1
-; BE-I64-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
-; BE-I64-NEXT:    add lr, sp, #112
-; BE-I64-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
 ; BE-I64-NEXT:    vorr d0, d9, d9
-; BE-I64-NEXT:    vmov.32 d11[1], r0
+; BE-I64-NEXT:    mov r5, r1
+; BE-I64-NEXT:    vmov.32 d12[0], r0
 ; BE-I64-NEXT:    bl lrint
-; BE-I64-NEXT:    vorr d0, d8, d8
-; BE-I64-NEXT:    mov r7, r1
+; BE-I64-NEXT:    add lr, sp, #24
+; BE-I64-NEXT:    mov r6, r1
 ; BE-I64-NEXT:    vmov.32 d13[0], r0
-; BE-I64-NEXT:    vmov.32 d10[1], r9
+; BE-I64-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
+; BE-I64-NEXT:    @ kill: def $d0 killed $d0 killed $q0
 ; BE-I64-NEXT:    bl lrint
 ; BE-I64-NEXT:    add lr, sp, #8
-; BE-I64-NEXT:    vmov.32 d12[0], r0
-; BE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-I64-NEXT:    add lr, sp, #48
-; BE-I64-NEXT:    vmov.32 d17[1], r10
-; BE-I64-NEXT:    vmov.32 d16[1], r11
-; BE-I64-NEXT:    vorr q12, q8, q8
+; BE-I64-NEXT:    vmov.32 d13[1], r6
 ; BE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-I64-NEXT:    add lr, sp, #152
-; BE-I64-NEXT:    vmov.32 d17[1], r8
-; BE-I64-NEXT:    vldmia lr, {d18, d19} @ 16-byte Reload
-; BE-I64-NEXT:    add lr, sp, #24
-; BE-I64-NEXT:    vmov.32 d13[1], r7
-; BE-I64-NEXT:    vmov.32 d16[1], r6
-; BE-I64-NEXT:    vldmia lr, {d20, d21} @ 16-byte Reload
-; BE-I64-NEXT:    add lr, sp, #64
-; BE-I64-NEXT:    vorr q13, q8, q8
-; BE-I64-NEXT:    vmov.32 d12[1], r1
-; BE-I64-NEXT:    ldr r1, [sp, #132] @ 4-byte Reload
-; BE-I64-NEXT:    vrev64.32 q8, q5
-; BE-I64-NEXT:    mov r0, r1
-; BE-I64-NEXT:    vldmia lr, {d22, d23} @ 16-byte Reload
-; BE-I64-NEXT:    vrev64.32 q9, q9
-; BE-I64-NEXT:    vrev64.32 q10, q10
-; BE-I64-NEXT:    vst1.64 {d16, d17}, [r0:128]!
-; BE-I64-NEXT:    vst1.64 {d18, d19}, [r0:128]!
-; BE-I64-NEXT:    vrev64.32 q11, q11
 ; BE-I64-NEXT:    vmov.32 d15[1], r4
-; BE-I64-NEXT:    vst1.64 {d20, d21}, [r0:128]!
-; BE-I64-NEXT:    vrev64.32 q15, q6
-; BE-I64-NEXT:    vmov.32 d14[1], r5
-; BE-I64-NEXT:    vrev64.32 q12, q12
-; BE-I64-NEXT:    vst1.64 {d22, d23}, [r0:128]
-; BE-I64-NEXT:    add r0, r1, #64
-; BE-I64-NEXT:    vrev64.32 q13, q13
-; BE-I64-NEXT:    vst1.64 {d30, d31}, [r0:128]!
-; BE-I64-NEXT:    vst1.64 {d24, d25}, [r0:128]!
-; BE-I64-NEXT:    vrev64.32 q14, q7
-; BE-I64-NEXT:    vst1.64 {d26, d27}, [r0:128]!
-; BE-I64-NEXT:    vst1.64 {d28, d29}, [r0:128]
-; BE-I64-NEXT:    add sp, sp, #168
+; BE-I64-NEXT:    vmov.32 d16[0], r0
+; BE-I64-NEXT:    vmov.32 d11[1], r10
+; BE-I64-NEXT:    vmov.32 d17[1], r8
+; BE-I64-NEXT:    vmov.32 d12[1], r5
+; BE-I64-NEXT:    vmov.32 d14[1], r7
+; BE-I64-NEXT:    vmov.32 d10[1], r9
+; BE-I64-NEXT:    vmov.32 d16[1], r1
+; BE-I64-NEXT:    vrev64.32 q0, q6
+; BE-I64-NEXT:    vrev64.32 q1, q7
+; BE-I64-NEXT:    vrev64.32 q2, q5
+; BE-I64-NEXT:    vrev64.32 q3, q8
+; BE-I64-NEXT:    add sp, sp, #40
 ; BE-I64-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
-; BE-I64-NEXT:    add sp, sp, #4
-; BE-I64-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
-;
-; BE-I32-NEON-LABEL: lrint_v16f64:
-; BE-I32-NEON:       @ %bb.0:
-; BE-I32-NEON-NEXT:    .save {r4, r5, r6, lr}
-; BE-I32-NEON-NEXT:    push {r4, r5, r6, lr}
-; BE-I32-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
-; BE-I32-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
-; BE-I32-NEON-NEXT:    .pad #128
-; BE-I32-NEON-NEXT:    sub sp, sp, #128
-; BE-I32-NEON-NEXT:    add lr, sp, #64
-; BE-I32-NEON-NEXT:    add r0, sp, #240
-; BE-I32-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
-; BE-I32-NEON-NEXT:    add r0, sp, #224
-; BE-I32-NEON-NEXT:    vorr q6, q3, q3
-; BE-I32-NEON-NEXT:    vstmia lr, {d4, d5} @ 16-byte Spill
-; BE-I32-NEON-NEXT:    add lr, sp, #16
-; BE-I32-NEON-NEXT:    vorr q5, q1, q1
-; BE-I32-NEON-NEXT:    vstmia lr, {d2, d3} @ 16-byte Spill
-; BE-I32-NEON-NEXT:    add lr, sp, #32
-; BE-I32-NEON-NEXT:    vstmia lr, {d0, d1} @ 16-byte Spill
-; BE-I32-NEON-NEXT:    add lr, sp, #80
-; BE-I32-NEON-NEXT:    @ kill: def $d0 killed $d0 killed $q0
-; BE-I32-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-I32-NEON-NEXT:    add lr, sp, #112
-; BE-I32-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
-; BE-I32-NEON-NEXT:    add r0, sp, #256
-; BE-I32-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-I32-NEON-NEXT:    add lr, sp, #96
-; BE-I32-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
-; BE-I32-NEON-NEXT:    add r0, sp, #208
-; BE-I32-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-I32-NEON-NEXT:    add lr, sp, #48
-; BE-I32-NEON-NEXT:    vld1.64 {d14, d15}, [r0]
-; BE-I32-NEON-NEXT:    vstmia sp, {d6, d7} @ 16-byte Spill
-; BE-I32-NEON-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
-; BE-I32-NEON-NEXT:    bl lrint
-; BE-I32-NEON-NEXT:    vorr d0, d10, d10
-; BE-I32-NEON-NEXT:    vmov.32 d8[0], r0
-; BE-I32-NEON-NEXT:    bl lrint
-; BE-I32-NEON-NEXT:    vorr d0, d12, d12
-; BE-I32-NEON-NEXT:    vmov.32 d9[0], r0
-; BE-I32-NEON-NEXT:    bl lrint
-; BE-I32-NEON-NEXT:    vorr d0, d14, d14
-; BE-I32-NEON-NEXT:    vmov.32 d11[0], r0
-; BE-I32-NEON-NEXT:    bl lrint
-; BE-I32-NEON-NEXT:    add lr, sp, #80
-; BE-I32-NEON-NEXT:    mov r4, r0
-; BE-I32-NEON-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
-; BE-I32-NEON-NEXT:    @ kill: def $d0 killed $d0 killed $q0
-; BE-I32-NEON-NEXT:    bl lrint
-; BE-I32-NEON-NEXT:    add lr, sp, #64
-; BE-I32-NEON-NEXT:    vmov.32 d12[0], r0
-; BE-I32-NEON-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
-; BE-I32-NEON-NEXT:    @ kill: def $d0 killed $d0 killed $q0
-; BE-I32-NEON-NEXT:    bl lrint
-; BE-I32-NEON-NEXT:    add lr, sp, #112
-; BE-I32-NEON-NEXT:    vmov.32 d10[0], r0
-; BE-I32-NEON-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
-; BE-I32-NEON-NEXT:    @ kill: def $d0 killed $d0 killed $q0
-; BE-I32-NEON-NEXT:    vmov.32 d14[0], r4
-; BE-I32-NEON-NEXT:    bl lrint
-; BE-I32-NEON-NEXT:    vldmia sp, {d16, d17} @ 16-byte Reload
-; BE-I32-NEON-NEXT:    vorr d0, d17, d17
-; BE-I32-NEON-NEXT:    vmov.32 d15[0], r0
-; BE-I32-NEON-NEXT:    bl lrint
-; BE-I32-NEON-NEXT:    add lr, sp, #96
-; BE-I32-NEON-NEXT:    vmov.32 d11[1], r0
-; BE-I32-NEON-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
-; BE-I32-NEON-NEXT:    @ kill: def $d0 killed $d0 killed $q0
-; BE-I32-NEON-NEXT:    bl lrint
-; BE-I32-NEON-NEXT:    add lr, sp, #64
-; BE-I32-NEON-NEXT:    vmov.32 d13[0], r0
-; BE-I32-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-I32-NEON-NEXT:    vorr d0, d17, d17
-; BE-I32-NEON-NEXT:    bl lrint
-; BE-I32-NEON-NEXT:    add lr, sp, #16
-; BE-I32-NEON-NEXT:    vmov.32 d10[1], r0
-; BE-I32-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-I32-NEON-NEXT:    vorr d0, d17, d17
-; BE-I32-NEON-NEXT:    bl lrint
-; BE-I32-NEON-NEXT:    add lr, sp, #32
-; BE-I32-NEON-NEXT:    vmov.32 d9[1], r0
-; BE-I32-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-I32-NEON-NEXT:    vorr d0, d17, d17
-; BE-I32-NEON-NEXT:    bl lrint
-; BE-I32-NEON-NEXT:    add lr, sp, #96
-; BE-I32-NEON-NEXT:    vmov.32 d8[1], r0
-; BE-I32-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-I32-NEON-NEXT:    vorr d0, d17, d17
-; BE-I32-NEON-NEXT:    bl lrint
-; BE-I32-NEON-NEXT:    add lr, sp, #112
-; BE-I32-NEON-NEXT:    vmov.32 d13[1], r0
-; BE-I32-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-I32-NEON-NEXT:    vorr d0, d17, d17
-; BE-I32-NEON-NEXT:    bl lrint
-; BE-I32-NEON-NEXT:    add lr, sp, #48
-; BE-I32-NEON-NEXT:    vmov.32 d15[1], r0
-; BE-I32-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-I32-NEON-NEXT:    vorr d0, d17, d17
-; BE-I32-NEON-NEXT:    bl lrint
-; BE-I32-NEON-NEXT:    add lr, sp, #80
-; BE-I32-NEON-NEXT:    vmov.32 d14[1], r0
-; BE-I32-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-I32-NEON-NEXT:    vorr d0, d17, d17
-; BE-I32-NEON-NEXT:    bl lrint
-; BE-I32-NEON-NEXT:    vmov.32 d12[1], r0
-; BE-I32-NEON-NEXT:    vrev64.32 q0, q4
-; BE-I32-NEON-NEXT:    vrev64.32 q1, q5
-; BE-I32-NEON-NEXT:    vrev64.32 q2, q7
-; BE-I32-NEON-NEXT:    vrev64.32 q3, q6
-; BE-I32-NEON-NEXT:    add sp, sp, #128
-; BE-I32-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
-; BE-I32-NEON-NEXT:    pop {r4, r5, r6, pc}
-;
-; BE-I64-NEON-LABEL: lrint_v16f64:
-; BE-I64-NEON:       @ %bb.0:
-; BE-I64-NEON-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; BE-I64-NEON-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; BE-I64-NEON-NEXT:    .pad #4
-; BE-I64-NEON-NEXT:    sub sp, sp, #4
-; BE-I64-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
-; BE-I64-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
-; BE-I64-NEON-NEXT:    .pad #168
-; BE-I64-NEON-NEXT:    sub sp, sp, #168
-; BE-I64-NEON-NEXT:    add lr, sp, #64
-; BE-I64-NEON-NEXT:    str r0, [sp, #132] @ 4-byte Spill
-; BE-I64-NEON-NEXT:    add r0, sp, #304
-; BE-I64-NEON-NEXT:    vorr q4, q3, q3
-; BE-I64-NEON-NEXT:    vstmia lr, {d0, d1} @ 16-byte Spill
-; BE-I64-NEON-NEXT:    add lr, sp, #48
-; BE-I64-NEON-NEXT:    vorr d0, d1, d1
-; BE-I64-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
-; BE-I64-NEON-NEXT:    add r0, sp, #320
-; BE-I64-NEON-NEXT:    vorr q6, q2, q2
-; BE-I64-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-I64-NEON-NEXT:    add lr, sp, #88
-; BE-I64-NEON-NEXT:    vorr q7, q1, q1
-; BE-I64-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
-; BE-I64-NEON-NEXT:    add r0, sp, #272
-; BE-I64-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-I64-NEON-NEXT:    add lr, sp, #112
-; BE-I64-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
-; BE-I64-NEON-NEXT:    add r0, sp, #288
-; BE-I64-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-I64-NEON-NEXT:    add lr, sp, #24
-; BE-I64-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
-; BE-I64-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-I64-NEON-NEXT:    bl lrint
-; BE-I64-NEON-NEXT:    vorr d0, d14, d14
-; BE-I64-NEON-NEXT:    add lr, sp, #136
-; BE-I64-NEON-NEXT:    vmov.32 d17[0], r0
-; BE-I64-NEON-NEXT:    str r1, [sp, #108] @ 4-byte Spill
-; BE-I64-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-I64-NEON-NEXT:    bl lrint
-; BE-I64-NEON-NEXT:    vorr d0, d15, d15
-; BE-I64-NEON-NEXT:    str r1, [sp, #84] @ 4-byte Spill
-; BE-I64-NEON-NEXT:    vmov.32 d10[0], r0
-; BE-I64-NEON-NEXT:    bl lrint
-; BE-I64-NEON-NEXT:    vorr d0, d12, d12
-; BE-I64-NEON-NEXT:    add lr, sp, #152
-; BE-I64-NEON-NEXT:    vmov.32 d11[0], r0
-; BE-I64-NEON-NEXT:    str r1, [sp, #44] @ 4-byte Spill
-; BE-I64-NEON-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
-; BE-I64-NEON-NEXT:    bl lrint
-; BE-I64-NEON-NEXT:    vorr d0, d13, d13
-; BE-I64-NEON-NEXT:    mov r6, r1
-; BE-I64-NEON-NEXT:    vmov.32 d10[0], r0
-; BE-I64-NEON-NEXT:    bl lrint
-; BE-I64-NEON-NEXT:    vorr d0, d8, d8
-; BE-I64-NEON-NEXT:    mov r4, r1
-; BE-I64-NEON-NEXT:    vmov.32 d11[0], r0
-; BE-I64-NEON-NEXT:    bl lrint
-; BE-I64-NEON-NEXT:    vorr d0, d9, d9
-; BE-I64-NEON-NEXT:    mov r7, r1
-; BE-I64-NEON-NEXT:    vmov.32 d12[0], r0
-; BE-I64-NEON-NEXT:    bl lrint
-; BE-I64-NEON-NEXT:    add lr, sp, #64
-; BE-I64-NEON-NEXT:    mov r5, r1
-; BE-I64-NEON-NEXT:    vmov.32 d13[0], r0
-; BE-I64-NEON-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
-; BE-I64-NEON-NEXT:    @ kill: def $d0 killed $d0 killed $q0
-; BE-I64-NEON-NEXT:    bl lrint
-; BE-I64-NEON-NEXT:    add lr, sp, #136
-; BE-I64-NEON-NEXT:    mov r9, r1
-; BE-I64-NEON-NEXT:    vmov.32 d13[1], r5
-; BE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-I64-NEON-NEXT:    vmov.32 d16[0], r0
-; BE-I64-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-I64-NEON-NEXT:    add lr, sp, #24
-; BE-I64-NEON-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
-; BE-I64-NEON-NEXT:    vorr d0, d9, d9
-; BE-I64-NEON-NEXT:    bl lrint
-; BE-I64-NEON-NEXT:    vorr d0, d8, d8
-; BE-I64-NEON-NEXT:    vmov.32 d12[1], r7
-; BE-I64-NEON-NEXT:    add lr, sp, #64
-; BE-I64-NEON-NEXT:    mov r10, r1
-; BE-I64-NEON-NEXT:    vmov.32 d15[0], r0
-; BE-I64-NEON-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
-; BE-I64-NEON-NEXT:    bl lrint
-; BE-I64-NEON-NEXT:    vmov.32 d14[0], r0
-; BE-I64-NEON-NEXT:    add lr, sp, #8
-; BE-I64-NEON-NEXT:    mov r11, r1
-; BE-I64-NEON-NEXT:    vmov.32 d11[1], r4
-; BE-I64-NEON-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
-; BE-I64-NEON-NEXT:    add lr, sp, #48
-; BE-I64-NEON-NEXT:    vorr q6, q5, q5
-; BE-I64-NEON-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
-; BE-I64-NEON-NEXT:    vorr d0, d9, d9
-; BE-I64-NEON-NEXT:    bl lrint
-; BE-I64-NEON-NEXT:    vorr d0, d8, d8
-; BE-I64-NEON-NEXT:    vmov.32 d12[1], r6
-; BE-I64-NEON-NEXT:    add lr, sp, #24
-; BE-I64-NEON-NEXT:    mov r8, r1
-; BE-I64-NEON-NEXT:    vmov.32 d11[0], r0
-; BE-I64-NEON-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
-; BE-I64-NEON-NEXT:    bl lrint
-; BE-I64-NEON-NEXT:    vmov.32 d10[0], r0
-; BE-I64-NEON-NEXT:    add lr, sp, #48
-; BE-I64-NEON-NEXT:    ldr r0, [sp, #44] @ 4-byte Reload
-; BE-I64-NEON-NEXT:    mov r6, r1
-; BE-I64-NEON-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
-; BE-I64-NEON-NEXT:    add lr, sp, #152
-; BE-I64-NEON-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
-; BE-I64-NEON-NEXT:    add lr, sp, #88
-; BE-I64-NEON-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
-; BE-I64-NEON-NEXT:    vorr d0, d13, d13
-; BE-I64-NEON-NEXT:    vmov.32 d9[1], r0
-; BE-I64-NEON-NEXT:    bl lrint
-; BE-I64-NEON-NEXT:    vmov.32 d15[0], r0
-; BE-I64-NEON-NEXT:    ldr r0, [sp, #84] @ 4-byte Reload
-; BE-I64-NEON-NEXT:    vorr d0, d12, d12
-; BE-I64-NEON-NEXT:    add lr, sp, #152
-; BE-I64-NEON-NEXT:    mov r4, r1
-; BE-I64-NEON-NEXT:    vmov.32 d8[1], r0
-; BE-I64-NEON-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
-; BE-I64-NEON-NEXT:    bl lrint
-; BE-I64-NEON-NEXT:    add lr, sp, #136
-; BE-I64-NEON-NEXT:    vmov.32 d14[0], r0
-; BE-I64-NEON-NEXT:    ldr r0, [sp, #108] @ 4-byte Reload
-; BE-I64-NEON-NEXT:    mov r5, r1
-; BE-I64-NEON-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
-; BE-I64-NEON-NEXT:    add lr, sp, #112
-; BE-I64-NEON-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
-; BE-I64-NEON-NEXT:    vorr d0, d9, d9
-; BE-I64-NEON-NEXT:    vmov.32 d11[1], r0
-; BE-I64-NEON-NEXT:    bl lrint
-; BE-I64-NEON-NEXT:    vorr d0, d8, d8
-; BE-I64-NEON-NEXT:    mov r7, r1
-; BE-I64-NEON-NEXT:    vmov.32 d13[0], r0
-; BE-I64-NEON-NEXT:    vmov.32 d10[1], r9
-; BE-I64-NEON-NEXT:    bl lrint
-; BE-I64-NEON-NEXT:    add lr, sp, #8
-; BE-I64-NEON-NEXT:    vmov.32 d12[0], r0
-; BE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-I64-NEON-NEXT:    add lr, sp, #48
-; BE-I64-NEON-NEXT:    vmov.32 d17[1], r10
-; BE-I64-NEON-NEXT:    vmov.32 d16[1], r11
-; BE-I64-NEON-NEXT:    vorr q12, q8, q8
-; BE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-I64-NEON-NEXT:    add lr, sp, #152
-; BE-I64-NEON-NEXT:    vmov.32 d17[1], r8
-; BE-I64-NEON-NEXT:    vldmia lr, {d18, d19} @ 16-byte Reload
-; BE-I64-NEON-NEXT:    add lr, sp, #24
-; BE-I64-NEON-NEXT:    vmov.32 d13[1], r7
-; BE-I64-NEON-NEXT:    vmov.32 d16[1], r6
-; BE-I64-NEON-NEXT:    vldmia lr, {d20, d21} @ 16-byte Reload
-; BE-I64-NEON-NEXT:    add lr, sp, #64
-; BE-I64-NEON-NEXT:    vorr q13, q8, q8
-; BE-I64-NEON-NEXT:    vmov.32 d12[1], r1
-; BE-I64-NEON-NEXT:    ldr r1, [sp, #132] @ 4-byte Reload
-; BE-I64-NEON-NEXT:    vrev64.32 q8, q5
-; BE-I64-NEON-NEXT:    mov r0, r1
-; BE-I64-NEON-NEXT:    vldmia lr, {d22, d23} @ 16-byte Reload
-; BE-I64-NEON-NEXT:    vrev64.32 q9, q9
-; BE-I64-NEON-NEXT:    vrev64.32 q10, q10
-; BE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r0:128]!
-; BE-I64-NEON-NEXT:    vst1.64 {d18, d19}, [r0:128]!
-; BE-I64-NEON-NEXT:    vrev64.32 q11, q11
-; BE-I64-NEON-NEXT:    vmov.32 d15[1], r4
-; BE-I64-NEON-NEXT:    vst1.64 {d20, d21}, [r0:128]!
-; BE-I64-NEON-NEXT:    vrev64.32 q15, q6
-; BE-I64-NEON-NEXT:    vmov.32 d14[1], r5
-; BE-I64-NEON-NEXT:    vrev64.32 q12, q12
-; BE-I64-NEON-NEXT:    vst1.64 {d22, d23}, [r0:128]
-; BE-I64-NEON-NEXT:    add r0, r1, #64
-; BE-I64-NEON-NEXT:    vrev64.32 q13, q13
-; BE-I64-NEON-NEXT:    vst1.64 {d30, d31}, [r0:128]!
-; BE-I64-NEON-NEXT:    vst1.64 {d24, d25}, [r0:128]!
-; BE-I64-NEON-NEXT:    vrev64.32 q14, q7
-; BE-I64-NEON-NEXT:    vst1.64 {d26, d27}, [r0:128]!
-; BE-I64-NEON-NEXT:    vst1.64 {d28, d29}, [r0:128]
-; BE-I64-NEON-NEXT:    add sp, sp, #168
-; BE-I64-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
-; BE-I64-NEON-NEXT:    add sp, sp, #4
-; BE-I64-NEON-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
-  %a = call <16 x iXLen> @llvm.lrint.v16iXLen.v16f64(<16 x double> %x)
-  ret <16 x iXLen> %a
+; BE-I64-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, pc}
+  %a = call <8 x iXLen> @llvm.lrint.v8iXLen.v8f64(<8 x double> %x)
+  ret <8 x iXLen> %a
 }
-declare <16 x iXLen> @llvm.lrint.v16iXLen.v16f64(<16 x double>)
+declare <8 x iXLen> @llvm.lrint.v8iXLen.v8f64(<8 x double>)
 
-define <32 x iXLen> @lrint_v32f64(<32 x double> %x) {
-; LE-I32-LABEL: lrint_v32f64:
+define <16 x iXLen> @lrint_v16f64(<16 x double> %x) {
+; LE-I32-LABEL: lrint_v16f64:
 ; LE-I32:       @ %bb.0:
 ; LE-I32-NEXT:    .save {r4, r5, r6, lr}
 ; LE-I32-NEXT:    push {r4, r5, r6, lr}
 ; LE-I32-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
 ; LE-I32-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-I32-NEXT:    .pad #160
-; LE-I32-NEXT:    sub sp, sp, #160
-; LE-I32-NEXT:    add lr, sp, #96
-; LE-I32-NEXT:    mov r4, r0
-; LE-I32-NEXT:    add r0, sp, #304
-; LE-I32-NEXT:    vorr q6, q3, q3
-; LE-I32-NEXT:    vstmia lr, {d4, d5} @ 16-byte Spill
-; LE-I32-NEXT:    add lr, sp, #64
+; LE-I32-NEXT:    .pad #128
+; LE-I32-NEXT:    sub sp, sp, #128
+; LE-I32-NEXT:    add lr, sp, #80
+; LE-I32-NEXT:    add r0, sp, #240
+; LE-I32-NEXT:    vld1.64 {d16, d17}, [r0]
+; LE-I32-NEXT:    add r0, sp, #208
+; LE-I32-NEXT:    vorr q6, q0, q0
+; LE-I32-NEXT:    vstmia lr, {d6, d7} @ 16-byte Spill
+; LE-I32-NEXT:    add lr, sp, #32
 ; LE-I32-NEXT:    vorr q5, q1, q1
+; LE-I32-NEXT:    vstmia lr, {d4, d5} @ 16-byte Spill
+; LE-I32-NEXT:    add lr, sp, #16
 ; LE-I32-NEXT:    vstmia lr, {d0, d1} @ 16-byte Spill
-; LE-I32-NEXT:    add lr, sp, #48
+; LE-I32-NEXT:    add lr, sp, #64
 ; LE-I32-NEXT:    vorr d0, d4, d4
-; LE-I32-NEXT:    vld1.64 {d16, d17}, [r0]
-; LE-I32-NEXT:    add r0, sp, #352
-; LE-I32-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-I32-NEXT:    add lr, sp, #16
-; LE-I32-NEXT:    vld1.64 {d16, d17}, [r0]
-; LE-I32-NEXT:    add r0, sp, #272
 ; LE-I32-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
 ; LE-I32-NEXT:    add lr, sp, #112
 ; LE-I32-NEXT:    vld1.64 {d16, d17}, [r0]
-; LE-I32-NEXT:    add r0, sp, #288
-; LE-I32-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-I32-NEXT:    add lr, sp, #80
-; LE-I32-NEXT:    vld1.64 {d16, d17}, [r0]
-; LE-I32-NEXT:    add r0, sp, #336
+; LE-I32-NEXT:    add r0, sp, #224
 ; LE-I32-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-I32-NEXT:    add lr, sp, #144
+; LE-I32-NEXT:    add lr, sp, #96
 ; LE-I32-NEXT:    vld1.64 {d16, d17}, [r0]
 ; LE-I32-NEXT:    add r0, sp, #256
 ; LE-I32-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-I32-NEXT:    add lr, sp, #128
-; LE-I32-NEXT:    vld1.64 {d16, d17}, [r0]
-; LE-I32-NEXT:    add r0, sp, #320
-; LE-I32-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-I32-NEXT:    add lr, sp, #32
-; LE-I32-NEXT:    vld1.64 {d16, d17}, [r0]
-; LE-I32-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I32-NEXT:    add lr, sp, #48
+; LE-I32-NEXT:    vld1.64 {d14, d15}, [r0]
+; LE-I32-NEXT:    vstmia sp, {d2, d3} @ 16-byte Spill
+; LE-I32-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
 ; LE-I32-NEXT:    bl lrint
 ; LE-I32-NEXT:    vorr d0, d12, d12
-; LE-I32-NEXT:    vmov.32 d14[0], r0
+; LE-I32-NEXT:    vmov.32 d8[0], r0
 ; LE-I32-NEXT:    bl lrint
 ; LE-I32-NEXT:    vorr d0, d10, d10
-; LE-I32-NEXT:    vmov.32 d15[0], r0
-; LE-I32-NEXT:    bl lrint
-; LE-I32-NEXT:    vorr d0, d13, d13
-; LE-I32-NEXT:    vmov.32 d9[0], r0
-; LE-I32-NEXT:    bl lrint
-; LE-I32-NEXT:    vorr d0, d11, d11
-; LE-I32-NEXT:    vmov.32 d15[1], r0
-; LE-I32-NEXT:    bl lrint
-; LE-I32-NEXT:    add lr, sp, #96
-; LE-I32-NEXT:    vorr q5, q4, q4
-; LE-I32-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-I32-NEXT:    vorr d0, d17, d17
-; LE-I32-NEXT:    vmov.32 d11[1], r0
-; LE-I32-NEXT:    bl lrint
-; LE-I32-NEXT:    vmov.32 d14[1], r0
-; LE-I32-NEXT:    add lr, sp, #96
-; LE-I32-NEXT:    add r0, sp, #416
-; LE-I32-NEXT:    vld1.64 {d16, d17}, [r0]
-; LE-I32-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
-; LE-I32-NEXT:    add lr, sp, #64
-; LE-I32-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
-; LE-I32-NEXT:    vorr d0, d8, d8
-; LE-I32-NEXT:    vstmia sp, {d16, d17} @ 16-byte Spill
+; LE-I32-NEXT:    vmov.32 d12[0], r0
 ; LE-I32-NEXT:    bl lrint
-; LE-I32-NEXT:    add lr, sp, #32
-; LE-I32-NEXT:    vmov.32 d10[0], r0
-; LE-I32-NEXT:    vldmia lr, {d14, d15} @ 16-byte Reload
-; LE-I32-NEXT:    vorr q6, q5, q5
 ; LE-I32-NEXT:    vorr d0, d14, d14
+; LE-I32-NEXT:    vmov.32 d13[0], r0
 ; LE-I32-NEXT:    bl lrint
-; LE-I32-NEXT:    vorr d0, d9, d9
-; LE-I32-NEXT:    vmov.32 d11[0], r0
-; LE-I32-NEXT:    bl lrint
-; LE-I32-NEXT:    vorr d0, d15, d15
-; LE-I32-NEXT:    vmov.32 d12[1], r0
 ; LE-I32-NEXT:    add lr, sp, #64
-; LE-I32-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
-; LE-I32-NEXT:    bl lrint
-; LE-I32-NEXT:    vmov.32 d11[1], r0
-; LE-I32-NEXT:    add r0, sp, #400
-; LE-I32-NEXT:    add lr, sp, #32
-; LE-I32-NEXT:    vorr q6, q5, q5
-; LE-I32-NEXT:    vldmia sp, {d10, d11} @ 16-byte Reload
-; LE-I32-NEXT:    vorr d0, d10, d10
-; LE-I32-NEXT:    vld1.64 {d16, d17}, [r0]
-; LE-I32-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-I32-NEXT:    bl lrint
-; LE-I32-NEXT:    add lr, sp, #48
-; LE-I32-NEXT:    vmov.32 d15[0], r0
-; LE-I32-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
-; LE-I32-NEXT:    vorr d0, d8, d8
-; LE-I32-NEXT:    bl lrint
-; LE-I32-NEXT:    vorr d0, d11, d11
-; LE-I32-NEXT:    vmov.32 d12[0], r0
-; LE-I32-NEXT:    bl lrint
-; LE-I32-NEXT:    vorr d0, d9, d9
-; LE-I32-NEXT:    vmov.32 d15[1], r0
-; LE-I32-NEXT:    bl lrint
-; LE-I32-NEXT:    vmov.32 d12[1], r0
-; LE-I32-NEXT:    add lr, sp, #48
-; LE-I32-NEXT:    add r0, sp, #384
-; LE-I32-NEXT:    vld1.64 {d16, d17}, [r0]
-; LE-I32-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
-; LE-I32-NEXT:    add lr, sp, #32
-; LE-I32-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
-; LE-I32-NEXT:    vorr d0, d10, d10
-; LE-I32-NEXT:    vstmia sp, {d16, d17} @ 16-byte Spill
+; LE-I32-NEXT:    mov r4, r0
+; LE-I32-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
+; LE-I32-NEXT:    @ kill: def $d0 killed $d0 killed $q0
 ; LE-I32-NEXT:    bl lrint
-; LE-I32-NEXT:    add lr, sp, #16
+; LE-I32-NEXT:    add lr, sp, #80
 ; LE-I32-NEXT:    vmov.32 d14[0], r0
-; LE-I32-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
-; LE-I32-NEXT:    vorr d0, d8, d8
-; LE-I32-NEXT:    bl lrint
-; LE-I32-NEXT:    vorr d0, d11, d11
-; LE-I32-NEXT:    vmov.32 d13[0], r0
-; LE-I32-NEXT:    bl lrint
-; LE-I32-NEXT:    vorr d0, d9, d9
-; LE-I32-NEXT:    vmov.32 d14[1], r0
-; LE-I32-NEXT:    add lr, sp, #32
-; LE-I32-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
-; LE-I32-NEXT:    bl lrint
-; LE-I32-NEXT:    vorr q7, q6, q6
-; LE-I32-NEXT:    vldmia sp, {d10, d11} @ 16-byte Reload
-; LE-I32-NEXT:    vorr d0, d10, d10
-; LE-I32-NEXT:    vmov.32 d15[1], r0
-; LE-I32-NEXT:    add r0, sp, #368
-; LE-I32-NEXT:    vld1.64 {d12, d13}, [r0]
+; LE-I32-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
+; LE-I32-NEXT:    @ kill: def $d0 killed $d0 killed $q0
 ; LE-I32-NEXT:    bl lrint
-; LE-I32-NEXT:    add lr, sp, #144
+; LE-I32-NEXT:    add lr, sp, #112
 ; LE-I32-NEXT:    vmov.32 d9[0], r0
 ; LE-I32-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
 ; LE-I32-NEXT:    @ kill: def $d0 killed $d0 killed $q0
+; LE-I32-NEXT:    vmov.32 d15[0], r4
 ; LE-I32-NEXT:    bl lrint
-; LE-I32-NEXT:    vorr d0, d11, d11
-; LE-I32-NEXT:    vmov.32 d14[0], r0
-; LE-I32-NEXT:    bl lrint
-; LE-I32-NEXT:    vorr d0, d12, d12
-; LE-I32-NEXT:    vmov.32 d9[1], r0
-; LE-I32-NEXT:    bl lrint
-; LE-I32-NEXT:    add lr, sp, #144
-; LE-I32-NEXT:    vmov.32 d8[0], r0
-; LE-I32-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I32-NEXT:    vldmia sp, {d16, d17} @ 16-byte Reload
 ; LE-I32-NEXT:    vorr d0, d17, d17
+; LE-I32-NEXT:    vmov.32 d10[0], r0
 ; LE-I32-NEXT:    bl lrint
-; LE-I32-NEXT:    vmov.32 d14[1], r0
-; LE-I32-NEXT:    add r0, sp, #240
-; LE-I32-NEXT:    vorr d0, d13, d13
-; LE-I32-NEXT:    add lr, sp, #144
-; LE-I32-NEXT:    vld1.64 {d10, d11}, [r0]
-; LE-I32-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
-; LE-I32-NEXT:    vstmia sp, {d10, d11} @ 16-byte Spill
+; LE-I32-NEXT:    add lr, sp, #96
+; LE-I32-NEXT:    vmov.32 d13[1], r0
+; LE-I32-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
+; LE-I32-NEXT:    @ kill: def $d0 killed $d0 killed $q0
 ; LE-I32-NEXT:    bl lrint
-; LE-I32-NEXT:    vorr d0, d10, d10
-; LE-I32-NEXT:    vmov.32 d8[1], r0
 ; LE-I32-NEXT:    add lr, sp, #16
-; LE-I32-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-I32-NEXT:    vmov.32 d11[0], r0
+; LE-I32-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I32-NEXT:    vorr d0, d17, d17
 ; LE-I32-NEXT:    bl lrint
 ; LE-I32-NEXT:    add lr, sp, #80
-; LE-I32-NEXT:    vmov.32 d8[0], r0
-; LE-I32-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
-; LE-I32-NEXT:    vorr d0, d12, d12
+; LE-I32-NEXT:    vmov.32 d12[1], r0
+; LE-I32-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I32-NEXT:    vorr d0, d17, d17
 ; LE-I32-NEXT:    bl lrint
-; LE-I32-NEXT:    add lr, sp, #112
-; LE-I32-NEXT:    vmov.32 d11[0], r0
-; LE-I32-NEXT:    vldmia lr, {d14, d15} @ 16-byte Reload
-; LE-I32-NEXT:    vorr d0, d14, d14
+; LE-I32-NEXT:    add lr, sp, #32
+; LE-I32-NEXT:    vmov.32 d9[1], r0
+; LE-I32-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I32-NEXT:    vorr d0, d17, d17
 ; LE-I32-NEXT:    bl lrint
-; LE-I32-NEXT:    vorr d0, d13, d13
-; LE-I32-NEXT:    vmov.32 d10[0], r0
+; LE-I32-NEXT:    add lr, sp, #96
+; LE-I32-NEXT:    vmov.32 d8[1], r0
+; LE-I32-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I32-NEXT:    vorr d0, d17, d17
 ; LE-I32-NEXT:    bl lrint
-; LE-I32-NEXT:    add lr, sp, #128
+; LE-I32-NEXT:    add lr, sp, #112
 ; LE-I32-NEXT:    vmov.32 d11[1], r0
-; LE-I32-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
-; LE-I32-NEXT:    vorr d0, d12, d12
-; LE-I32-NEXT:    bl lrint
-; LE-I32-NEXT:    vorr d0, d15, d15
-; LE-I32-NEXT:    vmov.32 d9[0], r0
+; LE-I32-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I32-NEXT:    vorr d0, d17, d17
 ; LE-I32-NEXT:    bl lrint
-; LE-I32-NEXT:    vorr d0, d13, d13
+; LE-I32-NEXT:    add lr, sp, #48
 ; LE-I32-NEXT:    vmov.32 d10[1], r0
-; LE-I32-NEXT:    bl lrint
-; LE-I32-NEXT:    vldmia sp, {d16, d17} @ 16-byte Reload
+; LE-I32-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
 ; LE-I32-NEXT:    vorr d0, d17, d17
-; LE-I32-NEXT:    vmov.32 d9[1], r0
 ; LE-I32-NEXT:    bl lrint
 ; LE-I32-NEXT:    add lr, sp, #64
-; LE-I32-NEXT:    vmov.32 d8[1], r0
-; LE-I32-NEXT:    mov r0, r4
-; LE-I32-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-I32-NEXT:    add lr, sp, #96
-; LE-I32-NEXT:    vst1.32 {d16, d17}, [r0:128]!
-; LE-I32-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-I32-NEXT:    add lr, sp, #48
-; LE-I32-NEXT:    vst1.32 {d16, d17}, [r0:128]!
-; LE-I32-NEXT:    vst1.32 {d8, d9}, [r0:128]!
-; LE-I32-NEXT:    vst1.64 {d10, d11}, [r0:128]
-; LE-I32-NEXT:    add r0, r4, #64
-; LE-I32-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-I32-NEXT:    add lr, sp, #144
-; LE-I32-NEXT:    vst1.32 {d16, d17}, [r0:128]!
-; LE-I32-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-I32-NEXT:    add lr, sp, #16
-; LE-I32-NEXT:    vst1.32 {d16, d17}, [r0:128]!
-; LE-I32-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-I32-NEXT:    add lr, sp, #32
-; LE-I32-NEXT:    vst1.32 {d16, d17}, [r0:128]!
+; LE-I32-NEXT:    vmov.32 d15[1], r0
 ; LE-I32-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-I32-NEXT:    vst1.64 {d16, d17}, [r0:128]
-; LE-I32-NEXT:    add sp, sp, #160
+; LE-I32-NEXT:    vorr d0, d17, d17
+; LE-I32-NEXT:    bl lrint
+; LE-I32-NEXT:    vmov.32 d14[1], r0
+; LE-I32-NEXT:    vorr q0, q6, q6
+; LE-I32-NEXT:    vorr q1, q4, q4
+; LE-I32-NEXT:    vorr q2, q5, q5
+; LE-I32-NEXT:    vorr q3, q7, q7
+; LE-I32-NEXT:    add sp, sp, #128
 ; LE-I32-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
 ; LE-I32-NEXT:    pop {r4, r5, r6, pc}
 ;
-; LE-I64-LABEL: lrint_v32f64:
+; LE-I64-LABEL: lrint_v16f64:
 ; LE-I64:       @ %bb.0:
 ; LE-I64-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
 ; LE-I64-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
@@ -6313,1139 +1591,285 @@ define <32 x iXLen> @lrint_v32f64(<32 x double> %x) {
 ; LE-I64-NEXT:    sub sp, sp, #4
 ; LE-I64-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
 ; LE-I64-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-I64-NEXT:    .pad #208
-; LE-I64-NEXT:    sub sp, sp, #208
-; LE-I64-NEXT:    add lr, sp, #72
-; LE-I64-NEXT:    str r0, [sp, #156] @ 4-byte Spill
-; LE-I64-NEXT:    add r0, sp, #456
-; LE-I64-NEXT:    vorr q4, q0, q0
+; LE-I64-NEXT:    .pad #176
+; LE-I64-NEXT:    sub sp, sp, #176
+; LE-I64-NEXT:    add lr, sp, #40
+; LE-I64-NEXT:    str r0, [sp, #140] @ 4-byte Spill
+; LE-I64-NEXT:    add r0, sp, #312
+; LE-I64-NEXT:    vorr q6, q2, q2
 ; LE-I64-NEXT:    vstmia lr, {d6, d7} @ 16-byte Spill
-; LE-I64-NEXT:    add lr, sp, #56
-; LE-I64-NEXT:    vorr d0, d7, d7
-; LE-I64-NEXT:    vstmia lr, {d2, d3} @ 16-byte Spill
-; LE-I64-NEXT:    add lr, sp, #104
-; LE-I64-NEXT:    vorr q5, q2, q2
-; LE-I64-NEXT:    vld1.64 {d16, d17}, [r0]
-; LE-I64-NEXT:    add r0, sp, #344
-; LE-I64-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-I64-NEXT:    add lr, sp, #192
+; LE-I64-NEXT:    add lr, sp, #96
+; LE-I64-NEXT:    vorr q7, q1, q1
+; LE-I64-NEXT:    vstmia lr, {d0, d1} @ 16-byte Spill
+; LE-I64-NEXT:    add lr, sp, #144
+; LE-I64-NEXT:    vorr d0, d1, d1
 ; LE-I64-NEXT:    vld1.64 {d16, d17}, [r0]
-; LE-I64-NEXT:    add r0, sp, #376
+; LE-I64-NEXT:    add r0, sp, #280
 ; LE-I64-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-I64-NEXT:    add lr, sp, #160
+; LE-I64-NEXT:    add lr, sp, #80
 ; LE-I64-NEXT:    vld1.64 {d16, d17}, [r0]
-; LE-I64-NEXT:    add r0, sp, #360
+; LE-I64-NEXT:    add r0, sp, #296
 ; LE-I64-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-I64-NEXT:    add lr, sp, #136
+; LE-I64-NEXT:    add lr, sp, #120
 ; LE-I64-NEXT:    vld1.64 {d16, d17}, [r0]
-; LE-I64-NEXT:    add r0, sp, #440
+; LE-I64-NEXT:    add r0, sp, #328
 ; LE-I64-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-I64-NEXT:    add lr, sp, #176
+; LE-I64-NEXT:    add lr, sp, #56
 ; LE-I64-NEXT:    vld1.64 {d16, d17}, [r0]
 ; LE-I64-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
 ; LE-I64-NEXT:    bl lrint
-; LE-I64-NEXT:    vorr d0, d10, d10
-; LE-I64-NEXT:    str r1, [sp, #120] @ 4-byte Spill
-; LE-I64-NEXT:    vmov.32 d13[0], r0
-; LE-I64-NEXT:    bl lrint
-; LE-I64-NEXT:    vorr d0, d11, d11
-; LE-I64-NEXT:    mov r10, r1
-; LE-I64-NEXT:    vmov.32 d14[0], r0
-; LE-I64-NEXT:    bl lrint
-; LE-I64-NEXT:    vorr d0, d8, d8
-; LE-I64-NEXT:    add lr, sp, #88
-; LE-I64-NEXT:    vmov.32 d15[0], r0
-; LE-I64-NEXT:    mov r11, r1
-; LE-I64-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
-; LE-I64-NEXT:    bl lrint
-; LE-I64-NEXT:    vorr d0, d9, d9
-; LE-I64-NEXT:    mov r9, r1
-; LE-I64-NEXT:    vmov.32 d10[0], r0
-; LE-I64-NEXT:    bl lrint
-; LE-I64-NEXT:    add lr, sp, #40
+; LE-I64-NEXT:    vorr d0, d14, d14
+; LE-I64-NEXT:    str r1, [sp, #116] @ 4-byte Spill
 ; LE-I64-NEXT:    vmov.32 d11[0], r0
-; LE-I64-NEXT:    mov r6, r1
-; LE-I64-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
-; LE-I64-NEXT:    add lr, sp, #56
-; LE-I64-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
-; LE-I64-NEXT:    vorr d0, d10, d10
 ; LE-I64-NEXT:    bl lrint
-; LE-I64-NEXT:    vorr d0, d11, d11
-; LE-I64-NEXT:    mov r4, r1
+; LE-I64-NEXT:    vorr d0, d15, d15
+; LE-I64-NEXT:    str r1, [sp, #76] @ 4-byte Spill
 ; LE-I64-NEXT:    vmov.32 d8[0], r0
 ; LE-I64-NEXT:    bl lrint
-; LE-I64-NEXT:    add lr, sp, #72
-; LE-I64-NEXT:    mov r7, r1
+; LE-I64-NEXT:    vorr d0, d12, d12
+; LE-I64-NEXT:    add lr, sp, #160
 ; LE-I64-NEXT:    vmov.32 d9[0], r0
-; LE-I64-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
-; LE-I64-NEXT:    @ kill: def $d0 killed $d0 killed $q0
-; LE-I64-NEXT:    bl lrint
-; LE-I64-NEXT:    vmov.32 d12[0], r0
-; LE-I64-NEXT:    add lr, sp, #56
-; LE-I64-NEXT:    mov r5, r1
-; LE-I64-NEXT:    vmov.32 d9[1], r7
-; LE-I64-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
-; LE-I64-NEXT:    add lr, sp, #176
-; LE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-I64-NEXT:    vorr d0, d17, d17
-; LE-I64-NEXT:    bl lrint
-; LE-I64-NEXT:    vmov.32 d8[1], r4
-; LE-I64-NEXT:    add lr, sp, #72
-; LE-I64-NEXT:    mov r7, r1
-; LE-I64-NEXT:    vmov.32 d13[0], r0
+; LE-I64-NEXT:    str r1, [sp, #72] @ 4-byte Spill
 ; LE-I64-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
-; LE-I64-NEXT:    add lr, sp, #104
-; LE-I64-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
-; LE-I64-NEXT:    vorr d0, d9, d9
-; LE-I64-NEXT:    bl lrint
-; LE-I64-NEXT:    add lr, sp, #40
-; LE-I64-NEXT:    vorr d0, d8, d8
-; LE-I64-NEXT:    mov r8, r1
-; LE-I64-NEXT:    vmov.32 d15[0], r0
-; LE-I64-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
-; LE-I64-NEXT:    vmov.32 d11[1], r6
 ; LE-I64-NEXT:    bl lrint
-; LE-I64-NEXT:    add lr, sp, #176
-; LE-I64-NEXT:    mov r4, r1
+; LE-I64-NEXT:    vorr d0, d13, d13
+; LE-I64-NEXT:    mov r6, r1
 ; LE-I64-NEXT:    vmov.32 d14[0], r0
-; LE-I64-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
-; LE-I64-NEXT:    @ kill: def $d0 killed $d0 killed $q0
-; LE-I64-NEXT:    vmov.32 d10[1], r9
 ; LE-I64-NEXT:    bl lrint
-; LE-I64-NEXT:    add lr, sp, #88
-; LE-I64-NEXT:    vmov.32 d12[0], r0
-; LE-I64-NEXT:    ldr r0, [sp, #120] @ 4-byte Reload
-; LE-I64-NEXT:    vldmia lr, {d20, d21} @ 16-byte Reload
-; LE-I64-NEXT:    add lr, sp, #56
-; LE-I64-NEXT:    vldmia lr, {d18, d19} @ 16-byte Reload
-; LE-I64-NEXT:    add lr, sp, #24
-; LE-I64-NEXT:    vmov.32 d19[1], r0
-; LE-I64-NEXT:    add r0, sp, #408
-; LE-I64-NEXT:    ldr r2, [sp, #156] @ 4-byte Reload
-; LE-I64-NEXT:    vld1.64 {d16, d17}, [r0]
-; LE-I64-NEXT:    vmov.32 d13[1], r7
-; LE-I64-NEXT:    mov r0, r2
-; LE-I64-NEXT:    vmov.32 d12[1], r1
-; LE-I64-NEXT:    add r1, sp, #488
-; LE-I64-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
 ; LE-I64-NEXT:    add lr, sp, #40
-; LE-I64-NEXT:    vld1.64 {d16, d17}, [r1]
-; LE-I64-NEXT:    add r1, sp, #472
-; LE-I64-NEXT:    vst1.64 {d10, d11}, [r0:128]!
-; LE-I64-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-I64-NEXT:    add lr, sp, #72
-; LE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-I64-NEXT:    add lr, sp, #56
-; LE-I64-NEXT:    vmov.32 d21[1], r11
-; LE-I64-NEXT:    vmov.32 d20[1], r10
-; LE-I64-NEXT:    add r10, r2, #192
-; LE-I64-NEXT:    vst1.64 {d16, d17}, [r0:128]!
-; LE-I64-NEXT:    vld1.64 {d16, d17}, [r1]
-; LE-I64-NEXT:    add r1, sp, #392
-; LE-I64-NEXT:    vmov.32 d18[1], r5
-; LE-I64-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-I64-NEXT:    add lr, sp, #72
-; LE-I64-NEXT:    vst1.64 {d20, d21}, [r0:128]!
-; LE-I64-NEXT:    vld1.64 {d16, d17}, [r1]
-; LE-I64-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-I64-NEXT:    add lr, sp, #104
-; LE-I64-NEXT:    vst1.64 {d18, d19}, [r0:128]
-; LE-I64-NEXT:    add r0, sp, #312
-; LE-I64-NEXT:    vld1.64 {d16, d17}, [r0]
-; LE-I64-NEXT:    add r0, sp, #328
-; LE-I64-NEXT:    vmov.32 d15[1], r8
-; LE-I64-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-I64-NEXT:    add lr, sp, #120
-; LE-I64-NEXT:    vld1.64 {d16, d17}, [r0]
-; LE-I64-NEXT:    add r0, sp, #424
-; LE-I64-NEXT:    vmov.32 d14[1], r4
-; LE-I64-NEXT:    vst1.64 {d12, d13}, [r10:128]!
-; LE-I64-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-I64-NEXT:    add lr, sp, #176
-; LE-I64-NEXT:    vld1.64 {d16, d17}, [r0]
-; LE-I64-NEXT:    vst1.64 {d14, d15}, [r10:128]!
-; LE-I64-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-I64-NEXT:    add lr, sp, #192
-; LE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-I64-NEXT:    vorr d0, d17, d17
-; LE-I64-NEXT:    bl lrint
-; LE-I64-NEXT:    add lr, sp, #136
-; LE-I64-NEXT:    mov r9, r1
-; LE-I64-NEXT:    vmov.32 d9[0], r0
-; LE-I64-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
-; LE-I64-NEXT:    vorr d0, d10, d10
+; LE-I64-NEXT:    mov r4, r1
+; LE-I64-NEXT:    vmov.32 d15[0], r0
+; LE-I64-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
+; LE-I64-NEXT:    vorr d0, d8, d8
 ; LE-I64-NEXT:    bl lrint
-; LE-I64-NEXT:    vorr d0, d11, d11
-; LE-I64-NEXT:    mov r8, r1
+; LE-I64-NEXT:    vorr d0, d9, d9
+; LE-I64-NEXT:    mov r7, r1
 ; LE-I64-NEXT:    vmov.32 d12[0], r0
 ; LE-I64-NEXT:    bl lrint
-; LE-I64-NEXT:    add lr, sp, #160
+; LE-I64-NEXT:    add lr, sp, #96
 ; LE-I64-NEXT:    mov r5, r1
 ; LE-I64-NEXT:    vmov.32 d13[0], r0
-; LE-I64-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
-; LE-I64-NEXT:    vorr d0, d10, d10
-; LE-I64-NEXT:    bl lrint
-; LE-I64-NEXT:    vorr d0, d11, d11
-; LE-I64-NEXT:    mov r6, r1
-; LE-I64-NEXT:    vmov.32 d14[0], r0
-; LE-I64-NEXT:    bl lrint
-; LE-I64-NEXT:    add lr, sp, #192
-; LE-I64-NEXT:    mov r4, r1
-; LE-I64-NEXT:    vmov.32 d15[0], r0
 ; LE-I64-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
 ; LE-I64-NEXT:    @ kill: def $d0 killed $d0 killed $q0
 ; LE-I64-NEXT:    bl lrint
-; LE-I64-NEXT:    vmov.32 d8[0], r0
-; LE-I64-NEXT:    add lr, sp, #192
-; LE-I64-NEXT:    mov r11, r1
-; LE-I64-NEXT:    vmov.32 d15[1], r4
-; LE-I64-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
-; LE-I64-NEXT:    add lr, sp, #176
-; LE-I64-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
-; LE-I64-NEXT:    vorr d0, d9, d9
-; LE-I64-NEXT:    bl lrint
-; LE-I64-NEXT:    vorr d0, d8, d8
-; LE-I64-NEXT:    vmov.32 d14[1], r6
-; LE-I64-NEXT:    add lr, sp, #136
-; LE-I64-NEXT:    mov r4, r1
-; LE-I64-NEXT:    vmov.32 d11[0], r0
-; LE-I64-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
-; LE-I64-NEXT:    bl lrint
 ; LE-I64-NEXT:    vmov.32 d10[0], r0
-; LE-I64-NEXT:    add lr, sp, #176
-; LE-I64-NEXT:    mov r7, r1
+; LE-I64-NEXT:    add lr, sp, #40
+; LE-I64-NEXT:    mov r10, r1
 ; LE-I64-NEXT:    vmov.32 d13[1], r5
 ; LE-I64-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
-; LE-I64-NEXT:    add lr, sp, #24
+; LE-I64-NEXT:    add lr, sp, #56
 ; LE-I64-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
 ; LE-I64-NEXT:    vorr d0, d9, d9
 ; LE-I64-NEXT:    bl lrint
 ; LE-I64-NEXT:    vorr d0, d8, d8
-; LE-I64-NEXT:    vmov.32 d12[1], r8
-; LE-I64-NEXT:    add lr, sp, #88
-; LE-I64-NEXT:    mov r5, r1
-; LE-I64-NEXT:    vmov.32 d15[0], r0
-; LE-I64-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
-; LE-I64-NEXT:    bl lrint
-; LE-I64-NEXT:    add lr, sp, #192
-; LE-I64-NEXT:    str r1, [sp, #24] @ 4-byte Spill
-; LE-I64-NEXT:    vmov.32 d14[0], r0
-; LE-I64-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
-; LE-I64-NEXT:    add lr, sp, #40
-; LE-I64-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
-; LE-I64-NEXT:    vorr d0, d11, d11
-; LE-I64-NEXT:    vmov.32 d9[1], r9
-; LE-I64-NEXT:    bl lrint
-; LE-I64-NEXT:    vorr d0, d10, d10
-; LE-I64-NEXT:    vmov.32 d8[1], r11
-; LE-I64-NEXT:    add lr, sp, #192
-; LE-I64-NEXT:    mov r6, r1
-; LE-I64-NEXT:    vmov.32 d13[0], r0
-; LE-I64-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
-; LE-I64-NEXT:    bl lrint
-; LE-I64-NEXT:    vmov.32 d12[0], r0
-; LE-I64-NEXT:    add lr, sp, #160
-; LE-I64-NEXT:    str r1, [sp, #40] @ 4-byte Spill
+; LE-I64-NEXT:    vmov.32 d12[1], r7
+; LE-I64-NEXT:    add lr, sp, #96
+; LE-I64-NEXT:    mov r9, r1
+; LE-I64-NEXT:    vmov.32 d11[0], r0
 ; LE-I64-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
-; LE-I64-NEXT:    add lr, sp, #176
-; LE-I64-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
-; LE-I64-NEXT:    add lr, sp, #56
-; LE-I64-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
-; LE-I64-NEXT:    vorr d0, d9, d9
-; LE-I64-NEXT:    vmov.32 d11[1], r4
 ; LE-I64-NEXT:    bl lrint
-; LE-I64-NEXT:    vorr d0, d8, d8
-; LE-I64-NEXT:    vmov.32 d10[1], r7
-; LE-I64-NEXT:    add lr, sp, #176
-; LE-I64-NEXT:    mov r8, r1
-; LE-I64-NEXT:    vmov.32 d13[0], r0
+; LE-I64-NEXT:    vmov.32 d10[0], r0
+; LE-I64-NEXT:    add lr, sp, #24
+; LE-I64-NEXT:    mov r11, r1
+; LE-I64-NEXT:    vmov.32 d15[1], r4
 ; LE-I64-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-I64-NEXT:    add lr, sp, #144
+; LE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEXT:    vorr d0, d17, d17
 ; LE-I64-NEXT:    bl lrint
-; LE-I64-NEXT:    vmov.32 d12[0], r0
 ; LE-I64-NEXT:    add lr, sp, #8
-; LE-I64-NEXT:    mov r11, r1
-; LE-I64-NEXT:    vmov.32 d15[1], r5
-; LE-I64-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
-; LE-I64-NEXT:    add lr, sp, #72
-; LE-I64-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
-; LE-I64-NEXT:    vorr d0, d9, d9
-; LE-I64-NEXT:    bl lrint
-; LE-I64-NEXT:    vmov.32 d11[0], r0
-; LE-I64-NEXT:    ldr r0, [sp, #24] @ 4-byte Reload
-; LE-I64-NEXT:    vorr d0, d8, d8
+; LE-I64-NEXT:    vmov.32 d14[1], r6
+; LE-I64-NEXT:    mov r8, r1
+; LE-I64-NEXT:    vmov.32 d17[0], r0
+; LE-I64-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
 ; LE-I64-NEXT:    add lr, sp, #56
-; LE-I64-NEXT:    mov r9, r1
-; LE-I64-NEXT:    vmov.32 d14[1], r0
 ; LE-I64-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-I64-NEXT:    add lr, sp, #80
+; LE-I64-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; LE-I64-NEXT:    vorr d0, d11, d11
 ; LE-I64-NEXT:    bl lrint
-; LE-I64-NEXT:    vmov.32 d10[0], r0
-; LE-I64-NEXT:    add lr, sp, #72
-; LE-I64-NEXT:    mov r7, r1
-; LE-I64-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-I64-NEXT:    vmov.32 d15[0], r0
 ; LE-I64-NEXT:    add lr, sp, #160
-; LE-I64-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
-; LE-I64-NEXT:    add lr, sp, #104
+; LE-I64-NEXT:    vorr d0, d10, d10
+; LE-I64-NEXT:    ldr r0, [sp, #72] @ 4-byte Reload
 ; LE-I64-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
-; LE-I64-NEXT:    vorr d0, d9, d9
-; LE-I64-NEXT:    vmov.32 d13[1], r6
-; LE-I64-NEXT:    bl lrint
-; LE-I64-NEXT:    vmov.32 d11[0], r0
-; LE-I64-NEXT:    ldr r0, [sp, #40] @ 4-byte Reload
-; LE-I64-NEXT:    vorr d0, d8, d8
-; LE-I64-NEXT:    add lr, sp, #160
 ; LE-I64-NEXT:    mov r6, r1
-; LE-I64-NEXT:    vmov.32 d12[1], r0
-; LE-I64-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-I64-NEXT:    vmov.32 d9[1], r0
 ; LE-I64-NEXT:    bl lrint
-; LE-I64-NEXT:    add lr, sp, #8
+; LE-I64-NEXT:    vmov.32 d14[0], r0
+; LE-I64-NEXT:    ldr r0, [sp, #76] @ 4-byte Reload
+; LE-I64-NEXT:    add lr, sp, #160
 ; LE-I64-NEXT:    mov r4, r1
-; LE-I64-NEXT:    vmov.32 d10[0], r0
-; LE-I64-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
+; LE-I64-NEXT:    vmov.32 d8[1], r0
+; LE-I64-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
 ; LE-I64-NEXT:    add lr, sp, #120
-; LE-I64-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
-; LE-I64-NEXT:    vorr d0, d9, d9
-; LE-I64-NEXT:    vmov.32 d13[1], r8
+; LE-I64-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
+; LE-I64-NEXT:    vorr d0, d11, d11
 ; LE-I64-NEXT:    bl lrint
-; LE-I64-NEXT:    vorr d0, d8, d8
+; LE-I64-NEXT:    vmov.32 d13[0], r0
+; LE-I64-NEXT:    add lr, sp, #40
+; LE-I64-NEXT:    vorr d0, d10, d10
+; LE-I64-NEXT:    ldr r0, [sp, #116] @ 4-byte Reload
+; LE-I64-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
 ; LE-I64-NEXT:    mov r5, r1
-; LE-I64-NEXT:    vmov.32 d15[0], r0
-; LE-I64-NEXT:    vmov.32 d12[1], r11
+; LE-I64-NEXT:    vmov.32 d9[1], r0
+; LE-I64-NEXT:    bl lrint
+; LE-I64-NEXT:    add lr, sp, #144
+; LE-I64-NEXT:    mov r7, r1
+; LE-I64-NEXT:    vmov.32 d12[0], r0
+; LE-I64-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
+; LE-I64-NEXT:    @ kill: def $d0 killed $d0 killed $q0
+; LE-I64-NEXT:    vmov.32 d8[1], r10
 ; LE-I64-NEXT:    bl lrint
-; LE-I64-NEXT:    add lr, sp, #72
-; LE-I64-NEXT:    vmov.32 d14[0], r0
+; LE-I64-NEXT:    add lr, sp, #8
+; LE-I64-NEXT:    vmov.32 d15[1], r6
+; LE-I64-NEXT:    vldmia lr, {d20, d21} @ 16-byte Reload
+; LE-I64-NEXT:    add lr, sp, #24
 ; LE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
 ; LE-I64-NEXT:    add lr, sp, #160
-; LE-I64-NEXT:    vmov.32 d17[1], r9
-; LE-I64-NEXT:    vmov.32 d16[1], r7
-; LE-I64-NEXT:    vst1.64 {d12, d13}, [r10:128]!
-; LE-I64-NEXT:    vorr q9, q8, q8
-; LE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-I64-NEXT:    add lr, sp, #136
-; LE-I64-NEXT:    vmov.32 d15[1], r5
-; LE-I64-NEXT:    vst1.64 {d16, d17}, [r10:128]
-; LE-I64-NEXT:    vmov.32 d14[1], r1
-; LE-I64-NEXT:    ldr r1, [sp, #156] @ 4-byte Reload
-; LE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-I64-NEXT:    add r0, r1, #128
+; LE-I64-NEXT:    vmov.32 d20[0], r0
+; LE-I64-NEXT:    vmov.32 d21[1], r8
+; LE-I64-NEXT:    vmov.32 d20[1], r1
+; LE-I64-NEXT:    ldr r1, [sp, #140] @ 4-byte Reload
+; LE-I64-NEXT:    vmov.32 d13[1], r5
+; LE-I64-NEXT:    mov r0, r1
+; LE-I64-NEXT:    vst1.64 {d8, d9}, [r0:128]!
+; LE-I64-NEXT:    vldmia lr, {d18, d19} @ 16-byte Reload
 ; LE-I64-NEXT:    add lr, sp, #56
-; LE-I64-NEXT:    vmov.32 d11[1], r6
-; LE-I64-NEXT:    vst1.64 {d16, d17}, [r0:128]!
+; LE-I64-NEXT:    vmov.32 d14[1], r4
 ; LE-I64-NEXT:    vst1.64 {d18, d19}, [r0:128]!
-; LE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-I64-NEXT:    add lr, sp, #176
-; LE-I64-NEXT:    vmov.32 d10[1], r4
-; LE-I64-NEXT:    vst1.64 {d16, d17}, [r0:128]!
-; LE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-I64-NEXT:    add lr, sp, #192
-; LE-I64-NEXT:    vst1.64 {d16, d17}, [r0:128]
+; LE-I64-NEXT:    vldmia lr, {d18, d19} @ 16-byte Reload
+; LE-I64-NEXT:    add lr, sp, #96
+; LE-I64-NEXT:    vmov.32 d12[1], r7
+; LE-I64-NEXT:    vst1.64 {d18, d19}, [r0:128]!
+; LE-I64-NEXT:    vldmia lr, {d18, d19} @ 16-byte Reload
+; LE-I64-NEXT:    vmov.32 d17[1], r9
+; LE-I64-NEXT:    vst1.64 {d18, d19}, [r0:128]
 ; LE-I64-NEXT:    add r0, r1, #64
-; LE-I64-NEXT:    vst1.64 {d10, d11}, [r0:128]!
 ; LE-I64-NEXT:    vst1.64 {d14, d15}, [r0:128]!
-; LE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-I64-NEXT:    add lr, sp, #88
-; LE-I64-NEXT:    vst1.64 {d16, d17}, [r0:128]!
-; LE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEXT:    vst1.64 {d12, d13}, [r0:128]!
+; LE-I64-NEXT:    vmov.32 d16[1], r11
+; LE-I64-NEXT:    vst1.64 {d20, d21}, [r0:128]!
 ; LE-I64-NEXT:    vst1.64 {d16, d17}, [r0:128]
-; LE-I64-NEXT:    add sp, sp, #208
+; LE-I64-NEXT:    add sp, sp, #176
 ; LE-I64-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
 ; LE-I64-NEXT:    add sp, sp, #4
 ; LE-I64-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
 ;
-; LE-I32-NEON-LABEL: lrint_v32f64:
-; LE-I32-NEON:       @ %bb.0:
-; LE-I32-NEON-NEXT:    .save {r4, r5, r6, lr}
-; LE-I32-NEON-NEXT:    push {r4, r5, r6, lr}
-; LE-I32-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-I32-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-I32-NEON-NEXT:    .pad #160
-; LE-I32-NEON-NEXT:    sub sp, sp, #160
-; LE-I32-NEON-NEXT:    add lr, sp, #96
-; LE-I32-NEON-NEXT:    mov r4, r0
-; LE-I32-NEON-NEXT:    add r0, sp, #304
-; LE-I32-NEON-NEXT:    vorr q6, q3, q3
-; LE-I32-NEON-NEXT:    vstmia lr, {d4, d5} @ 16-byte Spill
-; LE-I32-NEON-NEXT:    add lr, sp, #64
-; LE-I32-NEON-NEXT:    vorr q5, q1, q1
-; LE-I32-NEON-NEXT:    vstmia lr, {d0, d1} @ 16-byte Spill
-; LE-I32-NEON-NEXT:    add lr, sp, #48
-; LE-I32-NEON-NEXT:    vorr d0, d4, d4
-; LE-I32-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
-; LE-I32-NEON-NEXT:    add r0, sp, #352
-; LE-I32-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-I32-NEON-NEXT:    add lr, sp, #16
-; LE-I32-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
-; LE-I32-NEON-NEXT:    add r0, sp, #272
-; LE-I32-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-I32-NEON-NEXT:    add lr, sp, #112
-; LE-I32-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
-; LE-I32-NEON-NEXT:    add r0, sp, #288
-; LE-I32-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-I32-NEON-NEXT:    add lr, sp, #80
-; LE-I32-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
-; LE-I32-NEON-NEXT:    add r0, sp, #336
-; LE-I32-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-I32-NEON-NEXT:    add lr, sp, #144
-; LE-I32-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
-; LE-I32-NEON-NEXT:    add r0, sp, #256
-; LE-I32-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-I32-NEON-NEXT:    add lr, sp, #128
-; LE-I32-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
-; LE-I32-NEON-NEXT:    add r0, sp, #320
-; LE-I32-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-I32-NEON-NEXT:    add lr, sp, #32
-; LE-I32-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
-; LE-I32-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-I32-NEON-NEXT:    bl lrint
-; LE-I32-NEON-NEXT:    vorr d0, d12, d12
-; LE-I32-NEON-NEXT:    vmov.32 d14[0], r0
-; LE-I32-NEON-NEXT:    bl lrint
-; LE-I32-NEON-NEXT:    vorr d0, d10, d10
-; LE-I32-NEON-NEXT:    vmov.32 d15[0], r0
-; LE-I32-NEON-NEXT:    bl lrint
-; LE-I32-NEON-NEXT:    vorr d0, d13, d13
-; LE-I32-NEON-NEXT:    vmov.32 d9[0], r0
-; LE-I32-NEON-NEXT:    bl lrint
-; LE-I32-NEON-NEXT:    vorr d0, d11, d11
-; LE-I32-NEON-NEXT:    vmov.32 d15[1], r0
-; LE-I32-NEON-NEXT:    bl lrint
-; LE-I32-NEON-NEXT:    add lr, sp, #96
-; LE-I32-NEON-NEXT:    vorr q5, q4, q4
-; LE-I32-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-I32-NEON-NEXT:    vorr d0, d17, d17
-; LE-I32-NEON-NEXT:    vmov.32 d11[1], r0
-; LE-I32-NEON-NEXT:    bl lrint
-; LE-I32-NEON-NEXT:    vmov.32 d14[1], r0
-; LE-I32-NEON-NEXT:    add lr, sp, #96
-; LE-I32-NEON-NEXT:    add r0, sp, #416
-; LE-I32-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
-; LE-I32-NEON-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
-; LE-I32-NEON-NEXT:    add lr, sp, #64
-; LE-I32-NEON-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
-; LE-I32-NEON-NEXT:    vorr d0, d8, d8
-; LE-I32-NEON-NEXT:    vstmia sp, {d16, d17} @ 16-byte Spill
-; LE-I32-NEON-NEXT:    bl lrint
-; LE-I32-NEON-NEXT:    add lr, sp, #32
-; LE-I32-NEON-NEXT:    vmov.32 d10[0], r0
-; LE-I32-NEON-NEXT:    vldmia lr, {d14, d15} @ 16-byte Reload
-; LE-I32-NEON-NEXT:    vorr q6, q5, q5
-; LE-I32-NEON-NEXT:    vorr d0, d14, d14
-; LE-I32-NEON-NEXT:    bl lrint
-; LE-I32-NEON-NEXT:    vorr d0, d9, d9
-; LE-I32-NEON-NEXT:    vmov.32 d11[0], r0
-; LE-I32-NEON-NEXT:    bl lrint
-; LE-I32-NEON-NEXT:    vorr d0, d15, d15
-; LE-I32-NEON-NEXT:    vmov.32 d12[1], r0
-; LE-I32-NEON-NEXT:    add lr, sp, #64
-; LE-I32-NEON-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
-; LE-I32-NEON-NEXT:    bl lrint
-; LE-I32-NEON-NEXT:    vmov.32 d11[1], r0
-; LE-I32-NEON-NEXT:    add r0, sp, #400
-; LE-I32-NEON-NEXT:    add lr, sp, #32
-; LE-I32-NEON-NEXT:    vorr q6, q5, q5
-; LE-I32-NEON-NEXT:    vldmia sp, {d10, d11} @ 16-byte Reload
-; LE-I32-NEON-NEXT:    vorr d0, d10, d10
-; LE-I32-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
-; LE-I32-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-I32-NEON-NEXT:    bl lrint
-; LE-I32-NEON-NEXT:    add lr, sp, #48
-; LE-I32-NEON-NEXT:    vmov.32 d15[0], r0
-; LE-I32-NEON-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
-; LE-I32-NEON-NEXT:    vorr d0, d8, d8
-; LE-I32-NEON-NEXT:    bl lrint
-; LE-I32-NEON-NEXT:    vorr d0, d11, d11
-; LE-I32-NEON-NEXT:    vmov.32 d12[0], r0
-; LE-I32-NEON-NEXT:    bl lrint
-; LE-I32-NEON-NEXT:    vorr d0, d9, d9
-; LE-I32-NEON-NEXT:    vmov.32 d15[1], r0
-; LE-I32-NEON-NEXT:    bl lrint
-; LE-I32-NEON-NEXT:    vmov.32 d12[1], r0
-; LE-I32-NEON-NEXT:    add lr, sp, #48
-; LE-I32-NEON-NEXT:    add r0, sp, #384
-; LE-I32-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
-; LE-I32-NEON-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
-; LE-I32-NEON-NEXT:    add lr, sp, #32
-; LE-I32-NEON-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
-; LE-I32-NEON-NEXT:    vorr d0, d10, d10
-; LE-I32-NEON-NEXT:    vstmia sp, {d16, d17} @ 16-byte Spill
-; LE-I32-NEON-NEXT:    bl lrint
-; LE-I32-NEON-NEXT:    add lr, sp, #16
-; LE-I32-NEON-NEXT:    vmov.32 d14[0], r0
-; LE-I32-NEON-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
-; LE-I32-NEON-NEXT:    vorr d0, d8, d8
-; LE-I32-NEON-NEXT:    bl lrint
-; LE-I32-NEON-NEXT:    vorr d0, d11, d11
-; LE-I32-NEON-NEXT:    vmov.32 d13[0], r0
-; LE-I32-NEON-NEXT:    bl lrint
-; LE-I32-NEON-NEXT:    vorr d0, d9, d9
-; LE-I32-NEON-NEXT:    vmov.32 d14[1], r0
-; LE-I32-NEON-NEXT:    add lr, sp, #32
-; LE-I32-NEON-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
-; LE-I32-NEON-NEXT:    bl lrint
-; LE-I32-NEON-NEXT:    vorr q7, q6, q6
-; LE-I32-NEON-NEXT:    vldmia sp, {d10, d11} @ 16-byte Reload
-; LE-I32-NEON-NEXT:    vorr d0, d10, d10
-; LE-I32-NEON-NEXT:    vmov.32 d15[1], r0
-; LE-I32-NEON-NEXT:    add r0, sp, #368
-; LE-I32-NEON-NEXT:    vld1.64 {d12, d13}, [r0]
-; LE-I32-NEON-NEXT:    bl lrint
-; LE-I32-NEON-NEXT:    add lr, sp, #144
-; LE-I32-NEON-NEXT:    vmov.32 d9[0], r0
-; LE-I32-NEON-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
-; LE-I32-NEON-NEXT:    @ kill: def $d0 killed $d0 killed $q0
-; LE-I32-NEON-NEXT:    bl lrint
-; LE-I32-NEON-NEXT:    vorr d0, d11, d11
-; LE-I32-NEON-NEXT:    vmov.32 d14[0], r0
-; LE-I32-NEON-NEXT:    bl lrint
-; LE-I32-NEON-NEXT:    vorr d0, d12, d12
-; LE-I32-NEON-NEXT:    vmov.32 d9[1], r0
-; LE-I32-NEON-NEXT:    bl lrint
-; LE-I32-NEON-NEXT:    add lr, sp, #144
-; LE-I32-NEON-NEXT:    vmov.32 d8[0], r0
-; LE-I32-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-I32-NEON-NEXT:    vorr d0, d17, d17
-; LE-I32-NEON-NEXT:    bl lrint
-; LE-I32-NEON-NEXT:    vmov.32 d14[1], r0
-; LE-I32-NEON-NEXT:    add r0, sp, #240
-; LE-I32-NEON-NEXT:    vorr d0, d13, d13
-; LE-I32-NEON-NEXT:    add lr, sp, #144
-; LE-I32-NEON-NEXT:    vld1.64 {d10, d11}, [r0]
-; LE-I32-NEON-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
-; LE-I32-NEON-NEXT:    vstmia sp, {d10, d11} @ 16-byte Spill
-; LE-I32-NEON-NEXT:    bl lrint
-; LE-I32-NEON-NEXT:    vorr d0, d10, d10
-; LE-I32-NEON-NEXT:    vmov.32 d8[1], r0
-; LE-I32-NEON-NEXT:    add lr, sp, #16
-; LE-I32-NEON-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
-; LE-I32-NEON-NEXT:    bl lrint
-; LE-I32-NEON-NEXT:    add lr, sp, #80
-; LE-I32-NEON-NEXT:    vmov.32 d8[0], r0
-; LE-I32-NEON-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
-; LE-I32-NEON-NEXT:    vorr d0, d12, d12
-; LE-I32-NEON-NEXT:    bl lrint
-; LE-I32-NEON-NEXT:    add lr, sp, #112
-; LE-I32-NEON-NEXT:    vmov.32 d11[0], r0
-; LE-I32-NEON-NEXT:    vldmia lr, {d14, d15} @ 16-byte Reload
-; LE-I32-NEON-NEXT:    vorr d0, d14, d14
-; LE-I32-NEON-NEXT:    bl lrint
-; LE-I32-NEON-NEXT:    vorr d0, d13, d13
-; LE-I32-NEON-NEXT:    vmov.32 d10[0], r0
-; LE-I32-NEON-NEXT:    bl lrint
-; LE-I32-NEON-NEXT:    add lr, sp, #128
-; LE-I32-NEON-NEXT:    vmov.32 d11[1], r0
-; LE-I32-NEON-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
-; LE-I32-NEON-NEXT:    vorr d0, d12, d12
-; LE-I32-NEON-NEXT:    bl lrint
-; LE-I32-NEON-NEXT:    vorr d0, d15, d15
-; LE-I32-NEON-NEXT:    vmov.32 d9[0], r0
-; LE-I32-NEON-NEXT:    bl lrint
-; LE-I32-NEON-NEXT:    vorr d0, d13, d13
-; LE-I32-NEON-NEXT:    vmov.32 d10[1], r0
-; LE-I32-NEON-NEXT:    bl lrint
-; LE-I32-NEON-NEXT:    vldmia sp, {d16, d17} @ 16-byte Reload
-; LE-I32-NEON-NEXT:    vorr d0, d17, d17
-; LE-I32-NEON-NEXT:    vmov.32 d9[1], r0
-; LE-I32-NEON-NEXT:    bl lrint
-; LE-I32-NEON-NEXT:    add lr, sp, #64
-; LE-I32-NEON-NEXT:    vmov.32 d8[1], r0
-; LE-I32-NEON-NEXT:    mov r0, r4
-; LE-I32-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-I32-NEON-NEXT:    add lr, sp, #96
-; LE-I32-NEON-NEXT:    vst1.32 {d16, d17}, [r0:128]!
-; LE-I32-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-I32-NEON-NEXT:    add lr, sp, #48
-; LE-I32-NEON-NEXT:    vst1.32 {d16, d17}, [r0:128]!
-; LE-I32-NEON-NEXT:    vst1.32 {d8, d9}, [r0:128]!
-; LE-I32-NEON-NEXT:    vst1.64 {d10, d11}, [r0:128]
-; LE-I32-NEON-NEXT:    add r0, r4, #64
-; LE-I32-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-I32-NEON-NEXT:    add lr, sp, #144
-; LE-I32-NEON-NEXT:    vst1.32 {d16, d17}, [r0:128]!
-; LE-I32-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-I32-NEON-NEXT:    add lr, sp, #16
-; LE-I32-NEON-NEXT:    vst1.32 {d16, d17}, [r0:128]!
-; LE-I32-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-I32-NEON-NEXT:    add lr, sp, #32
-; LE-I32-NEON-NEXT:    vst1.32 {d16, d17}, [r0:128]!
-; LE-I32-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-I32-NEON-NEXT:    vst1.64 {d16, d17}, [r0:128]
-; LE-I32-NEON-NEXT:    add sp, sp, #160
-; LE-I32-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-I32-NEON-NEXT:    pop {r4, r5, r6, pc}
-;
-; LE-I64-NEON-LABEL: lrint_v32f64:
-; LE-I64-NEON:       @ %bb.0:
-; LE-I64-NEON-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; LE-I64-NEON-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; LE-I64-NEON-NEXT:    .pad #4
-; LE-I64-NEON-NEXT:    sub sp, sp, #4
-; LE-I64-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-I64-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-I64-NEON-NEXT:    .pad #208
-; LE-I64-NEON-NEXT:    sub sp, sp, #208
-; LE-I64-NEON-NEXT:    add lr, sp, #72
-; LE-I64-NEON-NEXT:    str r0, [sp, #156] @ 4-byte Spill
-; LE-I64-NEON-NEXT:    add r0, sp, #456
-; LE-I64-NEON-NEXT:    vorr q4, q0, q0
-; LE-I64-NEON-NEXT:    vstmia lr, {d6, d7} @ 16-byte Spill
-; LE-I64-NEON-NEXT:    add lr, sp, #56
-; LE-I64-NEON-NEXT:    vorr d0, d7, d7
-; LE-I64-NEON-NEXT:    vstmia lr, {d2, d3} @ 16-byte Spill
-; LE-I64-NEON-NEXT:    add lr, sp, #104
-; LE-I64-NEON-NEXT:    vorr q5, q2, q2
-; LE-I64-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
-; LE-I64-NEON-NEXT:    add r0, sp, #344
-; LE-I64-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-I64-NEON-NEXT:    add lr, sp, #192
-; LE-I64-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
-; LE-I64-NEON-NEXT:    add r0, sp, #376
-; LE-I64-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-I64-NEON-NEXT:    add lr, sp, #160
-; LE-I64-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
-; LE-I64-NEON-NEXT:    add r0, sp, #360
-; LE-I64-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-I64-NEON-NEXT:    add lr, sp, #136
-; LE-I64-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
-; LE-I64-NEON-NEXT:    add r0, sp, #440
-; LE-I64-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-I64-NEON-NEXT:    add lr, sp, #176
-; LE-I64-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
-; LE-I64-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-I64-NEON-NEXT:    bl lrint
-; LE-I64-NEON-NEXT:    vorr d0, d10, d10
-; LE-I64-NEON-NEXT:    str r1, [sp, #120] @ 4-byte Spill
-; LE-I64-NEON-NEXT:    vmov.32 d13[0], r0
-; LE-I64-NEON-NEXT:    bl lrint
-; LE-I64-NEON-NEXT:    vorr d0, d11, d11
-; LE-I64-NEON-NEXT:    mov r10, r1
-; LE-I64-NEON-NEXT:    vmov.32 d14[0], r0
-; LE-I64-NEON-NEXT:    bl lrint
-; LE-I64-NEON-NEXT:    vorr d0, d8, d8
-; LE-I64-NEON-NEXT:    add lr, sp, #88
-; LE-I64-NEON-NEXT:    vmov.32 d15[0], r0
-; LE-I64-NEON-NEXT:    mov r11, r1
-; LE-I64-NEON-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
-; LE-I64-NEON-NEXT:    bl lrint
-; LE-I64-NEON-NEXT:    vorr d0, d9, d9
-; LE-I64-NEON-NEXT:    mov r9, r1
-; LE-I64-NEON-NEXT:    vmov.32 d10[0], r0
-; LE-I64-NEON-NEXT:    bl lrint
-; LE-I64-NEON-NEXT:    add lr, sp, #40
-; LE-I64-NEON-NEXT:    vmov.32 d11[0], r0
-; LE-I64-NEON-NEXT:    mov r6, r1
-; LE-I64-NEON-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
-; LE-I64-NEON-NEXT:    add lr, sp, #56
-; LE-I64-NEON-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
-; LE-I64-NEON-NEXT:    vorr d0, d10, d10
-; LE-I64-NEON-NEXT:    bl lrint
-; LE-I64-NEON-NEXT:    vorr d0, d11, d11
-; LE-I64-NEON-NEXT:    mov r4, r1
-; LE-I64-NEON-NEXT:    vmov.32 d8[0], r0
-; LE-I64-NEON-NEXT:    bl lrint
-; LE-I64-NEON-NEXT:    add lr, sp, #72
-; LE-I64-NEON-NEXT:    mov r7, r1
-; LE-I64-NEON-NEXT:    vmov.32 d9[0], r0
-; LE-I64-NEON-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
-; LE-I64-NEON-NEXT:    @ kill: def $d0 killed $d0 killed $q0
-; LE-I64-NEON-NEXT:    bl lrint
-; LE-I64-NEON-NEXT:    vmov.32 d12[0], r0
-; LE-I64-NEON-NEXT:    add lr, sp, #56
-; LE-I64-NEON-NEXT:    mov r5, r1
-; LE-I64-NEON-NEXT:    vmov.32 d9[1], r7
-; LE-I64-NEON-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
-; LE-I64-NEON-NEXT:    add lr, sp, #176
-; LE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-I64-NEON-NEXT:    vorr d0, d17, d17
-; LE-I64-NEON-NEXT:    bl lrint
-; LE-I64-NEON-NEXT:    vmov.32 d8[1], r4
-; LE-I64-NEON-NEXT:    add lr, sp, #72
-; LE-I64-NEON-NEXT:    mov r7, r1
-; LE-I64-NEON-NEXT:    vmov.32 d13[0], r0
-; LE-I64-NEON-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
-; LE-I64-NEON-NEXT:    add lr, sp, #104
-; LE-I64-NEON-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
-; LE-I64-NEON-NEXT:    vorr d0, d9, d9
-; LE-I64-NEON-NEXT:    bl lrint
-; LE-I64-NEON-NEXT:    add lr, sp, #40
-; LE-I64-NEON-NEXT:    vorr d0, d8, d8
-; LE-I64-NEON-NEXT:    mov r8, r1
-; LE-I64-NEON-NEXT:    vmov.32 d15[0], r0
-; LE-I64-NEON-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
-; LE-I64-NEON-NEXT:    vmov.32 d11[1], r6
-; LE-I64-NEON-NEXT:    bl lrint
-; LE-I64-NEON-NEXT:    add lr, sp, #176
-; LE-I64-NEON-NEXT:    mov r4, r1
-; LE-I64-NEON-NEXT:    vmov.32 d14[0], r0
-; LE-I64-NEON-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
-; LE-I64-NEON-NEXT:    @ kill: def $d0 killed $d0 killed $q0
-; LE-I64-NEON-NEXT:    vmov.32 d10[1], r9
-; LE-I64-NEON-NEXT:    bl lrint
-; LE-I64-NEON-NEXT:    add lr, sp, #88
-; LE-I64-NEON-NEXT:    vmov.32 d12[0], r0
-; LE-I64-NEON-NEXT:    ldr r0, [sp, #120] @ 4-byte Reload
-; LE-I64-NEON-NEXT:    vldmia lr, {d20, d21} @ 16-byte Reload
-; LE-I64-NEON-NEXT:    add lr, sp, #56
-; LE-I64-NEON-NEXT:    vldmia lr, {d18, d19} @ 16-byte Reload
-; LE-I64-NEON-NEXT:    add lr, sp, #24
-; LE-I64-NEON-NEXT:    vmov.32 d19[1], r0
-; LE-I64-NEON-NEXT:    add r0, sp, #408
-; LE-I64-NEON-NEXT:    ldr r2, [sp, #156] @ 4-byte Reload
-; LE-I64-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
-; LE-I64-NEON-NEXT:    vmov.32 d13[1], r7
-; LE-I64-NEON-NEXT:    mov r0, r2
-; LE-I64-NEON-NEXT:    vmov.32 d12[1], r1
-; LE-I64-NEON-NEXT:    add r1, sp, #488
-; LE-I64-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-I64-NEON-NEXT:    add lr, sp, #40
-; LE-I64-NEON-NEXT:    vld1.64 {d16, d17}, [r1]
-; LE-I64-NEON-NEXT:    add r1, sp, #472
-; LE-I64-NEON-NEXT:    vst1.64 {d10, d11}, [r0:128]!
-; LE-I64-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-I64-NEON-NEXT:    add lr, sp, #72
-; LE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-I64-NEON-NEXT:    add lr, sp, #56
-; LE-I64-NEON-NEXT:    vmov.32 d21[1], r11
-; LE-I64-NEON-NEXT:    vmov.32 d20[1], r10
-; LE-I64-NEON-NEXT:    add r10, r2, #192
-; LE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r0:128]!
-; LE-I64-NEON-NEXT:    vld1.64 {d16, d17}, [r1]
-; LE-I64-NEON-NEXT:    add r1, sp, #392
-; LE-I64-NEON-NEXT:    vmov.32 d18[1], r5
-; LE-I64-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-I64-NEON-NEXT:    add lr, sp, #72
-; LE-I64-NEON-NEXT:    vst1.64 {d20, d21}, [r0:128]!
-; LE-I64-NEON-NEXT:    vld1.64 {d16, d17}, [r1]
-; LE-I64-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-I64-NEON-NEXT:    add lr, sp, #104
-; LE-I64-NEON-NEXT:    vst1.64 {d18, d19}, [r0:128]
-; LE-I64-NEON-NEXT:    add r0, sp, #312
-; LE-I64-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
-; LE-I64-NEON-NEXT:    add r0, sp, #328
-; LE-I64-NEON-NEXT:    vmov.32 d15[1], r8
-; LE-I64-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-I64-NEON-NEXT:    add lr, sp, #120
-; LE-I64-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
-; LE-I64-NEON-NEXT:    add r0, sp, #424
-; LE-I64-NEON-NEXT:    vmov.32 d14[1], r4
-; LE-I64-NEON-NEXT:    vst1.64 {d12, d13}, [r10:128]!
-; LE-I64-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-I64-NEON-NEXT:    add lr, sp, #176
-; LE-I64-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
-; LE-I64-NEON-NEXT:    vst1.64 {d14, d15}, [r10:128]!
-; LE-I64-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-I64-NEON-NEXT:    add lr, sp, #192
-; LE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-I64-NEON-NEXT:    vorr d0, d17, d17
-; LE-I64-NEON-NEXT:    bl lrint
-; LE-I64-NEON-NEXT:    add lr, sp, #136
-; LE-I64-NEON-NEXT:    mov r9, r1
-; LE-I64-NEON-NEXT:    vmov.32 d9[0], r0
-; LE-I64-NEON-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
-; LE-I64-NEON-NEXT:    vorr d0, d10, d10
-; LE-I64-NEON-NEXT:    bl lrint
-; LE-I64-NEON-NEXT:    vorr d0, d11, d11
-; LE-I64-NEON-NEXT:    mov r8, r1
-; LE-I64-NEON-NEXT:    vmov.32 d12[0], r0
-; LE-I64-NEON-NEXT:    bl lrint
-; LE-I64-NEON-NEXT:    add lr, sp, #160
-; LE-I64-NEON-NEXT:    mov r5, r1
-; LE-I64-NEON-NEXT:    vmov.32 d13[0], r0
-; LE-I64-NEON-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
-; LE-I64-NEON-NEXT:    vorr d0, d10, d10
-; LE-I64-NEON-NEXT:    bl lrint
-; LE-I64-NEON-NEXT:    vorr d0, d11, d11
-; LE-I64-NEON-NEXT:    mov r6, r1
-; LE-I64-NEON-NEXT:    vmov.32 d14[0], r0
-; LE-I64-NEON-NEXT:    bl lrint
-; LE-I64-NEON-NEXT:    add lr, sp, #192
-; LE-I64-NEON-NEXT:    mov r4, r1
-; LE-I64-NEON-NEXT:    vmov.32 d15[0], r0
-; LE-I64-NEON-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
-; LE-I64-NEON-NEXT:    @ kill: def $d0 killed $d0 killed $q0
-; LE-I64-NEON-NEXT:    bl lrint
-; LE-I64-NEON-NEXT:    vmov.32 d8[0], r0
-; LE-I64-NEON-NEXT:    add lr, sp, #192
-; LE-I64-NEON-NEXT:    mov r11, r1
-; LE-I64-NEON-NEXT:    vmov.32 d15[1], r4
-; LE-I64-NEON-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
-; LE-I64-NEON-NEXT:    add lr, sp, #176
-; LE-I64-NEON-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
-; LE-I64-NEON-NEXT:    vorr d0, d9, d9
-; LE-I64-NEON-NEXT:    bl lrint
-; LE-I64-NEON-NEXT:    vorr d0, d8, d8
-; LE-I64-NEON-NEXT:    vmov.32 d14[1], r6
-; LE-I64-NEON-NEXT:    add lr, sp, #136
-; LE-I64-NEON-NEXT:    mov r4, r1
-; LE-I64-NEON-NEXT:    vmov.32 d11[0], r0
-; LE-I64-NEON-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
-; LE-I64-NEON-NEXT:    bl lrint
-; LE-I64-NEON-NEXT:    vmov.32 d10[0], r0
-; LE-I64-NEON-NEXT:    add lr, sp, #176
-; LE-I64-NEON-NEXT:    mov r7, r1
-; LE-I64-NEON-NEXT:    vmov.32 d13[1], r5
-; LE-I64-NEON-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
-; LE-I64-NEON-NEXT:    add lr, sp, #24
-; LE-I64-NEON-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
-; LE-I64-NEON-NEXT:    vorr d0, d9, d9
-; LE-I64-NEON-NEXT:    bl lrint
-; LE-I64-NEON-NEXT:    vorr d0, d8, d8
-; LE-I64-NEON-NEXT:    vmov.32 d12[1], r8
-; LE-I64-NEON-NEXT:    add lr, sp, #88
-; LE-I64-NEON-NEXT:    mov r5, r1
-; LE-I64-NEON-NEXT:    vmov.32 d15[0], r0
-; LE-I64-NEON-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
-; LE-I64-NEON-NEXT:    bl lrint
-; LE-I64-NEON-NEXT:    add lr, sp, #192
-; LE-I64-NEON-NEXT:    str r1, [sp, #24] @ 4-byte Spill
-; LE-I64-NEON-NEXT:    vmov.32 d14[0], r0
-; LE-I64-NEON-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
-; LE-I64-NEON-NEXT:    add lr, sp, #40
-; LE-I64-NEON-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
-; LE-I64-NEON-NEXT:    vorr d0, d11, d11
-; LE-I64-NEON-NEXT:    vmov.32 d9[1], r9
-; LE-I64-NEON-NEXT:    bl lrint
-; LE-I64-NEON-NEXT:    vorr d0, d10, d10
-; LE-I64-NEON-NEXT:    vmov.32 d8[1], r11
-; LE-I64-NEON-NEXT:    add lr, sp, #192
-; LE-I64-NEON-NEXT:    mov r6, r1
-; LE-I64-NEON-NEXT:    vmov.32 d13[0], r0
-; LE-I64-NEON-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
-; LE-I64-NEON-NEXT:    bl lrint
-; LE-I64-NEON-NEXT:    vmov.32 d12[0], r0
-; LE-I64-NEON-NEXT:    add lr, sp, #160
-; LE-I64-NEON-NEXT:    str r1, [sp, #40] @ 4-byte Spill
-; LE-I64-NEON-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
-; LE-I64-NEON-NEXT:    add lr, sp, #176
-; LE-I64-NEON-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
-; LE-I64-NEON-NEXT:    add lr, sp, #56
-; LE-I64-NEON-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
-; LE-I64-NEON-NEXT:    vorr d0, d9, d9
-; LE-I64-NEON-NEXT:    vmov.32 d11[1], r4
-; LE-I64-NEON-NEXT:    bl lrint
-; LE-I64-NEON-NEXT:    vorr d0, d8, d8
-; LE-I64-NEON-NEXT:    vmov.32 d10[1], r7
-; LE-I64-NEON-NEXT:    add lr, sp, #176
-; LE-I64-NEON-NEXT:    mov r8, r1
-; LE-I64-NEON-NEXT:    vmov.32 d13[0], r0
-; LE-I64-NEON-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
-; LE-I64-NEON-NEXT:    bl lrint
-; LE-I64-NEON-NEXT:    vmov.32 d12[0], r0
-; LE-I64-NEON-NEXT:    add lr, sp, #8
-; LE-I64-NEON-NEXT:    mov r11, r1
-; LE-I64-NEON-NEXT:    vmov.32 d15[1], r5
-; LE-I64-NEON-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
-; LE-I64-NEON-NEXT:    add lr, sp, #72
-; LE-I64-NEON-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
-; LE-I64-NEON-NEXT:    vorr d0, d9, d9
-; LE-I64-NEON-NEXT:    bl lrint
-; LE-I64-NEON-NEXT:    vmov.32 d11[0], r0
-; LE-I64-NEON-NEXT:    ldr r0, [sp, #24] @ 4-byte Reload
-; LE-I64-NEON-NEXT:    vorr d0, d8, d8
-; LE-I64-NEON-NEXT:    add lr, sp, #56
-; LE-I64-NEON-NEXT:    mov r9, r1
-; LE-I64-NEON-NEXT:    vmov.32 d14[1], r0
-; LE-I64-NEON-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
-; LE-I64-NEON-NEXT:    bl lrint
-; LE-I64-NEON-NEXT:    vmov.32 d10[0], r0
-; LE-I64-NEON-NEXT:    add lr, sp, #72
-; LE-I64-NEON-NEXT:    mov r7, r1
-; LE-I64-NEON-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
-; LE-I64-NEON-NEXT:    add lr, sp, #160
-; LE-I64-NEON-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
-; LE-I64-NEON-NEXT:    add lr, sp, #104
-; LE-I64-NEON-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
-; LE-I64-NEON-NEXT:    vorr d0, d9, d9
-; LE-I64-NEON-NEXT:    vmov.32 d13[1], r6
-; LE-I64-NEON-NEXT:    bl lrint
-; LE-I64-NEON-NEXT:    vmov.32 d11[0], r0
-; LE-I64-NEON-NEXT:    ldr r0, [sp, #40] @ 4-byte Reload
-; LE-I64-NEON-NEXT:    vorr d0, d8, d8
-; LE-I64-NEON-NEXT:    add lr, sp, #160
-; LE-I64-NEON-NEXT:    mov r6, r1
-; LE-I64-NEON-NEXT:    vmov.32 d12[1], r0
-; LE-I64-NEON-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
-; LE-I64-NEON-NEXT:    bl lrint
-; LE-I64-NEON-NEXT:    add lr, sp, #8
-; LE-I64-NEON-NEXT:    mov r4, r1
-; LE-I64-NEON-NEXT:    vmov.32 d10[0], r0
-; LE-I64-NEON-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
-; LE-I64-NEON-NEXT:    add lr, sp, #120
-; LE-I64-NEON-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
-; LE-I64-NEON-NEXT:    vorr d0, d9, d9
-; LE-I64-NEON-NEXT:    vmov.32 d13[1], r8
-; LE-I64-NEON-NEXT:    bl lrint
-; LE-I64-NEON-NEXT:    vorr d0, d8, d8
-; LE-I64-NEON-NEXT:    mov r5, r1
-; LE-I64-NEON-NEXT:    vmov.32 d15[0], r0
-; LE-I64-NEON-NEXT:    vmov.32 d12[1], r11
-; LE-I64-NEON-NEXT:    bl lrint
-; LE-I64-NEON-NEXT:    add lr, sp, #72
-; LE-I64-NEON-NEXT:    vmov.32 d14[0], r0
-; LE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-I64-NEON-NEXT:    add lr, sp, #160
-; LE-I64-NEON-NEXT:    vmov.32 d17[1], r9
-; LE-I64-NEON-NEXT:    vmov.32 d16[1], r7
-; LE-I64-NEON-NEXT:    vst1.64 {d12, d13}, [r10:128]!
-; LE-I64-NEON-NEXT:    vorr q9, q8, q8
-; LE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-I64-NEON-NEXT:    add lr, sp, #136
-; LE-I64-NEON-NEXT:    vmov.32 d15[1], r5
-; LE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r10:128]
-; LE-I64-NEON-NEXT:    vmov.32 d14[1], r1
-; LE-I64-NEON-NEXT:    ldr r1, [sp, #156] @ 4-byte Reload
-; LE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-I64-NEON-NEXT:    add r0, r1, #128
-; LE-I64-NEON-NEXT:    add lr, sp, #56
-; LE-I64-NEON-NEXT:    vmov.32 d11[1], r6
-; LE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r0:128]!
-; LE-I64-NEON-NEXT:    vst1.64 {d18, d19}, [r0:128]!
-; LE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-I64-NEON-NEXT:    add lr, sp, #176
-; LE-I64-NEON-NEXT:    vmov.32 d10[1], r4
-; LE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r0:128]!
-; LE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-I64-NEON-NEXT:    add lr, sp, #192
-; LE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r0:128]
-; LE-I64-NEON-NEXT:    add r0, r1, #64
-; LE-I64-NEON-NEXT:    vst1.64 {d10, d11}, [r0:128]!
-; LE-I64-NEON-NEXT:    vst1.64 {d14, d15}, [r0:128]!
-; LE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-I64-NEON-NEXT:    add lr, sp, #88
-; LE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r0:128]!
-; LE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r0:128]
-; LE-I64-NEON-NEXT:    add sp, sp, #208
-; LE-I64-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-I64-NEON-NEXT:    add sp, sp, #4
-; LE-I64-NEON-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
-;
-; BE-I32-LABEL: lrint_v32f64:
+; BE-I32-LABEL: lrint_v16f64:
 ; BE-I32:       @ %bb.0:
 ; BE-I32-NEXT:    .save {r4, r5, r6, lr}
 ; BE-I32-NEXT:    push {r4, r5, r6, lr}
 ; BE-I32-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
 ; BE-I32-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
-; BE-I32-NEXT:    .pad #176
-; BE-I32-NEXT:    sub sp, sp, #176
-; BE-I32-NEXT:    add lr, sp, #128
-; BE-I32-NEXT:    mov r4, r0
-; BE-I32-NEXT:    add r0, sp, #336
+; BE-I32-NEXT:    .pad #128
+; BE-I32-NEXT:    sub sp, sp, #128
+; BE-I32-NEXT:    add lr, sp, #64
+; BE-I32-NEXT:    add r0, sp, #240
+; BE-I32-NEXT:    vld1.64 {d16, d17}, [r0]
+; BE-I32-NEXT:    add r0, sp, #224
 ; BE-I32-NEXT:    vorr q6, q3, q3
 ; BE-I32-NEXT:    vstmia lr, {d4, d5} @ 16-byte Spill
-; BE-I32-NEXT:    add lr, sp, #80
+; BE-I32-NEXT:    add lr, sp, #16
 ; BE-I32-NEXT:    vorr q5, q1, q1
-; BE-I32-NEXT:    vstmia lr, {d0, d1} @ 16-byte Spill
+; BE-I32-NEXT:    vstmia lr, {d2, d3} @ 16-byte Spill
 ; BE-I32-NEXT:    add lr, sp, #32
-; BE-I32-NEXT:    vorr d0, d4, d4
-; BE-I32-NEXT:    vld1.64 {d16, d17}, [r0]
-; BE-I32-NEXT:    add r0, sp, #320
-; BE-I32-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-I32-NEXT:    add lr, sp, #160
-; BE-I32-NEXT:    vld1.64 {d16, d17}, [r0]
-; BE-I32-NEXT:    add r0, sp, #432
+; BE-I32-NEXT:    vstmia lr, {d0, d1} @ 16-byte Spill
+; BE-I32-NEXT:    add lr, sp, #80
+; BE-I32-NEXT:    @ kill: def $d0 killed $d0 killed $q0
 ; BE-I32-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
 ; BE-I32-NEXT:    add lr, sp, #112
 ; BE-I32-NEXT:    vld1.64 {d16, d17}, [r0]
-; BE-I32-NEXT:    add r0, sp, #288
+; BE-I32-NEXT:    add r0, sp, #256
 ; BE-I32-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
 ; BE-I32-NEXT:    add lr, sp, #96
 ; BE-I32-NEXT:    vld1.64 {d16, d17}, [r0]
-; BE-I32-NEXT:    add r0, sp, #368
+; BE-I32-NEXT:    add r0, sp, #208
 ; BE-I32-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
 ; BE-I32-NEXT:    add lr, sp, #48
-; BE-I32-NEXT:    vld1.64 {d16, d17}, [r0]
-; BE-I32-NEXT:    add r0, sp, #416
-; BE-I32-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-I32-NEXT:    add lr, sp, #144
-; BE-I32-NEXT:    vld1.64 {d16, d17}, [r0]
-; BE-I32-NEXT:    add r0, sp, #400
-; BE-I32-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-I32-NEXT:    add lr, sp, #64
-; BE-I32-NEXT:    vld1.64 {d16, d17}, [r0]
-; BE-I32-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-I32-NEXT:    bl lrint
-; BE-I32-NEXT:    vorr d0, d12, d12
-; BE-I32-NEXT:    vmov.32 d14[0], r0
+; BE-I32-NEXT:    vld1.64 {d14, d15}, [r0]
+; BE-I32-NEXT:    vstmia sp, {d6, d7} @ 16-byte Spill
+; BE-I32-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
 ; BE-I32-NEXT:    bl lrint
 ; BE-I32-NEXT:    vorr d0, d10, d10
-; BE-I32-NEXT:    vmov.32 d15[0], r0
+; BE-I32-NEXT:    vmov.32 d8[0], r0
 ; BE-I32-NEXT:    bl lrint
-; BE-I32-NEXT:    vorr d0, d13, d13
+; BE-I32-NEXT:    vorr d0, d12, d12
 ; BE-I32-NEXT:    vmov.32 d9[0], r0
 ; BE-I32-NEXT:    bl lrint
-; BE-I32-NEXT:    vorr d0, d11, d11
-; BE-I32-NEXT:    vmov.32 d15[1], r0
-; BE-I32-NEXT:    bl lrint
-; BE-I32-NEXT:    add lr, sp, #128
-; BE-I32-NEXT:    vorr q5, q4, q4
-; BE-I32-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-I32-NEXT:    vorr d0, d17, d17
-; BE-I32-NEXT:    vmov.32 d11[1], r0
-; BE-I32-NEXT:    bl lrint
-; BE-I32-NEXT:    vmov.32 d14[1], r0
-; BE-I32-NEXT:    add lr, sp, #128
-; BE-I32-NEXT:    add r0, sp, #384
-; BE-I32-NEXT:    vld1.64 {d16, d17}, [r0]
-; BE-I32-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
-; BE-I32-NEXT:    add lr, sp, #16
-; BE-I32-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-I32-NEXT:    add lr, sp, #80
-; BE-I32-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
-; BE-I32-NEXT:    vorr d0, d8, d8
-; BE-I32-NEXT:    bl lrint
-; BE-I32-NEXT:    add lr, sp, #64
-; BE-I32-NEXT:    vmov.32 d10[0], r0
-; BE-I32-NEXT:    vldmia lr, {d14, d15} @ 16-byte Reload
 ; BE-I32-NEXT:    vorr d0, d14, d14
+; BE-I32-NEXT:    vmov.32 d11[0], r0
 ; BE-I32-NEXT:    bl lrint
-; BE-I32-NEXT:    vorr d0, d9, d9
-; BE-I32-NEXT:    vmov.32 d13[0], r0
-; BE-I32-NEXT:    bl lrint
-; BE-I32-NEXT:    vorr d0, d15, d15
-; BE-I32-NEXT:    vmov.32 d10[1], r0
 ; BE-I32-NEXT:    add lr, sp, #80
-; BE-I32-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
-; BE-I32-NEXT:    bl lrint
-; BE-I32-NEXT:    add lr, sp, #16
-; BE-I32-NEXT:    vmov.32 d13[1], r0
-; BE-I32-NEXT:    add r0, sp, #272
-; BE-I32-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
-; BE-I32-NEXT:    vorr d0, d10, d10
-; BE-I32-NEXT:    vld1.64 {d16, d17}, [r0]
-; BE-I32-NEXT:    vstmia sp, {d16, d17} @ 16-byte Spill
+; BE-I32-NEXT:    mov r4, r0
+; BE-I32-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
+; BE-I32-NEXT:    @ kill: def $d0 killed $d0 killed $q0
 ; BE-I32-NEXT:    bl lrint
-; BE-I32-NEXT:    add lr, sp, #32
+; BE-I32-NEXT:    add lr, sp, #64
 ; BE-I32-NEXT:    vmov.32 d12[0], r0
-; BE-I32-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
-; BE-I32-NEXT:    vorr d0, d8, d8
+; BE-I32-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
+; BE-I32-NEXT:    @ kill: def $d0 killed $d0 killed $q0
 ; BE-I32-NEXT:    bl lrint
-; BE-I32-NEXT:    vorr d0, d11, d11
-; BE-I32-NEXT:    vmov.32 d15[0], r0
+; BE-I32-NEXT:    add lr, sp, #112
+; BE-I32-NEXT:    vmov.32 d10[0], r0
+; BE-I32-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
+; BE-I32-NEXT:    @ kill: def $d0 killed $d0 killed $q0
+; BE-I32-NEXT:    vmov.32 d14[0], r4
 ; BE-I32-NEXT:    bl lrint
-; BE-I32-NEXT:    vorr d0, d9, d9
-; BE-I32-NEXT:    vmov.32 d12[1], r0
-; BE-I32-NEXT:    add lr, sp, #64
-; BE-I32-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
+; BE-I32-NEXT:    vldmia sp, {d16, d17} @ 16-byte Reload
+; BE-I32-NEXT:    vorr d0, d17, d17
+; BE-I32-NEXT:    vmov.32 d15[0], r0
 ; BE-I32-NEXT:    bl lrint
-; BE-I32-NEXT:    vldmia sp, {d10, d11} @ 16-byte Reload
-; BE-I32-NEXT:    add lr, sp, #16
-; BE-I32-NEXT:    vmov.32 d15[1], r0
-; BE-I32-NEXT:    add r0, sp, #256
-; BE-I32-NEXT:    vorr d0, d10, d10
-; BE-I32-NEXT:    vld1.64 {d16, d17}, [r0]
-; BE-I32-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I32-NEXT:    add lr, sp, #96
+; BE-I32-NEXT:    vmov.32 d11[1], r0
+; BE-I32-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
+; BE-I32-NEXT:    @ kill: def $d0 killed $d0 killed $q0
 ; BE-I32-NEXT:    bl lrint
-; BE-I32-NEXT:    add lr, sp, #160
+; BE-I32-NEXT:    add lr, sp, #64
 ; BE-I32-NEXT:    vmov.32 d13[0], r0
-; BE-I32-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
-; BE-I32-NEXT:    vorr d0, d8, d8
-; BE-I32-NEXT:    bl lrint
-; BE-I32-NEXT:    vorr d0, d11, d11
-; BE-I32-NEXT:    vmov.32 d14[0], r0
-; BE-I32-NEXT:    bl lrint
-; BE-I32-NEXT:    vorr d0, d9, d9
-; BE-I32-NEXT:    vmov.32 d13[1], r0
-; BE-I32-NEXT:    bl lrint
-; BE-I32-NEXT:    vmov.32 d14[1], r0
-; BE-I32-NEXT:    add lr, sp, #32
-; BE-I32-NEXT:    add r0, sp, #304
-; BE-I32-NEXT:    vld1.64 {d10, d11}, [r0]
-; BE-I32-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
-; BE-I32-NEXT:    add lr, sp, #16
-; BE-I32-NEXT:    vldmia lr, {d14, d15} @ 16-byte Reload
-; BE-I32-NEXT:    vorr d0, d14, d14
-; BE-I32-NEXT:    bl lrint
-; BE-I32-NEXT:    add lr, sp, #48
-; BE-I32-NEXT:    vorr q4, q6, q6
-; BE-I32-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
-; BE-I32-NEXT:    vorr d0, d12, d12
-; BE-I32-NEXT:    vmov.32 d8[0], r0
-; BE-I32-NEXT:    bl lrint
-; BE-I32-NEXT:    vorr d0, d15, d15
-; BE-I32-NEXT:    add lr, sp, #160
-; BE-I32-NEXT:    vmov.32 d17[0], r0
-; BE-I32-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I32-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I32-NEXT:    vorr d0, d17, d17
 ; BE-I32-NEXT:    bl lrint
-; BE-I32-NEXT:    vorr d0, d10, d10
-; BE-I32-NEXT:    vmov.32 d8[1], r0
 ; BE-I32-NEXT:    add lr, sp, #16
-; BE-I32-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
-; BE-I32-NEXT:    bl lrint
-; BE-I32-NEXT:    vorr d0, d13, d13
-; BE-I32-NEXT:    vmov.32 d9[0], r0
-; BE-I32-NEXT:    bl lrint
-; BE-I32-NEXT:    add lr, sp, #160
-; BE-I32-NEXT:    vorr d0, d11, d11
-; BE-I32-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
-; BE-I32-NEXT:    add lr, sp, #48
-; BE-I32-NEXT:    vmov.32 d13[1], r0
-; BE-I32-NEXT:    add r0, sp, #352
-; BE-I32-NEXT:    vld1.64 {d14, d15}, [r0]
-; BE-I32-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
+; BE-I32-NEXT:    vmov.32 d10[1], r0
+; BE-I32-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I32-NEXT:    vorr d0, d17, d17
 ; BE-I32-NEXT:    bl lrint
-; BE-I32-NEXT:    vorr d0, d14, d14
+; BE-I32-NEXT:    add lr, sp, #32
 ; BE-I32-NEXT:    vmov.32 d9[1], r0
+; BE-I32-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I32-NEXT:    vorr d0, d17, d17
 ; BE-I32-NEXT:    bl lrint
-; BE-I32-NEXT:    vmov.32 d12[0], r0
-; BE-I32-NEXT:    add lr, sp, #160
-; BE-I32-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
 ; BE-I32-NEXT:    add lr, sp, #96
-; BE-I32-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
-; BE-I32-NEXT:    vorr d0, d12, d12
-; BE-I32-NEXT:    bl lrint
-; BE-I32-NEXT:    add lr, sp, #112
-; BE-I32-NEXT:    vmov.32 d8[0], r0
-; BE-I32-NEXT:    vldmia lr, {d14, d15} @ 16-byte Reload
-; BE-I32-NEXT:    vorr d0, d14, d14
-; BE-I32-NEXT:    bl lrint
-; BE-I32-NEXT:    vorr d0, d13, d13
-; BE-I32-NEXT:    vmov.32 d11[0], r0
-; BE-I32-NEXT:    bl lrint
-; BE-I32-NEXT:    add lr, sp, #144
 ; BE-I32-NEXT:    vmov.32 d8[1], r0
-; BE-I32-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
-; BE-I32-NEXT:    vorr d0, d12, d12
-; BE-I32-NEXT:    bl lrint
-; BE-I32-NEXT:    vorr d0, d15, d15
-; BE-I32-NEXT:    vmov.32 d10[0], r0
+; BE-I32-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I32-NEXT:    vorr d0, d17, d17
 ; BE-I32-NEXT:    bl lrint
-; BE-I32-NEXT:    vorr d0, d13, d13
-; BE-I32-NEXT:    vmov.32 d11[1], r0
+; BE-I32-NEXT:    add lr, sp, #112
+; BE-I32-NEXT:    vmov.32 d13[1], r0
+; BE-I32-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I32-NEXT:    vorr d0, d17, d17
 ; BE-I32-NEXT:    bl lrint
 ; BE-I32-NEXT:    add lr, sp, #48
-; BE-I32-NEXT:    vmov.32 d10[1], r0
+; BE-I32-NEXT:    vmov.32 d15[1], r0
 ; BE-I32-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
 ; BE-I32-NEXT:    vorr d0, d17, d17
 ; BE-I32-NEXT:    bl lrint
-; BE-I32-NEXT:    add lr, sp, #160
-; BE-I32-NEXT:    vrev64.32 q9, q4
-; BE-I32-NEXT:    vldmia lr, {d22, d23} @ 16-byte Reload
 ; BE-I32-NEXT:    add lr, sp, #80
-; BE-I32-NEXT:    vldmia lr, {d20, d21} @ 16-byte Reload
-; BE-I32-NEXT:    add lr, sp, #128
-; BE-I32-NEXT:    vmov.32 d22[1], r0
-; BE-I32-NEXT:    mov r0, r4
-; BE-I32-NEXT:    vst1.32 {d20, d21}, [r0:128]!
-; BE-I32-NEXT:    vldmia lr, {d20, d21} @ 16-byte Reload
-; BE-I32-NEXT:    add lr, sp, #16
-; BE-I32-NEXT:    vrev64.32 q8, q5
-; BE-I32-NEXT:    vst1.32 {d20, d21}, [r0:128]!
-; BE-I32-NEXT:    vldmia lr, {d20, d21} @ 16-byte Reload
-; BE-I32-NEXT:    add lr, sp, #32
-; BE-I32-NEXT:    vst1.32 {d20, d21}, [r0:128]!
-; BE-I32-NEXT:    vst1.64 {d18, d19}, [r0:128]
-; BE-I32-NEXT:    add r0, r4, #64
-; BE-I32-NEXT:    vldmia lr, {d18, d19} @ 16-byte Reload
-; BE-I32-NEXT:    add lr, sp, #64
-; BE-I32-NEXT:    vst1.32 {d18, d19}, [r0:128]!
-; BE-I32-NEXT:    vst1.32 {d22, d23}, [r0:128]!
-; BE-I32-NEXT:    vldmia lr, {d18, d19} @ 16-byte Reload
-; BE-I32-NEXT:    vst1.32 {d18, d19}, [r0:128]!
-; BE-I32-NEXT:    vst1.64 {d16, d17}, [r0:128]
-; BE-I32-NEXT:    add sp, sp, #176
+; BE-I32-NEXT:    vmov.32 d14[1], r0
+; BE-I32-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I32-NEXT:    vorr d0, d17, d17
+; BE-I32-NEXT:    bl lrint
+; BE-I32-NEXT:    vmov.32 d12[1], r0
+; BE-I32-NEXT:    vrev64.32 q0, q4
+; BE-I32-NEXT:    vrev64.32 q1, q5
+; BE-I32-NEXT:    vrev64.32 q2, q7
+; BE-I32-NEXT:    vrev64.32 q3, q6
+; BE-I32-NEXT:    add sp, sp, #128
 ; BE-I32-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
 ; BE-I32-NEXT:    pop {r4, r5, r6, pc}
 ;
-; BE-I64-LABEL: lrint_v32f64:
+; BE-I64-LABEL: lrint_v16f64:
 ; BE-I64:       @ %bb.0:
 ; BE-I64-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
 ; BE-I64-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
@@ -7453,902 +1877,183 @@ define <32 x iXLen> @lrint_v32f64(<32 x double> %x) {
 ; BE-I64-NEXT:    sub sp, sp, #4
 ; BE-I64-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
 ; BE-I64-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
-; BE-I64-NEXT:    .pad #232
-; BE-I64-NEXT:    sub sp, sp, #232
-; BE-I64-NEXT:    add lr, sp, #184
-; BE-I64-NEXT:    str r0, [sp, #148] @ 4-byte Spill
-; BE-I64-NEXT:    add r0, sp, #416
-; BE-I64-NEXT:    vstmia lr, {d6, d7} @ 16-byte Spill
-; BE-I64-NEXT:    add lr, sp, #168
-; BE-I64-NEXT:    vstmia lr, {d4, d5} @ 16-byte Spill
-; BE-I64-NEXT:    add lr, sp, #152
-; BE-I64-NEXT:    vstmia lr, {d2, d3} @ 16-byte Spill
-; BE-I64-NEXT:    add lr, sp, #128
-; BE-I64-NEXT:    vstmia lr, {d0, d1} @ 16-byte Spill
-; BE-I64-NEXT:    add lr, sp, #200
-; BE-I64-NEXT:    vld1.64 {d18, d19}, [r0]
-; BE-I64-NEXT:    add r0, sp, #448
-; BE-I64-NEXT:    vorr d0, d19, d19
-; BE-I64-NEXT:    vld1.64 {d14, d15}, [r0]
-; BE-I64-NEXT:    add r0, sp, #336
-; BE-I64-NEXT:    vstmia lr, {d18, d19} @ 16-byte Spill
+; BE-I64-NEXT:    .pad #168
+; BE-I64-NEXT:    sub sp, sp, #168
 ; BE-I64-NEXT:    add lr, sp, #64
-; BE-I64-NEXT:    vld1.64 {d16, d17}, [r0]
-; BE-I64-NEXT:    add r0, sp, #400
-; BE-I64-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-I64-NEXT:    add lr, sp, #8
-; BE-I64-NEXT:    vld1.64 {d16, d17}, [r0]
-; BE-I64-NEXT:    add r0, sp, #352
-; BE-I64-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-I64-NEXT:    add lr, sp, #24
-; BE-I64-NEXT:    vld1.64 {d16, d17}, [r0]
-; BE-I64-NEXT:    add r0, sp, #368
-; BE-I64-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I64-NEXT:    str r0, [sp, #132] @ 4-byte Spill
+; BE-I64-NEXT:    add r0, sp, #304
+; BE-I64-NEXT:    vorr q4, q3, q3
+; BE-I64-NEXT:    vstmia lr, {d0, d1} @ 16-byte Spill
 ; BE-I64-NEXT:    add lr, sp, #48
+; BE-I64-NEXT:    vorr d0, d1, d1
 ; BE-I64-NEXT:    vld1.64 {d16, d17}, [r0]
-; BE-I64-NEXT:    add r0, sp, #384
+; BE-I64-NEXT:    add r0, sp, #320
+; BE-I64-NEXT:    vorr q6, q2, q2
 ; BE-I64-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-I64-NEXT:    add lr, sp, #96
+; BE-I64-NEXT:    add lr, sp, #88
+; BE-I64-NEXT:    vorr q7, q1, q1
 ; BE-I64-NEXT:    vld1.64 {d16, d17}, [r0]
-; BE-I64-NEXT:    add r0, sp, #512
+; BE-I64-NEXT:    add r0, sp, #272
 ; BE-I64-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
 ; BE-I64-NEXT:    add lr, sp, #112
 ; BE-I64-NEXT:    vld1.64 {d16, d17}, [r0]
-; BE-I64-NEXT:    add r0, sp, #432
-; BE-I64-NEXT:    vld1.64 {d8, d9}, [r0]
+; BE-I64-NEXT:    add r0, sp, #288
+; BE-I64-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; BE-I64-NEXT:    add lr, sp, #24
+; BE-I64-NEXT:    vld1.64 {d16, d17}, [r0]
 ; BE-I64-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
 ; BE-I64-NEXT:    bl lrint
-; BE-I64-NEXT:    vorr d0, d8, d8
-; BE-I64-NEXT:    str r1, [sp, #80] @ 4-byte Spill
-; BE-I64-NEXT:    vmov.32 d13[0], r0
+; BE-I64-NEXT:    vorr d0, d14, d14
+; BE-I64-NEXT:    add lr, sp, #136
+; BE-I64-NEXT:    vmov.32 d17[0], r0
+; BE-I64-NEXT:    str r1, [sp, #108] @ 4-byte Spill
+; BE-I64-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
 ; BE-I64-NEXT:    bl lrint
-; BE-I64-NEXT:    vorr d0, d9, d9
-; BE-I64-NEXT:    str r1, [sp, #44] @ 4-byte Spill
+; BE-I64-NEXT:    vorr d0, d15, d15
+; BE-I64-NEXT:    str r1, [sp, #84] @ 4-byte Spill
 ; BE-I64-NEXT:    vmov.32 d10[0], r0
 ; BE-I64-NEXT:    bl lrint
-; BE-I64-NEXT:    vorr d0, d14, d14
-; BE-I64-NEXT:    add lr, sp, #216
+; BE-I64-NEXT:    vorr d0, d12, d12
+; BE-I64-NEXT:    add lr, sp, #152
 ; BE-I64-NEXT:    vmov.32 d11[0], r0
-; BE-I64-NEXT:    mov r9, r1
+; BE-I64-NEXT:    str r1, [sp, #44] @ 4-byte Spill
 ; BE-I64-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
 ; BE-I64-NEXT:    bl lrint
-; BE-I64-NEXT:    vorr d0, d15, d15
-; BE-I64-NEXT:    mov r8, r1
-; BE-I64-NEXT:    vmov.32 d8[0], r0
+; BE-I64-NEXT:    vorr d0, d13, d13
+; BE-I64-NEXT:    mov r6, r1
+; BE-I64-NEXT:    vmov.32 d10[0], r0
 ; BE-I64-NEXT:    bl lrint
-; BE-I64-NEXT:    add lr, sp, #64
+; BE-I64-NEXT:    vorr d0, d8, d8
 ; BE-I64-NEXT:    mov r4, r1
-; BE-I64-NEXT:    vmov.32 d9[0], r0
-; BE-I64-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
-; BE-I64-NEXT:    vorr d0, d10, d10
-; BE-I64-NEXT:    bl lrint
-; BE-I64-NEXT:    vorr d0, d11, d11
-; BE-I64-NEXT:    mov r6, r1
-; BE-I64-NEXT:    vmov.32 d14[0], r0
+; BE-I64-NEXT:    vmov.32 d11[0], r0
 ; BE-I64-NEXT:    bl lrint
-; BE-I64-NEXT:    add lr, sp, #200
+; BE-I64-NEXT:    vorr d0, d9, d9
 ; BE-I64-NEXT:    mov r7, r1
-; BE-I64-NEXT:    vmov.32 d15[0], r0
-; BE-I64-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
-; BE-I64-NEXT:    @ kill: def $d0 killed $d0 killed $q0
-; BE-I64-NEXT:    bl lrint
 ; BE-I64-NEXT:    vmov.32 d12[0], r0
-; BE-I64-NEXT:    add lr, sp, #200
-; BE-I64-NEXT:    mov r5, r1
-; BE-I64-NEXT:    vmov.32 d15[1], r7
-; BE-I64-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
-; BE-I64-NEXT:    add lr, sp, #8
-; BE-I64-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
-; BE-I64-NEXT:    vorr d0, d11, d11
 ; BE-I64-NEXT:    bl lrint
-; BE-I64-NEXT:    vorr d0, d10, d10
-; BE-I64-NEXT:    vmov.32 d14[1], r6
 ; BE-I64-NEXT:    add lr, sp, #64
-; BE-I64-NEXT:    mov r10, r1
+; BE-I64-NEXT:    mov r5, r1
 ; BE-I64-NEXT:    vmov.32 d13[0], r0
-; BE-I64-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
-; BE-I64-NEXT:    bl lrint
-; BE-I64-NEXT:    add lr, sp, #24
-; BE-I64-NEXT:    mov r11, r1
-; BE-I64-NEXT:    vmov.32 d12[0], r0
-; BE-I64-NEXT:    vldmia lr, {d14, d15} @ 16-byte Reload
-; BE-I64-NEXT:    vorr d0, d15, d15
-; BE-I64-NEXT:    vmov.32 d9[1], r4
-; BE-I64-NEXT:    bl lrint
-; BE-I64-NEXT:    vorr d0, d14, d14
-; BE-I64-NEXT:    vmov.32 d8[1], r8
-; BE-I64-NEXT:    add lr, sp, #8
-; BE-I64-NEXT:    str r1, [sp, #4] @ 4-byte Spill
-; BE-I64-NEXT:    vmov.32 d11[0], r0
-; BE-I64-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
+; BE-I64-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
+; BE-I64-NEXT:    @ kill: def $d0 killed $d0 killed $q0
 ; BE-I64-NEXT:    bl lrint
-; BE-I64-NEXT:    vmov.32 d10[0], r0
+; BE-I64-NEXT:    add lr, sp, #136
+; BE-I64-NEXT:    mov r9, r1
+; BE-I64-NEXT:    vmov.32 d13[1], r5
+; BE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEXT:    vmov.32 d16[0], r0
+; BE-I64-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
 ; BE-I64-NEXT:    add lr, sp, #24
-; BE-I64-NEXT:    mov r8, r1
-; BE-I64-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
-; BE-I64-NEXT:    add lr, sp, #216
-; BE-I64-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
-; BE-I64-NEXT:    add lr, sp, #48
 ; BE-I64-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
 ; BE-I64-NEXT:    vorr d0, d9, d9
-; BE-I64-NEXT:    vmov.32 d11[1], r9
 ; BE-I64-NEXT:    bl lrint
-; BE-I64-NEXT:    vmov.32 d15[0], r0
-; BE-I64-NEXT:    ldr r0, [sp, #44] @ 4-byte Reload
 ; BE-I64-NEXT:    vorr d0, d8, d8
-; BE-I64-NEXT:    add lr, sp, #216
-; BE-I64-NEXT:    mov r9, r1
-; BE-I64-NEXT:    vmov.32 d10[1], r0
-; BE-I64-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; BE-I64-NEXT:    vmov.32 d12[1], r7
+; BE-I64-NEXT:    add lr, sp, #64
+; BE-I64-NEXT:    mov r10, r1
+; BE-I64-NEXT:    vmov.32 d15[0], r0
+; BE-I64-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
 ; BE-I64-NEXT:    bl lrint
 ; BE-I64-NEXT:    vmov.32 d14[0], r0
-; BE-I64-NEXT:    add lr, sp, #48
-; BE-I64-NEXT:    ldr r0, [sp, #80] @ 4-byte Reload
-; BE-I64-NEXT:    mov r6, r1
+; BE-I64-NEXT:    add lr, sp, #8
+; BE-I64-NEXT:    mov r11, r1
+; BE-I64-NEXT:    vmov.32 d11[1], r4
 ; BE-I64-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
-; BE-I64-NEXT:    add lr, sp, #200
-; BE-I64-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
-; BE-I64-NEXT:    add lr, sp, #96
+; BE-I64-NEXT:    add lr, sp, #48
+; BE-I64-NEXT:    vorr q6, q5, q5
 ; BE-I64-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
 ; BE-I64-NEXT:    vorr d0, d9, d9
-; BE-I64-NEXT:    vmov.32 d11[1], r0
 ; BE-I64-NEXT:    bl lrint
 ; BE-I64-NEXT:    vorr d0, d8, d8
-; BE-I64-NEXT:    vmov.32 d10[1], r5
-; BE-I64-NEXT:    add lr, sp, #200
-; BE-I64-NEXT:    mov r7, r1
-; BE-I64-NEXT:    vmov.32 d15[0], r0
-; BE-I64-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
-; BE-I64-NEXT:    bl lrint
-; BE-I64-NEXT:    add lr, sp, #112
-; BE-I64-NEXT:    vorr q4, q6, q6
-; BE-I64-NEXT:    mov r4, r1
-; BE-I64-NEXT:    vmov.32 d14[0], r0
-; BE-I64-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
-; BE-I64-NEXT:    vorr d0, d13, d13
-; BE-I64-NEXT:    vmov.32 d9[1], r10
-; BE-I64-NEXT:    bl lrint
-; BE-I64-NEXT:    vorr d0, d12, d12
-; BE-I64-NEXT:    mov r5, r1
+; BE-I64-NEXT:    vmov.32 d12[1], r6
+; BE-I64-NEXT:    add lr, sp, #24
+; BE-I64-NEXT:    mov r8, r1
 ; BE-I64-NEXT:    vmov.32 d11[0], r0
-; BE-I64-NEXT:    vmov.32 d8[1], r11
+; BE-I64-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
 ; BE-I64-NEXT:    bl lrint
 ; BE-I64-NEXT:    vmov.32 d10[0], r0
-; BE-I64-NEXT:    add lr, sp, #24
-; BE-I64-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
-; BE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-I64-NEXT:    add lr, sp, #48
-; BE-I64-NEXT:    vmov.32 d17[1], r0
-; BE-I64-NEXT:    vmov.32 d16[1], r8
-; BE-I64-NEXT:    vorr q9, q8, q8
-; BE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-I64-NEXT:    add lr, sp, #112
-; BE-I64-NEXT:    vmov.32 d17[1], r9
-; BE-I64-NEXT:    vmov.32 d16[1], r6
-; BE-I64-NEXT:    vorr q10, q8, q8
-; BE-I64-NEXT:    vrev64.32 q8, q4
-; BE-I64-NEXT:    vmov.32 d15[1], r7
-; BE-I64-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-I64-NEXT:    add lr, sp, #200
-; BE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-I64-NEXT:    vmov.32 d11[1], r5
-; BE-I64-NEXT:    vrev64.32 q8, q8
-; BE-I64-NEXT:    vmov.32 d14[1], r4
-; BE-I64-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-I64-NEXT:    add lr, sp, #216
-; BE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-I64-NEXT:    vmov.32 d10[1], r1
-; BE-I64-NEXT:    vrev64.32 q8, q8
-; BE-I64-NEXT:    vrev64.32 q6, q7
-; BE-I64-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-I64-NEXT:    add lr, sp, #8
-; BE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-I64-NEXT:    add lr, sp, #96
-; BE-I64-NEXT:    vrev64.32 q7, q5
-; BE-I64-NEXT:    vrev64.32 q8, q8
-; BE-I64-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-I64-NEXT:    add lr, sp, #64
-; BE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-I64-NEXT:    add lr, sp, #80
-; BE-I64-NEXT:    vrev64.32 q8, q8
-; BE-I64-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-I64-NEXT:    add lr, sp, #64
-; BE-I64-NEXT:    vrev64.32 q8, q9
-; BE-I64-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
 ; BE-I64-NEXT:    add lr, sp, #48
-; BE-I64-NEXT:    vrev64.32 q8, q10
-; BE-I64-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-I64-NEXT:    add lr, sp, #128
-; BE-I64-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
-; BE-I64-NEXT:    vorr d0, d11, d11
-; BE-I64-NEXT:    bl lrint
-; BE-I64-NEXT:    vorr d0, d10, d10
-; BE-I64-NEXT:    mov r4, r1
-; BE-I64-NEXT:    vmov.32 d9[0], r0
-; BE-I64-NEXT:    bl lrint
-; BE-I64-NEXT:    vmov.32 d8[0], r0
-; BE-I64-NEXT:    ldr r6, [sp, #148] @ 4-byte Reload
+; BE-I64-NEXT:    ldr r0, [sp, #44] @ 4-byte Reload
+; BE-I64-NEXT:    mov r6, r1
+; BE-I64-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
 ; BE-I64-NEXT:    add lr, sp, #152
-; BE-I64-NEXT:    vmov.32 d9[1], r4
-; BE-I64-NEXT:    mov r5, r6
-; BE-I64-NEXT:    vmov.32 d8[1], r1
-; BE-I64-NEXT:    vrev64.32 q8, q4
-; BE-I64-NEXT:    vst1.64 {d16, d17}, [r5:128]!
-; BE-I64-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
-; BE-I64-NEXT:    vorr d0, d11, d11
-; BE-I64-NEXT:    bl lrint
-; BE-I64-NEXT:    vorr d0, d10, d10
-; BE-I64-NEXT:    mov r4, r1
-; BE-I64-NEXT:    vmov.32 d9[0], r0
-; BE-I64-NEXT:    bl lrint
-; BE-I64-NEXT:    vmov.32 d8[0], r0
-; BE-I64-NEXT:    add lr, sp, #168
-; BE-I64-NEXT:    vmov.32 d9[1], r4
-; BE-I64-NEXT:    vmov.32 d8[1], r1
-; BE-I64-NEXT:    vrev64.32 q8, q4
-; BE-I64-NEXT:    vst1.64 {d16, d17}, [r5:128]!
-; BE-I64-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
-; BE-I64-NEXT:    vorr d0, d11, d11
+; BE-I64-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
+; BE-I64-NEXT:    add lr, sp, #88
+; BE-I64-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
+; BE-I64-NEXT:    vorr d0, d13, d13
+; BE-I64-NEXT:    vmov.32 d9[1], r0
 ; BE-I64-NEXT:    bl lrint
-; BE-I64-NEXT:    vorr d0, d10, d10
+; BE-I64-NEXT:    vmov.32 d15[0], r0
+; BE-I64-NEXT:    ldr r0, [sp, #84] @ 4-byte Reload
+; BE-I64-NEXT:    vorr d0, d12, d12
+; BE-I64-NEXT:    add lr, sp, #152
 ; BE-I64-NEXT:    mov r4, r1
-; BE-I64-NEXT:    vmov.32 d9[0], r0
+; BE-I64-NEXT:    vmov.32 d8[1], r0
+; BE-I64-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
 ; BE-I64-NEXT:    bl lrint
-; BE-I64-NEXT:    vmov.32 d8[0], r0
-; BE-I64-NEXT:    add lr, sp, #184
-; BE-I64-NEXT:    vmov.32 d9[1], r4
-; BE-I64-NEXT:    vmov.32 d8[1], r1
-; BE-I64-NEXT:    vrev64.32 q8, q4
-; BE-I64-NEXT:    vst1.64 {d16, d17}, [r5:128]!
+; BE-I64-NEXT:    add lr, sp, #136
+; BE-I64-NEXT:    vmov.32 d14[0], r0
+; BE-I64-NEXT:    ldr r0, [sp, #108] @ 4-byte Reload
+; BE-I64-NEXT:    mov r5, r1
 ; BE-I64-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
-; BE-I64-NEXT:    vorr d0, d11, d11
-; BE-I64-NEXT:    bl lrint
-; BE-I64-NEXT:    vorr d0, d10, d10
-; BE-I64-NEXT:    mov r4, r1
-; BE-I64-NEXT:    vmov.32 d9[0], r0
-; BE-I64-NEXT:    bl lrint
-; BE-I64-NEXT:    vmov.32 d8[0], r0
-; BE-I64-NEXT:    add r0, sp, #464
-; BE-I64-NEXT:    vmov.32 d9[1], r4
-; BE-I64-NEXT:    vmov.32 d8[1], r1
-; BE-I64-NEXT:    vrev64.32 q8, q4
-; BE-I64-NEXT:    vld1.64 {d8, d9}, [r0]
-; BE-I64-NEXT:    vorr d0, d9, d9
-; BE-I64-NEXT:    vst1.64 {d16, d17}, [r5:128]
-; BE-I64-NEXT:    bl lrint
-; BE-I64-NEXT:    vorr d0, d8, d8
-; BE-I64-NEXT:    mov r4, r1
-; BE-I64-NEXT:    vmov.32 d11[0], r0
-; BE-I64-NEXT:    bl lrint
-; BE-I64-NEXT:    vmov.32 d10[0], r0
-; BE-I64-NEXT:    add r0, sp, #480
-; BE-I64-NEXT:    add r5, r6, #192
-; BE-I64-NEXT:    vmov.32 d11[1], r4
-; BE-I64-NEXT:    vmov.32 d10[1], r1
-; BE-I64-NEXT:    vld1.64 {d8, d9}, [r0]
-; BE-I64-NEXT:    vorr d0, d9, d9
-; BE-I64-NEXT:    vrev64.32 q8, q5
-; BE-I64-NEXT:    vst1.64 {d16, d17}, [r5:128]!
-; BE-I64-NEXT:    bl lrint
-; BE-I64-NEXT:    vorr d0, d8, d8
-; BE-I64-NEXT:    mov r4, r1
-; BE-I64-NEXT:    vmov.32 d11[0], r0
-; BE-I64-NEXT:    bl lrint
-; BE-I64-NEXT:    vmov.32 d10[0], r0
-; BE-I64-NEXT:    add r0, sp, #496
-; BE-I64-NEXT:    vmov.32 d11[1], r4
-; BE-I64-NEXT:    vmov.32 d10[1], r1
-; BE-I64-NEXT:    vld1.64 {d8, d9}, [r0]
+; BE-I64-NEXT:    add lr, sp, #112
+; BE-I64-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
 ; BE-I64-NEXT:    vorr d0, d9, d9
-; BE-I64-NEXT:    vrev64.32 q8, q5
-; BE-I64-NEXT:    vst1.64 {d16, d17}, [r5:128]!
+; BE-I64-NEXT:    vmov.32 d11[1], r0
 ; BE-I64-NEXT:    bl lrint
 ; BE-I64-NEXT:    vorr d0, d8, d8
-; BE-I64-NEXT:    mov r4, r1
-; BE-I64-NEXT:    vmov.32 d11[0], r0
+; BE-I64-NEXT:    mov r7, r1
+; BE-I64-NEXT:    vmov.32 d13[0], r0
+; BE-I64-NEXT:    vmov.32 d10[1], r9
 ; BE-I64-NEXT:    bl lrint
-; BE-I64-NEXT:    vmov.32 d10[0], r0
-; BE-I64-NEXT:    add lr, sp, #112
-; BE-I64-NEXT:    add r0, r6, #128
-; BE-I64-NEXT:    vmov.32 d11[1], r4
-; BE-I64-NEXT:    vmov.32 d10[1], r1
-; BE-I64-NEXT:    vrev64.32 q8, q5
-; BE-I64-NEXT:    vst1.64 {d16, d17}, [r5:128]!
-; BE-I64-NEXT:    vst1.64 {d14, d15}, [r5:128]
-; BE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-I64-NEXT:    add lr, sp, #200
-; BE-I64-NEXT:    vst1.64 {d16, d17}, [r0:128]!
-; BE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-I64-NEXT:    add lr, sp, #216
-; BE-I64-NEXT:    vst1.64 {d16, d17}, [r0:128]!
-; BE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-I64-NEXT:    add lr, sp, #96
-; BE-I64-NEXT:    vst1.64 {d16, d17}, [r0:128]!
-; BE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-I64-NEXT:    add lr, sp, #80
-; BE-I64-NEXT:    vst1.64 {d16, d17}, [r0:128]
-; BE-I64-NEXT:    add r0, r6, #64
-; BE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-I64-NEXT:    add lr, sp, #64
-; BE-I64-NEXT:    vst1.64 {d16, d17}, [r0:128]!
+; BE-I64-NEXT:    add lr, sp, #8
+; BE-I64-NEXT:    vmov.32 d12[0], r0
 ; BE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
 ; BE-I64-NEXT:    add lr, sp, #48
-; BE-I64-NEXT:    vst1.64 {d16, d17}, [r0:128]!
+; BE-I64-NEXT:    vmov.32 d17[1], r10
+; BE-I64-NEXT:    vmov.32 d16[1], r11
+; BE-I64-NEXT:    vorr q12, q8, q8
 ; BE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; BE-I64-NEXT:    add lr, sp, #152
+; BE-I64-NEXT:    vmov.32 d17[1], r8
+; BE-I64-NEXT:    vldmia lr, {d18, d19} @ 16-byte Reload
+; BE-I64-NEXT:    add lr, sp, #24
+; BE-I64-NEXT:    vmov.32 d13[1], r7
+; BE-I64-NEXT:    vmov.32 d16[1], r6
+; BE-I64-NEXT:    vldmia lr, {d20, d21} @ 16-byte Reload
+; BE-I64-NEXT:    add lr, sp, #64
+; BE-I64-NEXT:    vorr q13, q8, q8
+; BE-I64-NEXT:    vmov.32 d12[1], r1
+; BE-I64-NEXT:    ldr r1, [sp, #132] @ 4-byte Reload
+; BE-I64-NEXT:    vrev64.32 q8, q5
+; BE-I64-NEXT:    mov r0, r1
+; BE-I64-NEXT:    vldmia lr, {d22, d23} @ 16-byte Reload
+; BE-I64-NEXT:    vrev64.32 q9, q9
+; BE-I64-NEXT:    vrev64.32 q10, q10
 ; BE-I64-NEXT:    vst1.64 {d16, d17}, [r0:128]!
-; BE-I64-NEXT:    vst1.64 {d12, d13}, [r0:128]
-; BE-I64-NEXT:    add sp, sp, #232
+; BE-I64-NEXT:    vst1.64 {d18, d19}, [r0:128]!
+; BE-I64-NEXT:    vrev64.32 q11, q11
+; BE-I64-NEXT:    vmov.32 d15[1], r4
+; BE-I64-NEXT:    vst1.64 {d20, d21}, [r0:128]!
+; BE-I64-NEXT:    vrev64.32 q15, q6
+; BE-I64-NEXT:    vmov.32 d14[1], r5
+; BE-I64-NEXT:    vrev64.32 q12, q12
+; BE-I64-NEXT:    vst1.64 {d22, d23}, [r0:128]
+; BE-I64-NEXT:    add r0, r1, #64
+; BE-I64-NEXT:    vrev64.32 q13, q13
+; BE-I64-NEXT:    vst1.64 {d30, d31}, [r0:128]!
+; BE-I64-NEXT:    vst1.64 {d24, d25}, [r0:128]!
+; BE-I64-NEXT:    vrev64.32 q14, q7
+; BE-I64-NEXT:    vst1.64 {d26, d27}, [r0:128]!
+; BE-I64-NEXT:    vst1.64 {d28, d29}, [r0:128]
+; BE-I64-NEXT:    add sp, sp, #168
 ; BE-I64-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
 ; BE-I64-NEXT:    add sp, sp, #4
 ; BE-I64-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
-;
-; BE-I32-NEON-LABEL: lrint_v32f64:
-; BE-I32-NEON:       @ %bb.0:
-; BE-I32-NEON-NEXT:    .save {r4, r5, r6, lr}
-; BE-I32-NEON-NEXT:    push {r4, r5, r6, lr}
-; BE-I32-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
-; BE-I32-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
-; BE-I32-NEON-NEXT:    .pad #176
-; BE-I32-NEON-NEXT:    sub sp, sp, #176
-; BE-I32-NEON-NEXT:    add lr, sp, #128
-; BE-I32-NEON-NEXT:    mov r4, r0
-; BE-I32-NEON-NEXT:    add r0, sp, #336
-; BE-I32-NEON-NEXT:    vorr q6, q3, q3
-; BE-I32-NEON-NEXT:    vstmia lr, {d4, d5} @ 16-byte Spill
-; BE-I32-NEON-NEXT:    add lr, sp, #80
-; BE-I32-NEON-NEXT:    vorr q5, q1, q1
-; BE-I32-NEON-NEXT:    vstmia lr, {d0, d1} @ 16-byte Spill
-; BE-I32-NEON-NEXT:    add lr, sp, #32
-; BE-I32-NEON-NEXT:    vorr d0, d4, d4
-; BE-I32-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
-; BE-I32-NEON-NEXT:    add r0, sp, #320
-; BE-I32-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-I32-NEON-NEXT:    add lr, sp, #160
-; BE-I32-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
-; BE-I32-NEON-NEXT:    add r0, sp, #432
-; BE-I32-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-I32-NEON-NEXT:    add lr, sp, #112
-; BE-I32-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
-; BE-I32-NEON-NEXT:    add r0, sp, #288
-; BE-I32-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-I32-NEON-NEXT:    add lr, sp, #96
-; BE-I32-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
-; BE-I32-NEON-NEXT:    add r0, sp, #368
-; BE-I32-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-I32-NEON-NEXT:    add lr, sp, #48
-; BE-I32-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
-; BE-I32-NEON-NEXT:    add r0, sp, #416
-; BE-I32-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-I32-NEON-NEXT:    add lr, sp, #144
-; BE-I32-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
-; BE-I32-NEON-NEXT:    add r0, sp, #400
-; BE-I32-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-I32-NEON-NEXT:    add lr, sp, #64
-; BE-I32-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
-; BE-I32-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-I32-NEON-NEXT:    bl lrint
-; BE-I32-NEON-NEXT:    vorr d0, d12, d12
-; BE-I32-NEON-NEXT:    vmov.32 d14[0], r0
-; BE-I32-NEON-NEXT:    bl lrint
-; BE-I32-NEON-NEXT:    vorr d0, d10, d10
-; BE-I32-NEON-NEXT:    vmov.32 d15[0], r0
-; BE-I32-NEON-NEXT:    bl lrint
-; BE-I32-NEON-NEXT:    vorr d0, d13, d13
-; BE-I32-NEON-NEXT:    vmov.32 d9[0], r0
-; BE-I32-NEON-NEXT:    bl lrint
-; BE-I32-NEON-NEXT:    vorr d0, d11, d11
-; BE-I32-NEON-NEXT:    vmov.32 d15[1], r0
-; BE-I32-NEON-NEXT:    bl lrint
-; BE-I32-NEON-NEXT:    add lr, sp, #128
-; BE-I32-NEON-NEXT:    vorr q5, q4, q4
-; BE-I32-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-I32-NEON-NEXT:    vorr d0, d17, d17
-; BE-I32-NEON-NEXT:    vmov.32 d11[1], r0
-; BE-I32-NEON-NEXT:    bl lrint
-; BE-I32-NEON-NEXT:    vmov.32 d14[1], r0
-; BE-I32-NEON-NEXT:    add lr, sp, #128
-; BE-I32-NEON-NEXT:    add r0, sp, #384
-; BE-I32-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
-; BE-I32-NEON-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
-; BE-I32-NEON-NEXT:    add lr, sp, #16
-; BE-I32-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-I32-NEON-NEXT:    add lr, sp, #80
-; BE-I32-NEON-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
-; BE-I32-NEON-NEXT:    vorr d0, d8, d8
-; BE-I32-NEON-NEXT:    bl lrint
-; BE-I32-NEON-NEXT:    add lr, sp, #64
-; BE-I32-NEON-NEXT:    vmov.32 d10[0], r0
-; BE-I32-NEON-NEXT:    vldmia lr, {d14, d15} @ 16-byte Reload
-; BE-I32-NEON-NEXT:    vorr d0, d14, d14
-; BE-I32-NEON-NEXT:    bl lrint
-; BE-I32-NEON-NEXT:    vorr d0, d9, d9
-; BE-I32-NEON-NEXT:    vmov.32 d13[0], r0
-; BE-I32-NEON-NEXT:    bl lrint
-; BE-I32-NEON-NEXT:    vorr d0, d15, d15
-; BE-I32-NEON-NEXT:    vmov.32 d10[1], r0
-; BE-I32-NEON-NEXT:    add lr, sp, #80
-; BE-I32-NEON-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
-; BE-I32-NEON-NEXT:    bl lrint
-; BE-I32-NEON-NEXT:    add lr, sp, #16
-; BE-I32-NEON-NEXT:    vmov.32 d13[1], r0
-; BE-I32-NEON-NEXT:    add r0, sp, #272
-; BE-I32-NEON-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
-; BE-I32-NEON-NEXT:    vorr d0, d10, d10
-; BE-I32-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
-; BE-I32-NEON-NEXT:    vstmia sp, {d16, d17} @ 16-byte Spill
-; BE-I32-NEON-NEXT:    bl lrint
-; BE-I32-NEON-NEXT:    add lr, sp, #32
-; BE-I32-NEON-NEXT:    vmov.32 d12[0], r0
-; BE-I32-NEON-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
-; BE-I32-NEON-NEXT:    vorr d0, d8, d8
-; BE-I32-NEON-NEXT:    bl lrint
-; BE-I32-NEON-NEXT:    vorr d0, d11, d11
-; BE-I32-NEON-NEXT:    vmov.32 d15[0], r0
-; BE-I32-NEON-NEXT:    bl lrint
-; BE-I32-NEON-NEXT:    vorr d0, d9, d9
-; BE-I32-NEON-NEXT:    vmov.32 d12[1], r0
-; BE-I32-NEON-NEXT:    add lr, sp, #64
-; BE-I32-NEON-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
-; BE-I32-NEON-NEXT:    bl lrint
-; BE-I32-NEON-NEXT:    vldmia sp, {d10, d11} @ 16-byte Reload
-; BE-I32-NEON-NEXT:    add lr, sp, #16
-; BE-I32-NEON-NEXT:    vmov.32 d15[1], r0
-; BE-I32-NEON-NEXT:    add r0, sp, #256
-; BE-I32-NEON-NEXT:    vorr d0, d10, d10
-; BE-I32-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
-; BE-I32-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-I32-NEON-NEXT:    bl lrint
-; BE-I32-NEON-NEXT:    add lr, sp, #160
-; BE-I32-NEON-NEXT:    vmov.32 d13[0], r0
-; BE-I32-NEON-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
-; BE-I32-NEON-NEXT:    vorr d0, d8, d8
-; BE-I32-NEON-NEXT:    bl lrint
-; BE-I32-NEON-NEXT:    vorr d0, d11, d11
-; BE-I32-NEON-NEXT:    vmov.32 d14[0], r0
-; BE-I32-NEON-NEXT:    bl lrint
-; BE-I32-NEON-NEXT:    vorr d0, d9, d9
-; BE-I32-NEON-NEXT:    vmov.32 d13[1], r0
-; BE-I32-NEON-NEXT:    bl lrint
-; BE-I32-NEON-NEXT:    vmov.32 d14[1], r0
-; BE-I32-NEON-NEXT:    add lr, sp, #32
-; BE-I32-NEON-NEXT:    add r0, sp, #304
-; BE-I32-NEON-NEXT:    vld1.64 {d10, d11}, [r0]
-; BE-I32-NEON-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
-; BE-I32-NEON-NEXT:    add lr, sp, #16
-; BE-I32-NEON-NEXT:    vldmia lr, {d14, d15} @ 16-byte Reload
-; BE-I32-NEON-NEXT:    vorr d0, d14, d14
-; BE-I32-NEON-NEXT:    bl lrint
-; BE-I32-NEON-NEXT:    add lr, sp, #48
-; BE-I32-NEON-NEXT:    vorr q4, q6, q6
-; BE-I32-NEON-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
-; BE-I32-NEON-NEXT:    vorr d0, d12, d12
-; BE-I32-NEON-NEXT:    vmov.32 d8[0], r0
-; BE-I32-NEON-NEXT:    bl lrint
-; BE-I32-NEON-NEXT:    vorr d0, d15, d15
-; BE-I32-NEON-NEXT:    add lr, sp, #160
-; BE-I32-NEON-NEXT:    vmov.32 d17[0], r0
-; BE-I32-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-I32-NEON-NEXT:    bl lrint
-; BE-I32-NEON-NEXT:    vorr d0, d10, d10
-; BE-I32-NEON-NEXT:    vmov.32 d8[1], r0
-; BE-I32-NEON-NEXT:    add lr, sp, #16
-; BE-I32-NEON-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
-; BE-I32-NEON-NEXT:    bl lrint
-; BE-I32-NEON-NEXT:    vorr d0, d13, d13
-; BE-I32-NEON-NEXT:    vmov.32 d9[0], r0
-; BE-I32-NEON-NEXT:    bl lrint
-; BE-I32-NEON-NEXT:    add lr, sp, #160
-; BE-I32-NEON-NEXT:    vorr d0, d11, d11
-; BE-I32-NEON-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
-; BE-I32-NEON-NEXT:    add lr, sp, #48
-; BE-I32-NEON-NEXT:    vmov.32 d13[1], r0
-; BE-I32-NEON-NEXT:    add r0, sp, #352
-; BE-I32-NEON-NEXT:    vld1.64 {d14, d15}, [r0]
-; BE-I32-NEON-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
-; BE-I32-NEON-NEXT:    bl lrint
-; BE-I32-NEON-NEXT:    vorr d0, d14, d14
-; BE-I32-NEON-NEXT:    vmov.32 d9[1], r0
-; BE-I32-NEON-NEXT:    bl lrint
-; BE-I32-NEON-NEXT:    vmov.32 d12[0], r0
-; BE-I32-NEON-NEXT:    add lr, sp, #160
-; BE-I32-NEON-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
-; BE-I32-NEON-NEXT:    add lr, sp, #96
-; BE-I32-NEON-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
-; BE-I32-NEON-NEXT:    vorr d0, d12, d12
-; BE-I32-NEON-NEXT:    bl lrint
-; BE-I32-NEON-NEXT:    add lr, sp, #112
-; BE-I32-NEON-NEXT:    vmov.32 d8[0], r0
-; BE-I32-NEON-NEXT:    vldmia lr, {d14, d15} @ 16-byte Reload
-; BE-I32-NEON-NEXT:    vorr d0, d14, d14
-; BE-I32-NEON-NEXT:    bl lrint
-; BE-I32-NEON-NEXT:    vorr d0, d13, d13
-; BE-I32-NEON-NEXT:    vmov.32 d11[0], r0
-; BE-I32-NEON-NEXT:    bl lrint
-; BE-I32-NEON-NEXT:    add lr, sp, #144
-; BE-I32-NEON-NEXT:    vmov.32 d8[1], r0
-; BE-I32-NEON-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
-; BE-I32-NEON-NEXT:    vorr d0, d12, d12
-; BE-I32-NEON-NEXT:    bl lrint
-; BE-I32-NEON-NEXT:    vorr d0, d15, d15
-; BE-I32-NEON-NEXT:    vmov.32 d10[0], r0
-; BE-I32-NEON-NEXT:    bl lrint
-; BE-I32-NEON-NEXT:    vorr d0, d13, d13
-; BE-I32-NEON-NEXT:    vmov.32 d11[1], r0
-; BE-I32-NEON-NEXT:    bl lrint
-; BE-I32-NEON-NEXT:    add lr, sp, #48
-; BE-I32-NEON-NEXT:    vmov.32 d10[1], r0
-; BE-I32-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-I32-NEON-NEXT:    vorr d0, d17, d17
-; BE-I32-NEON-NEXT:    bl lrint
-; BE-I32-NEON-NEXT:    add lr, sp, #160
-; BE-I32-NEON-NEXT:    vrev64.32 q9, q4
-; BE-I32-NEON-NEXT:    vldmia lr, {d22, d23} @ 16-byte Reload
-; BE-I32-NEON-NEXT:    add lr, sp, #80
-; BE-I32-NEON-NEXT:    vldmia lr, {d20, d21} @ 16-byte Reload
-; BE-I32-NEON-NEXT:    add lr, sp, #128
-; BE-I32-NEON-NEXT:    vmov.32 d22[1], r0
-; BE-I32-NEON-NEXT:    mov r0, r4
-; BE-I32-NEON-NEXT:    vst1.32 {d20, d21}, [r0:128]!
-; BE-I32-NEON-NEXT:    vldmia lr, {d20, d21} @ 16-byte Reload
-; BE-I32-NEON-NEXT:    add lr, sp, #16
-; BE-I32-NEON-NEXT:    vrev64.32 q8, q5
-; BE-I32-NEON-NEXT:    vst1.32 {d20, d21}, [r0:128]!
-; BE-I32-NEON-NEXT:    vldmia lr, {d20, d21} @ 16-byte Reload
-; BE-I32-NEON-NEXT:    add lr, sp, #32
-; BE-I32-NEON-NEXT:    vst1.32 {d20, d21}, [r0:128]!
-; BE-I32-NEON-NEXT:    vst1.64 {d18, d19}, [r0:128]
-; BE-I32-NEON-NEXT:    add r0, r4, #64
-; BE-I32-NEON-NEXT:    vldmia lr, {d18, d19} @ 16-byte Reload
-; BE-I32-NEON-NEXT:    add lr, sp, #64
-; BE-I32-NEON-NEXT:    vst1.32 {d18, d19}, [r0:128]!
-; BE-I32-NEON-NEXT:    vst1.32 {d22, d23}, [r0:128]!
-; BE-I32-NEON-NEXT:    vldmia lr, {d18, d19} @ 16-byte Reload
-; BE-I32-NEON-NEXT:    vst1.32 {d18, d19}, [r0:128]!
-; BE-I32-NEON-NEXT:    vst1.64 {d16, d17}, [r0:128]
-; BE-I32-NEON-NEXT:    add sp, sp, #176
-; BE-I32-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
-; BE-I32-NEON-NEXT:    pop {r4, r5, r6, pc}
-;
-; BE-I64-NEON-LABEL: lrint_v32f64:
-; BE-I64-NEON:       @ %bb.0:
-; BE-I64-NEON-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; BE-I64-NEON-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; BE-I64-NEON-NEXT:    .pad #4
-; BE-I64-NEON-NEXT:    sub sp, sp, #4
-; BE-I64-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
-; BE-I64-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
-; BE-I64-NEON-NEXT:    .pad #232
-; BE-I64-NEON-NEXT:    sub sp, sp, #232
-; BE-I64-NEON-NEXT:    add lr, sp, #184
-; BE-I64-NEON-NEXT:    str r0, [sp, #148] @ 4-byte Spill
-; BE-I64-NEON-NEXT:    add r0, sp, #416
-; BE-I64-NEON-NEXT:    vstmia lr, {d6, d7} @ 16-byte Spill
-; BE-I64-NEON-NEXT:    add lr, sp, #168
-; BE-I64-NEON-NEXT:    vstmia lr, {d4, d5} @ 16-byte Spill
-; BE-I64-NEON-NEXT:    add lr, sp, #152
-; BE-I64-NEON-NEXT:    vstmia lr, {d2, d3} @ 16-byte Spill
-; BE-I64-NEON-NEXT:    add lr, sp, #128
-; BE-I64-NEON-NEXT:    vstmia lr, {d0, d1} @ 16-byte Spill
-; BE-I64-NEON-NEXT:    add lr, sp, #200
-; BE-I64-NEON-NEXT:    vld1.64 {d18, d19}, [r0]
-; BE-I64-NEON-NEXT:    add r0, sp, #448
-; BE-I64-NEON-NEXT:    vorr d0, d19, d19
-; BE-I64-NEON-NEXT:    vld1.64 {d14, d15}, [r0]
-; BE-I64-NEON-NEXT:    add r0, sp, #336
-; BE-I64-NEON-NEXT:    vstmia lr, {d18, d19} @ 16-byte Spill
-; BE-I64-NEON-NEXT:    add lr, sp, #64
-; BE-I64-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
-; BE-I64-NEON-NEXT:    add r0, sp, #400
-; BE-I64-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-I64-NEON-NEXT:    add lr, sp, #8
-; BE-I64-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
-; BE-I64-NEON-NEXT:    add r0, sp, #352
-; BE-I64-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-I64-NEON-NEXT:    add lr, sp, #24
-; BE-I64-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
-; BE-I64-NEON-NEXT:    add r0, sp, #368
-; BE-I64-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-I64-NEON-NEXT:    add lr, sp, #48
-; BE-I64-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
-; BE-I64-NEON-NEXT:    add r0, sp, #384
-; BE-I64-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-I64-NEON-NEXT:    add lr, sp, #96
-; BE-I64-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
-; BE-I64-NEON-NEXT:    add r0, sp, #512
-; BE-I64-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-I64-NEON-NEXT:    add lr, sp, #112
-; BE-I64-NEON-NEXT:    vld1.64 {d16, d17}, [r0]
-; BE-I64-NEON-NEXT:    add r0, sp, #432
-; BE-I64-NEON-NEXT:    vld1.64 {d8, d9}, [r0]
-; BE-I64-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-I64-NEON-NEXT:    bl lrint
-; BE-I64-NEON-NEXT:    vorr d0, d8, d8
-; BE-I64-NEON-NEXT:    str r1, [sp, #80] @ 4-byte Spill
-; BE-I64-NEON-NEXT:    vmov.32 d13[0], r0
-; BE-I64-NEON-NEXT:    bl lrint
-; BE-I64-NEON-NEXT:    vorr d0, d9, d9
-; BE-I64-NEON-NEXT:    str r1, [sp, #44] @ 4-byte Spill
-; BE-I64-NEON-NEXT:    vmov.32 d10[0], r0
-; BE-I64-NEON-NEXT:    bl lrint
-; BE-I64-NEON-NEXT:    vorr d0, d14, d14
-; BE-I64-NEON-NEXT:    add lr, sp, #216
-; BE-I64-NEON-NEXT:    vmov.32 d11[0], r0
-; BE-I64-NEON-NEXT:    mov r9, r1
-; BE-I64-NEON-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
-; BE-I64-NEON-NEXT:    bl lrint
-; BE-I64-NEON-NEXT:    vorr d0, d15, d15
-; BE-I64-NEON-NEXT:    mov r8, r1
-; BE-I64-NEON-NEXT:    vmov.32 d8[0], r0
-; BE-I64-NEON-NEXT:    bl lrint
-; BE-I64-NEON-NEXT:    add lr, sp, #64
-; BE-I64-NEON-NEXT:    mov r4, r1
-; BE-I64-NEON-NEXT:    vmov.32 d9[0], r0
-; BE-I64-NEON-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
-; BE-I64-NEON-NEXT:    vorr d0, d10, d10
-; BE-I64-NEON-NEXT:    bl lrint
-; BE-I64-NEON-NEXT:    vorr d0, d11, d11
-; BE-I64-NEON-NEXT:    mov r6, r1
-; BE-I64-NEON-NEXT:    vmov.32 d14[0], r0
-; BE-I64-NEON-NEXT:    bl lrint
-; BE-I64-NEON-NEXT:    add lr, sp, #200
-; BE-I64-NEON-NEXT:    mov r7, r1
-; BE-I64-NEON-NEXT:    vmov.32 d15[0], r0
-; BE-I64-NEON-NEXT:    vldmia lr, {d0, d1} @ 16-byte Reload
-; BE-I64-NEON-NEXT:    @ kill: def $d0 killed $d0 killed $q0
-; BE-I64-NEON-NEXT:    bl lrint
-; BE-I64-NEON-NEXT:    vmov.32 d12[0], r0
-; BE-I64-NEON-NEXT:    add lr, sp, #200
-; BE-I64-NEON-NEXT:    mov r5, r1
-; BE-I64-NEON-NEXT:    vmov.32 d15[1], r7
-; BE-I64-NEON-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
-; BE-I64-NEON-NEXT:    add lr, sp, #8
-; BE-I64-NEON-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
-; BE-I64-NEON-NEXT:    vorr d0, d11, d11
-; BE-I64-NEON-NEXT:    bl lrint
-; BE-I64-NEON-NEXT:    vorr d0, d10, d10
-; BE-I64-NEON-NEXT:    vmov.32 d14[1], r6
-; BE-I64-NEON-NEXT:    add lr, sp, #64
-; BE-I64-NEON-NEXT:    mov r10, r1
-; BE-I64-NEON-NEXT:    vmov.32 d13[0], r0
-; BE-I64-NEON-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
-; BE-I64-NEON-NEXT:    bl lrint
-; BE-I64-NEON-NEXT:    add lr, sp, #24
-; BE-I64-NEON-NEXT:    mov r11, r1
-; BE-I64-NEON-NEXT:    vmov.32 d12[0], r0
-; BE-I64-NEON-NEXT:    vldmia lr, {d14, d15} @ 16-byte Reload
-; BE-I64-NEON-NEXT:    vorr d0, d15, d15
-; BE-I64-NEON-NEXT:    vmov.32 d9[1], r4
-; BE-I64-NEON-NEXT:    bl lrint
-; BE-I64-NEON-NEXT:    vorr d0, d14, d14
-; BE-I64-NEON-NEXT:    vmov.32 d8[1], r8
-; BE-I64-NEON-NEXT:    add lr, sp, #8
-; BE-I64-NEON-NEXT:    str r1, [sp, #4] @ 4-byte Spill
-; BE-I64-NEON-NEXT:    vmov.32 d11[0], r0
-; BE-I64-NEON-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
-; BE-I64-NEON-NEXT:    bl lrint
-; BE-I64-NEON-NEXT:    vmov.32 d10[0], r0
-; BE-I64-NEON-NEXT:    add lr, sp, #24
-; BE-I64-NEON-NEXT:    mov r8, r1
-; BE-I64-NEON-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
-; BE-I64-NEON-NEXT:    add lr, sp, #216
-; BE-I64-NEON-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
-; BE-I64-NEON-NEXT:    add lr, sp, #48
-; BE-I64-NEON-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
-; BE-I64-NEON-NEXT:    vorr d0, d9, d9
-; BE-I64-NEON-NEXT:    vmov.32 d11[1], r9
-; BE-I64-NEON-NEXT:    bl lrint
-; BE-I64-NEON-NEXT:    vmov.32 d15[0], r0
-; BE-I64-NEON-NEXT:    ldr r0, [sp, #44] @ 4-byte Reload
-; BE-I64-NEON-NEXT:    vorr d0, d8, d8
-; BE-I64-NEON-NEXT:    add lr, sp, #216
-; BE-I64-NEON-NEXT:    mov r9, r1
-; BE-I64-NEON-NEXT:    vmov.32 d10[1], r0
-; BE-I64-NEON-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
-; BE-I64-NEON-NEXT:    bl lrint
-; BE-I64-NEON-NEXT:    vmov.32 d14[0], r0
-; BE-I64-NEON-NEXT:    add lr, sp, #48
-; BE-I64-NEON-NEXT:    ldr r0, [sp, #80] @ 4-byte Reload
-; BE-I64-NEON-NEXT:    mov r6, r1
-; BE-I64-NEON-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
-; BE-I64-NEON-NEXT:    add lr, sp, #200
-; BE-I64-NEON-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
-; BE-I64-NEON-NEXT:    add lr, sp, #96
-; BE-I64-NEON-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
-; BE-I64-NEON-NEXT:    vorr d0, d9, d9
-; BE-I64-NEON-NEXT:    vmov.32 d11[1], r0
-; BE-I64-NEON-NEXT:    bl lrint
-; BE-I64-NEON-NEXT:    vorr d0, d8, d8
-; BE-I64-NEON-NEXT:    vmov.32 d10[1], r5
-; BE-I64-NEON-NEXT:    add lr, sp, #200
-; BE-I64-NEON-NEXT:    mov r7, r1
-; BE-I64-NEON-NEXT:    vmov.32 d15[0], r0
-; BE-I64-NEON-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
-; BE-I64-NEON-NEXT:    bl lrint
-; BE-I64-NEON-NEXT:    add lr, sp, #112
-; BE-I64-NEON-NEXT:    vorr q4, q6, q6
-; BE-I64-NEON-NEXT:    mov r4, r1
-; BE-I64-NEON-NEXT:    vmov.32 d14[0], r0
-; BE-I64-NEON-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
-; BE-I64-NEON-NEXT:    vorr d0, d13, d13
-; BE-I64-NEON-NEXT:    vmov.32 d9[1], r10
-; BE-I64-NEON-NEXT:    bl lrint
-; BE-I64-NEON-NEXT:    vorr d0, d12, d12
-; BE-I64-NEON-NEXT:    mov r5, r1
-; BE-I64-NEON-NEXT:    vmov.32 d11[0], r0
-; BE-I64-NEON-NEXT:    vmov.32 d8[1], r11
-; BE-I64-NEON-NEXT:    bl lrint
-; BE-I64-NEON-NEXT:    vmov.32 d10[0], r0
-; BE-I64-NEON-NEXT:    add lr, sp, #24
-; BE-I64-NEON-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
-; BE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-I64-NEON-NEXT:    add lr, sp, #48
-; BE-I64-NEON-NEXT:    vmov.32 d17[1], r0
-; BE-I64-NEON-NEXT:    vmov.32 d16[1], r8
-; BE-I64-NEON-NEXT:    vorr q9, q8, q8
-; BE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-I64-NEON-NEXT:    add lr, sp, #112
-; BE-I64-NEON-NEXT:    vmov.32 d17[1], r9
-; BE-I64-NEON-NEXT:    vmov.32 d16[1], r6
-; BE-I64-NEON-NEXT:    vorr q10, q8, q8
-; BE-I64-NEON-NEXT:    vrev64.32 q8, q4
-; BE-I64-NEON-NEXT:    vmov.32 d15[1], r7
-; BE-I64-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-I64-NEON-NEXT:    add lr, sp, #200
-; BE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-I64-NEON-NEXT:    vmov.32 d11[1], r5
-; BE-I64-NEON-NEXT:    vrev64.32 q8, q8
-; BE-I64-NEON-NEXT:    vmov.32 d14[1], r4
-; BE-I64-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-I64-NEON-NEXT:    add lr, sp, #216
-; BE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-I64-NEON-NEXT:    vmov.32 d10[1], r1
-; BE-I64-NEON-NEXT:    vrev64.32 q8, q8
-; BE-I64-NEON-NEXT:    vrev64.32 q6, q7
-; BE-I64-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-I64-NEON-NEXT:    add lr, sp, #8
-; BE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-I64-NEON-NEXT:    add lr, sp, #96
-; BE-I64-NEON-NEXT:    vrev64.32 q7, q5
-; BE-I64-NEON-NEXT:    vrev64.32 q8, q8
-; BE-I64-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-I64-NEON-NEXT:    add lr, sp, #64
-; BE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-I64-NEON-NEXT:    add lr, sp, #80
-; BE-I64-NEON-NEXT:    vrev64.32 q8, q8
-; BE-I64-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-I64-NEON-NEXT:    add lr, sp, #64
-; BE-I64-NEON-NEXT:    vrev64.32 q8, q9
-; BE-I64-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-I64-NEON-NEXT:    add lr, sp, #48
-; BE-I64-NEON-NEXT:    vrev64.32 q8, q10
-; BE-I64-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-I64-NEON-NEXT:    add lr, sp, #128
-; BE-I64-NEON-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
-; BE-I64-NEON-NEXT:    vorr d0, d11, d11
-; BE-I64-NEON-NEXT:    bl lrint
-; BE-I64-NEON-NEXT:    vorr d0, d10, d10
-; BE-I64-NEON-NEXT:    mov r4, r1
-; BE-I64-NEON-NEXT:    vmov.32 d9[0], r0
-; BE-I64-NEON-NEXT:    bl lrint
-; BE-I64-NEON-NEXT:    vmov.32 d8[0], r0
-; BE-I64-NEON-NEXT:    ldr r6, [sp, #148] @ 4-byte Reload
-; BE-I64-NEON-NEXT:    add lr, sp, #152
-; BE-I64-NEON-NEXT:    vmov.32 d9[1], r4
-; BE-I64-NEON-NEXT:    mov r5, r6
-; BE-I64-NEON-NEXT:    vmov.32 d8[1], r1
-; BE-I64-NEON-NEXT:    vrev64.32 q8, q4
-; BE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r5:128]!
-; BE-I64-NEON-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
-; BE-I64-NEON-NEXT:    vorr d0, d11, d11
-; BE-I64-NEON-NEXT:    bl lrint
-; BE-I64-NEON-NEXT:    vorr d0, d10, d10
-; BE-I64-NEON-NEXT:    mov r4, r1
-; BE-I64-NEON-NEXT:    vmov.32 d9[0], r0
-; BE-I64-NEON-NEXT:    bl lrint
-; BE-I64-NEON-NEXT:    vmov.32 d8[0], r0
-; BE-I64-NEON-NEXT:    add lr, sp, #168
-; BE-I64-NEON-NEXT:    vmov.32 d9[1], r4
-; BE-I64-NEON-NEXT:    vmov.32 d8[1], r1
-; BE-I64-NEON-NEXT:    vrev64.32 q8, q4
-; BE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r5:128]!
-; BE-I64-NEON-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
-; BE-I64-NEON-NEXT:    vorr d0, d11, d11
-; BE-I64-NEON-NEXT:    bl lrint
-; BE-I64-NEON-NEXT:    vorr d0, d10, d10
-; BE-I64-NEON-NEXT:    mov r4, r1
-; BE-I64-NEON-NEXT:    vmov.32 d9[0], r0
-; BE-I64-NEON-NEXT:    bl lrint
-; BE-I64-NEON-NEXT:    vmov.32 d8[0], r0
-; BE-I64-NEON-NEXT:    add lr, sp, #184
-; BE-I64-NEON-NEXT:    vmov.32 d9[1], r4
-; BE-I64-NEON-NEXT:    vmov.32 d8[1], r1
-; BE-I64-NEON-NEXT:    vrev64.32 q8, q4
-; BE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r5:128]!
-; BE-I64-NEON-NEXT:    vldmia lr, {d10, d11} @ 16-byte Reload
-; BE-I64-NEON-NEXT:    vorr d0, d11, d11
-; BE-I64-NEON-NEXT:    bl lrint
-; BE-I64-NEON-NEXT:    vorr d0, d10, d10
-; BE-I64-NEON-NEXT:    mov r4, r1
-; BE-I64-NEON-NEXT:    vmov.32 d9[0], r0
-; BE-I64-NEON-NEXT:    bl lrint
-; BE-I64-NEON-NEXT:    vmov.32 d8[0], r0
-; BE-I64-NEON-NEXT:    add r0, sp, #464
-; BE-I64-NEON-NEXT:    vmov.32 d9[1], r4
-; BE-I64-NEON-NEXT:    vmov.32 d8[1], r1
-; BE-I64-NEON-NEXT:    vrev64.32 q8, q4
-; BE-I64-NEON-NEXT:    vld1.64 {d8, d9}, [r0]
-; BE-I64-NEON-NEXT:    vorr d0, d9, d9
-; BE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r5:128]
-; BE-I64-NEON-NEXT:    bl lrint
-; BE-I64-NEON-NEXT:    vorr d0, d8, d8
-; BE-I64-NEON-NEXT:    mov r4, r1
-; BE-I64-NEON-NEXT:    vmov.32 d11[0], r0
-; BE-I64-NEON-NEXT:    bl lrint
-; BE-I64-NEON-NEXT:    vmov.32 d10[0], r0
-; BE-I64-NEON-NEXT:    add r0, sp, #480
-; BE-I64-NEON-NEXT:    add r5, r6, #192
-; BE-I64-NEON-NEXT:    vmov.32 d11[1], r4
-; BE-I64-NEON-NEXT:    vmov.32 d10[1], r1
-; BE-I64-NEON-NEXT:    vld1.64 {d8, d9}, [r0]
-; BE-I64-NEON-NEXT:    vorr d0, d9, d9
-; BE-I64-NEON-NEXT:    vrev64.32 q8, q5
-; BE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r5:128]!
-; BE-I64-NEON-NEXT:    bl lrint
-; BE-I64-NEON-NEXT:    vorr d0, d8, d8
-; BE-I64-NEON-NEXT:    mov r4, r1
-; BE-I64-NEON-NEXT:    vmov.32 d11[0], r0
-; BE-I64-NEON-NEXT:    bl lrint
-; BE-I64-NEON-NEXT:    vmov.32 d10[0], r0
-; BE-I64-NEON-NEXT:    add r0, sp, #496
-; BE-I64-NEON-NEXT:    vmov.32 d11[1], r4
-; BE-I64-NEON-NEXT:    vmov.32 d10[1], r1
-; BE-I64-NEON-NEXT:    vld1.64 {d8, d9}, [r0]
-; BE-I64-NEON-NEXT:    vorr d0, d9, d9
-; BE-I64-NEON-NEXT:    vrev64.32 q8, q5
-; BE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r5:128]!
-; BE-I64-NEON-NEXT:    bl lrint
-; BE-I64-NEON-NEXT:    vorr d0, d8, d8
-; BE-I64-NEON-NEXT:    mov r4, r1
-; BE-I64-NEON-NEXT:    vmov.32 d11[0], r0
-; BE-I64-NEON-NEXT:    bl lrint
-; BE-I64-NEON-NEXT:    vmov.32 d10[0], r0
-; BE-I64-NEON-NEXT:    add lr, sp, #112
-; BE-I64-NEON-NEXT:    add r0, r6, #128
-; BE-I64-NEON-NEXT:    vmov.32 d11[1], r4
-; BE-I64-NEON-NEXT:    vmov.32 d10[1], r1
-; BE-I64-NEON-NEXT:    vrev64.32 q8, q5
-; BE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r5:128]!
-; BE-I64-NEON-NEXT:    vst1.64 {d14, d15}, [r5:128]
-; BE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-I64-NEON-NEXT:    add lr, sp, #200
-; BE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r0:128]!
-; BE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-I64-NEON-NEXT:    add lr, sp, #216
-; BE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r0:128]!
-; BE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-I64-NEON-NEXT:    add lr, sp, #96
-; BE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r0:128]!
-; BE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-I64-NEON-NEXT:    add lr, sp, #80
-; BE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r0:128]
-; BE-I64-NEON-NEXT:    add r0, r6, #64
-; BE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-I64-NEON-NEXT:    add lr, sp, #64
-; BE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r0:128]!
-; BE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-I64-NEON-NEXT:    add lr, sp, #48
-; BE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r0:128]!
-; BE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r0:128]!
-; BE-I64-NEON-NEXT:    vst1.64 {d12, d13}, [r0:128]
-; BE-I64-NEON-NEXT:    add sp, sp, #232
-; BE-I64-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
-; BE-I64-NEON-NEXT:    add sp, sp, #4
-; BE-I64-NEON-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
-  %a = call <32 x iXLen> @llvm.lrint.v32iXLen.v16f64(<32 x double> %x)
-  ret <32 x iXLen> %a
+  %a = call <16 x iXLen> @llvm.lrint.v16iXLen.v16f64(<16 x double> %x)
+  ret <16 x iXLen> %a
 }
-declare <32 x iXLen> @llvm.lrint.v32iXLen.v32f64(<32 x double>)
+declare <16 x iXLen> @llvm.lrint.v16iXLen.v16f64(<16 x double>)
 
 define <1 x iXLen> @lrint_v1fp128(<1 x fp128> %x) {
 ; LE-I32-LABEL: lrint_v1fp128:
@@ -8367,22 +2072,6 @@ define <1 x iXLen> @lrint_v1fp128(<1 x fp128> %x) {
 ; LE-I64-NEXT:    vmov.32 d0[1], r1
 ; LE-I64-NEXT:    pop {r11, pc}
 ;
-; LE-I32-NEON-LABEL: lrint_v1fp128:
-; LE-I32-NEON:       @ %bb.0:
-; LE-I32-NEON-NEXT:    .save {r11, lr}
-; LE-I32-NEON-NEXT:    push {r11, lr}
-; LE-I32-NEON-NEXT:    bl lrintl
-; LE-I32-NEON-NEXT:    pop {r11, pc}
-;
-; LE-I64-NEON-LABEL: lrint_v1fp128:
-; LE-I64-NEON:       @ %bb.0:
-; LE-I64-NEON-NEXT:    .save {r11, lr}
-; LE-I64-NEON-NEXT:    push {r11, lr}
-; LE-I64-NEON-NEXT:    bl lrintl
-; LE-I64-NEON-NEXT:    vmov.32 d0[0], r0
-; LE-I64-NEON-NEXT:    vmov.32 d0[1], r1
-; LE-I64-NEON-NEXT:    pop {r11, pc}
-;
 ; BE-I32-LABEL: lrint_v1fp128:
 ; BE-I32:       @ %bb.0:
 ; BE-I32-NEXT:    .save {r11, lr}
@@ -8399,23 +2088,6 @@ define <1 x iXLen> @lrint_v1fp128(<1 x fp128> %x) {
 ; BE-I64-NEXT:    vmov.32 d16[1], r1
 ; BE-I64-NEXT:    vrev64.32 d0, d16
 ; BE-I64-NEXT:    pop {r11, pc}
-;
-; BE-I32-NEON-LABEL: lrint_v1fp128:
-; BE-I32-NEON:       @ %bb.0:
-; BE-I32-NEON-NEXT:    .save {r11, lr}
-; BE-I32-NEON-NEXT:    push {r11, lr}
-; BE-I32-NEON-NEXT:    bl lrintl
-; BE-I32-NEON-NEXT:    pop {r11, pc}
-;
-; BE-I64-NEON-LABEL: lrint_v1fp128:
-; BE-I64-NEON:       @ %bb.0:
-; BE-I64-NEON-NEXT:    .save {r11, lr}
-; BE-I64-NEON-NEXT:    push {r11, lr}
-; BE-I64-NEON-NEXT:    bl lrintl
-; BE-I64-NEON-NEXT:    vmov.32 d16[0], r0
-; BE-I64-NEON-NEXT:    vmov.32 d16[1], r1
-; BE-I64-NEON-NEXT:    vrev64.32 d0, d16
-; BE-I64-NEON-NEXT:    pop {r11, pc}
   %a = call <1 x iXLen> @llvm.lrint.v1iXLen.v1fp128(<1 x fp128> %x)
   ret <1 x iXLen> %a
 }
@@ -8470,54 +2142,6 @@ define <2 x iXLen> @lrint_v2fp128(<2 x fp128> %x) {
 ; LE-I64-NEXT:    vpop {d8, d9}
 ; LE-I64-NEXT:    pop {r4, r5, r6, r7, r8, pc}
 ;
-; LE-I32-NEON-LABEL: lrint_v2fp128:
-; LE-I32-NEON:       @ %bb.0:
-; LE-I32-NEON-NEXT:    .save {r4, r5, r6, r7, r8, lr}
-; LE-I32-NEON-NEXT:    push {r4, r5, r6, r7, r8, lr}
-; LE-I32-NEON-NEXT:    mov r8, r3
-; LE-I32-NEON-NEXT:    add r3, sp, #24
-; LE-I32-NEON-NEXT:    mov r5, r2
-; LE-I32-NEON-NEXT:    mov r6, r1
-; LE-I32-NEON-NEXT:    mov r7, r0
-; LE-I32-NEON-NEXT:    ldm r3, {r0, r1, r2, r3}
-; LE-I32-NEON-NEXT:    bl lrintl
-; LE-I32-NEON-NEXT:    mov r4, r0
-; LE-I32-NEON-NEXT:    mov r0, r7
-; LE-I32-NEON-NEXT:    mov r1, r6
-; LE-I32-NEON-NEXT:    mov r2, r5
-; LE-I32-NEON-NEXT:    mov r3, r8
-; LE-I32-NEON-NEXT:    bl lrintl
-; LE-I32-NEON-NEXT:    vmov.32 d0[0], r0
-; LE-I32-NEON-NEXT:    vmov.32 d0[1], r4
-; LE-I32-NEON-NEXT:    pop {r4, r5, r6, r7, r8, pc}
-;
-; LE-I64-NEON-LABEL: lrint_v2fp128:
-; LE-I64-NEON:       @ %bb.0:
-; LE-I64-NEON-NEXT:    .save {r4, r5, r6, r7, r8, lr}
-; LE-I64-NEON-NEXT:    push {r4, r5, r6, r7, r8, lr}
-; LE-I64-NEON-NEXT:    .vsave {d8, d9}
-; LE-I64-NEON-NEXT:    vpush {d8, d9}
-; LE-I64-NEON-NEXT:    mov r8, r3
-; LE-I64-NEON-NEXT:    add r3, sp, #40
-; LE-I64-NEON-NEXT:    mov r5, r2
-; LE-I64-NEON-NEXT:    mov r6, r1
-; LE-I64-NEON-NEXT:    mov r7, r0
-; LE-I64-NEON-NEXT:    ldm r3, {r0, r1, r2, r3}
-; LE-I64-NEON-NEXT:    bl lrintl
-; LE-I64-NEON-NEXT:    mov r4, r1
-; LE-I64-NEON-NEXT:    vmov.32 d9[0], r0
-; LE-I64-NEON-NEXT:    mov r0, r7
-; LE-I64-NEON-NEXT:    mov r1, r6
-; LE-I64-NEON-NEXT:    mov r2, r5
-; LE-I64-NEON-NEXT:    mov r3, r8
-; LE-I64-NEON-NEXT:    bl lrintl
-; LE-I64-NEON-NEXT:    vmov.32 d8[0], r0
-; LE-I64-NEON-NEXT:    vmov.32 d9[1], r4
-; LE-I64-NEON-NEXT:    vmov.32 d8[1], r1
-; LE-I64-NEON-NEXT:    vorr q0, q4, q4
-; LE-I64-NEON-NEXT:    vpop {d8, d9}
-; LE-I64-NEON-NEXT:    pop {r4, r5, r6, r7, r8, pc}
-;
 ; BE-I32-LABEL: lrint_v2fp128:
 ; BE-I32:       @ %bb.0:
 ; BE-I32-NEXT:    .save {r4, r5, r6, r7, r8, lr}
@@ -8567,56 +2191,6 @@ define <2 x iXLen> @lrint_v2fp128(<2 x fp128> %x) {
 ; BE-I64-NEXT:    vrev64.32 d0, d16
 ; BE-I64-NEXT:    vpop {d8}
 ; BE-I64-NEXT:    pop {r4, r5, r6, r7, r8, pc}
-;
-; BE-I32-NEON-LABEL: lrint_v2fp128:
-; BE-I32-NEON:       @ %bb.0:
-; BE-I32-NEON-NEXT:    .save {r4, r5, r6, r7, r8, lr}
-; BE-I32-NEON-NEXT:    push {r4, r5, r6, r7, r8, lr}
-; BE-I32-NEON-NEXT:    mov r8, r3
-; BE-I32-NEON-NEXT:    add r3, sp, #24
-; BE-I32-NEON-NEXT:    mov r5, r2
-; BE-I32-NEON-NEXT:    mov r6, r1
-; BE-I32-NEON-NEXT:    mov r7, r0
-; BE-I32-NEON-NEXT:    ldm r3, {r0, r1, r2, r3}
-; BE-I32-NEON-NEXT:    bl lrintl
-; BE-I32-NEON-NEXT:    mov r4, r0
-; BE-I32-NEON-NEXT:    mov r0, r7
-; BE-I32-NEON-NEXT:    mov r1, r6
-; BE-I32-NEON-NEXT:    mov r2, r5
-; BE-I32-NEON-NEXT:    mov r3, r8
-; BE-I32-NEON-NEXT:    bl lrintl
-; BE-I32-NEON-NEXT:    vmov.32 d16[0], r0
-; BE-I32-NEON-NEXT:    vmov.32 d16[1], r4
-; BE-I32-NEON-NEXT:    vrev64.32 d0, d16
-; BE-I32-NEON-NEXT:    pop {r4, r5, r6, r7, r8, pc}
-;
-; BE-I64-NEON-LABEL: lrint_v2fp128:
-; BE-I64-NEON:       @ %bb.0:
-; BE-I64-NEON-NEXT:    .save {r4, r5, r6, r7, r8, lr}
-; BE-I64-NEON-NEXT:    push {r4, r5, r6, r7, r8, lr}
-; BE-I64-NEON-NEXT:    .vsave {d8}
-; BE-I64-NEON-NEXT:    vpush {d8}
-; BE-I64-NEON-NEXT:    mov r8, r3
-; BE-I64-NEON-NEXT:    add r3, sp, #32
-; BE-I64-NEON-NEXT:    mov r5, r2
-; BE-I64-NEON-NEXT:    mov r6, r1
-; BE-I64-NEON-NEXT:    mov r7, r0
-; BE-I64-NEON-NEXT:    ldm r3, {r0, r1, r2, r3}
-; BE-I64-NEON-NEXT:    bl lrintl
-; BE-I64-NEON-NEXT:    mov r4, r1
-; BE-I64-NEON-NEXT:    vmov.32 d8[0], r0
-; BE-I64-NEON-NEXT:    mov r0, r7
-; BE-I64-NEON-NEXT:    mov r1, r6
-; BE-I64-NEON-NEXT:    mov r2, r5
-; BE-I64-NEON-NEXT:    mov r3, r8
-; BE-I64-NEON-NEXT:    bl lrintl
-; BE-I64-NEON-NEXT:    vmov.32 d16[0], r0
-; BE-I64-NEON-NEXT:    vmov.32 d8[1], r4
-; BE-I64-NEON-NEXT:    vmov.32 d16[1], r1
-; BE-I64-NEON-NEXT:    vrev64.32 d1, d8
-; BE-I64-NEON-NEXT:    vrev64.32 d0, d16
-; BE-I64-NEON-NEXT:    vpop {d8}
-; BE-I64-NEON-NEXT:    pop {r4, r5, r6, r7, r8, pc}
   %a = call <2 x iXLen> @llvm.lrint.v2iXLen.v2fp128(<2 x fp128> %x)
   ret <2 x iXLen> %a
 }
@@ -8696,991 +2270,154 @@ define <4 x iXLen> @lrint_v4fp128(<4 x fp128> %x) {
 ; LE-I64-NEXT:    vpop {d8, d9, d10, d11}
 ; LE-I64-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, pc}
 ;
-; LE-I32-NEON-LABEL: lrint_v4fp128:
-; LE-I32-NEON:       @ %bb.0:
-; LE-I32-NEON-NEXT:    .save {r4, lr}
-; LE-I32-NEON-NEXT:    push {r4, lr}
-; LE-I32-NEON-NEXT:    .vsave {d8, d9}
-; LE-I32-NEON-NEXT:    vpush {d8, d9}
-; LE-I32-NEON-NEXT:    bl lrintl
-; LE-I32-NEON-NEXT:    add r3, sp, #60
-; LE-I32-NEON-NEXT:    ldr r12, [sp, #56]
-; LE-I32-NEON-NEXT:    vmov.32 d8[0], r0
-; LE-I32-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; LE-I32-NEON-NEXT:    mov r0, r12
-; LE-I32-NEON-NEXT:    bl lrintl
-; LE-I32-NEON-NEXT:    add r3, sp, #40
-; LE-I32-NEON-NEXT:    mov r4, r0
-; LE-I32-NEON-NEXT:    ldm r3, {r0, r1, r2, r3}
-; LE-I32-NEON-NEXT:    bl lrintl
-; LE-I32-NEON-NEXT:    add r3, sp, #28
-; LE-I32-NEON-NEXT:    ldr r12, [sp, #24]
-; LE-I32-NEON-NEXT:    vmov.32 d9[0], r0
-; LE-I32-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; LE-I32-NEON-NEXT:    mov r0, r12
-; LE-I32-NEON-NEXT:    vmov.32 d9[1], r4
-; LE-I32-NEON-NEXT:    bl lrintl
-; LE-I32-NEON-NEXT:    vmov.32 d8[1], r0
-; LE-I32-NEON-NEXT:    vorr q0, q4, q4
-; LE-I32-NEON-NEXT:    vpop {d8, d9}
-; LE-I32-NEON-NEXT:    pop {r4, pc}
-;
-; LE-I64-NEON-LABEL: lrint_v4fp128:
-; LE-I64-NEON:       @ %bb.0:
-; LE-I64-NEON-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, lr}
-; LE-I64-NEON-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, lr}
-; LE-I64-NEON-NEXT:    .vsave {d8, d9, d10, d11}
-; LE-I64-NEON-NEXT:    vpush {d8, d9, d10, d11}
-; LE-I64-NEON-NEXT:    mov r5, r3
-; LE-I64-NEON-NEXT:    add r3, sp, #96
-; LE-I64-NEON-NEXT:    mov r7, r2
-; LE-I64-NEON-NEXT:    mov r6, r1
-; LE-I64-NEON-NEXT:    mov r4, r0
-; LE-I64-NEON-NEXT:    ldm r3, {r0, r1, r2, r3}
-; LE-I64-NEON-NEXT:    bl lrintl
-; LE-I64-NEON-NEXT:    mov r9, r1
-; LE-I64-NEON-NEXT:    vmov.32 d9[0], r0
-; LE-I64-NEON-NEXT:    mov r0, r4
-; LE-I64-NEON-NEXT:    mov r1, r6
-; LE-I64-NEON-NEXT:    mov r2, r7
-; LE-I64-NEON-NEXT:    mov r3, r5
-; LE-I64-NEON-NEXT:    ldr r8, [sp, #80]
-; LE-I64-NEON-NEXT:    ldr r10, [sp, #64]
-; LE-I64-NEON-NEXT:    bl lrintl
-; LE-I64-NEON-NEXT:    add r3, sp, #68
-; LE-I64-NEON-NEXT:    mov r5, r1
-; LE-I64-NEON-NEXT:    vmov.32 d10[0], r0
-; LE-I64-NEON-NEXT:    mov r0, r10
-; LE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; LE-I64-NEON-NEXT:    bl lrintl
-; LE-I64-NEON-NEXT:    add r3, sp, #84
-; LE-I64-NEON-NEXT:    mov r4, r1
-; LE-I64-NEON-NEXT:    vmov.32 d11[0], r0
-; LE-I64-NEON-NEXT:    mov r0, r8
-; LE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; LE-I64-NEON-NEXT:    bl lrintl
-; LE-I64-NEON-NEXT:    vmov.32 d8[0], r0
-; LE-I64-NEON-NEXT:    vmov.32 d11[1], r4
-; LE-I64-NEON-NEXT:    vmov.32 d9[1], r9
-; LE-I64-NEON-NEXT:    vmov.32 d10[1], r5
-; LE-I64-NEON-NEXT:    vmov.32 d8[1], r1
-; LE-I64-NEON-NEXT:    vorr q0, q5, q5
-; LE-I64-NEON-NEXT:    vorr q1, q4, q4
-; LE-I64-NEON-NEXT:    vpop {d8, d9, d10, d11}
-; LE-I64-NEON-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, pc}
-;
-; BE-I32-LABEL: lrint_v4fp128:
-; BE-I32:       @ %bb.0:
-; BE-I32-NEXT:    .save {r4, lr}
-; BE-I32-NEXT:    push {r4, lr}
-; BE-I32-NEXT:    .vsave {d8, d9}
-; BE-I32-NEXT:    vpush {d8, d9}
-; BE-I32-NEXT:    bl lrintl
-; BE-I32-NEXT:    add r3, sp, #60
-; BE-I32-NEXT:    ldr r12, [sp, #56]
-; BE-I32-NEXT:    vmov.32 d8[0], r0
-; BE-I32-NEXT:    ldm r3, {r1, r2, r3}
-; BE-I32-NEXT:    mov r0, r12
-; BE-I32-NEXT:    bl lrintl
-; BE-I32-NEXT:    add r3, sp, #40
-; BE-I32-NEXT:    mov r4, r0
-; BE-I32-NEXT:    ldm r3, {r0, r1, r2, r3}
-; BE-I32-NEXT:    bl lrintl
-; BE-I32-NEXT:    add r3, sp, #28
-; BE-I32-NEXT:    ldr r12, [sp, #24]
-; BE-I32-NEXT:    vmov.32 d9[0], r0
-; BE-I32-NEXT:    ldm r3, {r1, r2, r3}
-; BE-I32-NEXT:    mov r0, r12
-; BE-I32-NEXT:    vmov.32 d9[1], r4
-; BE-I32-NEXT:    bl lrintl
-; BE-I32-NEXT:    vmov.32 d8[1], r0
-; BE-I32-NEXT:    vrev64.32 q0, q4
-; BE-I32-NEXT:    vpop {d8, d9}
-; BE-I32-NEXT:    pop {r4, pc}
-;
-; BE-I64-LABEL: lrint_v4fp128:
-; BE-I64:       @ %bb.0:
-; BE-I64-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, lr}
-; BE-I64-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, lr}
-; BE-I64-NEXT:    .vsave {d8, d9, d10}
-; BE-I64-NEXT:    vpush {d8, d9, d10}
-; BE-I64-NEXT:    mov r5, r3
-; BE-I64-NEXT:    add r3, sp, #88
-; BE-I64-NEXT:    mov r7, r2
-; BE-I64-NEXT:    mov r6, r1
-; BE-I64-NEXT:    mov r4, r0
-; BE-I64-NEXT:    ldm r3, {r0, r1, r2, r3}
-; BE-I64-NEXT:    bl lrintl
-; BE-I64-NEXT:    mov r9, r1
-; BE-I64-NEXT:    vmov.32 d8[0], r0
-; BE-I64-NEXT:    mov r0, r4
-; BE-I64-NEXT:    mov r1, r6
-; BE-I64-NEXT:    mov r2, r7
-; BE-I64-NEXT:    mov r3, r5
-; BE-I64-NEXT:    ldr r8, [sp, #72]
-; BE-I64-NEXT:    ldr r10, [sp, #56]
-; BE-I64-NEXT:    bl lrintl
-; BE-I64-NEXT:    add r3, sp, #60
-; BE-I64-NEXT:    mov r5, r1
-; BE-I64-NEXT:    vmov.32 d9[0], r0
-; BE-I64-NEXT:    mov r0, r10
-; BE-I64-NEXT:    ldm r3, {r1, r2, r3}
-; BE-I64-NEXT:    bl lrintl
-; BE-I64-NEXT:    add r3, sp, #76
-; BE-I64-NEXT:    mov r4, r1
-; BE-I64-NEXT:    vmov.32 d10[0], r0
-; BE-I64-NEXT:    mov r0, r8
-; BE-I64-NEXT:    ldm r3, {r1, r2, r3}
-; BE-I64-NEXT:    bl lrintl
-; BE-I64-NEXT:    vmov.32 d16[0], r0
-; BE-I64-NEXT:    vmov.32 d10[1], r4
-; BE-I64-NEXT:    vmov.32 d8[1], r9
-; BE-I64-NEXT:    vmov.32 d9[1], r5
-; BE-I64-NEXT:    vmov.32 d16[1], r1
-; BE-I64-NEXT:    vrev64.32 d1, d10
-; BE-I64-NEXT:    vrev64.32 d3, d8
-; BE-I64-NEXT:    vrev64.32 d0, d9
-; BE-I64-NEXT:    vrev64.32 d2, d16
-; BE-I64-NEXT:    vpop {d8, d9, d10}
-; BE-I64-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, pc}
-;
-; BE-I32-NEON-LABEL: lrint_v4fp128:
-; BE-I32-NEON:       @ %bb.0:
-; BE-I32-NEON-NEXT:    .save {r4, lr}
-; BE-I32-NEON-NEXT:    push {r4, lr}
-; BE-I32-NEON-NEXT:    .vsave {d8, d9}
-; BE-I32-NEON-NEXT:    vpush {d8, d9}
-; BE-I32-NEON-NEXT:    bl lrintl
-; BE-I32-NEON-NEXT:    add r3, sp, #60
-; BE-I32-NEON-NEXT:    ldr r12, [sp, #56]
-; BE-I32-NEON-NEXT:    vmov.32 d8[0], r0
-; BE-I32-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; BE-I32-NEON-NEXT:    mov r0, r12
-; BE-I32-NEON-NEXT:    bl lrintl
-; BE-I32-NEON-NEXT:    add r3, sp, #40
-; BE-I32-NEON-NEXT:    mov r4, r0
-; BE-I32-NEON-NEXT:    ldm r3, {r0, r1, r2, r3}
-; BE-I32-NEON-NEXT:    bl lrintl
-; BE-I32-NEON-NEXT:    add r3, sp, #28
-; BE-I32-NEON-NEXT:    ldr r12, [sp, #24]
-; BE-I32-NEON-NEXT:    vmov.32 d9[0], r0
-; BE-I32-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; BE-I32-NEON-NEXT:    mov r0, r12
-; BE-I32-NEON-NEXT:    vmov.32 d9[1], r4
-; BE-I32-NEON-NEXT:    bl lrintl
-; BE-I32-NEON-NEXT:    vmov.32 d8[1], r0
-; BE-I32-NEON-NEXT:    vrev64.32 q0, q4
-; BE-I32-NEON-NEXT:    vpop {d8, d9}
-; BE-I32-NEON-NEXT:    pop {r4, pc}
-;
-; BE-I64-NEON-LABEL: lrint_v4fp128:
-; BE-I64-NEON:       @ %bb.0:
-; BE-I64-NEON-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, lr}
-; BE-I64-NEON-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, lr}
-; BE-I64-NEON-NEXT:    .vsave {d8, d9, d10}
-; BE-I64-NEON-NEXT:    vpush {d8, d9, d10}
-; BE-I64-NEON-NEXT:    mov r5, r3
-; BE-I64-NEON-NEXT:    add r3, sp, #88
-; BE-I64-NEON-NEXT:    mov r7, r2
-; BE-I64-NEON-NEXT:    mov r6, r1
-; BE-I64-NEON-NEXT:    mov r4, r0
-; BE-I64-NEON-NEXT:    ldm r3, {r0, r1, r2, r3}
-; BE-I64-NEON-NEXT:    bl lrintl
-; BE-I64-NEON-NEXT:    mov r9, r1
-; BE-I64-NEON-NEXT:    vmov.32 d8[0], r0
-; BE-I64-NEON-NEXT:    mov r0, r4
-; BE-I64-NEON-NEXT:    mov r1, r6
-; BE-I64-NEON-NEXT:    mov r2, r7
-; BE-I64-NEON-NEXT:    mov r3, r5
-; BE-I64-NEON-NEXT:    ldr r8, [sp, #72]
-; BE-I64-NEON-NEXT:    ldr r10, [sp, #56]
-; BE-I64-NEON-NEXT:    bl lrintl
-; BE-I64-NEON-NEXT:    add r3, sp, #60
-; BE-I64-NEON-NEXT:    mov r5, r1
-; BE-I64-NEON-NEXT:    vmov.32 d9[0], r0
-; BE-I64-NEON-NEXT:    mov r0, r10
-; BE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; BE-I64-NEON-NEXT:    bl lrintl
-; BE-I64-NEON-NEXT:    add r3, sp, #76
-; BE-I64-NEON-NEXT:    mov r4, r1
-; BE-I64-NEON-NEXT:    vmov.32 d10[0], r0
-; BE-I64-NEON-NEXT:    mov r0, r8
-; BE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; BE-I64-NEON-NEXT:    bl lrintl
-; BE-I64-NEON-NEXT:    vmov.32 d16[0], r0
-; BE-I64-NEON-NEXT:    vmov.32 d10[1], r4
-; BE-I64-NEON-NEXT:    vmov.32 d8[1], r9
-; BE-I64-NEON-NEXT:    vmov.32 d9[1], r5
-; BE-I64-NEON-NEXT:    vmov.32 d16[1], r1
-; BE-I64-NEON-NEXT:    vrev64.32 d1, d10
-; BE-I64-NEON-NEXT:    vrev64.32 d3, d8
-; BE-I64-NEON-NEXT:    vrev64.32 d0, d9
-; BE-I64-NEON-NEXT:    vrev64.32 d2, d16
-; BE-I64-NEON-NEXT:    vpop {d8, d9, d10}
-; BE-I64-NEON-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, pc}
-  %a = call <4 x iXLen> @llvm.lrint.v4iXLen.v4fp128(<4 x fp128> %x)
-  ret <4 x iXLen> %a
-}
-declare <4 x iXLen> @llvm.lrint.v4iXLen.v4fp128(<4 x fp128>)
-
-define <8 x iXLen> @lrint_v8fp128(<8 x fp128> %x) {
-; LE-I32-LABEL: lrint_v8fp128:
-; LE-I32:       @ %bb.0:
-; LE-I32-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, lr}
-; LE-I32-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, lr}
-; LE-I32-NEXT:    .vsave {d8, d9, d10, d11}
-; LE-I32-NEXT:    vpush {d8, d9, d10, d11}
-; LE-I32-NEXT:    mov r6, r3
-; LE-I32-NEXT:    add r3, sp, #112
-; LE-I32-NEXT:    mov r7, r2
-; LE-I32-NEXT:    mov r4, r1
-; LE-I32-NEXT:    mov r5, r0
-; LE-I32-NEXT:    ldm r3, {r0, r1, r2, r3}
-; LE-I32-NEXT:    bl lrintl
-; LE-I32-NEXT:    vmov.32 d8[0], r0
-; LE-I32-NEXT:    mov r0, r5
-; LE-I32-NEXT:    mov r1, r4
-; LE-I32-NEXT:    mov r2, r7
-; LE-I32-NEXT:    mov r3, r6
-; LE-I32-NEXT:    ldr r8, [sp, #160]
-; LE-I32-NEXT:    ldr r9, [sp, #64]
-; LE-I32-NEXT:    ldr r10, [sp, #80]
-; LE-I32-NEXT:    bl lrintl
-; LE-I32-NEXT:    add r3, sp, #84
-; LE-I32-NEXT:    vmov.32 d10[0], r0
-; LE-I32-NEXT:    mov r0, r10
-; LE-I32-NEXT:    ldm r3, {r1, r2, r3}
-; LE-I32-NEXT:    bl lrintl
-; LE-I32-NEXT:    ldr r6, [sp, #96]
-; LE-I32-NEXT:    vmov.32 d11[0], r0
-; LE-I32-NEXT:    ldr r1, [sp, #100]
-; LE-I32-NEXT:    ldr r2, [sp, #104]
-; LE-I32-NEXT:    ldr r3, [sp, #108]
-; LE-I32-NEXT:    mov r0, r6
-; LE-I32-NEXT:    ldr r4, [sp, #68]
-; LE-I32-NEXT:    ldr r5, [sp, #72]
-; LE-I32-NEXT:    ldr r10, [sp, #164]
-; LE-I32-NEXT:    ldr r7, [sp, #168]
-; LE-I32-NEXT:    bl lrintl
-; LE-I32-NEXT:    ldr r3, [sp, #76]
-; LE-I32-NEXT:    vmov.32 d11[1], r0
-; LE-I32-NEXT:    mov r0, r9
-; LE-I32-NEXT:    mov r1, r4
-; LE-I32-NEXT:    mov r2, r5
-; LE-I32-NEXT:    bl lrintl
-; LE-I32-NEXT:    ldr r3, [sp, #172]
-; LE-I32-NEXT:    vmov.32 d10[1], r0
-; LE-I32-NEXT:    mov r0, r8
-; LE-I32-NEXT:    mov r1, r10
-; LE-I32-NEXT:    mov r2, r7
-; LE-I32-NEXT:    bl lrintl
-; LE-I32-NEXT:    add r3, sp, #144
-; LE-I32-NEXT:    mov r4, r0
-; LE-I32-NEXT:    ldm r3, {r0, r1, r2, r3}
-; LE-I32-NEXT:    bl lrintl
-; LE-I32-NEXT:    add r3, sp, #132
-; LE-I32-NEXT:    ldr r7, [sp, #128]
-; LE-I32-NEXT:    vmov.32 d9[0], r0
-; LE-I32-NEXT:    ldm r3, {r1, r2, r3}
-; LE-I32-NEXT:    mov r0, r7
-; LE-I32-NEXT:    vmov.32 d9[1], r4
-; LE-I32-NEXT:    bl lrintl
-; LE-I32-NEXT:    vmov.32 d8[1], r0
-; LE-I32-NEXT:    vorr q0, q5, q5
-; LE-I32-NEXT:    vorr q1, q4, q4
-; LE-I32-NEXT:    vpop {d8, d9, d10, d11}
-; LE-I32-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, pc}
-;
-; LE-I64-LABEL: lrint_v8fp128:
-; LE-I64:       @ %bb.0:
-; LE-I64-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; LE-I64-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; LE-I64-NEXT:    .pad #4
-; LE-I64-NEXT:    sub sp, sp, #4
-; LE-I64-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-I64-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-I64-NEXT:    .pad #8
-; LE-I64-NEXT:    sub sp, sp, #8
-; LE-I64-NEXT:    mov r11, r3
-; LE-I64-NEXT:    add r3, sp, #208
-; LE-I64-NEXT:    mov r10, r2
-; LE-I64-NEXT:    mov r4, r1
-; LE-I64-NEXT:    mov r5, r0
-; LE-I64-NEXT:    ldm r3, {r0, r1, r2, r3}
-; LE-I64-NEXT:    bl lrintl
-; LE-I64-NEXT:    add r7, sp, #164
-; LE-I64-NEXT:    ldr r6, [sp, #160]
-; LE-I64-NEXT:    str r1, [sp, #4] @ 4-byte Spill
-; LE-I64-NEXT:    vmov.32 d9[0], r0
-; LE-I64-NEXT:    ldm r7, {r1, r2, r3, r7}
-; LE-I64-NEXT:    mov r0, r6
-; LE-I64-NEXT:    ldr r8, [sp, #128]
-; LE-I64-NEXT:    ldr r9, [sp, #144]
-; LE-I64-NEXT:    bl lrintl
-; LE-I64-NEXT:    add r3, sp, #180
-; LE-I64-NEXT:    str r1, [sp] @ 4-byte Spill
-; LE-I64-NEXT:    vmov.32 d10[0], r0
-; LE-I64-NEXT:    mov r0, r7
-; LE-I64-NEXT:    ldm r3, {r1, r2, r3}
-; LE-I64-NEXT:    bl lrintl
-; LE-I64-NEXT:    add r3, sp, #132
-; LE-I64-NEXT:    mov r7, r1
-; LE-I64-NEXT:    vmov.32 d11[0], r0
-; LE-I64-NEXT:    mov r0, r8
-; LE-I64-NEXT:    ldm r3, {r1, r2, r3}
-; LE-I64-NEXT:    bl lrintl
-; LE-I64-NEXT:    add r3, sp, #148
-; LE-I64-NEXT:    mov r8, r1
-; LE-I64-NEXT:    vmov.32 d12[0], r0
-; LE-I64-NEXT:    mov r0, r9
-; LE-I64-NEXT:    ldm r3, {r1, r2, r3}
-; LE-I64-NEXT:    bl lrintl
-; LE-I64-NEXT:    mov r9, r1
-; LE-I64-NEXT:    vmov.32 d13[0], r0
-; LE-I64-NEXT:    mov r0, r5
-; LE-I64-NEXT:    mov r1, r4
-; LE-I64-NEXT:    mov r2, r10
-; LE-I64-NEXT:    mov r3, r11
-; LE-I64-NEXT:    ldr r6, [sp, #112]
-; LE-I64-NEXT:    bl lrintl
-; LE-I64-NEXT:    add r3, sp, #116
-; LE-I64-NEXT:    mov r4, r1
-; LE-I64-NEXT:    vmov.32 d14[0], r0
-; LE-I64-NEXT:    mov r0, r6
-; LE-I64-NEXT:    ldm r3, {r1, r2, r3}
-; LE-I64-NEXT:    bl lrintl
-; LE-I64-NEXT:    add r3, sp, #196
-; LE-I64-NEXT:    vmov.32 d15[0], r0
-; LE-I64-NEXT:    ldr r0, [sp, #192]
-; LE-I64-NEXT:    mov r5, r1
-; LE-I64-NEXT:    ldm r3, {r1, r2, r3}
-; LE-I64-NEXT:    bl lrintl
-; LE-I64-NEXT:    vmov.32 d8[0], r0
-; LE-I64-NEXT:    ldr r0, [sp] @ 4-byte Reload
-; LE-I64-NEXT:    vmov.32 d11[1], r7
-; LE-I64-NEXT:    vmov.32 d10[1], r0
-; LE-I64-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
-; LE-I64-NEXT:    vmov.32 d15[1], r5
-; LE-I64-NEXT:    vorr q2, q5, q5
-; LE-I64-NEXT:    vmov.32 d13[1], r9
-; LE-I64-NEXT:    vmov.32 d9[1], r0
-; LE-I64-NEXT:    vmov.32 d14[1], r4
-; LE-I64-NEXT:    vmov.32 d12[1], r8
-; LE-I64-NEXT:    vorr q0, q7, q7
-; LE-I64-NEXT:    vmov.32 d8[1], r1
-; LE-I64-NEXT:    vorr q1, q6, q6
-; LE-I64-NEXT:    vorr q3, q4, q4
-; LE-I64-NEXT:    add sp, sp, #8
-; LE-I64-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-I64-NEXT:    add sp, sp, #4
-; LE-I64-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
-;
-; LE-I32-NEON-LABEL: lrint_v8fp128:
-; LE-I32-NEON:       @ %bb.0:
-; LE-I32-NEON-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, lr}
-; LE-I32-NEON-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, lr}
-; LE-I32-NEON-NEXT:    .vsave {d8, d9, d10, d11}
-; LE-I32-NEON-NEXT:    vpush {d8, d9, d10, d11}
-; LE-I32-NEON-NEXT:    mov r6, r3
-; LE-I32-NEON-NEXT:    add r3, sp, #112
-; LE-I32-NEON-NEXT:    mov r7, r2
-; LE-I32-NEON-NEXT:    mov r4, r1
-; LE-I32-NEON-NEXT:    mov r5, r0
-; LE-I32-NEON-NEXT:    ldm r3, {r0, r1, r2, r3}
-; LE-I32-NEON-NEXT:    bl lrintl
-; LE-I32-NEON-NEXT:    vmov.32 d8[0], r0
-; LE-I32-NEON-NEXT:    mov r0, r5
-; LE-I32-NEON-NEXT:    mov r1, r4
-; LE-I32-NEON-NEXT:    mov r2, r7
-; LE-I32-NEON-NEXT:    mov r3, r6
-; LE-I32-NEON-NEXT:    ldr r8, [sp, #160]
-; LE-I32-NEON-NEXT:    ldr r9, [sp, #64]
-; LE-I32-NEON-NEXT:    ldr r10, [sp, #80]
-; LE-I32-NEON-NEXT:    bl lrintl
-; LE-I32-NEON-NEXT:    add r3, sp, #84
-; LE-I32-NEON-NEXT:    vmov.32 d10[0], r0
-; LE-I32-NEON-NEXT:    mov r0, r10
-; LE-I32-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; LE-I32-NEON-NEXT:    bl lrintl
-; LE-I32-NEON-NEXT:    ldr r6, [sp, #96]
-; LE-I32-NEON-NEXT:    vmov.32 d11[0], r0
-; LE-I32-NEON-NEXT:    ldr r1, [sp, #100]
-; LE-I32-NEON-NEXT:    ldr r2, [sp, #104]
-; LE-I32-NEON-NEXT:    ldr r3, [sp, #108]
-; LE-I32-NEON-NEXT:    mov r0, r6
-; LE-I32-NEON-NEXT:    ldr r4, [sp, #68]
-; LE-I32-NEON-NEXT:    ldr r5, [sp, #72]
-; LE-I32-NEON-NEXT:    ldr r10, [sp, #164]
-; LE-I32-NEON-NEXT:    ldr r7, [sp, #168]
-; LE-I32-NEON-NEXT:    bl lrintl
-; LE-I32-NEON-NEXT:    ldr r3, [sp, #76]
-; LE-I32-NEON-NEXT:    vmov.32 d11[1], r0
-; LE-I32-NEON-NEXT:    mov r0, r9
-; LE-I32-NEON-NEXT:    mov r1, r4
-; LE-I32-NEON-NEXT:    mov r2, r5
-; LE-I32-NEON-NEXT:    bl lrintl
-; LE-I32-NEON-NEXT:    ldr r3, [sp, #172]
-; LE-I32-NEON-NEXT:    vmov.32 d10[1], r0
-; LE-I32-NEON-NEXT:    mov r0, r8
-; LE-I32-NEON-NEXT:    mov r1, r10
-; LE-I32-NEON-NEXT:    mov r2, r7
-; LE-I32-NEON-NEXT:    bl lrintl
-; LE-I32-NEON-NEXT:    add r3, sp, #144
-; LE-I32-NEON-NEXT:    mov r4, r0
-; LE-I32-NEON-NEXT:    ldm r3, {r0, r1, r2, r3}
-; LE-I32-NEON-NEXT:    bl lrintl
-; LE-I32-NEON-NEXT:    add r3, sp, #132
-; LE-I32-NEON-NEXT:    ldr r7, [sp, #128]
-; LE-I32-NEON-NEXT:    vmov.32 d9[0], r0
-; LE-I32-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; LE-I32-NEON-NEXT:    mov r0, r7
-; LE-I32-NEON-NEXT:    vmov.32 d9[1], r4
-; LE-I32-NEON-NEXT:    bl lrintl
-; LE-I32-NEON-NEXT:    vmov.32 d8[1], r0
-; LE-I32-NEON-NEXT:    vorr q0, q5, q5
-; LE-I32-NEON-NEXT:    vorr q1, q4, q4
-; LE-I32-NEON-NEXT:    vpop {d8, d9, d10, d11}
-; LE-I32-NEON-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, pc}
-;
-; LE-I64-NEON-LABEL: lrint_v8fp128:
-; LE-I64-NEON:       @ %bb.0:
-; LE-I64-NEON-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; LE-I64-NEON-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; LE-I64-NEON-NEXT:    .pad #4
-; LE-I64-NEON-NEXT:    sub sp, sp, #4
-; LE-I64-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-I64-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-I64-NEON-NEXT:    .pad #8
-; LE-I64-NEON-NEXT:    sub sp, sp, #8
-; LE-I64-NEON-NEXT:    mov r11, r3
-; LE-I64-NEON-NEXT:    add r3, sp, #208
-; LE-I64-NEON-NEXT:    mov r10, r2
-; LE-I64-NEON-NEXT:    mov r4, r1
-; LE-I64-NEON-NEXT:    mov r5, r0
-; LE-I64-NEON-NEXT:    ldm r3, {r0, r1, r2, r3}
-; LE-I64-NEON-NEXT:    bl lrintl
-; LE-I64-NEON-NEXT:    add r7, sp, #164
-; LE-I64-NEON-NEXT:    ldr r6, [sp, #160]
-; LE-I64-NEON-NEXT:    str r1, [sp, #4] @ 4-byte Spill
-; LE-I64-NEON-NEXT:    vmov.32 d9[0], r0
-; LE-I64-NEON-NEXT:    ldm r7, {r1, r2, r3, r7}
-; LE-I64-NEON-NEXT:    mov r0, r6
-; LE-I64-NEON-NEXT:    ldr r8, [sp, #128]
-; LE-I64-NEON-NEXT:    ldr r9, [sp, #144]
-; LE-I64-NEON-NEXT:    bl lrintl
-; LE-I64-NEON-NEXT:    add r3, sp, #180
-; LE-I64-NEON-NEXT:    str r1, [sp] @ 4-byte Spill
-; LE-I64-NEON-NEXT:    vmov.32 d10[0], r0
-; LE-I64-NEON-NEXT:    mov r0, r7
-; LE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; LE-I64-NEON-NEXT:    bl lrintl
-; LE-I64-NEON-NEXT:    add r3, sp, #132
-; LE-I64-NEON-NEXT:    mov r7, r1
-; LE-I64-NEON-NEXT:    vmov.32 d11[0], r0
-; LE-I64-NEON-NEXT:    mov r0, r8
-; LE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; LE-I64-NEON-NEXT:    bl lrintl
-; LE-I64-NEON-NEXT:    add r3, sp, #148
-; LE-I64-NEON-NEXT:    mov r8, r1
-; LE-I64-NEON-NEXT:    vmov.32 d12[0], r0
-; LE-I64-NEON-NEXT:    mov r0, r9
-; LE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; LE-I64-NEON-NEXT:    bl lrintl
-; LE-I64-NEON-NEXT:    mov r9, r1
-; LE-I64-NEON-NEXT:    vmov.32 d13[0], r0
-; LE-I64-NEON-NEXT:    mov r0, r5
-; LE-I64-NEON-NEXT:    mov r1, r4
-; LE-I64-NEON-NEXT:    mov r2, r10
-; LE-I64-NEON-NEXT:    mov r3, r11
-; LE-I64-NEON-NEXT:    ldr r6, [sp, #112]
-; LE-I64-NEON-NEXT:    bl lrintl
-; LE-I64-NEON-NEXT:    add r3, sp, #116
-; LE-I64-NEON-NEXT:    mov r4, r1
-; LE-I64-NEON-NEXT:    vmov.32 d14[0], r0
-; LE-I64-NEON-NEXT:    mov r0, r6
-; LE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; LE-I64-NEON-NEXT:    bl lrintl
-; LE-I64-NEON-NEXT:    add r3, sp, #196
-; LE-I64-NEON-NEXT:    vmov.32 d15[0], r0
-; LE-I64-NEON-NEXT:    ldr r0, [sp, #192]
-; LE-I64-NEON-NEXT:    mov r5, r1
-; LE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; LE-I64-NEON-NEXT:    bl lrintl
-; LE-I64-NEON-NEXT:    vmov.32 d8[0], r0
-; LE-I64-NEON-NEXT:    ldr r0, [sp] @ 4-byte Reload
-; LE-I64-NEON-NEXT:    vmov.32 d11[1], r7
-; LE-I64-NEON-NEXT:    vmov.32 d10[1], r0
-; LE-I64-NEON-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
-; LE-I64-NEON-NEXT:    vmov.32 d15[1], r5
-; LE-I64-NEON-NEXT:    vorr q2, q5, q5
-; LE-I64-NEON-NEXT:    vmov.32 d13[1], r9
-; LE-I64-NEON-NEXT:    vmov.32 d9[1], r0
-; LE-I64-NEON-NEXT:    vmov.32 d14[1], r4
-; LE-I64-NEON-NEXT:    vmov.32 d12[1], r8
-; LE-I64-NEON-NEXT:    vorr q0, q7, q7
-; LE-I64-NEON-NEXT:    vmov.32 d8[1], r1
-; LE-I64-NEON-NEXT:    vorr q1, q6, q6
-; LE-I64-NEON-NEXT:    vorr q3, q4, q4
-; LE-I64-NEON-NEXT:    add sp, sp, #8
-; LE-I64-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-I64-NEON-NEXT:    add sp, sp, #4
-; LE-I64-NEON-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
-;
-; BE-I32-LABEL: lrint_v8fp128:
+; BE-I32-LABEL: lrint_v4fp128:
 ; BE-I32:       @ %bb.0:
-; BE-I32-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; BE-I32-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; BE-I32-NEXT:    .pad #4
-; BE-I32-NEXT:    sub sp, sp, #4
-; BE-I32-NEXT:    .vsave {d8, d9, d10, d11}
-; BE-I32-NEXT:    vpush {d8, d9, d10, d11}
-; BE-I32-NEXT:    .pad #8
-; BE-I32-NEXT:    sub sp, sp, #8
-; BE-I32-NEXT:    str r3, [sp, #4] @ 4-byte Spill
-; BE-I32-NEXT:    add r3, sp, #128
-; BE-I32-NEXT:    mov r11, r2
-; BE-I32-NEXT:    mov r6, r1
-; BE-I32-NEXT:    mov r7, r0
-; BE-I32-NEXT:    ldm r3, {r0, r1, r2, r3}
+; BE-I32-NEXT:    .save {r4, lr}
+; BE-I32-NEXT:    push {r4, lr}
+; BE-I32-NEXT:    .vsave {d8, d9}
+; BE-I32-NEXT:    vpush {d8, d9}
 ; BE-I32-NEXT:    bl lrintl
-; BE-I32-NEXT:    add r3, sp, #100
-; BE-I32-NEXT:    ldr r5, [sp, #96]
+; BE-I32-NEXT:    add r3, sp, #60
+; BE-I32-NEXT:    ldr r12, [sp, #56]
 ; BE-I32-NEXT:    vmov.32 d8[0], r0
-; BE-I32-NEXT:    ldr r4, [sp, #160]
 ; BE-I32-NEXT:    ldm r3, {r1, r2, r3}
-; BE-I32-NEXT:    mov r0, r5
-; BE-I32-NEXT:    bl lrintl
-; BE-I32-NEXT:    add r3, sp, #164
-; BE-I32-NEXT:    vmov.32 d11[0], r0
-; BE-I32-NEXT:    mov r0, r4
-; BE-I32-NEXT:    ldm r3, {r1, r2, r3}
-; BE-I32-NEXT:    bl lrintl
-; BE-I32-NEXT:    ldr r4, [sp, #176]
-; BE-I32-NEXT:    vmov.32 d9[0], r0
-; BE-I32-NEXT:    ldr r1, [sp, #180]
-; BE-I32-NEXT:    ldr r2, [sp, #184]
-; BE-I32-NEXT:    ldr r3, [sp, #188]
-; BE-I32-NEXT:    mov r0, r4
-; BE-I32-NEXT:    ldr r5, [sp, #116]
-; BE-I32-NEXT:    ldr r8, [sp, #120]
-; BE-I32-NEXT:    ldr r10, [sp, #84]
-; BE-I32-NEXT:    ldr r9, [sp, #88]
-; BE-I32-NEXT:    bl lrintl
-; BE-I32-NEXT:    vmov.32 d9[1], r0
-; BE-I32-NEXT:    ldr r3, [sp, #124]
-; BE-I32-NEXT:    ldr r0, [sp, #112]
-; BE-I32-NEXT:    mov r1, r5
-; BE-I32-NEXT:    mov r2, r8
-; BE-I32-NEXT:    bl lrintl
-; BE-I32-NEXT:    vmov.32 d11[1], r0
-; BE-I32-NEXT:    ldr r3, [sp, #92]
-; BE-I32-NEXT:    ldr r0, [sp, #80]
-; BE-I32-NEXT:    mov r1, r10
-; BE-I32-NEXT:    mov r2, r9
+; BE-I32-NEXT:    mov r0, r12
 ; BE-I32-NEXT:    bl lrintl
-; BE-I32-NEXT:    ldr r3, [sp, #4] @ 4-byte Reload
+; BE-I32-NEXT:    add r3, sp, #40
 ; BE-I32-NEXT:    mov r4, r0
-; BE-I32-NEXT:    mov r0, r7
-; BE-I32-NEXT:    mov r1, r6
-; BE-I32-NEXT:    mov r2, r11
+; BE-I32-NEXT:    ldm r3, {r0, r1, r2, r3}
 ; BE-I32-NEXT:    bl lrintl
-; BE-I32-NEXT:    add r3, sp, #148
-; BE-I32-NEXT:    ldr r7, [sp, #144]
-; BE-I32-NEXT:    vmov.32 d10[0], r0
+; BE-I32-NEXT:    add r3, sp, #28
+; BE-I32-NEXT:    ldr r12, [sp, #24]
+; BE-I32-NEXT:    vmov.32 d9[0], r0
 ; BE-I32-NEXT:    ldm r3, {r1, r2, r3}
-; BE-I32-NEXT:    mov r0, r7
-; BE-I32-NEXT:    vmov.32 d10[1], r4
+; BE-I32-NEXT:    mov r0, r12
+; BE-I32-NEXT:    vmov.32 d9[1], r4
 ; BE-I32-NEXT:    bl lrintl
 ; BE-I32-NEXT:    vmov.32 d8[1], r0
-; BE-I32-NEXT:    vrev64.32 q0, q5
-; BE-I32-NEXT:    vrev64.32 q1, q4
-; BE-I32-NEXT:    add sp, sp, #8
-; BE-I32-NEXT:    vpop {d8, d9, d10, d11}
-; BE-I32-NEXT:    add sp, sp, #4
-; BE-I32-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+; BE-I32-NEXT:    vrev64.32 q0, q4
+; BE-I32-NEXT:    vpop {d8, d9}
+; BE-I32-NEXT:    pop {r4, pc}
 ;
-; BE-I64-LABEL: lrint_v8fp128:
+; BE-I64-LABEL: lrint_v4fp128:
 ; BE-I64:       @ %bb.0:
-; BE-I64-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; BE-I64-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; BE-I64-NEXT:    .pad #4
-; BE-I64-NEXT:    sub sp, sp, #4
-; BE-I64-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14}
-; BE-I64-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14}
-; BE-I64-NEXT:    .pad #16
-; BE-I64-NEXT:    sub sp, sp, #16
-; BE-I64-NEXT:    str r3, [sp, #4] @ 4-byte Spill
-; BE-I64-NEXT:    add r3, sp, #208
-; BE-I64-NEXT:    mov r11, r2
-; BE-I64-NEXT:    mov r4, r1
-; BE-I64-NEXT:    mov r5, r0
+; BE-I64-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, lr}
+; BE-I64-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, lr}
+; BE-I64-NEXT:    .vsave {d8, d9, d10}
+; BE-I64-NEXT:    vpush {d8, d9, d10}
+; BE-I64-NEXT:    mov r5, r3
+; BE-I64-NEXT:    add r3, sp, #88
+; BE-I64-NEXT:    mov r7, r2
+; BE-I64-NEXT:    mov r6, r1
+; BE-I64-NEXT:    mov r4, r0
 ; BE-I64-NEXT:    ldm r3, {r0, r1, r2, r3}
 ; BE-I64-NEXT:    bl lrintl
-; BE-I64-NEXT:    ldr r7, [sp, #176]
-; BE-I64-NEXT:    add r3, sp, #180
-; BE-I64-NEXT:    str r1, [sp, #12] @ 4-byte Spill
+; BE-I64-NEXT:    mov r9, r1
 ; BE-I64-NEXT:    vmov.32 d8[0], r0
-; BE-I64-NEXT:    ldm r3, {r1, r2, r3}
-; BE-I64-NEXT:    mov r0, r7
-; BE-I64-NEXT:    ldr r6, [sp, #128]
-; BE-I64-NEXT:    ldr r8, [sp, #144]
+; BE-I64-NEXT:    mov r0, r4
+; BE-I64-NEXT:    mov r1, r6
+; BE-I64-NEXT:    mov r2, r7
+; BE-I64-NEXT:    mov r3, r5
+; BE-I64-NEXT:    ldr r8, [sp, #72]
+; BE-I64-NEXT:    ldr r10, [sp, #56]
 ; BE-I64-NEXT:    bl lrintl
-; BE-I64-NEXT:    add r3, sp, #132
-; BE-I64-NEXT:    str r1, [sp, #8] @ 4-byte Spill
+; BE-I64-NEXT:    add r3, sp, #60
+; BE-I64-NEXT:    mov r5, r1
 ; BE-I64-NEXT:    vmov.32 d9[0], r0
-; BE-I64-NEXT:    mov r0, r6
+; BE-I64-NEXT:    mov r0, r10
 ; BE-I64-NEXT:    ldm r3, {r1, r2, r3}
 ; BE-I64-NEXT:    bl lrintl
-; BE-I64-NEXT:    add r3, sp, #148
-; BE-I64-NEXT:    mov r6, r1
+; BE-I64-NEXT:    add r3, sp, #76
+; BE-I64-NEXT:    mov r4, r1
 ; BE-I64-NEXT:    vmov.32 d10[0], r0
 ; BE-I64-NEXT:    mov r0, r8
 ; BE-I64-NEXT:    ldm r3, {r1, r2, r3}
 ; BE-I64-NEXT:    bl lrintl
-; BE-I64-NEXT:    add r3, sp, #160
-; BE-I64-NEXT:    mov r9, r0
-; BE-I64-NEXT:    mov r7, r1
-; BE-I64-NEXT:    ldm r3, {r0, r1, r2, r3}
-; BE-I64-NEXT:    bl lrintl
-; BE-I64-NEXT:    ldr r3, [sp, #4] @ 4-byte Reload
-; BE-I64-NEXT:    mov r8, r1
-; BE-I64-NEXT:    vmov.32 d11[0], r0
-; BE-I64-NEXT:    mov r0, r5
-; BE-I64-NEXT:    mov r1, r4
-; BE-I64-NEXT:    mov r2, r11
-; BE-I64-NEXT:    ldr r10, [sp, #112]
-; BE-I64-NEXT:    vmov.32 d12[0], r9
-; BE-I64-NEXT:    bl lrintl
-; BE-I64-NEXT:    add r3, sp, #116
-; BE-I64-NEXT:    mov r4, r1
-; BE-I64-NEXT:    vmov.32 d13[0], r0
-; BE-I64-NEXT:    mov r0, r10
-; BE-I64-NEXT:    ldm r3, {r1, r2, r3}
-; BE-I64-NEXT:    bl lrintl
-; BE-I64-NEXT:    add r3, sp, #196
-; BE-I64-NEXT:    vmov.32 d14[0], r0
-; BE-I64-NEXT:    ldr r0, [sp, #192]
-; BE-I64-NEXT:    mov r5, r1
-; BE-I64-NEXT:    ldm r3, {r1, r2, r3}
-; BE-I64-NEXT:    bl lrintl
 ; BE-I64-NEXT:    vmov.32 d16[0], r0
-; BE-I64-NEXT:    ldr r0, [sp, #8] @ 4-byte Reload
-; BE-I64-NEXT:    vmov.32 d14[1], r5
-; BE-I64-NEXT:    vmov.32 d9[1], r0
-; BE-I64-NEXT:    ldr r0, [sp, #12] @ 4-byte Reload
-; BE-I64-NEXT:    vmov.32 d12[1], r7
-; BE-I64-NEXT:    vmov.32 d8[1], r0
-; BE-I64-NEXT:    vmov.32 d13[1], r4
-; BE-I64-NEXT:    vmov.32 d10[1], r6
-; BE-I64-NEXT:    vmov.32 d11[1], r8
+; BE-I64-NEXT:    vmov.32 d10[1], r4
+; BE-I64-NEXT:    vmov.32 d8[1], r9
+; BE-I64-NEXT:    vmov.32 d9[1], r5
 ; BE-I64-NEXT:    vmov.32 d16[1], r1
-; BE-I64-NEXT:    vrev64.32 d1, d14
-; BE-I64-NEXT:    vrev64.32 d3, d12
-; BE-I64-NEXT:    vrev64.32 d5, d9
-; BE-I64-NEXT:    vrev64.32 d7, d8
-; BE-I64-NEXT:    vrev64.32 d0, d13
-; BE-I64-NEXT:    vrev64.32 d2, d10
-; BE-I64-NEXT:    vrev64.32 d4, d11
-; BE-I64-NEXT:    vrev64.32 d6, d16
-; BE-I64-NEXT:    add sp, sp, #16
-; BE-I64-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14}
-; BE-I64-NEXT:    add sp, sp, #4
-; BE-I64-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
-;
-; BE-I32-NEON-LABEL: lrint_v8fp128:
-; BE-I32-NEON:       @ %bb.0:
-; BE-I32-NEON-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; BE-I32-NEON-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; BE-I32-NEON-NEXT:    .pad #4
-; BE-I32-NEON-NEXT:    sub sp, sp, #4
-; BE-I32-NEON-NEXT:    .vsave {d8, d9, d10, d11}
-; BE-I32-NEON-NEXT:    vpush {d8, d9, d10, d11}
-; BE-I32-NEON-NEXT:    .pad #8
-; BE-I32-NEON-NEXT:    sub sp, sp, #8
-; BE-I32-NEON-NEXT:    str r3, [sp, #4] @ 4-byte Spill
-; BE-I32-NEON-NEXT:    add r3, sp, #128
-; BE-I32-NEON-NEXT:    mov r11, r2
-; BE-I32-NEON-NEXT:    mov r6, r1
-; BE-I32-NEON-NEXT:    mov r7, r0
-; BE-I32-NEON-NEXT:    ldm r3, {r0, r1, r2, r3}
-; BE-I32-NEON-NEXT:    bl lrintl
-; BE-I32-NEON-NEXT:    add r3, sp, #100
-; BE-I32-NEON-NEXT:    ldr r5, [sp, #96]
-; BE-I32-NEON-NEXT:    vmov.32 d8[0], r0
-; BE-I32-NEON-NEXT:    ldr r4, [sp, #160]
-; BE-I32-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; BE-I32-NEON-NEXT:    mov r0, r5
-; BE-I32-NEON-NEXT:    bl lrintl
-; BE-I32-NEON-NEXT:    add r3, sp, #164
-; BE-I32-NEON-NEXT:    vmov.32 d11[0], r0
-; BE-I32-NEON-NEXT:    mov r0, r4
-; BE-I32-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; BE-I32-NEON-NEXT:    bl lrintl
-; BE-I32-NEON-NEXT:    ldr r4, [sp, #176]
-; BE-I32-NEON-NEXT:    vmov.32 d9[0], r0
-; BE-I32-NEON-NEXT:    ldr r1, [sp, #180]
-; BE-I32-NEON-NEXT:    ldr r2, [sp, #184]
-; BE-I32-NEON-NEXT:    ldr r3, [sp, #188]
-; BE-I32-NEON-NEXT:    mov r0, r4
-; BE-I32-NEON-NEXT:    ldr r5, [sp, #116]
-; BE-I32-NEON-NEXT:    ldr r8, [sp, #120]
-; BE-I32-NEON-NEXT:    ldr r10, [sp, #84]
-; BE-I32-NEON-NEXT:    ldr r9, [sp, #88]
-; BE-I32-NEON-NEXT:    bl lrintl
-; BE-I32-NEON-NEXT:    vmov.32 d9[1], r0
-; BE-I32-NEON-NEXT:    ldr r3, [sp, #124]
-; BE-I32-NEON-NEXT:    ldr r0, [sp, #112]
-; BE-I32-NEON-NEXT:    mov r1, r5
-; BE-I32-NEON-NEXT:    mov r2, r8
-; BE-I32-NEON-NEXT:    bl lrintl
-; BE-I32-NEON-NEXT:    vmov.32 d11[1], r0
-; BE-I32-NEON-NEXT:    ldr r3, [sp, #92]
-; BE-I32-NEON-NEXT:    ldr r0, [sp, #80]
-; BE-I32-NEON-NEXT:    mov r1, r10
-; BE-I32-NEON-NEXT:    mov r2, r9
-; BE-I32-NEON-NEXT:    bl lrintl
-; BE-I32-NEON-NEXT:    ldr r3, [sp, #4] @ 4-byte Reload
-; BE-I32-NEON-NEXT:    mov r4, r0
-; BE-I32-NEON-NEXT:    mov r0, r7
-; BE-I32-NEON-NEXT:    mov r1, r6
-; BE-I32-NEON-NEXT:    mov r2, r11
-; BE-I32-NEON-NEXT:    bl lrintl
-; BE-I32-NEON-NEXT:    add r3, sp, #148
-; BE-I32-NEON-NEXT:    ldr r7, [sp, #144]
-; BE-I32-NEON-NEXT:    vmov.32 d10[0], r0
-; BE-I32-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; BE-I32-NEON-NEXT:    mov r0, r7
-; BE-I32-NEON-NEXT:    vmov.32 d10[1], r4
-; BE-I32-NEON-NEXT:    bl lrintl
-; BE-I32-NEON-NEXT:    vmov.32 d8[1], r0
-; BE-I32-NEON-NEXT:    vrev64.32 q0, q5
-; BE-I32-NEON-NEXT:    vrev64.32 q1, q4
-; BE-I32-NEON-NEXT:    add sp, sp, #8
-; BE-I32-NEON-NEXT:    vpop {d8, d9, d10, d11}
-; BE-I32-NEON-NEXT:    add sp, sp, #4
-; BE-I32-NEON-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
-;
-; BE-I64-NEON-LABEL: lrint_v8fp128:
-; BE-I64-NEON:       @ %bb.0:
-; BE-I64-NEON-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; BE-I64-NEON-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; BE-I64-NEON-NEXT:    .pad #4
-; BE-I64-NEON-NEXT:    sub sp, sp, #4
-; BE-I64-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14}
-; BE-I64-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14}
-; BE-I64-NEON-NEXT:    .pad #16
-; BE-I64-NEON-NEXT:    sub sp, sp, #16
-; BE-I64-NEON-NEXT:    str r3, [sp, #4] @ 4-byte Spill
-; BE-I64-NEON-NEXT:    add r3, sp, #208
-; BE-I64-NEON-NEXT:    mov r11, r2
-; BE-I64-NEON-NEXT:    mov r4, r1
-; BE-I64-NEON-NEXT:    mov r5, r0
-; BE-I64-NEON-NEXT:    ldm r3, {r0, r1, r2, r3}
-; BE-I64-NEON-NEXT:    bl lrintl
-; BE-I64-NEON-NEXT:    ldr r7, [sp, #176]
-; BE-I64-NEON-NEXT:    add r3, sp, #180
-; BE-I64-NEON-NEXT:    str r1, [sp, #12] @ 4-byte Spill
-; BE-I64-NEON-NEXT:    vmov.32 d8[0], r0
-; BE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; BE-I64-NEON-NEXT:    mov r0, r7
-; BE-I64-NEON-NEXT:    ldr r6, [sp, #128]
-; BE-I64-NEON-NEXT:    ldr r8, [sp, #144]
-; BE-I64-NEON-NEXT:    bl lrintl
-; BE-I64-NEON-NEXT:    add r3, sp, #132
-; BE-I64-NEON-NEXT:    str r1, [sp, #8] @ 4-byte Spill
-; BE-I64-NEON-NEXT:    vmov.32 d9[0], r0
-; BE-I64-NEON-NEXT:    mov r0, r6
-; BE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; BE-I64-NEON-NEXT:    bl lrintl
-; BE-I64-NEON-NEXT:    add r3, sp, #148
-; BE-I64-NEON-NEXT:    mov r6, r1
-; BE-I64-NEON-NEXT:    vmov.32 d10[0], r0
-; BE-I64-NEON-NEXT:    mov r0, r8
-; BE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; BE-I64-NEON-NEXT:    bl lrintl
-; BE-I64-NEON-NEXT:    add r3, sp, #160
-; BE-I64-NEON-NEXT:    mov r9, r0
-; BE-I64-NEON-NEXT:    mov r7, r1
-; BE-I64-NEON-NEXT:    ldm r3, {r0, r1, r2, r3}
-; BE-I64-NEON-NEXT:    bl lrintl
-; BE-I64-NEON-NEXT:    ldr r3, [sp, #4] @ 4-byte Reload
-; BE-I64-NEON-NEXT:    mov r8, r1
-; BE-I64-NEON-NEXT:    vmov.32 d11[0], r0
-; BE-I64-NEON-NEXT:    mov r0, r5
-; BE-I64-NEON-NEXT:    mov r1, r4
-; BE-I64-NEON-NEXT:    mov r2, r11
-; BE-I64-NEON-NEXT:    ldr r10, [sp, #112]
-; BE-I64-NEON-NEXT:    vmov.32 d12[0], r9
-; BE-I64-NEON-NEXT:    bl lrintl
-; BE-I64-NEON-NEXT:    add r3, sp, #116
-; BE-I64-NEON-NEXT:    mov r4, r1
-; BE-I64-NEON-NEXT:    vmov.32 d13[0], r0
-; BE-I64-NEON-NEXT:    mov r0, r10
-; BE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; BE-I64-NEON-NEXT:    bl lrintl
-; BE-I64-NEON-NEXT:    add r3, sp, #196
-; BE-I64-NEON-NEXT:    vmov.32 d14[0], r0
-; BE-I64-NEON-NEXT:    ldr r0, [sp, #192]
-; BE-I64-NEON-NEXT:    mov r5, r1
-; BE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; BE-I64-NEON-NEXT:    bl lrintl
-; BE-I64-NEON-NEXT:    vmov.32 d16[0], r0
-; BE-I64-NEON-NEXT:    ldr r0, [sp, #8] @ 4-byte Reload
-; BE-I64-NEON-NEXT:    vmov.32 d14[1], r5
-; BE-I64-NEON-NEXT:    vmov.32 d9[1], r0
-; BE-I64-NEON-NEXT:    ldr r0, [sp, #12] @ 4-byte Reload
-; BE-I64-NEON-NEXT:    vmov.32 d12[1], r7
-; BE-I64-NEON-NEXT:    vmov.32 d8[1], r0
-; BE-I64-NEON-NEXT:    vmov.32 d13[1], r4
-; BE-I64-NEON-NEXT:    vmov.32 d10[1], r6
-; BE-I64-NEON-NEXT:    vmov.32 d11[1], r8
-; BE-I64-NEON-NEXT:    vmov.32 d16[1], r1
-; BE-I64-NEON-NEXT:    vrev64.32 d1, d14
-; BE-I64-NEON-NEXT:    vrev64.32 d3, d12
-; BE-I64-NEON-NEXT:    vrev64.32 d5, d9
-; BE-I64-NEON-NEXT:    vrev64.32 d7, d8
-; BE-I64-NEON-NEXT:    vrev64.32 d0, d13
-; BE-I64-NEON-NEXT:    vrev64.32 d2, d10
-; BE-I64-NEON-NEXT:    vrev64.32 d4, d11
-; BE-I64-NEON-NEXT:    vrev64.32 d6, d16
-; BE-I64-NEON-NEXT:    add sp, sp, #16
-; BE-I64-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14}
-; BE-I64-NEON-NEXT:    add sp, sp, #4
-; BE-I64-NEON-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
-  %a = call <8 x iXLen> @llvm.lrint.v8iXLen.v8fp128(<8 x fp128> %x)
-  ret <8 x iXLen> %a
+; BE-I64-NEXT:    vrev64.32 d1, d10
+; BE-I64-NEXT:    vrev64.32 d3, d8
+; BE-I64-NEXT:    vrev64.32 d0, d9
+; BE-I64-NEXT:    vrev64.32 d2, d16
+; BE-I64-NEXT:    vpop {d8, d9, d10}
+; BE-I64-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, pc}
+  %a = call <4 x iXLen> @llvm.lrint.v4iXLen.v4fp128(<4 x fp128> %x)
+  ret <4 x iXLen> %a
 }
-declare <8 x iXLen> @llvm.lrint.v8iXLen.v8fp128(<8 x fp128>)
+declare <4 x iXLen> @llvm.lrint.v4iXLen.v4fp128(<4 x fp128>)
 
-define <16 x iXLen> @lrint_v16fp128(<16 x fp128> %x) {
-; LE-I32-LABEL: lrint_v16fp128:
+define <8 x iXLen> @lrint_v8fp128(<8 x fp128> %x) {
+; LE-I32-LABEL: lrint_v8fp128:
 ; LE-I32:       @ %bb.0:
-; LE-I32-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; LE-I32-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; LE-I32-NEXT:    .pad #4
-; LE-I32-NEXT:    sub sp, sp, #4
-; LE-I32-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-I32-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-I32-NEXT:    mov r8, r3
-; LE-I32-NEXT:    add r3, sp, #280
-; LE-I32-NEXT:    mov r9, r2
-; LE-I32-NEXT:    mov r10, r1
-; LE-I32-NEXT:    mov r6, r0
-; LE-I32-NEXT:    ldm r3, {r0, r1, r2, r3}
-; LE-I32-NEXT:    bl lrintl
-; LE-I32-NEXT:    ldr r4, [sp, #216]
-; LE-I32-NEXT:    vmov.32 d8[0], r0
-; LE-I32-NEXT:    ldr r1, [sp, #220]
-; LE-I32-NEXT:    ldr r2, [sp, #224]
-; LE-I32-NEXT:    ldr r3, [sp, #228]
-; LE-I32-NEXT:    mov r0, r4
-; LE-I32-NEXT:    ldr r7, [sp, #152]
-; LE-I32-NEXT:    ldr r11, [sp, #104]
-; LE-I32-NEXT:    bl lrintl
-; LE-I32-NEXT:    add r3, sp, #156
-; LE-I32-NEXT:    vmov.32 d10[0], r0
-; LE-I32-NEXT:    mov r0, r7
-; LE-I32-NEXT:    ldm r3, {r1, r2, r3}
-; LE-I32-NEXT:    bl lrintl
-; LE-I32-NEXT:    ldr r7, [sp, #184]
-; LE-I32-NEXT:    vmov.32 d12[0], r0
-; LE-I32-NEXT:    ldr r1, [sp, #188]
-; LE-I32-NEXT:    ldr r2, [sp, #192]
-; LE-I32-NEXT:    ldr r3, [sp, #196]
-; LE-I32-NEXT:    mov r0, r7
-; LE-I32-NEXT:    ldr r4, [sp, #120]
-; LE-I32-NEXT:    bl lrintl
-; LE-I32-NEXT:    add r3, sp, #124
-; LE-I32-NEXT:    vmov.32 d13[0], r0
-; LE-I32-NEXT:    mov r0, r4
-; LE-I32-NEXT:    ldm r3, {r1, r2, r3}
-; LE-I32-NEXT:    bl lrintl
-; LE-I32-NEXT:    ldr r5, [sp, #136]
-; LE-I32-NEXT:    vmov.32 d15[0], r0
-; LE-I32-NEXT:    ldr r1, [sp, #140]
-; LE-I32-NEXT:    ldr r2, [sp, #144]
-; LE-I32-NEXT:    ldr r3, [sp, #148]
-; LE-I32-NEXT:    mov r0, r5
-; LE-I32-NEXT:    ldr r4, [sp, #108]
-; LE-I32-NEXT:    ldr r7, [sp, #112]
-; LE-I32-NEXT:    bl lrintl
-; LE-I32-NEXT:    ldr r3, [sp, #116]
-; LE-I32-NEXT:    vmov.32 d15[1], r0
-; LE-I32-NEXT:    mov r0, r11
+; LE-I32-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, lr}
+; LE-I32-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, lr}
+; LE-I32-NEXT:    .vsave {d8, d9, d10, d11}
+; LE-I32-NEXT:    vpush {d8, d9, d10, d11}
+; LE-I32-NEXT:    mov r6, r3
+; LE-I32-NEXT:    add r3, sp, #112
+; LE-I32-NEXT:    mov r7, r2
+; LE-I32-NEXT:    mov r4, r1
+; LE-I32-NEXT:    mov r5, r0
+; LE-I32-NEXT:    ldm r3, {r0, r1, r2, r3}
+; LE-I32-NEXT:    bl lrintl
+; LE-I32-NEXT:    vmov.32 d8[0], r0
+; LE-I32-NEXT:    mov r0, r5
 ; LE-I32-NEXT:    mov r1, r4
 ; LE-I32-NEXT:    mov r2, r7
+; LE-I32-NEXT:    mov r3, r6
+; LE-I32-NEXT:    ldr r8, [sp, #160]
+; LE-I32-NEXT:    ldr r9, [sp, #64]
+; LE-I32-NEXT:    ldr r10, [sp, #80]
 ; LE-I32-NEXT:    bl lrintl
-; LE-I32-NEXT:    mov r4, r0
-; LE-I32-NEXT:    mov r0, r6
-; LE-I32-NEXT:    mov r1, r10
-; LE-I32-NEXT:    mov r2, r9
-; LE-I32-NEXT:    mov r3, r8
-; LE-I32-NEXT:    bl lrintl
-; LE-I32-NEXT:    ldr r7, [sp, #200]
-; LE-I32-NEXT:    vmov.32 d14[0], r0
-; LE-I32-NEXT:    ldr r1, [sp, #204]
-; LE-I32-NEXT:    ldr r2, [sp, #208]
-; LE-I32-NEXT:    ldr r3, [sp, #212]
-; LE-I32-NEXT:    mov r0, r7
-; LE-I32-NEXT:    ldr r5, [sp, #172]
-; LE-I32-NEXT:    vmov.32 d14[1], r4
-; LE-I32-NEXT:    ldr r6, [sp, #176]
-; LE-I32-NEXT:    bl lrintl
-; LE-I32-NEXT:    vmov.32 d13[1], r0
-; LE-I32-NEXT:    ldr r3, [sp, #180]
-; LE-I32-NEXT:    ldr r0, [sp, #168]
-; LE-I32-NEXT:    mov r1, r5
-; LE-I32-NEXT:    mov r2, r6
-; LE-I32-NEXT:    bl lrintl
-; LE-I32-NEXT:    add r3, sp, #248
-; LE-I32-NEXT:    mov r5, r0
-; LE-I32-NEXT:    ldm r3, {r0, r1, r2, r3}
+; LE-I32-NEXT:    add r3, sp, #84
+; LE-I32-NEXT:    vmov.32 d10[0], r0
+; LE-I32-NEXT:    mov r0, r10
+; LE-I32-NEXT:    ldm r3, {r1, r2, r3}
 ; LE-I32-NEXT:    bl lrintl
-; LE-I32-NEXT:    ldr r4, [sp, #264]
+; LE-I32-NEXT:    ldr r6, [sp, #96]
 ; LE-I32-NEXT:    vmov.32 d11[0], r0
-; LE-I32-NEXT:    ldr r1, [sp, #268]
-; LE-I32-NEXT:    ldr r2, [sp, #272]
-; LE-I32-NEXT:    vmov.32 d12[1], r5
-; LE-I32-NEXT:    ldr r3, [sp, #276]
-; LE-I32-NEXT:    mov r0, r4
-; LE-I32-NEXT:    ldr r6, [sp, #236]
-; LE-I32-NEXT:    ldr r7, [sp, #240]
-; LE-I32-NEXT:    ldr r8, [sp, #332]
-; LE-I32-NEXT:    ldr r5, [sp, #336]
+; LE-I32-NEXT:    ldr r1, [sp, #100]
+; LE-I32-NEXT:    ldr r2, [sp, #104]
+; LE-I32-NEXT:    ldr r3, [sp, #108]
+; LE-I32-NEXT:    mov r0, r6
+; LE-I32-NEXT:    ldr r4, [sp, #68]
+; LE-I32-NEXT:    ldr r5, [sp, #72]
+; LE-I32-NEXT:    ldr r10, [sp, #164]
+; LE-I32-NEXT:    ldr r7, [sp, #168]
 ; LE-I32-NEXT:    bl lrintl
+; LE-I32-NEXT:    ldr r3, [sp, #76]
 ; LE-I32-NEXT:    vmov.32 d11[1], r0
-; LE-I32-NEXT:    ldr r3, [sp, #244]
-; LE-I32-NEXT:    ldr r0, [sp, #232]
-; LE-I32-NEXT:    mov r1, r6
-; LE-I32-NEXT:    mov r2, r7
+; LE-I32-NEXT:    mov r0, r9
+; LE-I32-NEXT:    mov r1, r4
+; LE-I32-NEXT:    mov r2, r5
 ; LE-I32-NEXT:    bl lrintl
+; LE-I32-NEXT:    ldr r3, [sp, #172]
 ; LE-I32-NEXT:    vmov.32 d10[1], r0
-; LE-I32-NEXT:    ldr r3, [sp, #340]
-; LE-I32-NEXT:    ldr r0, [sp, #328]
-; LE-I32-NEXT:    mov r1, r8
-; LE-I32-NEXT:    mov r2, r5
+; LE-I32-NEXT:    mov r0, r8
+; LE-I32-NEXT:    mov r1, r10
+; LE-I32-NEXT:    mov r2, r7
 ; LE-I32-NEXT:    bl lrintl
-; LE-I32-NEXT:    add r3, sp, #312
+; LE-I32-NEXT:    add r3, sp, #144
 ; LE-I32-NEXT:    mov r4, r0
 ; LE-I32-NEXT:    ldm r3, {r0, r1, r2, r3}
 ; LE-I32-NEXT:    bl lrintl
-; LE-I32-NEXT:    add r3, sp, #300
-; LE-I32-NEXT:    ldr r7, [sp, #296]
+; LE-I32-NEXT:    add r3, sp, #132
+; LE-I32-NEXT:    ldr r7, [sp, #128]
 ; LE-I32-NEXT:    vmov.32 d9[0], r0
 ; LE-I32-NEXT:    ldm r3, {r1, r2, r3}
 ; LE-I32-NEXT:    mov r0, r7
 ; LE-I32-NEXT:    vmov.32 d9[1], r4
 ; LE-I32-NEXT:    bl lrintl
 ; LE-I32-NEXT:    vmov.32 d8[1], r0
-; LE-I32-NEXT:    vorr q0, q7, q7
-; LE-I32-NEXT:    vorr q1, q6, q6
-; LE-I32-NEXT:    vorr q2, q5, q5
-; LE-I32-NEXT:    vorr q3, q4, q4
-; LE-I32-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-I32-NEXT:    add sp, sp, #4
-; LE-I32-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+; LE-I32-NEXT:    vorr q0, q5, q5
+; LE-I32-NEXT:    vorr q1, q4, q4
+; LE-I32-NEXT:    vpop {d8, d9, d10, d11}
+; LE-I32-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, pc}
 ;
-; LE-I64-LABEL: lrint_v16fp128:
+; LE-I64-LABEL: lrint_v8fp128:
 ; LE-I64:       @ %bb.0:
 ; LE-I64-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
 ; LE-I64-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
@@ -9688,1063 +2425,249 @@ define <16 x iXLen> @lrint_v16fp128(<16 x fp128> %x) {
 ; LE-I64-NEXT:    sub sp, sp, #4
 ; LE-I64-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
 ; LE-I64-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-I64-NEXT:    .pad #72
-; LE-I64-NEXT:    sub sp, sp, #72
-; LE-I64-NEXT:    mov r6, r3
-; LE-I64-NEXT:    add r3, sp, #408
-; LE-I64-NEXT:    mov r7, r2
-; LE-I64-NEXT:    mov r4, r0
+; LE-I64-NEXT:    .pad #8
+; LE-I64-NEXT:    sub sp, sp, #8
+; LE-I64-NEXT:    mov r11, r3
+; LE-I64-NEXT:    add r3, sp, #208
+; LE-I64-NEXT:    mov r10, r2
+; LE-I64-NEXT:    mov r4, r1
+; LE-I64-NEXT:    mov r5, r0
 ; LE-I64-NEXT:    ldm r3, {r0, r1, r2, r3}
 ; LE-I64-NEXT:    bl lrintl
-; LE-I64-NEXT:    add r5, sp, #176
-; LE-I64-NEXT:    mov r10, r1
-; LE-I64-NEXT:    vmov.32 d13[0], r0
-; LE-I64-NEXT:    mov r0, r7
-; LE-I64-NEXT:    ldm r5, {r2, r3, r5}
-; LE-I64-NEXT:    mov r1, r6
-; LE-I64-NEXT:    ldr r8, [sp, #232]
-; LE-I64-NEXT:    bl lrintl
-; LE-I64-NEXT:    add r3, sp, #188
-; LE-I64-NEXT:    mov r9, r1
-; LE-I64-NEXT:    vmov.32 d8[0], r0
-; LE-I64-NEXT:    mov r0, r5
-; LE-I64-NEXT:    ldm r3, {r1, r2, r3}
-; LE-I64-NEXT:    bl lrintl
-; LE-I64-NEXT:    add r3, sp, #236
-; LE-I64-NEXT:    mov r11, r1
+; LE-I64-NEXT:    add r7, sp, #164
+; LE-I64-NEXT:    ldr r6, [sp, #160]
+; LE-I64-NEXT:    str r1, [sp, #4] @ 4-byte Spill
 ; LE-I64-NEXT:    vmov.32 d9[0], r0
-; LE-I64-NEXT:    mov r0, r8
-; LE-I64-NEXT:    ldm r3, {r1, r2, r3}
-; LE-I64-NEXT:    bl lrintl
-; LE-I64-NEXT:    add r3, sp, #252
-; LE-I64-NEXT:    vmov.32 d10[0], r0
-; LE-I64-NEXT:    ldr r0, [sp, #248]
-; LE-I64-NEXT:    mov r8, r1
-; LE-I64-NEXT:    ldm r3, {r1, r2, r3}
-; LE-I64-NEXT:    bl lrintl
-; LE-I64-NEXT:    add r3, sp, #268
-; LE-I64-NEXT:    vmov.32 d11[0], r0
-; LE-I64-NEXT:    ldr r0, [sp, #264]
-; LE-I64-NEXT:    mov r6, r1
-; LE-I64-NEXT:    ldm r3, {r1, r2, r3}
-; LE-I64-NEXT:    bl lrintl
-; LE-I64-NEXT:    add r3, sp, #284
-; LE-I64-NEXT:    vmov.32 d14[0], r0
-; LE-I64-NEXT:    ldr r0, [sp, #280]
-; LE-I64-NEXT:    mov r7, r1
-; LE-I64-NEXT:    ldm r3, {r1, r2, r3}
-; LE-I64-NEXT:    bl lrintl
-; LE-I64-NEXT:    add r3, sp, #316
-; LE-I64-NEXT:    vmov.32 d15[0], r0
-; LE-I64-NEXT:    ldr r0, [sp, #312]
-; LE-I64-NEXT:    mov r5, r1
-; LE-I64-NEXT:    ldm r3, {r1, r2, r3}
-; LE-I64-NEXT:    bl lrintl
-; LE-I64-NEXT:    vmov.32 d15[1], r5
-; LE-I64-NEXT:    add lr, sp, #56
-; LE-I64-NEXT:    ldr r5, [sp, #300]
-; LE-I64-NEXT:    vmov.32 d14[1], r7
-; LE-I64-NEXT:    ldr r2, [sp, #304]
-; LE-I64-NEXT:    ldr r3, [sp, #308]
-; LE-I64-NEXT:    vmov.32 d11[1], r6
-; LE-I64-NEXT:    ldr r6, [sp, #200]
-; LE-I64-NEXT:    ldr r7, [sp, #204]
-; LE-I64-NEXT:    vmov.32 d10[1], r8
-; LE-I64-NEXT:    ldr r8, [sp, #344]
-; LE-I64-NEXT:    vmov.32 d9[1], r11
-; LE-I64-NEXT:    ldr r11, [sp, #216]
-; LE-I64-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
-; LE-I64-NEXT:    add lr, sp, #40
-; LE-I64-NEXT:    vmov.32 d17[0], r0
-; LE-I64-NEXT:    ldr r0, [sp, #296]
-; LE-I64-NEXT:    vmov.32 d8[1], r9
-; LE-I64-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
-; LE-I64-NEXT:    add lr, sp, #24
-; LE-I64-NEXT:    vorr q5, q8, q8
-; LE-I64-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
-; LE-I64-NEXT:    vorr q4, q6, q6
-; LE-I64-NEXT:    vmov.32 d11[1], r1
-; LE-I64-NEXT:    mov r1, r5
-; LE-I64-NEXT:    vmov.32 d9[1], r10
-; LE-I64-NEXT:    bl lrintl
-; LE-I64-NEXT:    vmov.32 d10[0], r0
-; LE-I64-NEXT:    ldr r2, [sp, #208]
-; LE-I64-NEXT:    ldr r3, [sp, #212]
-; LE-I64-NEXT:    add lr, sp, #8
-; LE-I64-NEXT:    mov r9, r1
+; LE-I64-NEXT:    ldm r7, {r1, r2, r3, r7}
 ; LE-I64-NEXT:    mov r0, r6
-; LE-I64-NEXT:    mov r1, r7
-; LE-I64-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-I64-NEXT:    ldr r8, [sp, #128]
+; LE-I64-NEXT:    ldr r9, [sp, #144]
 ; LE-I64-NEXT:    bl lrintl
-; LE-I64-NEXT:    add r3, sp, #220
-; LE-I64-NEXT:    mov r10, r1
+; LE-I64-NEXT:    add r3, sp, #180
+; LE-I64-NEXT:    str r1, [sp] @ 4-byte Spill
 ; LE-I64-NEXT:    vmov.32 d10[0], r0
-; LE-I64-NEXT:    mov r0, r11
+; LE-I64-NEXT:    mov r0, r7
 ; LE-I64-NEXT:    ldm r3, {r1, r2, r3}
 ; LE-I64-NEXT:    bl lrintl
-; LE-I64-NEXT:    add r3, sp, #348
-; LE-I64-NEXT:    mov r11, r1
+; LE-I64-NEXT:    add r3, sp, #132
+; LE-I64-NEXT:    mov r7, r1
 ; LE-I64-NEXT:    vmov.32 d11[0], r0
 ; LE-I64-NEXT:    mov r0, r8
 ; LE-I64-NEXT:    ldm r3, {r1, r2, r3}
 ; LE-I64-NEXT:    bl lrintl
-; LE-I64-NEXT:    add r3, sp, #364
-; LE-I64-NEXT:    vmov.32 d13[0], r0
-; LE-I64-NEXT:    ldr r0, [sp, #360]
+; LE-I64-NEXT:    add r3, sp, #148
 ; LE-I64-NEXT:    mov r8, r1
+; LE-I64-NEXT:    vmov.32 d12[0], r0
+; LE-I64-NEXT:    mov r0, r9
 ; LE-I64-NEXT:    ldm r3, {r1, r2, r3}
 ; LE-I64-NEXT:    bl lrintl
-; LE-I64-NEXT:    add r3, sp, #380
+; LE-I64-NEXT:    mov r9, r1
+; LE-I64-NEXT:    vmov.32 d13[0], r0
+; LE-I64-NEXT:    mov r0, r5
+; LE-I64-NEXT:    mov r1, r4
+; LE-I64-NEXT:    mov r2, r10
+; LE-I64-NEXT:    mov r3, r11
+; LE-I64-NEXT:    ldr r6, [sp, #112]
+; LE-I64-NEXT:    bl lrintl
+; LE-I64-NEXT:    add r3, sp, #116
+; LE-I64-NEXT:    mov r4, r1
 ; LE-I64-NEXT:    vmov.32 d14[0], r0
-; LE-I64-NEXT:    ldr r0, [sp, #376]
-; LE-I64-NEXT:    mov r5, r1
+; LE-I64-NEXT:    mov r0, r6
 ; LE-I64-NEXT:    ldm r3, {r1, r2, r3}
 ; LE-I64-NEXT:    bl lrintl
-; LE-I64-NEXT:    add r3, sp, #396
+; LE-I64-NEXT:    add r3, sp, #196
 ; LE-I64-NEXT:    vmov.32 d15[0], r0
-; LE-I64-NEXT:    ldr r0, [sp, #392]
-; LE-I64-NEXT:    mov r6, r1
-; LE-I64-NEXT:    ldm r3, {r1, r2, r3}
-; LE-I64-NEXT:    bl lrintl
-; LE-I64-NEXT:    add r3, sp, #332
-; LE-I64-NEXT:    vmov.32 d8[0], r0
-; LE-I64-NEXT:    ldr r0, [sp, #328]
-; LE-I64-NEXT:    mov r7, r1
-; LE-I64-NEXT:    ldm r3, {r1, r2, r3}
-; LE-I64-NEXT:    bl lrintl
-; LE-I64-NEXT:    add lr, sp, #8
-; LE-I64-NEXT:    vmov.32 d12[0], r0
-; LE-I64-NEXT:    add r0, r4, #64
-; LE-I64-NEXT:    vldmia lr, {d18, d19} @ 16-byte Reload
-; LE-I64-NEXT:    add lr, sp, #24
-; LE-I64-NEXT:    vmov.32 d13[1], r8
-; LE-I64-NEXT:    vmov.32 d18[1], r9
-; LE-I64-NEXT:    vmov.32 d15[1], r6
-; LE-I64-NEXT:    vmov.32 d12[1], r1
-; LE-I64-NEXT:    vmov.32 d14[1], r5
-; LE-I64-NEXT:    vst1.64 {d18, d19}, [r0:128]!
-; LE-I64-NEXT:    vst1.64 {d12, d13}, [r0:128]!
-; LE-I64-NEXT:    vmov.32 d8[1], r7
-; LE-I64-NEXT:    vst1.64 {d14, d15}, [r0:128]!
-; LE-I64-NEXT:    vst1.64 {d8, d9}, [r0:128]
-; LE-I64-NEXT:    vmov.32 d11[1], r11
-; LE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-I64-NEXT:    add lr, sp, #40
-; LE-I64-NEXT:    vmov.32 d10[1], r10
-; LE-I64-NEXT:    vst1.64 {d16, d17}, [r4:128]!
-; LE-I64-NEXT:    vst1.64 {d10, d11}, [r4:128]!
-; LE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-I64-NEXT:    add lr, sp, #56
-; LE-I64-NEXT:    vst1.64 {d16, d17}, [r4:128]!
-; LE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-I64-NEXT:    vst1.64 {d16, d17}, [r4:128]
-; LE-I64-NEXT:    add sp, sp, #72
+; LE-I64-NEXT:    ldr r0, [sp, #192]
+; LE-I64-NEXT:    mov r5, r1
+; LE-I64-NEXT:    ldm r3, {r1, r2, r3}
+; LE-I64-NEXT:    bl lrintl
+; LE-I64-NEXT:    vmov.32 d8[0], r0
+; LE-I64-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; LE-I64-NEXT:    vmov.32 d11[1], r7
+; LE-I64-NEXT:    vmov.32 d10[1], r0
+; LE-I64-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
+; LE-I64-NEXT:    vmov.32 d15[1], r5
+; LE-I64-NEXT:    vorr q2, q5, q5
+; LE-I64-NEXT:    vmov.32 d13[1], r9
+; LE-I64-NEXT:    vmov.32 d9[1], r0
+; LE-I64-NEXT:    vmov.32 d14[1], r4
+; LE-I64-NEXT:    vmov.32 d12[1], r8
+; LE-I64-NEXT:    vorr q0, q7, q7
+; LE-I64-NEXT:    vmov.32 d8[1], r1
+; LE-I64-NEXT:    vorr q1, q6, q6
+; LE-I64-NEXT:    vorr q3, q4, q4
+; LE-I64-NEXT:    add sp, sp, #8
 ; LE-I64-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
 ; LE-I64-NEXT:    add sp, sp, #4
 ; LE-I64-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
 ;
-; LE-I32-NEON-LABEL: lrint_v16fp128:
-; LE-I32-NEON:       @ %bb.0:
-; LE-I32-NEON-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; LE-I32-NEON-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; LE-I32-NEON-NEXT:    .pad #4
-; LE-I32-NEON-NEXT:    sub sp, sp, #4
-; LE-I32-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-I32-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-I32-NEON-NEXT:    mov r8, r3
-; LE-I32-NEON-NEXT:    add r3, sp, #280
-; LE-I32-NEON-NEXT:    mov r9, r2
-; LE-I32-NEON-NEXT:    mov r10, r1
-; LE-I32-NEON-NEXT:    mov r6, r0
-; LE-I32-NEON-NEXT:    ldm r3, {r0, r1, r2, r3}
-; LE-I32-NEON-NEXT:    bl lrintl
-; LE-I32-NEON-NEXT:    ldr r4, [sp, #216]
-; LE-I32-NEON-NEXT:    vmov.32 d8[0], r0
-; LE-I32-NEON-NEXT:    ldr r1, [sp, #220]
-; LE-I32-NEON-NEXT:    ldr r2, [sp, #224]
-; LE-I32-NEON-NEXT:    ldr r3, [sp, #228]
-; LE-I32-NEON-NEXT:    mov r0, r4
-; LE-I32-NEON-NEXT:    ldr r7, [sp, #152]
-; LE-I32-NEON-NEXT:    ldr r11, [sp, #104]
-; LE-I32-NEON-NEXT:    bl lrintl
-; LE-I32-NEON-NEXT:    add r3, sp, #156
-; LE-I32-NEON-NEXT:    vmov.32 d10[0], r0
-; LE-I32-NEON-NEXT:    mov r0, r7
-; LE-I32-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; LE-I32-NEON-NEXT:    bl lrintl
-; LE-I32-NEON-NEXT:    ldr r7, [sp, #184]
-; LE-I32-NEON-NEXT:    vmov.32 d12[0], r0
-; LE-I32-NEON-NEXT:    ldr r1, [sp, #188]
-; LE-I32-NEON-NEXT:    ldr r2, [sp, #192]
-; LE-I32-NEON-NEXT:    ldr r3, [sp, #196]
-; LE-I32-NEON-NEXT:    mov r0, r7
-; LE-I32-NEON-NEXT:    ldr r4, [sp, #120]
-; LE-I32-NEON-NEXT:    bl lrintl
-; LE-I32-NEON-NEXT:    add r3, sp, #124
-; LE-I32-NEON-NEXT:    vmov.32 d13[0], r0
-; LE-I32-NEON-NEXT:    mov r0, r4
-; LE-I32-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; LE-I32-NEON-NEXT:    bl lrintl
-; LE-I32-NEON-NEXT:    ldr r5, [sp, #136]
-; LE-I32-NEON-NEXT:    vmov.32 d15[0], r0
-; LE-I32-NEON-NEXT:    ldr r1, [sp, #140]
-; LE-I32-NEON-NEXT:    ldr r2, [sp, #144]
-; LE-I32-NEON-NEXT:    ldr r3, [sp, #148]
-; LE-I32-NEON-NEXT:    mov r0, r5
-; LE-I32-NEON-NEXT:    ldr r4, [sp, #108]
-; LE-I32-NEON-NEXT:    ldr r7, [sp, #112]
-; LE-I32-NEON-NEXT:    bl lrintl
-; LE-I32-NEON-NEXT:    ldr r3, [sp, #116]
-; LE-I32-NEON-NEXT:    vmov.32 d15[1], r0
-; LE-I32-NEON-NEXT:    mov r0, r11
-; LE-I32-NEON-NEXT:    mov r1, r4
-; LE-I32-NEON-NEXT:    mov r2, r7
-; LE-I32-NEON-NEXT:    bl lrintl
-; LE-I32-NEON-NEXT:    mov r4, r0
-; LE-I32-NEON-NEXT:    mov r0, r6
-; LE-I32-NEON-NEXT:    mov r1, r10
-; LE-I32-NEON-NEXT:    mov r2, r9
-; LE-I32-NEON-NEXT:    mov r3, r8
-; LE-I32-NEON-NEXT:    bl lrintl
-; LE-I32-NEON-NEXT:    ldr r7, [sp, #200]
-; LE-I32-NEON-NEXT:    vmov.32 d14[0], r0
-; LE-I32-NEON-NEXT:    ldr r1, [sp, #204]
-; LE-I32-NEON-NEXT:    ldr r2, [sp, #208]
-; LE-I32-NEON-NEXT:    ldr r3, [sp, #212]
-; LE-I32-NEON-NEXT:    mov r0, r7
-; LE-I32-NEON-NEXT:    ldr r5, [sp, #172]
-; LE-I32-NEON-NEXT:    vmov.32 d14[1], r4
-; LE-I32-NEON-NEXT:    ldr r6, [sp, #176]
-; LE-I32-NEON-NEXT:    bl lrintl
-; LE-I32-NEON-NEXT:    vmov.32 d13[1], r0
-; LE-I32-NEON-NEXT:    ldr r3, [sp, #180]
-; LE-I32-NEON-NEXT:    ldr r0, [sp, #168]
-; LE-I32-NEON-NEXT:    mov r1, r5
-; LE-I32-NEON-NEXT:    mov r2, r6
-; LE-I32-NEON-NEXT:    bl lrintl
-; LE-I32-NEON-NEXT:    add r3, sp, #248
-; LE-I32-NEON-NEXT:    mov r5, r0
-; LE-I32-NEON-NEXT:    ldm r3, {r0, r1, r2, r3}
-; LE-I32-NEON-NEXT:    bl lrintl
-; LE-I32-NEON-NEXT:    ldr r4, [sp, #264]
-; LE-I32-NEON-NEXT:    vmov.32 d11[0], r0
-; LE-I32-NEON-NEXT:    ldr r1, [sp, #268]
-; LE-I32-NEON-NEXT:    ldr r2, [sp, #272]
-; LE-I32-NEON-NEXT:    vmov.32 d12[1], r5
-; LE-I32-NEON-NEXT:    ldr r3, [sp, #276]
-; LE-I32-NEON-NEXT:    mov r0, r4
-; LE-I32-NEON-NEXT:    ldr r6, [sp, #236]
-; LE-I32-NEON-NEXT:    ldr r7, [sp, #240]
-; LE-I32-NEON-NEXT:    ldr r8, [sp, #332]
-; LE-I32-NEON-NEXT:    ldr r5, [sp, #336]
-; LE-I32-NEON-NEXT:    bl lrintl
-; LE-I32-NEON-NEXT:    vmov.32 d11[1], r0
-; LE-I32-NEON-NEXT:    ldr r3, [sp, #244]
-; LE-I32-NEON-NEXT:    ldr r0, [sp, #232]
-; LE-I32-NEON-NEXT:    mov r1, r6
-; LE-I32-NEON-NEXT:    mov r2, r7
-; LE-I32-NEON-NEXT:    bl lrintl
-; LE-I32-NEON-NEXT:    vmov.32 d10[1], r0
-; LE-I32-NEON-NEXT:    ldr r3, [sp, #340]
-; LE-I32-NEON-NEXT:    ldr r0, [sp, #328]
-; LE-I32-NEON-NEXT:    mov r1, r8
-; LE-I32-NEON-NEXT:    mov r2, r5
-; LE-I32-NEON-NEXT:    bl lrintl
-; LE-I32-NEON-NEXT:    add r3, sp, #312
-; LE-I32-NEON-NEXT:    mov r4, r0
-; LE-I32-NEON-NEXT:    ldm r3, {r0, r1, r2, r3}
-; LE-I32-NEON-NEXT:    bl lrintl
-; LE-I32-NEON-NEXT:    add r3, sp, #300
-; LE-I32-NEON-NEXT:    ldr r7, [sp, #296]
-; LE-I32-NEON-NEXT:    vmov.32 d9[0], r0
-; LE-I32-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; LE-I32-NEON-NEXT:    mov r0, r7
-; LE-I32-NEON-NEXT:    vmov.32 d9[1], r4
-; LE-I32-NEON-NEXT:    bl lrintl
-; LE-I32-NEON-NEXT:    vmov.32 d8[1], r0
-; LE-I32-NEON-NEXT:    vorr q0, q7, q7
-; LE-I32-NEON-NEXT:    vorr q1, q6, q6
-; LE-I32-NEON-NEXT:    vorr q2, q5, q5
-; LE-I32-NEON-NEXT:    vorr q3, q4, q4
-; LE-I32-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-I32-NEON-NEXT:    add sp, sp, #4
-; LE-I32-NEON-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
-;
-; LE-I64-NEON-LABEL: lrint_v16fp128:
-; LE-I64-NEON:       @ %bb.0:
-; LE-I64-NEON-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; LE-I64-NEON-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; LE-I64-NEON-NEXT:    .pad #4
-; LE-I64-NEON-NEXT:    sub sp, sp, #4
-; LE-I64-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-I64-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-I64-NEON-NEXT:    .pad #72
-; LE-I64-NEON-NEXT:    sub sp, sp, #72
-; LE-I64-NEON-NEXT:    mov r6, r3
-; LE-I64-NEON-NEXT:    add r3, sp, #408
-; LE-I64-NEON-NEXT:    mov r7, r2
-; LE-I64-NEON-NEXT:    mov r4, r0
-; LE-I64-NEON-NEXT:    ldm r3, {r0, r1, r2, r3}
-; LE-I64-NEON-NEXT:    bl lrintl
-; LE-I64-NEON-NEXT:    add r5, sp, #176
-; LE-I64-NEON-NEXT:    mov r10, r1
-; LE-I64-NEON-NEXT:    vmov.32 d13[0], r0
-; LE-I64-NEON-NEXT:    mov r0, r7
-; LE-I64-NEON-NEXT:    ldm r5, {r2, r3, r5}
-; LE-I64-NEON-NEXT:    mov r1, r6
-; LE-I64-NEON-NEXT:    ldr r8, [sp, #232]
-; LE-I64-NEON-NEXT:    bl lrintl
-; LE-I64-NEON-NEXT:    add r3, sp, #188
-; LE-I64-NEON-NEXT:    mov r9, r1
-; LE-I64-NEON-NEXT:    vmov.32 d8[0], r0
-; LE-I64-NEON-NEXT:    mov r0, r5
-; LE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; LE-I64-NEON-NEXT:    bl lrintl
-; LE-I64-NEON-NEXT:    add r3, sp, #236
-; LE-I64-NEON-NEXT:    mov r11, r1
-; LE-I64-NEON-NEXT:    vmov.32 d9[0], r0
-; LE-I64-NEON-NEXT:    mov r0, r8
-; LE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; LE-I64-NEON-NEXT:    bl lrintl
-; LE-I64-NEON-NEXT:    add r3, sp, #252
-; LE-I64-NEON-NEXT:    vmov.32 d10[0], r0
-; LE-I64-NEON-NEXT:    ldr r0, [sp, #248]
-; LE-I64-NEON-NEXT:    mov r8, r1
-; LE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; LE-I64-NEON-NEXT:    bl lrintl
-; LE-I64-NEON-NEXT:    add r3, sp, #268
-; LE-I64-NEON-NEXT:    vmov.32 d11[0], r0
-; LE-I64-NEON-NEXT:    ldr r0, [sp, #264]
-; LE-I64-NEON-NEXT:    mov r6, r1
-; LE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; LE-I64-NEON-NEXT:    bl lrintl
-; LE-I64-NEON-NEXT:    add r3, sp, #284
-; LE-I64-NEON-NEXT:    vmov.32 d14[0], r0
-; LE-I64-NEON-NEXT:    ldr r0, [sp, #280]
-; LE-I64-NEON-NEXT:    mov r7, r1
-; LE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; LE-I64-NEON-NEXT:    bl lrintl
-; LE-I64-NEON-NEXT:    add r3, sp, #316
-; LE-I64-NEON-NEXT:    vmov.32 d15[0], r0
-; LE-I64-NEON-NEXT:    ldr r0, [sp, #312]
-; LE-I64-NEON-NEXT:    mov r5, r1
-; LE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; LE-I64-NEON-NEXT:    bl lrintl
-; LE-I64-NEON-NEXT:    vmov.32 d15[1], r5
-; LE-I64-NEON-NEXT:    add lr, sp, #56
-; LE-I64-NEON-NEXT:    ldr r5, [sp, #300]
-; LE-I64-NEON-NEXT:    vmov.32 d14[1], r7
-; LE-I64-NEON-NEXT:    ldr r2, [sp, #304]
-; LE-I64-NEON-NEXT:    ldr r3, [sp, #308]
-; LE-I64-NEON-NEXT:    vmov.32 d11[1], r6
-; LE-I64-NEON-NEXT:    ldr r6, [sp, #200]
-; LE-I64-NEON-NEXT:    ldr r7, [sp, #204]
-; LE-I64-NEON-NEXT:    vmov.32 d10[1], r8
-; LE-I64-NEON-NEXT:    ldr r8, [sp, #344]
-; LE-I64-NEON-NEXT:    vmov.32 d9[1], r11
-; LE-I64-NEON-NEXT:    ldr r11, [sp, #216]
-; LE-I64-NEON-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
-; LE-I64-NEON-NEXT:    add lr, sp, #40
-; LE-I64-NEON-NEXT:    vmov.32 d17[0], r0
-; LE-I64-NEON-NEXT:    ldr r0, [sp, #296]
-; LE-I64-NEON-NEXT:    vmov.32 d8[1], r9
-; LE-I64-NEON-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
-; LE-I64-NEON-NEXT:    add lr, sp, #24
-; LE-I64-NEON-NEXT:    vorr q5, q8, q8
-; LE-I64-NEON-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
-; LE-I64-NEON-NEXT:    vorr q4, q6, q6
-; LE-I64-NEON-NEXT:    vmov.32 d11[1], r1
-; LE-I64-NEON-NEXT:    mov r1, r5
-; LE-I64-NEON-NEXT:    vmov.32 d9[1], r10
-; LE-I64-NEON-NEXT:    bl lrintl
-; LE-I64-NEON-NEXT:    vmov.32 d10[0], r0
-; LE-I64-NEON-NEXT:    ldr r2, [sp, #208]
-; LE-I64-NEON-NEXT:    ldr r3, [sp, #212]
-; LE-I64-NEON-NEXT:    add lr, sp, #8
-; LE-I64-NEON-NEXT:    mov r9, r1
-; LE-I64-NEON-NEXT:    mov r0, r6
-; LE-I64-NEON-NEXT:    mov r1, r7
-; LE-I64-NEON-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
-; LE-I64-NEON-NEXT:    bl lrintl
-; LE-I64-NEON-NEXT:    add r3, sp, #220
-; LE-I64-NEON-NEXT:    mov r10, r1
-; LE-I64-NEON-NEXT:    vmov.32 d10[0], r0
-; LE-I64-NEON-NEXT:    mov r0, r11
-; LE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; LE-I64-NEON-NEXT:    bl lrintl
-; LE-I64-NEON-NEXT:    add r3, sp, #348
-; LE-I64-NEON-NEXT:    mov r11, r1
-; LE-I64-NEON-NEXT:    vmov.32 d11[0], r0
-; LE-I64-NEON-NEXT:    mov r0, r8
-; LE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; LE-I64-NEON-NEXT:    bl lrintl
-; LE-I64-NEON-NEXT:    add r3, sp, #364
-; LE-I64-NEON-NEXT:    vmov.32 d13[0], r0
-; LE-I64-NEON-NEXT:    ldr r0, [sp, #360]
-; LE-I64-NEON-NEXT:    mov r8, r1
-; LE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; LE-I64-NEON-NEXT:    bl lrintl
-; LE-I64-NEON-NEXT:    add r3, sp, #380
-; LE-I64-NEON-NEXT:    vmov.32 d14[0], r0
-; LE-I64-NEON-NEXT:    ldr r0, [sp, #376]
-; LE-I64-NEON-NEXT:    mov r5, r1
-; LE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; LE-I64-NEON-NEXT:    bl lrintl
-; LE-I64-NEON-NEXT:    add r3, sp, #396
-; LE-I64-NEON-NEXT:    vmov.32 d15[0], r0
-; LE-I64-NEON-NEXT:    ldr r0, [sp, #392]
-; LE-I64-NEON-NEXT:    mov r6, r1
-; LE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; LE-I64-NEON-NEXT:    bl lrintl
-; LE-I64-NEON-NEXT:    add r3, sp, #332
-; LE-I64-NEON-NEXT:    vmov.32 d8[0], r0
-; LE-I64-NEON-NEXT:    ldr r0, [sp, #328]
-; LE-I64-NEON-NEXT:    mov r7, r1
-; LE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; LE-I64-NEON-NEXT:    bl lrintl
-; LE-I64-NEON-NEXT:    add lr, sp, #8
-; LE-I64-NEON-NEXT:    vmov.32 d12[0], r0
-; LE-I64-NEON-NEXT:    add r0, r4, #64
-; LE-I64-NEON-NEXT:    vldmia lr, {d18, d19} @ 16-byte Reload
-; LE-I64-NEON-NEXT:    add lr, sp, #24
-; LE-I64-NEON-NEXT:    vmov.32 d13[1], r8
-; LE-I64-NEON-NEXT:    vmov.32 d18[1], r9
-; LE-I64-NEON-NEXT:    vmov.32 d15[1], r6
-; LE-I64-NEON-NEXT:    vmov.32 d12[1], r1
-; LE-I64-NEON-NEXT:    vmov.32 d14[1], r5
-; LE-I64-NEON-NEXT:    vst1.64 {d18, d19}, [r0:128]!
-; LE-I64-NEON-NEXT:    vst1.64 {d12, d13}, [r0:128]!
-; LE-I64-NEON-NEXT:    vmov.32 d8[1], r7
-; LE-I64-NEON-NEXT:    vst1.64 {d14, d15}, [r0:128]!
-; LE-I64-NEON-NEXT:    vst1.64 {d8, d9}, [r0:128]
-; LE-I64-NEON-NEXT:    vmov.32 d11[1], r11
-; LE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-I64-NEON-NEXT:    add lr, sp, #40
-; LE-I64-NEON-NEXT:    vmov.32 d10[1], r10
-; LE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r4:128]!
-; LE-I64-NEON-NEXT:    vst1.64 {d10, d11}, [r4:128]!
-; LE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-I64-NEON-NEXT:    add lr, sp, #56
-; LE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r4:128]!
-; LE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r4:128]
-; LE-I64-NEON-NEXT:    add sp, sp, #72
-; LE-I64-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-I64-NEON-NEXT:    add sp, sp, #4
-; LE-I64-NEON-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
-;
-; BE-I32-LABEL: lrint_v16fp128:
+; BE-I32-LABEL: lrint_v8fp128:
 ; BE-I32:       @ %bb.0:
 ; BE-I32-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
 ; BE-I32-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
 ; BE-I32-NEXT:    .pad #4
 ; BE-I32-NEXT:    sub sp, sp, #4
-; BE-I32-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
-; BE-I32-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
-; BE-I32-NEXT:    .pad #16
-; BE-I32-NEXT:    sub sp, sp, #16
-; BE-I32-NEXT:    stm sp, {r0, r1, r2, r3} @ 16-byte Folded Spill
-; BE-I32-NEXT:    add r3, sp, #264
+; BE-I32-NEXT:    .vsave {d8, d9, d10, d11}
+; BE-I32-NEXT:    vpush {d8, d9, d10, d11}
+; BE-I32-NEXT:    .pad #8
+; BE-I32-NEXT:    sub sp, sp, #8
+; BE-I32-NEXT:    str r3, [sp, #4] @ 4-byte Spill
+; BE-I32-NEXT:    add r3, sp, #128
+; BE-I32-NEXT:    mov r11, r2
+; BE-I32-NEXT:    mov r6, r1
+; BE-I32-NEXT:    mov r7, r0
 ; BE-I32-NEXT:    ldm r3, {r0, r1, r2, r3}
 ; BE-I32-NEXT:    bl lrintl
-; BE-I32-NEXT:    add r3, sp, #332
-; BE-I32-NEXT:    ldr r7, [sp, #328]
-; BE-I32-NEXT:    vmov.32 d9[0], r0
-; BE-I32-NEXT:    ldr r10, [sp, #280]
+; BE-I32-NEXT:    add r3, sp, #100
+; BE-I32-NEXT:    ldr r5, [sp, #96]
+; BE-I32-NEXT:    vmov.32 d8[0], r0
+; BE-I32-NEXT:    ldr r4, [sp, #160]
 ; BE-I32-NEXT:    ldm r3, {r1, r2, r3}
-; BE-I32-NEXT:    mov r0, r7
-; BE-I32-NEXT:    ldr r8, [sp, #168]
+; BE-I32-NEXT:    mov r0, r5
 ; BE-I32-NEXT:    bl lrintl
-; BE-I32-NEXT:    ldr r5, [sp, #344]
+; BE-I32-NEXT:    add r3, sp, #164
 ; BE-I32-NEXT:    vmov.32 d11[0], r0
-; BE-I32-NEXT:    ldr r1, [sp, #348]
-; BE-I32-NEXT:    ldr r2, [sp, #352]
-; BE-I32-NEXT:    ldr r3, [sp, #356]
-; BE-I32-NEXT:    mov r0, r5
-; BE-I32-NEXT:    ldr r7, [sp, #284]
-; BE-I32-NEXT:    ldr r4, [sp, #288]
-; BE-I32-NEXT:    ldr r6, [sp, #172]
-; BE-I32-NEXT:    ldr r9, [sp, #176]
+; BE-I32-NEXT:    mov r0, r4
+; BE-I32-NEXT:    ldm r3, {r1, r2, r3}
 ; BE-I32-NEXT:    bl lrintl
-; BE-I32-NEXT:    ldr r3, [sp, #292]
-; BE-I32-NEXT:    vmov.32 d11[1], r0
-; BE-I32-NEXT:    mov r0, r10
-; BE-I32-NEXT:    mov r1, r7
-; BE-I32-NEXT:    mov r2, r4
+; BE-I32-NEXT:    ldr r4, [sp, #176]
+; BE-I32-NEXT:    vmov.32 d9[0], r0
+; BE-I32-NEXT:    ldr r1, [sp, #180]
+; BE-I32-NEXT:    ldr r2, [sp, #184]
+; BE-I32-NEXT:    ldr r3, [sp, #188]
+; BE-I32-NEXT:    mov r0, r4
+; BE-I32-NEXT:    ldr r5, [sp, #116]
+; BE-I32-NEXT:    ldr r8, [sp, #120]
+; BE-I32-NEXT:    ldr r10, [sp, #84]
+; BE-I32-NEXT:    ldr r9, [sp, #88]
 ; BE-I32-NEXT:    bl lrintl
-; BE-I32-NEXT:    ldr r3, [sp, #180]
 ; BE-I32-NEXT:    vmov.32 d9[1], r0
-; BE-I32-NEXT:    mov r0, r8
-; BE-I32-NEXT:    mov r1, r6
+; BE-I32-NEXT:    ldr r3, [sp, #124]
+; BE-I32-NEXT:    ldr r0, [sp, #112]
+; BE-I32-NEXT:    mov r1, r5
+; BE-I32-NEXT:    mov r2, r8
+; BE-I32-NEXT:    bl lrintl
+; BE-I32-NEXT:    vmov.32 d11[1], r0
+; BE-I32-NEXT:    ldr r3, [sp, #92]
+; BE-I32-NEXT:    ldr r0, [sp, #80]
+; BE-I32-NEXT:    mov r1, r10
 ; BE-I32-NEXT:    mov r2, r9
 ; BE-I32-NEXT:    bl lrintl
-; BE-I32-NEXT:    add r3, sp, #232
+; BE-I32-NEXT:    ldr r3, [sp, #4] @ 4-byte Reload
 ; BE-I32-NEXT:    mov r4, r0
-; BE-I32-NEXT:    ldm r3, {r0, r1, r2, r3}
-; BE-I32-NEXT:    bl lrintl
-; BE-I32-NEXT:    add r3, sp, #136
-; BE-I32-NEXT:    mov r6, r0
-; BE-I32-NEXT:    ldm r3, {r0, r1, r2, r3}
-; BE-I32-NEXT:    bl lrintl
-; BE-I32-NEXT:    ldr r5, [sp, #296]
-; BE-I32-NEXT:    vmov.32 d13[0], r0
-; BE-I32-NEXT:    ldr r1, [sp, #300]
-; BE-I32-NEXT:    ldr r2, [sp, #304]
-; BE-I32-NEXT:    ldr r3, [sp, #308]
-; BE-I32-NEXT:    mov r0, r5
-; BE-I32-NEXT:    ldr r10, [sp, #216]
-; BE-I32-NEXT:    ldr r8, [sp, #220]
-; BE-I32-NEXT:    ldr r9, [sp, #152]
-; BE-I32-NEXT:    bl lrintl
-; BE-I32-NEXT:    ldr r7, [sp, #248]
-; BE-I32-NEXT:    vmov.32 d10[0], r0
-; BE-I32-NEXT:    ldr r1, [sp, #252]
-; BE-I32-NEXT:    ldr r2, [sp, #256]
-; BE-I32-NEXT:    vmov.32 d8[0], r6
-; BE-I32-NEXT:    ldr r3, [sp, #260]
 ; BE-I32-NEXT:    mov r0, r7
-; BE-I32-NEXT:    ldr r5, [sp, #224]
-; BE-I32-NEXT:    ldr r11, [sp, #120]
-; BE-I32-NEXT:    bl lrintl
-; BE-I32-NEXT:    ldr r3, [sp, #228]
-; BE-I32-NEXT:    vmov.32 d8[1], r0
-; BE-I32-NEXT:    mov r0, r10
-; BE-I32-NEXT:    mov r1, r8
-; BE-I32-NEXT:    mov r2, r5
-; BE-I32-NEXT:    bl lrintl
-; BE-I32-NEXT:    add r3, sp, #200
-; BE-I32-NEXT:    mov r5, r0
-; BE-I32-NEXT:    ldm r3, {r0, r1, r2, r3}
-; BE-I32-NEXT:    bl lrintl
-; BE-I32-NEXT:    vmov.32 d15[0], r0
-; BE-I32-NEXT:    ldr r0, [sp, #184]
-; BE-I32-NEXT:    ldr r1, [sp, #188]
-; BE-I32-NEXT:    ldr r2, [sp, #192]
-; BE-I32-NEXT:    vmov.32 d14[0], r4
-; BE-I32-NEXT:    ldr r3, [sp, #196]
-; BE-I32-NEXT:    vmov.32 d15[1], r5
-; BE-I32-NEXT:    ldr r7, [sp, #156]
-; BE-I32-NEXT:    ldr r6, [sp, #160]
-; BE-I32-NEXT:    ldr r4, [sp, #124]
-; BE-I32-NEXT:    ldr r5, [sp, #128]
-; BE-I32-NEXT:    bl lrintl
-; BE-I32-NEXT:    ldr r3, [sp, #164]
-; BE-I32-NEXT:    vmov.32 d14[1], r0
-; BE-I32-NEXT:    mov r0, r9
-; BE-I32-NEXT:    mov r1, r7
-; BE-I32-NEXT:    mov r2, r6
-; BE-I32-NEXT:    bl lrintl
-; BE-I32-NEXT:    ldr r3, [sp, #132]
-; BE-I32-NEXT:    vmov.32 d13[1], r0
-; BE-I32-NEXT:    mov r0, r11
-; BE-I32-NEXT:    mov r1, r4
-; BE-I32-NEXT:    mov r2, r5
-; BE-I32-NEXT:    bl lrintl
-; BE-I32-NEXT:    mov r4, r0
-; BE-I32-NEXT:    ldm sp, {r0, r1, r2, r3} @ 16-byte Folded Reload
+; BE-I32-NEXT:    mov r1, r6
+; BE-I32-NEXT:    mov r2, r11
 ; BE-I32-NEXT:    bl lrintl
-; BE-I32-NEXT:    add r3, sp, #316
-; BE-I32-NEXT:    ldr r7, [sp, #312]
-; BE-I32-NEXT:    vmov.32 d12[0], r0
+; BE-I32-NEXT:    add r3, sp, #148
+; BE-I32-NEXT:    ldr r7, [sp, #144]
+; BE-I32-NEXT:    vmov.32 d10[0], r0
 ; BE-I32-NEXT:    ldm r3, {r1, r2, r3}
 ; BE-I32-NEXT:    mov r0, r7
-; BE-I32-NEXT:    vmov.32 d12[1], r4
+; BE-I32-NEXT:    vmov.32 d10[1], r4
 ; BE-I32-NEXT:    bl lrintl
-; BE-I32-NEXT:    vmov.32 d10[1], r0
-; BE-I32-NEXT:    vrev64.32 q0, q6
-; BE-I32-NEXT:    vrev64.32 q1, q7
-; BE-I32-NEXT:    vrev64.32 q2, q4
-; BE-I32-NEXT:    vrev64.32 q3, q5
-; BE-I32-NEXT:    add sp, sp, #16
-; BE-I32-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I32-NEXT:    vmov.32 d8[1], r0
+; BE-I32-NEXT:    vrev64.32 q0, q5
+; BE-I32-NEXT:    vrev64.32 q1, q4
+; BE-I32-NEXT:    add sp, sp, #8
+; BE-I32-NEXT:    vpop {d8, d9, d10, d11}
 ; BE-I32-NEXT:    add sp, sp, #4
 ; BE-I32-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
 ;
-; BE-I64-LABEL: lrint_v16fp128:
+; BE-I64-LABEL: lrint_v8fp128:
 ; BE-I64:       @ %bb.0:
 ; BE-I64-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
 ; BE-I64-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
 ; BE-I64-NEXT:    .pad #4
 ; BE-I64-NEXT:    sub sp, sp, #4
-; BE-I64-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
-; BE-I64-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
-; BE-I64-NEXT:    .pad #56
-; BE-I64-NEXT:    sub sp, sp, #56
-; BE-I64-NEXT:    mov r5, r3
-; BE-I64-NEXT:    add r3, sp, #376
-; BE-I64-NEXT:    mov r6, r2
-; BE-I64-NEXT:    mov r4, r0
-; BE-I64-NEXT:    ldm r3, {r0, r1, r2, r3}
-; BE-I64-NEXT:    bl lrintl
-; BE-I64-NEXT:    ldr r7, [sp, #392]
-; BE-I64-NEXT:    add r3, sp, #396
-; BE-I64-NEXT:    mov r9, r1
-; BE-I64-NEXT:    vmov.32 d8[0], r0
-; BE-I64-NEXT:    ldm r3, {r1, r2, r3}
-; BE-I64-NEXT:    mov r0, r7
-; BE-I64-NEXT:    ldr r11, [sp, #168]
-; BE-I64-NEXT:    bl lrintl
-; BE-I64-NEXT:    ldr r2, [sp, #160]
-; BE-I64-NEXT:    mov r10, r1
-; BE-I64-NEXT:    ldr r3, [sp, #164]
-; BE-I64-NEXT:    vmov.32 d9[0], r0
-; BE-I64-NEXT:    mov r0, r6
-; BE-I64-NEXT:    mov r1, r5
-; BE-I64-NEXT:    bl lrintl
-; BE-I64-NEXT:    add r3, sp, #172
-; BE-I64-NEXT:    mov r8, r1
-; BE-I64-NEXT:    vmov.32 d10[0], r0
-; BE-I64-NEXT:    mov r0, r11
-; BE-I64-NEXT:    ldm r3, {r1, r2, r3}
-; BE-I64-NEXT:    bl lrintl
-; BE-I64-NEXT:    add r3, sp, #220
-; BE-I64-NEXT:    vmov.32 d11[0], r0
-; BE-I64-NEXT:    ldr r0, [sp, #216]
-; BE-I64-NEXT:    mov r11, r1
-; BE-I64-NEXT:    ldm r3, {r1, r2, r3}
-; BE-I64-NEXT:    bl lrintl
-; BE-I64-NEXT:    add r3, sp, #236
-; BE-I64-NEXT:    vmov.32 d12[0], r0
-; BE-I64-NEXT:    ldr r0, [sp, #232]
-; BE-I64-NEXT:    mov r6, r1
-; BE-I64-NEXT:    ldm r3, {r1, r2, r3}
-; BE-I64-NEXT:    bl lrintl
-; BE-I64-NEXT:    add r3, sp, #252
-; BE-I64-NEXT:    vmov.32 d13[0], r0
-; BE-I64-NEXT:    ldr r0, [sp, #248]
-; BE-I64-NEXT:    mov r7, r1
-; BE-I64-NEXT:    ldm r3, {r1, r2, r3}
-; BE-I64-NEXT:    bl lrintl
-; BE-I64-NEXT:    add r3, sp, #268
-; BE-I64-NEXT:    vmov.32 d14[0], r0
-; BE-I64-NEXT:    ldr r0, [sp, #264]
-; BE-I64-NEXT:    mov r5, r1
-; BE-I64-NEXT:    ldm r3, {r1, r2, r3}
-; BE-I64-NEXT:    bl lrintl
-; BE-I64-NEXT:    vmov.32 d15[0], r0
-; BE-I64-NEXT:    ldr r0, [sp, #280]
-; BE-I64-NEXT:    ldr r2, [sp, #288]
-; BE-I64-NEXT:    vmov.32 d13[1], r7
-; BE-I64-NEXT:    ldr r7, [sp, #284]
-; BE-I64-NEXT:    ldr r3, [sp, #292]
-; BE-I64-NEXT:    vmov.32 d14[1], r5
-; BE-I64-NEXT:    ldr r5, [sp, #328]
-; BE-I64-NEXT:    vmov.32 d12[1], r6
-; BE-I64-NEXT:    ldr r6, [sp, #300]
-; BE-I64-NEXT:    vmov.32 d10[1], r8
-; BE-I64-NEXT:    ldr r8, [sp, #184]
-; BE-I64-NEXT:    vmov.32 d11[1], r11
-; BE-I64-NEXT:    vmov.32 d9[1], r10
-; BE-I64-NEXT:    vmov.32 d8[1], r9
-; BE-I64-NEXT:    vmov.32 d15[1], r1
-; BE-I64-NEXT:    mov r1, r7
-; BE-I64-NEXT:    vstr d14, [sp, #48] @ 8-byte Spill
-; BE-I64-NEXT:    vstr d13, [sp, #40] @ 8-byte Spill
-; BE-I64-NEXT:    vstr d12, [sp, #32] @ 8-byte Spill
-; BE-I64-NEXT:    vstr d11, [sp, #24] @ 8-byte Spill
-; BE-I64-NEXT:    vstr d10, [sp, #16] @ 8-byte Spill
-; BE-I64-NEXT:    vstr d9, [sp, #8] @ 8-byte Spill
-; BE-I64-NEXT:    vstr d8, [sp] @ 8-byte Spill
+; BE-I64-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14}
+; BE-I64-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14}
+; BE-I64-NEXT:    .pad #16
+; BE-I64-NEXT:    sub sp, sp, #16
+; BE-I64-NEXT:    str r3, [sp, #4] @ 4-byte Spill
+; BE-I64-NEXT:    add r3, sp, #208
+; BE-I64-NEXT:    mov r11, r2
+; BE-I64-NEXT:    mov r4, r1
+; BE-I64-NEXT:    mov r5, r0
+; BE-I64-NEXT:    ldm r3, {r0, r1, r2, r3}
 ; BE-I64-NEXT:    bl lrintl
-; BE-I64-NEXT:    mov r10, r1
-; BE-I64-NEXT:    ldr r1, [sp, #296]
-; BE-I64-NEXT:    ldr r2, [sp, #304]
+; BE-I64-NEXT:    ldr r7, [sp, #176]
+; BE-I64-NEXT:    add r3, sp, #180
+; BE-I64-NEXT:    str r1, [sp, #12] @ 4-byte Spill
 ; BE-I64-NEXT:    vmov.32 d8[0], r0
-; BE-I64-NEXT:    ldr r3, [sp, #308]
-; BE-I64-NEXT:    mov r0, r1
-; BE-I64-NEXT:    mov r1, r6
+; BE-I64-NEXT:    ldm r3, {r1, r2, r3}
+; BE-I64-NEXT:    mov r0, r7
+; BE-I64-NEXT:    ldr r6, [sp, #128]
+; BE-I64-NEXT:    ldr r8, [sp, #144]
 ; BE-I64-NEXT:    bl lrintl
-; BE-I64-NEXT:    add r3, sp, #332
-; BE-I64-NEXT:    mov r11, r1
+; BE-I64-NEXT:    add r3, sp, #132
+; BE-I64-NEXT:    str r1, [sp, #8] @ 4-byte Spill
 ; BE-I64-NEXT:    vmov.32 d9[0], r0
-; BE-I64-NEXT:    mov r0, r5
+; BE-I64-NEXT:    mov r0, r6
 ; BE-I64-NEXT:    ldm r3, {r1, r2, r3}
 ; BE-I64-NEXT:    bl lrintl
-; BE-I64-NEXT:    add r3, sp, #188
-; BE-I64-NEXT:    mov r7, r1
+; BE-I64-NEXT:    add r3, sp, #148
+; BE-I64-NEXT:    mov r6, r1
 ; BE-I64-NEXT:    vmov.32 d10[0], r0
 ; BE-I64-NEXT:    mov r0, r8
 ; BE-I64-NEXT:    ldm r3, {r1, r2, r3}
 ; BE-I64-NEXT:    bl lrintl
-; BE-I64-NEXT:    add r3, sp, #204
-; BE-I64-NEXT:    vmov.32 d11[0], r0
-; BE-I64-NEXT:    ldr r0, [sp, #200]
-; BE-I64-NEXT:    mov r8, r1
-; BE-I64-NEXT:    ldm r3, {r1, r2, r3}
+; BE-I64-NEXT:    add r3, sp, #160
+; BE-I64-NEXT:    mov r9, r0
+; BE-I64-NEXT:    mov r7, r1
+; BE-I64-NEXT:    ldm r3, {r0, r1, r2, r3}
 ; BE-I64-NEXT:    bl lrintl
-; BE-I64-NEXT:    add r3, sp, #348
-; BE-I64-NEXT:    vmov.32 d12[0], r0
-; BE-I64-NEXT:    ldr r0, [sp, #344]
-; BE-I64-NEXT:    mov r5, r1
-; BE-I64-NEXT:    ldm r3, {r1, r2, r3}
+; BE-I64-NEXT:    ldr r3, [sp, #4] @ 4-byte Reload
+; BE-I64-NEXT:    mov r8, r1
+; BE-I64-NEXT:    vmov.32 d11[0], r0
+; BE-I64-NEXT:    mov r0, r5
+; BE-I64-NEXT:    mov r1, r4
+; BE-I64-NEXT:    mov r2, r11
+; BE-I64-NEXT:    ldr r10, [sp, #112]
+; BE-I64-NEXT:    vmov.32 d12[0], r9
 ; BE-I64-NEXT:    bl lrintl
-; BE-I64-NEXT:    add r3, sp, #364
+; BE-I64-NEXT:    add r3, sp, #116
+; BE-I64-NEXT:    mov r4, r1
 ; BE-I64-NEXT:    vmov.32 d13[0], r0
-; BE-I64-NEXT:    ldr r0, [sp, #360]
-; BE-I64-NEXT:    mov r9, r1
+; BE-I64-NEXT:    mov r0, r10
 ; BE-I64-NEXT:    ldm r3, {r1, r2, r3}
 ; BE-I64-NEXT:    bl lrintl
-; BE-I64-NEXT:    add r3, sp, #316
+; BE-I64-NEXT:    add r3, sp, #196
 ; BE-I64-NEXT:    vmov.32 d14[0], r0
-; BE-I64-NEXT:    ldr r0, [sp, #312]
-; BE-I64-NEXT:    mov r6, r1
+; BE-I64-NEXT:    ldr r0, [sp, #192]
+; BE-I64-NEXT:    mov r5, r1
 ; BE-I64-NEXT:    ldm r3, {r1, r2, r3}
 ; BE-I64-NEXT:    bl lrintl
-; BE-I64-NEXT:    vldr d18, [sp, #48] @ 8-byte Reload
-; BE-I64-NEXT:    vrev64.32 d17, d15
-; BE-I64-NEXT:    vrev64.32 d16, d18
-; BE-I64-NEXT:    vldr d18, [sp, #40] @ 8-byte Reload
-; BE-I64-NEXT:    vmov.32 d24[0], r0
-; BE-I64-NEXT:    add r0, r4, #64
-; BE-I64-NEXT:    vldr d20, [sp, #32] @ 8-byte Reload
-; BE-I64-NEXT:    vrev64.32 d19, d18
-; BE-I64-NEXT:    vmov.32 d9[1], r11
-; BE-I64-NEXT:    vmov.32 d10[1], r7
-; BE-I64-NEXT:    vrev64.32 d18, d20
-; BE-I64-NEXT:    vldr d20, [sp, #24] @ 8-byte Reload
-; BE-I64-NEXT:    vmov.32 d8[1], r10
-; BE-I64-NEXT:    vmov.32 d14[1], r6
-; BE-I64-NEXT:    vmov.32 d24[1], r1
-; BE-I64-NEXT:    vldr d22, [sp, #16] @ 8-byte Reload
-; BE-I64-NEXT:    vrev64.32 d21, d20
-; BE-I64-NEXT:    vrev64.32 d1, d9
-; BE-I64-NEXT:    vmov.32 d13[1], r9
-; BE-I64-NEXT:    vrev64.32 d31, d10
-; BE-I64-NEXT:    vrev64.32 d20, d22
-; BE-I64-NEXT:    vldr d22, [sp, #8] @ 8-byte Reload
-; BE-I64-NEXT:    vrev64.32 d0, d8
-; BE-I64-NEXT:    vrev64.32 d29, d14
-; BE-I64-NEXT:    vmov.32 d12[1], r5
-; BE-I64-NEXT:    vrev64.32 d30, d24
-; BE-I64-NEXT:    vrev64.32 d27, d22
-; BE-I64-NEXT:    vldr d22, [sp] @ 8-byte Reload
-; BE-I64-NEXT:    vst1.64 {d0, d1}, [r0:128]!
+; BE-I64-NEXT:    vmov.32 d16[0], r0
+; BE-I64-NEXT:    ldr r0, [sp, #8] @ 4-byte Reload
+; BE-I64-NEXT:    vmov.32 d14[1], r5
+; BE-I64-NEXT:    vmov.32 d9[1], r0
+; BE-I64-NEXT:    ldr r0, [sp, #12] @ 4-byte Reload
+; BE-I64-NEXT:    vmov.32 d12[1], r7
+; BE-I64-NEXT:    vmov.32 d8[1], r0
+; BE-I64-NEXT:    vmov.32 d13[1], r4
+; BE-I64-NEXT:    vmov.32 d10[1], r6
 ; BE-I64-NEXT:    vmov.32 d11[1], r8
-; BE-I64-NEXT:    vrev64.32 d28, d13
-; BE-I64-NEXT:    vst1.64 {d30, d31}, [r0:128]!
-; BE-I64-NEXT:    vrev64.32 d26, d22
-; BE-I64-NEXT:    vrev64.32 d23, d12
-; BE-I64-NEXT:    vst1.64 {d28, d29}, [r0:128]!
-; BE-I64-NEXT:    vrev64.32 d22, d11
-; BE-I64-NEXT:    vst1.64 {d26, d27}, [r0:128]
-; BE-I64-NEXT:    vst1.64 {d20, d21}, [r4:128]!
-; BE-I64-NEXT:    vst1.64 {d22, d23}, [r4:128]!
-; BE-I64-NEXT:    vst1.64 {d18, d19}, [r4:128]!
-; BE-I64-NEXT:    vst1.64 {d16, d17}, [r4:128]
-; BE-I64-NEXT:    add sp, sp, #56
-; BE-I64-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEXT:    vmov.32 d16[1], r1
+; BE-I64-NEXT:    vrev64.32 d1, d14
+; BE-I64-NEXT:    vrev64.32 d3, d12
+; BE-I64-NEXT:    vrev64.32 d5, d9
+; BE-I64-NEXT:    vrev64.32 d7, d8
+; BE-I64-NEXT:    vrev64.32 d0, d13
+; BE-I64-NEXT:    vrev64.32 d2, d10
+; BE-I64-NEXT:    vrev64.32 d4, d11
+; BE-I64-NEXT:    vrev64.32 d6, d16
+; BE-I64-NEXT:    add sp, sp, #16
+; BE-I64-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14}
 ; BE-I64-NEXT:    add sp, sp, #4
 ; BE-I64-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
-;
-; BE-I32-NEON-LABEL: lrint_v16fp128:
-; BE-I32-NEON:       @ %bb.0:
-; BE-I32-NEON-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; BE-I32-NEON-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; BE-I32-NEON-NEXT:    .pad #4
-; BE-I32-NEON-NEXT:    sub sp, sp, #4
-; BE-I32-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
-; BE-I32-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
-; BE-I32-NEON-NEXT:    .pad #16
-; BE-I32-NEON-NEXT:    sub sp, sp, #16
-; BE-I32-NEON-NEXT:    stm sp, {r0, r1, r2, r3} @ 16-byte Folded Spill
-; BE-I32-NEON-NEXT:    add r3, sp, #264
-; BE-I32-NEON-NEXT:    ldm r3, {r0, r1, r2, r3}
-; BE-I32-NEON-NEXT:    bl lrintl
-; BE-I32-NEON-NEXT:    add r3, sp, #332
-; BE-I32-NEON-NEXT:    ldr r7, [sp, #328]
-; BE-I32-NEON-NEXT:    vmov.32 d9[0], r0
-; BE-I32-NEON-NEXT:    ldr r10, [sp, #280]
-; BE-I32-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; BE-I32-NEON-NEXT:    mov r0, r7
-; BE-I32-NEON-NEXT:    ldr r8, [sp, #168]
-; BE-I32-NEON-NEXT:    bl lrintl
-; BE-I32-NEON-NEXT:    ldr r5, [sp, #344]
-; BE-I32-NEON-NEXT:    vmov.32 d11[0], r0
-; BE-I32-NEON-NEXT:    ldr r1, [sp, #348]
-; BE-I32-NEON-NEXT:    ldr r2, [sp, #352]
-; BE-I32-NEON-NEXT:    ldr r3, [sp, #356]
-; BE-I32-NEON-NEXT:    mov r0, r5
-; BE-I32-NEON-NEXT:    ldr r7, [sp, #284]
-; BE-I32-NEON-NEXT:    ldr r4, [sp, #288]
-; BE-I32-NEON-NEXT:    ldr r6, [sp, #172]
-; BE-I32-NEON-NEXT:    ldr r9, [sp, #176]
-; BE-I32-NEON-NEXT:    bl lrintl
-; BE-I32-NEON-NEXT:    ldr r3, [sp, #292]
-; BE-I32-NEON-NEXT:    vmov.32 d11[1], r0
-; BE-I32-NEON-NEXT:    mov r0, r10
-; BE-I32-NEON-NEXT:    mov r1, r7
-; BE-I32-NEON-NEXT:    mov r2, r4
-; BE-I32-NEON-NEXT:    bl lrintl
-; BE-I32-NEON-NEXT:    ldr r3, [sp, #180]
-; BE-I32-NEON-NEXT:    vmov.32 d9[1], r0
-; BE-I32-NEON-NEXT:    mov r0, r8
-; BE-I32-NEON-NEXT:    mov r1, r6
-; BE-I32-NEON-NEXT:    mov r2, r9
-; BE-I32-NEON-NEXT:    bl lrintl
-; BE-I32-NEON-NEXT:    add r3, sp, #232
-; BE-I32-NEON-NEXT:    mov r4, r0
-; BE-I32-NEON-NEXT:    ldm r3, {r0, r1, r2, r3}
-; BE-I32-NEON-NEXT:    bl lrintl
-; BE-I32-NEON-NEXT:    add r3, sp, #136
-; BE-I32-NEON-NEXT:    mov r6, r0
-; BE-I32-NEON-NEXT:    ldm r3, {r0, r1, r2, r3}
-; BE-I32-NEON-NEXT:    bl lrintl
-; BE-I32-NEON-NEXT:    ldr r5, [sp, #296]
-; BE-I32-NEON-NEXT:    vmov.32 d13[0], r0
-; BE-I32-NEON-NEXT:    ldr r1, [sp, #300]
-; BE-I32-NEON-NEXT:    ldr r2, [sp, #304]
-; BE-I32-NEON-NEXT:    ldr r3, [sp, #308]
-; BE-I32-NEON-NEXT:    mov r0, r5
-; BE-I32-NEON-NEXT:    ldr r10, [sp, #216]
-; BE-I32-NEON-NEXT:    ldr r8, [sp, #220]
-; BE-I32-NEON-NEXT:    ldr r9, [sp, #152]
-; BE-I32-NEON-NEXT:    bl lrintl
-; BE-I32-NEON-NEXT:    ldr r7, [sp, #248]
-; BE-I32-NEON-NEXT:    vmov.32 d10[0], r0
-; BE-I32-NEON-NEXT:    ldr r1, [sp, #252]
-; BE-I32-NEON-NEXT:    ldr r2, [sp, #256]
-; BE-I32-NEON-NEXT:    vmov.32 d8[0], r6
-; BE-I32-NEON-NEXT:    ldr r3, [sp, #260]
-; BE-I32-NEON-NEXT:    mov r0, r7
-; BE-I32-NEON-NEXT:    ldr r5, [sp, #224]
-; BE-I32-NEON-NEXT:    ldr r11, [sp, #120]
-; BE-I32-NEON-NEXT:    bl lrintl
-; BE-I32-NEON-NEXT:    ldr r3, [sp, #228]
-; BE-I32-NEON-NEXT:    vmov.32 d8[1], r0
-; BE-I32-NEON-NEXT:    mov r0, r10
-; BE-I32-NEON-NEXT:    mov r1, r8
-; BE-I32-NEON-NEXT:    mov r2, r5
-; BE-I32-NEON-NEXT:    bl lrintl
-; BE-I32-NEON-NEXT:    add r3, sp, #200
-; BE-I32-NEON-NEXT:    mov r5, r0
-; BE-I32-NEON-NEXT:    ldm r3, {r0, r1, r2, r3}
-; BE-I32-NEON-NEXT:    bl lrintl
-; BE-I32-NEON-NEXT:    vmov.32 d15[0], r0
-; BE-I32-NEON-NEXT:    ldr r0, [sp, #184]
-; BE-I32-NEON-NEXT:    ldr r1, [sp, #188]
-; BE-I32-NEON-NEXT:    ldr r2, [sp, #192]
-; BE-I32-NEON-NEXT:    vmov.32 d14[0], r4
-; BE-I32-NEON-NEXT:    ldr r3, [sp, #196]
-; BE-I32-NEON-NEXT:    vmov.32 d15[1], r5
-; BE-I32-NEON-NEXT:    ldr r7, [sp, #156]
-; BE-I32-NEON-NEXT:    ldr r6, [sp, #160]
-; BE-I32-NEON-NEXT:    ldr r4, [sp, #124]
-; BE-I32-NEON-NEXT:    ldr r5, [sp, #128]
-; BE-I32-NEON-NEXT:    bl lrintl
-; BE-I32-NEON-NEXT:    ldr r3, [sp, #164]
-; BE-I32-NEON-NEXT:    vmov.32 d14[1], r0
-; BE-I32-NEON-NEXT:    mov r0, r9
-; BE-I32-NEON-NEXT:    mov r1, r7
-; BE-I32-NEON-NEXT:    mov r2, r6
-; BE-I32-NEON-NEXT:    bl lrintl
-; BE-I32-NEON-NEXT:    ldr r3, [sp, #132]
-; BE-I32-NEON-NEXT:    vmov.32 d13[1], r0
-; BE-I32-NEON-NEXT:    mov r0, r11
-; BE-I32-NEON-NEXT:    mov r1, r4
-; BE-I32-NEON-NEXT:    mov r2, r5
-; BE-I32-NEON-NEXT:    bl lrintl
-; BE-I32-NEON-NEXT:    mov r4, r0
-; BE-I32-NEON-NEXT:    ldm sp, {r0, r1, r2, r3} @ 16-byte Folded Reload
-; BE-I32-NEON-NEXT:    bl lrintl
-; BE-I32-NEON-NEXT:    add r3, sp, #316
-; BE-I32-NEON-NEXT:    ldr r7, [sp, #312]
-; BE-I32-NEON-NEXT:    vmov.32 d12[0], r0
-; BE-I32-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; BE-I32-NEON-NEXT:    mov r0, r7
-; BE-I32-NEON-NEXT:    vmov.32 d12[1], r4
-; BE-I32-NEON-NEXT:    bl lrintl
-; BE-I32-NEON-NEXT:    vmov.32 d10[1], r0
-; BE-I32-NEON-NEXT:    vrev64.32 q0, q6
-; BE-I32-NEON-NEXT:    vrev64.32 q1, q7
-; BE-I32-NEON-NEXT:    vrev64.32 q2, q4
-; BE-I32-NEON-NEXT:    vrev64.32 q3, q5
-; BE-I32-NEON-NEXT:    add sp, sp, #16
-; BE-I32-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
-; BE-I32-NEON-NEXT:    add sp, sp, #4
-; BE-I32-NEON-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
-;
-; BE-I64-NEON-LABEL: lrint_v16fp128:
-; BE-I64-NEON:       @ %bb.0:
-; BE-I64-NEON-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; BE-I64-NEON-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; BE-I64-NEON-NEXT:    .pad #4
-; BE-I64-NEON-NEXT:    sub sp, sp, #4
-; BE-I64-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
-; BE-I64-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
-; BE-I64-NEON-NEXT:    .pad #56
-; BE-I64-NEON-NEXT:    sub sp, sp, #56
-; BE-I64-NEON-NEXT:    mov r5, r3
-; BE-I64-NEON-NEXT:    add r3, sp, #376
-; BE-I64-NEON-NEXT:    mov r6, r2
-; BE-I64-NEON-NEXT:    mov r4, r0
-; BE-I64-NEON-NEXT:    ldm r3, {r0, r1, r2, r3}
-; BE-I64-NEON-NEXT:    bl lrintl
-; BE-I64-NEON-NEXT:    ldr r7, [sp, #392]
-; BE-I64-NEON-NEXT:    add r3, sp, #396
-; BE-I64-NEON-NEXT:    mov r9, r1
-; BE-I64-NEON-NEXT:    vmov.32 d8[0], r0
-; BE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; BE-I64-NEON-NEXT:    mov r0, r7
-; BE-I64-NEON-NEXT:    ldr r11, [sp, #168]
-; BE-I64-NEON-NEXT:    bl lrintl
-; BE-I64-NEON-NEXT:    ldr r2, [sp, #160]
-; BE-I64-NEON-NEXT:    mov r10, r1
-; BE-I64-NEON-NEXT:    ldr r3, [sp, #164]
-; BE-I64-NEON-NEXT:    vmov.32 d9[0], r0
-; BE-I64-NEON-NEXT:    mov r0, r6
-; BE-I64-NEON-NEXT:    mov r1, r5
-; BE-I64-NEON-NEXT:    bl lrintl
-; BE-I64-NEON-NEXT:    add r3, sp, #172
-; BE-I64-NEON-NEXT:    mov r8, r1
-; BE-I64-NEON-NEXT:    vmov.32 d10[0], r0
-; BE-I64-NEON-NEXT:    mov r0, r11
-; BE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; BE-I64-NEON-NEXT:    bl lrintl
-; BE-I64-NEON-NEXT:    add r3, sp, #220
-; BE-I64-NEON-NEXT:    vmov.32 d11[0], r0
-; BE-I64-NEON-NEXT:    ldr r0, [sp, #216]
-; BE-I64-NEON-NEXT:    mov r11, r1
-; BE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; BE-I64-NEON-NEXT:    bl lrintl
-; BE-I64-NEON-NEXT:    add r3, sp, #236
-; BE-I64-NEON-NEXT:    vmov.32 d12[0], r0
-; BE-I64-NEON-NEXT:    ldr r0, [sp, #232]
-; BE-I64-NEON-NEXT:    mov r6, r1
-; BE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; BE-I64-NEON-NEXT:    bl lrintl
-; BE-I64-NEON-NEXT:    add r3, sp, #252
-; BE-I64-NEON-NEXT:    vmov.32 d13[0], r0
-; BE-I64-NEON-NEXT:    ldr r0, [sp, #248]
-; BE-I64-NEON-NEXT:    mov r7, r1
-; BE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; BE-I64-NEON-NEXT:    bl lrintl
-; BE-I64-NEON-NEXT:    add r3, sp, #268
-; BE-I64-NEON-NEXT:    vmov.32 d14[0], r0
-; BE-I64-NEON-NEXT:    ldr r0, [sp, #264]
-; BE-I64-NEON-NEXT:    mov r5, r1
-; BE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; BE-I64-NEON-NEXT:    bl lrintl
-; BE-I64-NEON-NEXT:    vmov.32 d15[0], r0
-; BE-I64-NEON-NEXT:    ldr r0, [sp, #280]
-; BE-I64-NEON-NEXT:    ldr r2, [sp, #288]
-; BE-I64-NEON-NEXT:    vmov.32 d13[1], r7
-; BE-I64-NEON-NEXT:    ldr r7, [sp, #284]
-; BE-I64-NEON-NEXT:    ldr r3, [sp, #292]
-; BE-I64-NEON-NEXT:    vmov.32 d14[1], r5
-; BE-I64-NEON-NEXT:    ldr r5, [sp, #328]
-; BE-I64-NEON-NEXT:    vmov.32 d12[1], r6
-; BE-I64-NEON-NEXT:    ldr r6, [sp, #300]
-; BE-I64-NEON-NEXT:    vmov.32 d10[1], r8
-; BE-I64-NEON-NEXT:    ldr r8, [sp, #184]
-; BE-I64-NEON-NEXT:    vmov.32 d11[1], r11
-; BE-I64-NEON-NEXT:    vmov.32 d9[1], r10
-; BE-I64-NEON-NEXT:    vmov.32 d8[1], r9
-; BE-I64-NEON-NEXT:    vmov.32 d15[1], r1
-; BE-I64-NEON-NEXT:    mov r1, r7
-; BE-I64-NEON-NEXT:    vstr d14, [sp, #48] @ 8-byte Spill
-; BE-I64-NEON-NEXT:    vstr d13, [sp, #40] @ 8-byte Spill
-; BE-I64-NEON-NEXT:    vstr d12, [sp, #32] @ 8-byte Spill
-; BE-I64-NEON-NEXT:    vstr d11, [sp, #24] @ 8-byte Spill
-; BE-I64-NEON-NEXT:    vstr d10, [sp, #16] @ 8-byte Spill
-; BE-I64-NEON-NEXT:    vstr d9, [sp, #8] @ 8-byte Spill
-; BE-I64-NEON-NEXT:    vstr d8, [sp] @ 8-byte Spill
-; BE-I64-NEON-NEXT:    bl lrintl
-; BE-I64-NEON-NEXT:    mov r10, r1
-; BE-I64-NEON-NEXT:    ldr r1, [sp, #296]
-; BE-I64-NEON-NEXT:    ldr r2, [sp, #304]
-; BE-I64-NEON-NEXT:    vmov.32 d8[0], r0
-; BE-I64-NEON-NEXT:    ldr r3, [sp, #308]
-; BE-I64-NEON-NEXT:    mov r0, r1
-; BE-I64-NEON-NEXT:    mov r1, r6
-; BE-I64-NEON-NEXT:    bl lrintl
-; BE-I64-NEON-NEXT:    add r3, sp, #332
-; BE-I64-NEON-NEXT:    mov r11, r1
-; BE-I64-NEON-NEXT:    vmov.32 d9[0], r0
-; BE-I64-NEON-NEXT:    mov r0, r5
-; BE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; BE-I64-NEON-NEXT:    bl lrintl
-; BE-I64-NEON-NEXT:    add r3, sp, #188
-; BE-I64-NEON-NEXT:    mov r7, r1
-; BE-I64-NEON-NEXT:    vmov.32 d10[0], r0
-; BE-I64-NEON-NEXT:    mov r0, r8
-; BE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; BE-I64-NEON-NEXT:    bl lrintl
-; BE-I64-NEON-NEXT:    add r3, sp, #204
-; BE-I64-NEON-NEXT:    vmov.32 d11[0], r0
-; BE-I64-NEON-NEXT:    ldr r0, [sp, #200]
-; BE-I64-NEON-NEXT:    mov r8, r1
-; BE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; BE-I64-NEON-NEXT:    bl lrintl
-; BE-I64-NEON-NEXT:    add r3, sp, #348
-; BE-I64-NEON-NEXT:    vmov.32 d12[0], r0
-; BE-I64-NEON-NEXT:    ldr r0, [sp, #344]
-; BE-I64-NEON-NEXT:    mov r5, r1
-; BE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; BE-I64-NEON-NEXT:    bl lrintl
-; BE-I64-NEON-NEXT:    add r3, sp, #364
-; BE-I64-NEON-NEXT:    vmov.32 d13[0], r0
-; BE-I64-NEON-NEXT:    ldr r0, [sp, #360]
-; BE-I64-NEON-NEXT:    mov r9, r1
-; BE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; BE-I64-NEON-NEXT:    bl lrintl
-; BE-I64-NEON-NEXT:    add r3, sp, #316
-; BE-I64-NEON-NEXT:    vmov.32 d14[0], r0
-; BE-I64-NEON-NEXT:    ldr r0, [sp, #312]
-; BE-I64-NEON-NEXT:    mov r6, r1
-; BE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; BE-I64-NEON-NEXT:    bl lrintl
-; BE-I64-NEON-NEXT:    vldr d18, [sp, #48] @ 8-byte Reload
-; BE-I64-NEON-NEXT:    vrev64.32 d17, d15
-; BE-I64-NEON-NEXT:    vrev64.32 d16, d18
-; BE-I64-NEON-NEXT:    vldr d18, [sp, #40] @ 8-byte Reload
-; BE-I64-NEON-NEXT:    vmov.32 d24[0], r0
-; BE-I64-NEON-NEXT:    add r0, r4, #64
-; BE-I64-NEON-NEXT:    vldr d20, [sp, #32] @ 8-byte Reload
-; BE-I64-NEON-NEXT:    vrev64.32 d19, d18
-; BE-I64-NEON-NEXT:    vmov.32 d9[1], r11
-; BE-I64-NEON-NEXT:    vmov.32 d10[1], r7
-; BE-I64-NEON-NEXT:    vrev64.32 d18, d20
-; BE-I64-NEON-NEXT:    vldr d20, [sp, #24] @ 8-byte Reload
-; BE-I64-NEON-NEXT:    vmov.32 d8[1], r10
-; BE-I64-NEON-NEXT:    vmov.32 d14[1], r6
-; BE-I64-NEON-NEXT:    vmov.32 d24[1], r1
-; BE-I64-NEON-NEXT:    vldr d22, [sp, #16] @ 8-byte Reload
-; BE-I64-NEON-NEXT:    vrev64.32 d21, d20
-; BE-I64-NEON-NEXT:    vrev64.32 d1, d9
-; BE-I64-NEON-NEXT:    vmov.32 d13[1], r9
-; BE-I64-NEON-NEXT:    vrev64.32 d31, d10
-; BE-I64-NEON-NEXT:    vrev64.32 d20, d22
-; BE-I64-NEON-NEXT:    vldr d22, [sp, #8] @ 8-byte Reload
-; BE-I64-NEON-NEXT:    vrev64.32 d0, d8
-; BE-I64-NEON-NEXT:    vrev64.32 d29, d14
-; BE-I64-NEON-NEXT:    vmov.32 d12[1], r5
-; BE-I64-NEON-NEXT:    vrev64.32 d30, d24
-; BE-I64-NEON-NEXT:    vrev64.32 d27, d22
-; BE-I64-NEON-NEXT:    vldr d22, [sp] @ 8-byte Reload
-; BE-I64-NEON-NEXT:    vst1.64 {d0, d1}, [r0:128]!
-; BE-I64-NEON-NEXT:    vmov.32 d11[1], r8
-; BE-I64-NEON-NEXT:    vrev64.32 d28, d13
-; BE-I64-NEON-NEXT:    vst1.64 {d30, d31}, [r0:128]!
-; BE-I64-NEON-NEXT:    vrev64.32 d26, d22
-; BE-I64-NEON-NEXT:    vrev64.32 d23, d12
-; BE-I64-NEON-NEXT:    vst1.64 {d28, d29}, [r0:128]!
-; BE-I64-NEON-NEXT:    vrev64.32 d22, d11
-; BE-I64-NEON-NEXT:    vst1.64 {d26, d27}, [r0:128]
-; BE-I64-NEON-NEXT:    vst1.64 {d20, d21}, [r4:128]!
-; BE-I64-NEON-NEXT:    vst1.64 {d22, d23}, [r4:128]!
-; BE-I64-NEON-NEXT:    vst1.64 {d18, d19}, [r4:128]!
-; BE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r4:128]
-; BE-I64-NEON-NEXT:    add sp, sp, #56
-; BE-I64-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
-; BE-I64-NEON-NEXT:    add sp, sp, #4
-; BE-I64-NEON-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
-  %a = call <16 x iXLen> @llvm.lrint.v16iXLen.v16fp128(<16 x fp128> %x)
-  ret <16 x iXLen> %a
+  %a = call <8 x iXLen> @llvm.lrint.v8iXLen.v8fp128(<8 x fp128> %x)
+  ret <8 x iXLen> %a
 }
-declare <16 x iXLen> @llvm.lrint.v16iXLen.v16fp128(<16 x fp128>)
+declare <8 x iXLen> @llvm.lrint.v8iXLen.v8fp128(<8 x fp128>)
 
-define <32 x iXLen> @lrint_v32fp128(<32 x fp128> %x) {
-; LE-I32-LABEL: lrint_v32fp128:
+define <16 x iXLen> @lrint_v16fp128(<16 x fp128> %x) {
+; LE-I32-LABEL: lrint_v16fp128:
 ; LE-I32:       @ %bb.0:
 ; LE-I32-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
 ; LE-I32-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
@@ -10752,258 +2675,126 @@ define <32 x iXLen> @lrint_v32fp128(<32 x fp128> %x) {
 ; LE-I32-NEXT:    sub sp, sp, #4
 ; LE-I32-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
 ; LE-I32-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-I32-NEXT:    .pad #80
-; LE-I32-NEXT:    sub sp, sp, #80
-; LE-I32-NEXT:    str r3, [sp, #16] @ 4-byte Spill
-; LE-I32-NEXT:    add r3, sp, #336
-; LE-I32-NEXT:    str r2, [sp, #12] @ 4-byte Spill
-; LE-I32-NEXT:    mov r9, r0
-; LE-I32-NEXT:    ldm r3, {r0, r1, r2, r3}
-; LE-I32-NEXT:    bl lrintl
-; LE-I32-NEXT:    add r3, sp, #244
-; LE-I32-NEXT:    ldr r7, [sp, #240]
-; LE-I32-NEXT:    vmov.32 d9[0], r0
-; LE-I32-NEXT:    ldr r5, [sp, #288]
-; LE-I32-NEXT:    ldm r3, {r1, r2, r3}
-; LE-I32-NEXT:    mov r0, r7
-; LE-I32-NEXT:    ldr r8, [sp, #352]
-; LE-I32-NEXT:    ldr r11, [sp, #656]
-; LE-I32-NEXT:    bl lrintl
-; LE-I32-NEXT:    add r3, sp, #292
-; LE-I32-NEXT:    vmov.32 d10[0], r0
-; LE-I32-NEXT:    mov r0, r5
-; LE-I32-NEXT:    ldm r3, {r1, r2, r3}
-; LE-I32-NEXT:    bl lrintl
-; LE-I32-NEXT:    add r3, sp, #272
-; LE-I32-NEXT:    mov r10, r0
-; LE-I32-NEXT:    ldm r3, {r0, r1, r2, r3}
-; LE-I32-NEXT:    bl lrintl
-; LE-I32-NEXT:    ldr r6, [sp, #256]
-; LE-I32-NEXT:    vmov.32 d11[0], r0
-; LE-I32-NEXT:    ldr r1, [sp, #260]
-; LE-I32-NEXT:    ldr r2, [sp, #264]
-; LE-I32-NEXT:    ldr r3, [sp, #268]
-; LE-I32-NEXT:    mov r0, r6
-; LE-I32-NEXT:    ldr r7, [sp, #660]
-; LE-I32-NEXT:    vmov.32 d11[1], r10
-; LE-I32-NEXT:    ldr r5, [sp, #664]
-; LE-I32-NEXT:    bl lrintl
-; LE-I32-NEXT:    vmov.32 d10[1], r0
-; LE-I32-NEXT:    ldr r1, [sp, #356]
-; LE-I32-NEXT:    ldr r2, [sp, #360]
-; LE-I32-NEXT:    add lr, sp, #32
-; LE-I32-NEXT:    ldr r3, [sp, #364]
-; LE-I32-NEXT:    mov r0, r8
-; LE-I32-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
-; LE-I32-NEXT:    bl lrintl
-; LE-I32-NEXT:    ldr r3, [sp, #668]
-; LE-I32-NEXT:    add lr, sp, #48
-; LE-I32-NEXT:    vmov.32 d9[1], r0
-; LE-I32-NEXT:    mov r0, r11
-; LE-I32-NEXT:    mov r1, r7
-; LE-I32-NEXT:    mov r2, r5
-; LE-I32-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
-; LE-I32-NEXT:    bl lrintl
-; LE-I32-NEXT:    add r3, sp, #400
-; LE-I32-NEXT:    mov r8, r0
-; LE-I32-NEXT:    ldm r3, {r0, r1, r2, r3}
-; LE-I32-NEXT:    bl lrintl
-; LE-I32-NEXT:    add r3, sp, #592
+; LE-I32-NEXT:    mov r8, r3
+; LE-I32-NEXT:    add r3, sp, #280
+; LE-I32-NEXT:    mov r9, r2
+; LE-I32-NEXT:    mov r10, r1
 ; LE-I32-NEXT:    mov r6, r0
 ; LE-I32-NEXT:    ldm r3, {r0, r1, r2, r3}
 ; LE-I32-NEXT:    bl lrintl
-; LE-I32-NEXT:    ldr r4, [sp, #416]
-; LE-I32-NEXT:    vmov.32 d15[0], r0
-; LE-I32-NEXT:    ldr r1, [sp, #420]
-; LE-I32-NEXT:    ldr r2, [sp, #424]
-; LE-I32-NEXT:    vmov.32 d13[0], r6
-; LE-I32-NEXT:    ldr r3, [sp, #428]
+; LE-I32-NEXT:    ldr r4, [sp, #216]
+; LE-I32-NEXT:    vmov.32 d8[0], r0
+; LE-I32-NEXT:    ldr r1, [sp, #220]
+; LE-I32-NEXT:    ldr r2, [sp, #224]
+; LE-I32-NEXT:    ldr r3, [sp, #228]
 ; LE-I32-NEXT:    mov r0, r4
-; LE-I32-NEXT:    ldr r7, [sp, #224]
-; LE-I32-NEXT:    ldr r10, [sp, #228]
-; LE-I32-NEXT:    ldr r5, [sp, #232]
-; LE-I32-NEXT:    ldr r11, [sp, #464]
-; LE-I32-NEXT:    bl lrintl
-; LE-I32-NEXT:    ldr r3, [sp, #236]
-; LE-I32-NEXT:    vmov.32 d13[1], r0
-; LE-I32-NEXT:    mov r0, r7
-; LE-I32-NEXT:    mov r1, r10
-; LE-I32-NEXT:    mov r2, r5
-; LE-I32-NEXT:    bl lrintl
-; LE-I32-NEXT:    add r3, sp, #208
-; LE-I32-NEXT:    mov r4, r0
-; LE-I32-NEXT:    ldm r3, {r0, r1, r2, r3}
-; LE-I32-NEXT:    bl lrintl
-; LE-I32-NEXT:    vmov.32 d9[0], r0
-; LE-I32-NEXT:    ldr r0, [sp, #672]
-; LE-I32-NEXT:    ldr r1, [sp, #676]
-; LE-I32-NEXT:    ldr r2, [sp, #680]
-; LE-I32-NEXT:    vmov.32 d11[0], r8
-; LE-I32-NEXT:    ldr r3, [sp, #684]
-; LE-I32-NEXT:    vmov.32 d9[1], r4
-; LE-I32-NEXT:    ldr r7, [sp, #612]
-; LE-I32-NEXT:    ldr r6, [sp, #616]
-; LE-I32-NEXT:    ldr r5, [sp, #468]
-; LE-I32-NEXT:    ldr r4, [sp, #472]
-; LE-I32-NEXT:    bl lrintl
-; LE-I32-NEXT:    vmov.32 d11[1], r0
-; LE-I32-NEXT:    ldr r3, [sp, #620]
-; LE-I32-NEXT:    ldr r0, [sp, #608]
-; LE-I32-NEXT:    mov r1, r7
-; LE-I32-NEXT:    mov r2, r6
-; LE-I32-NEXT:    bl lrintl
-; LE-I32-NEXT:    ldr r3, [sp, #476]
-; LE-I32-NEXT:    vmov.32 d15[1], r0
-; LE-I32-NEXT:    mov r0, r11
-; LE-I32-NEXT:    mov r1, r5
-; LE-I32-NEXT:    mov r2, r4
-; LE-I32-NEXT:    bl lrintl
-; LE-I32-NEXT:    add r3, sp, #560
-; LE-I32-NEXT:    str r0, [sp, #8] @ 4-byte Spill
-; LE-I32-NEXT:    ldm r3, {r0, r1, r2, r3}
-; LE-I32-NEXT:    bl lrintl
-; LE-I32-NEXT:    add r3, sp, #644
-; LE-I32-NEXT:    ldr r7, [sp, #640]
-; LE-I32-NEXT:    vmov.32 d14[0], r0
-; LE-I32-NEXT:    add lr, sp, #64
-; LE-I32-NEXT:    ldm r3, {r1, r2, r3}
-; LE-I32-NEXT:    mov r0, r7
-; LE-I32-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
-; LE-I32-NEXT:    bl lrintl
-; LE-I32-NEXT:    add r3, sp, #624
-; LE-I32-NEXT:    mov r11, r0
-; LE-I32-NEXT:    ldm r3, {r0, r1, r2, r3}
+; LE-I32-NEXT:    ldr r7, [sp, #152]
+; LE-I32-NEXT:    ldr r11, [sp, #104]
 ; LE-I32-NEXT:    bl lrintl
-; LE-I32-NEXT:    add r3, sp, #196
-; LE-I32-NEXT:    ldr r7, [sp, #192]
+; LE-I32-NEXT:    add r3, sp, #156
 ; LE-I32-NEXT:    vmov.32 d10[0], r0
-; LE-I32-NEXT:    ldm r3, {r1, r2, r3}
 ; LE-I32-NEXT:    mov r0, r7
-; LE-I32-NEXT:    bl lrintl
-; LE-I32-NEXT:    mov r6, r0
-; LE-I32-NEXT:    ldr r2, [sp, #184]
-; LE-I32-NEXT:    ldr r3, [sp, #188]
-; LE-I32-NEXT:    ldr r0, [sp, #12] @ 4-byte Reload
-; LE-I32-NEXT:    ldr r1, [sp, #16] @ 4-byte Reload
-; LE-I32-NEXT:    bl lrintl
-; LE-I32-NEXT:    add r3, sp, #324
-; LE-I32-NEXT:    ldr r7, [sp, #320]
-; LE-I32-NEXT:    vmov.32 d8[0], r0
 ; LE-I32-NEXT:    ldm r3, {r1, r2, r3}
-; LE-I32-NEXT:    mov r0, r7
-; LE-I32-NEXT:    bl lrintl
-; LE-I32-NEXT:    add r3, sp, #304
-; LE-I32-NEXT:    mov r7, r0
-; LE-I32-NEXT:    ldm r3, {r0, r1, r2, r3}
-; LE-I32-NEXT:    bl lrintl
-; LE-I32-NEXT:    add lr, sp, #48
-; LE-I32-NEXT:    ldr r4, [sp, #368]
-; LE-I32-NEXT:    ldr r1, [sp, #372]
-; LE-I32-NEXT:    vldmia lr, {d14, d15} @ 16-byte Reload
-; LE-I32-NEXT:    ldr r2, [sp, #376]
-; LE-I32-NEXT:    ldr r3, [sp, #380]
-; LE-I32-NEXT:    vmov.32 d14[0], r0
-; LE-I32-NEXT:    mov r0, r4
 ; LE-I32-NEXT:    bl lrintl
-; LE-I32-NEXT:    ldr r5, [sp, #384]
+; LE-I32-NEXT:    ldr r7, [sp, #184]
 ; LE-I32-NEXT:    vmov.32 d12[0], r0
-; LE-I32-NEXT:    ldr r1, [sp, #388]
-; LE-I32-NEXT:    ldr r2, [sp, #392]
-; LE-I32-NEXT:    ldr r3, [sp, #396]
-; LE-I32-NEXT:    mov r0, r5
-; LE-I32-NEXT:    ldr r4, [sp, #432]
+; LE-I32-NEXT:    ldr r1, [sp, #188]
+; LE-I32-NEXT:    ldr r2, [sp, #192]
+; LE-I32-NEXT:    ldr r3, [sp, #196]
+; LE-I32-NEXT:    mov r0, r7
+; LE-I32-NEXT:    ldr r4, [sp, #120]
 ; LE-I32-NEXT:    bl lrintl
-; LE-I32-NEXT:    vmov.32 d12[1], r0
-; LE-I32-NEXT:    ldr r1, [sp, #436]
-; LE-I32-NEXT:    ldr r2, [sp, #440]
-; LE-I32-NEXT:    add lr, sp, #16
-; LE-I32-NEXT:    ldr r3, [sp, #444]
+; LE-I32-NEXT:    add r3, sp, #124
+; LE-I32-NEXT:    vmov.32 d13[0], r0
 ; LE-I32-NEXT:    mov r0, r4
-; LE-I32-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
+; LE-I32-NEXT:    ldm r3, {r1, r2, r3}
 ; LE-I32-NEXT:    bl lrintl
-; LE-I32-NEXT:    vmov.32 d12[0], r0
-; LE-I32-NEXT:    ldr r0, [sp, #576]
-; LE-I32-NEXT:    ldr r1, [sp, #580]
-; LE-I32-NEXT:    add lr, sp, #48
-; LE-I32-NEXT:    vmov.32 d14[1], r7
-; LE-I32-NEXT:    ldr r2, [sp, #584]
-; LE-I32-NEXT:    ldr r3, [sp, #588]
-; LE-I32-NEXT:    vmov.32 d10[1], r11
-; LE-I32-NEXT:    ldr r8, [sp, #448]
-; LE-I32-NEXT:    ldr r4, [sp, #544]
-; LE-I32-NEXT:    ldr r10, [sp, #548]
-; LE-I32-NEXT:    vmov.32 d8[1], r6
-; LE-I32-NEXT:    ldr r7, [sp, #552]
-; LE-I32-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
-; LE-I32-NEXT:    ldr r11, [sp, #512]
+; LE-I32-NEXT:    ldr r5, [sp, #136]
+; LE-I32-NEXT:    vmov.32 d15[0], r0
+; LE-I32-NEXT:    ldr r1, [sp, #140]
+; LE-I32-NEXT:    ldr r2, [sp, #144]
+; LE-I32-NEXT:    ldr r3, [sp, #148]
+; LE-I32-NEXT:    mov r0, r5
+; LE-I32-NEXT:    ldr r4, [sp, #108]
+; LE-I32-NEXT:    ldr r7, [sp, #112]
 ; LE-I32-NEXT:    bl lrintl
-; LE-I32-NEXT:    add lr, sp, #64
-; LE-I32-NEXT:    ldr r3, [sp, #556]
-; LE-I32-NEXT:    mov r1, r10
-; LE-I32-NEXT:    mov r2, r7
-; LE-I32-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-I32-NEXT:    vmov.32 d16[1], r0
-; LE-I32-NEXT:    mov r0, r4
-; LE-I32-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I32-NEXT:    ldr r3, [sp, #116]
+; LE-I32-NEXT:    vmov.32 d15[1], r0
+; LE-I32-NEXT:    mov r0, r11
+; LE-I32-NEXT:    mov r1, r4
+; LE-I32-NEXT:    mov r2, r7
 ; LE-I32-NEXT:    bl lrintl
-; LE-I32-NEXT:    add r3, sp, #528
 ; LE-I32-NEXT:    mov r4, r0
-; LE-I32-NEXT:    ldm r3, {r0, r1, r2, r3}
+; LE-I32-NEXT:    mov r0, r6
+; LE-I32-NEXT:    mov r1, r10
+; LE-I32-NEXT:    mov r2, r9
+; LE-I32-NEXT:    mov r3, r8
 ; LE-I32-NEXT:    bl lrintl
-; LE-I32-NEXT:    ldr r1, [sp, #8] @ 4-byte Reload
-; LE-I32-NEXT:    vmov.32 d15[0], r0
-; LE-I32-NEXT:    ldr r0, [sp, #480]
-; LE-I32-NEXT:    ldr r2, [sp, #488]
-; LE-I32-NEXT:    vmov.32 d13[0], r1
-; LE-I32-NEXT:    ldr r1, [sp, #484]
-; LE-I32-NEXT:    ldr r3, [sp, #492]
-; LE-I32-NEXT:    vmov.32 d15[1], r4
-; LE-I32-NEXT:    ldr r7, [sp, #452]
-; LE-I32-NEXT:    ldr r5, [sp, #456]
-; LE-I32-NEXT:    ldr r6, [sp, #516]
-; LE-I32-NEXT:    ldr r4, [sp, #520]
+; LE-I32-NEXT:    ldr r7, [sp, #200]
+; LE-I32-NEXT:    vmov.32 d14[0], r0
+; LE-I32-NEXT:    ldr r1, [sp, #204]
+; LE-I32-NEXT:    ldr r2, [sp, #208]
+; LE-I32-NEXT:    ldr r3, [sp, #212]
+; LE-I32-NEXT:    mov r0, r7
+; LE-I32-NEXT:    ldr r5, [sp, #172]
+; LE-I32-NEXT:    vmov.32 d14[1], r4
+; LE-I32-NEXT:    ldr r6, [sp, #176]
 ; LE-I32-NEXT:    bl lrintl
-; LE-I32-NEXT:    ldr r3, [sp, #460]
 ; LE-I32-NEXT:    vmov.32 d13[1], r0
-; LE-I32-NEXT:    mov r0, r8
-; LE-I32-NEXT:    mov r1, r7
-; LE-I32-NEXT:    mov r2, r5
+; LE-I32-NEXT:    ldr r3, [sp, #180]
+; LE-I32-NEXT:    ldr r0, [sp, #168]
+; LE-I32-NEXT:    mov r1, r5
+; LE-I32-NEXT:    mov r2, r6
 ; LE-I32-NEXT:    bl lrintl
-; LE-I32-NEXT:    ldr r3, [sp, #524]
-; LE-I32-NEXT:    vmov.32 d12[1], r0
-; LE-I32-NEXT:    mov r0, r11
+; LE-I32-NEXT:    add r3, sp, #248
+; LE-I32-NEXT:    mov r5, r0
+; LE-I32-NEXT:    ldm r3, {r0, r1, r2, r3}
+; LE-I32-NEXT:    bl lrintl
+; LE-I32-NEXT:    ldr r4, [sp, #264]
+; LE-I32-NEXT:    vmov.32 d11[0], r0
+; LE-I32-NEXT:    ldr r1, [sp, #268]
+; LE-I32-NEXT:    ldr r2, [sp, #272]
+; LE-I32-NEXT:    vmov.32 d12[1], r5
+; LE-I32-NEXT:    ldr r3, [sp, #276]
+; LE-I32-NEXT:    mov r0, r4
+; LE-I32-NEXT:    ldr r6, [sp, #236]
+; LE-I32-NEXT:    ldr r7, [sp, #240]
+; LE-I32-NEXT:    ldr r8, [sp, #332]
+; LE-I32-NEXT:    ldr r5, [sp, #336]
+; LE-I32-NEXT:    bl lrintl
+; LE-I32-NEXT:    vmov.32 d11[1], r0
+; LE-I32-NEXT:    ldr r3, [sp, #244]
+; LE-I32-NEXT:    ldr r0, [sp, #232]
 ; LE-I32-NEXT:    mov r1, r6
-; LE-I32-NEXT:    mov r2, r4
+; LE-I32-NEXT:    mov r2, r7
+; LE-I32-NEXT:    bl lrintl
+; LE-I32-NEXT:    vmov.32 d10[1], r0
+; LE-I32-NEXT:    ldr r3, [sp, #340]
+; LE-I32-NEXT:    ldr r0, [sp, #328]
+; LE-I32-NEXT:    mov r1, r8
+; LE-I32-NEXT:    mov r2, r5
 ; LE-I32-NEXT:    bl lrintl
-; LE-I32-NEXT:    add r3, sp, #496
+; LE-I32-NEXT:    add r3, sp, #312
 ; LE-I32-NEXT:    mov r4, r0
 ; LE-I32-NEXT:    ldm r3, {r0, r1, r2, r3}
 ; LE-I32-NEXT:    bl lrintl
-; LE-I32-NEXT:    vmov.32 d14[0], r0
-; LE-I32-NEXT:    add r0, r9, #64
-; LE-I32-NEXT:    add lr, sp, #64
-; LE-I32-NEXT:    vst1.32 {d12, d13}, [r0:128]!
-; LE-I32-NEXT:    vmov.32 d14[1], r4
-; LE-I32-NEXT:    vst1.32 {d14, d15}, [r0:128]!
-; LE-I32-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-I32-NEXT:    add lr, sp, #32
-; LE-I32-NEXT:    vst1.32 {d16, d17}, [r0:128]!
-; LE-I32-NEXT:    vst1.64 {d10, d11}, [r0:128]
-; LE-I32-NEXT:    vst1.32 {d8, d9}, [r9:128]!
-; LE-I32-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-I32-NEXT:    add lr, sp, #48
-; LE-I32-NEXT:    vst1.32 {d16, d17}, [r9:128]!
-; LE-I32-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-I32-NEXT:    add lr, sp, #16
-; LE-I32-NEXT:    vst1.32 {d16, d17}, [r9:128]!
-; LE-I32-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-I32-NEXT:    vst1.64 {d16, d17}, [r9:128]
-; LE-I32-NEXT:    add sp, sp, #80
+; LE-I32-NEXT:    add r3, sp, #300
+; LE-I32-NEXT:    ldr r7, [sp, #296]
+; LE-I32-NEXT:    vmov.32 d9[0], r0
+; LE-I32-NEXT:    ldm r3, {r1, r2, r3}
+; LE-I32-NEXT:    mov r0, r7
+; LE-I32-NEXT:    vmov.32 d9[1], r4
+; LE-I32-NEXT:    bl lrintl
+; LE-I32-NEXT:    vmov.32 d8[1], r0
+; LE-I32-NEXT:    vorr q0, q7, q7
+; LE-I32-NEXT:    vorr q1, q6, q6
+; LE-I32-NEXT:    vorr q2, q5, q5
+; LE-I32-NEXT:    vorr q3, q4, q4
 ; LE-I32-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
 ; LE-I32-NEXT:    add sp, sp, #4
 ; LE-I32-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
 ;
-; LE-I64-LABEL: lrint_v32fp128:
+; LE-I64-LABEL: lrint_v16fp128:
 ; LE-I64:       @ %bb.0:
 ; LE-I64-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
 ; LE-I64-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
@@ -11011,988 +2802,162 @@ define <32 x iXLen> @lrint_v32fp128(<32 x fp128> %x) {
 ; LE-I64-NEXT:    sub sp, sp, #4
 ; LE-I64-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
 ; LE-I64-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-I64-NEXT:    .pad #192
-; LE-I64-NEXT:    sub sp, sp, #192
-; LE-I64-NEXT:    str r3, [sp, #60] @ 4-byte Spill
-; LE-I64-NEXT:    add r3, sp, #688
-; LE-I64-NEXT:    str r2, [sp, #56] @ 4-byte Spill
-; LE-I64-NEXT:    mov r9, r0
-; LE-I64-NEXT:    ldm r3, {r0, r1, r2, r3}
-; LE-I64-NEXT:    bl lrintl
-; LE-I64-NEXT:    add r3, sp, #560
+; LE-I64-NEXT:    .pad #72
+; LE-I64-NEXT:    sub sp, sp, #72
+; LE-I64-NEXT:    mov r6, r3
+; LE-I64-NEXT:    add r3, sp, #408
+; LE-I64-NEXT:    mov r7, r2
 ; LE-I64-NEXT:    mov r4, r0
-; LE-I64-NEXT:    str r1, [sp, #64] @ 4-byte Spill
 ; LE-I64-NEXT:    ldm r3, {r0, r1, r2, r3}
 ; LE-I64-NEXT:    bl lrintl
-; LE-I64-NEXT:    vmov.32 d17[0], r0
-; LE-I64-NEXT:    ldr r7, [sp, #544]
-; LE-I64-NEXT:    ldr r6, [sp, #548]
-; LE-I64-NEXT:    add lr, sp, #96
-; LE-I64-NEXT:    ldr r2, [sp, #552]
-; LE-I64-NEXT:    vmov.32 d17[1], r1
-; LE-I64-NEXT:    ldr r3, [sp, #556]
+; LE-I64-NEXT:    add r5, sp, #176
+; LE-I64-NEXT:    mov r10, r1
+; LE-I64-NEXT:    vmov.32 d13[0], r0
 ; LE-I64-NEXT:    mov r0, r7
+; LE-I64-NEXT:    ldm r5, {r2, r3, r5}
 ; LE-I64-NEXT:    mov r1, r6
-; LE-I64-NEXT:    vorr q4, q8, q8
-; LE-I64-NEXT:    ldr r5, [sp, #528]
-; LE-I64-NEXT:    vmov.32 d17[0], r4
-; LE-I64-NEXT:    ldr r10, [sp, #304]
-; LE-I64-NEXT:    ldr r8, [sp, #368]
-; LE-I64-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEXT:    ldr r8, [sp, #232]
 ; LE-I64-NEXT:    bl lrintl
-; LE-I64-NEXT:    add r3, sp, #532
+; LE-I64-NEXT:    add r3, sp, #188
+; LE-I64-NEXT:    mov r9, r1
 ; LE-I64-NEXT:    vmov.32 d8[0], r0
-; LE-I64-NEXT:    mov r11, r1
-; LE-I64-NEXT:    add lr, sp, #144
-; LE-I64-NEXT:    ldm r3, {r1, r2, r3}
 ; LE-I64-NEXT:    mov r0, r5
-; LE-I64-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
-; LE-I64-NEXT:    bl lrintl
-; LE-I64-NEXT:    add r3, sp, #308
-; LE-I64-NEXT:    mov r5, r1
-; LE-I64-NEXT:    add lr, sp, #176
-; LE-I64-NEXT:    vmov.32 d17[0], r0
-; LE-I64-NEXT:    ldm r3, {r1, r2, r3}
-; LE-I64-NEXT:    mov r0, r10
-; LE-I64-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-I64-NEXT:    bl lrintl
-; LE-I64-NEXT:    add r3, sp, #372
-; LE-I64-NEXT:    mov r10, r1
-; LE-I64-NEXT:    vmov.32 d11[0], r0
-; LE-I64-NEXT:    mov r0, r8
-; LE-I64-NEXT:    ldm r3, {r1, r2, r3}
-; LE-I64-NEXT:    bl lrintl
-; LE-I64-NEXT:    add r3, sp, #404
-; LE-I64-NEXT:    vmov.32 d15[0], r0
-; LE-I64-NEXT:    ldr r0, [sp, #400]
-; LE-I64-NEXT:    mov r6, r1
 ; LE-I64-NEXT:    ldm r3, {r1, r2, r3}
 ; LE-I64-NEXT:    bl lrintl
-; LE-I64-NEXT:    add r3, sp, #596
+; LE-I64-NEXT:    add r3, sp, #236
+; LE-I64-NEXT:    mov r11, r1
 ; LE-I64-NEXT:    vmov.32 d9[0], r0
-; LE-I64-NEXT:    ldr r0, [sp, #592]
-; LE-I64-NEXT:    mov r7, r1
-; LE-I64-NEXT:    ldm r3, {r1, r2, r3}
-; LE-I64-NEXT:    bl lrintl
-; LE-I64-NEXT:    add r3, sp, #676
-; LE-I64-NEXT:    vmov.32 d13[0], r0
-; LE-I64-NEXT:    ldr r0, [sp, #672]
-; LE-I64-NEXT:    mov r4, r1
+; LE-I64-NEXT:    mov r0, r8
 ; LE-I64-NEXT:    ldm r3, {r1, r2, r3}
 ; LE-I64-NEXT:    bl lrintl
-; LE-I64-NEXT:    add lr, sp, #96
-; LE-I64-NEXT:    vmov.32 d13[1], r4
-; LE-I64-NEXT:    str r1, [sp, #52] @ 4-byte Spill
-; LE-I64-NEXT:    vldmia lr, {d18, d19} @ 16-byte Reload
-; LE-I64-NEXT:    add lr, sp, #80
-; LE-I64-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
-; LE-I64-NEXT:    add lr, sp, #128
-; LE-I64-NEXT:    vmov.32 d9[1], r7
-; LE-I64-NEXT:    ldr r1, [sp, #628]
-; LE-I64-NEXT:    ldr r2, [sp, #632]
-; LE-I64-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
-; LE-I64-NEXT:    add lr, sp, #112
-; LE-I64-NEXT:    vmov.32 d15[1], r6
-; LE-I64-NEXT:    ldr r3, [sp, #636]
-; LE-I64-NEXT:    ldr r7, [sp, #64] @ 4-byte Reload
-; LE-I64-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
-; LE-I64-NEXT:    add lr, sp, #160
-; LE-I64-NEXT:    vmov.32 d11[1], r10
-; LE-I64-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
-; LE-I64-NEXT:    add lr, sp, #176
-; LE-I64-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
-; LE-I64-NEXT:    add lr, sp, #144
-; LE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-I64-NEXT:    vmov.32 d18[0], r0
-; LE-I64-NEXT:    ldr r0, [sp, #624]
-; LE-I64-NEXT:    vmov.32 d16[1], r11
-; LE-I64-NEXT:    vmov.32 d9[1], r5
-; LE-I64-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-I64-NEXT:    add lr, sp, #96
-; LE-I64-NEXT:    vmov.32 d19[1], r7
-; LE-I64-NEXT:    vstmia lr, {d18, d19} @ 16-byte Spill
-; LE-I64-NEXT:    bl lrintl
-; LE-I64-NEXT:    add r3, sp, #512
-; LE-I64-NEXT:    str r0, [sp, #48] @ 4-byte Spill
-; LE-I64-NEXT:    str r1, [sp, #64] @ 4-byte Spill
-; LE-I64-NEXT:    ldm r3, {r0, r1, r2, r3}
-; LE-I64-NEXT:    bl lrintl
-; LE-I64-NEXT:    add r3, sp, #768
-; LE-I64-NEXT:    mov r11, r0
-; LE-I64-NEXT:    str r1, [sp, #28] @ 4-byte Spill
-; LE-I64-NEXT:    ldm r3, {r0, r1, r2, r3}
-; LE-I64-NEXT:    bl lrintl
-; LE-I64-NEXT:    ldr r6, [sp, #784]
-; LE-I64-NEXT:    add r3, sp, #788
+; LE-I64-NEXT:    add r3, sp, #252
+; LE-I64-NEXT:    vmov.32 d10[0], r0
+; LE-I64-NEXT:    ldr r0, [sp, #248]
 ; LE-I64-NEXT:    mov r8, r1
-; LE-I64-NEXT:    vmov.32 d14[0], r0
 ; LE-I64-NEXT:    ldm r3, {r1, r2, r3}
-; LE-I64-NEXT:    mov r0, r6
-; LE-I64-NEXT:    ldr r5, [sp, #736]
-; LE-I64-NEXT:    ldr r7, [sp, #752]
-; LE-I64-NEXT:    ldr r4, [sp, #720]
 ; LE-I64-NEXT:    bl lrintl
-; LE-I64-NEXT:    add r3, sp, #740
+; LE-I64-NEXT:    add r3, sp, #268
+; LE-I64-NEXT:    vmov.32 d11[0], r0
+; LE-I64-NEXT:    ldr r0, [sp, #264]
 ; LE-I64-NEXT:    mov r6, r1
-; LE-I64-NEXT:    vmov.32 d15[0], r0
-; LE-I64-NEXT:    mov r0, r5
-; LE-I64-NEXT:    ldm r3, {r1, r2, r3}
-; LE-I64-NEXT:    bl lrintl
-; LE-I64-NEXT:    add r3, sp, #756
-; LE-I64-NEXT:    mov r5, r1
-; LE-I64-NEXT:    vmov.32 d12[0], r0
-; LE-I64-NEXT:    mov r0, r7
 ; LE-I64-NEXT:    ldm r3, {r1, r2, r3}
 ; LE-I64-NEXT:    bl lrintl
-; LE-I64-NEXT:    add r3, sp, #724
+; LE-I64-NEXT:    add r3, sp, #284
+; LE-I64-NEXT:    vmov.32 d14[0], r0
+; LE-I64-NEXT:    ldr r0, [sp, #280]
 ; LE-I64-NEXT:    mov r7, r1
-; LE-I64-NEXT:    vmov.32 d13[0], r0
-; LE-I64-NEXT:    mov r0, r4
 ; LE-I64-NEXT:    ldm r3, {r1, r2, r3}
 ; LE-I64-NEXT:    bl lrintl
-; LE-I64-NEXT:    vmov.32 d13[1], r7
-; LE-I64-NEXT:    add lr, sp, #8
-; LE-I64-NEXT:    ldr r2, [sp, #296]
-; LE-I64-NEXT:    vmov.32 d12[1], r5
-; LE-I64-NEXT:    ldr r3, [sp, #300]
-; LE-I64-NEXT:    ldr r4, [sp, #576]
-; LE-I64-NEXT:    vmov.32 d11[0], r0
-; LE-I64-NEXT:    ldr r0, [sp, #56] @ 4-byte Reload
-; LE-I64-NEXT:    ldr r10, [sp, #384]
-; LE-I64-NEXT:    vmov.32 d15[1], r6
-; LE-I64-NEXT:    ldr r6, [sp, #352]
-; LE-I64-NEXT:    vmov.32 d14[1], r8
-; LE-I64-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
-; LE-I64-NEXT:    add lr, sp, #32
-; LE-I64-NEXT:    vmov.32 d11[1], r1
-; LE-I64-NEXT:    ldr r1, [sp, #60] @ 4-byte Reload
-; LE-I64-NEXT:    vmov.32 d8[0], r11
-; LE-I64-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
-; LE-I64-NEXT:    add lr, sp, #176
-; LE-I64-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
-; LE-I64-NEXT:    bl lrintl
-; LE-I64-NEXT:    add lr, sp, #160
-; LE-I64-NEXT:    add r3, sp, #356
+; LE-I64-NEXT:    add r3, sp, #316
+; LE-I64-NEXT:    vmov.32 d15[0], r0
+; LE-I64-NEXT:    ldr r0, [sp, #312]
 ; LE-I64-NEXT:    mov r5, r1
-; LE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-I64-NEXT:    ldm r3, {r1, r2, r3}
-; LE-I64-NEXT:    vmov.32 d16[0], r0
-; LE-I64-NEXT:    mov r0, r6
-; LE-I64-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-I64-NEXT:    bl lrintl
-; LE-I64-NEXT:    add lr, sp, #112
-; LE-I64-NEXT:    add r3, sp, #388
-; LE-I64-NEXT:    mov r7, r1
-; LE-I64-NEXT:    vldmia lr, {d14, d15} @ 16-byte Reload
-; LE-I64-NEXT:    ldm r3, {r1, r2, r3}
-; LE-I64-NEXT:    vmov.32 d14[0], r0
-; LE-I64-NEXT:    mov r0, r10
-; LE-I64-NEXT:    bl lrintl
-; LE-I64-NEXT:    add lr, sp, #128
-; LE-I64-NEXT:    add r3, sp, #580
-; LE-I64-NEXT:    mov r6, r1
-; LE-I64-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
-; LE-I64-NEXT:    ldm r3, {r1, r2, r3}
-; LE-I64-NEXT:    vmov.32 d12[0], r0
-; LE-I64-NEXT:    mov r0, r4
-; LE-I64-NEXT:    bl lrintl
-; LE-I64-NEXT:    add lr, sp, #80
-; LE-I64-NEXT:    add r3, sp, #708
-; LE-I64-NEXT:    mov r4, r1
-; LE-I64-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
 ; LE-I64-NEXT:    ldm r3, {r1, r2, r3}
-; LE-I64-NEXT:    vmov.32 d8[0], r0
-; LE-I64-NEXT:    ldr r0, [sp, #704]
 ; LE-I64-NEXT:    bl lrintl
-; LE-I64-NEXT:    vmov.32 d8[1], r4
-; LE-I64-NEXT:    add lr, sp, #80
-; LE-I64-NEXT:    ldr r2, [sp, #52] @ 4-byte Reload
-; LE-I64-NEXT:    vmov.32 d12[1], r6
-; LE-I64-NEXT:    ldr r6, [sp, #644]
-; LE-I64-NEXT:    ldr r3, [sp, #652]
-; LE-I64-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
-; LE-I64-NEXT:    add lr, sp, #128
+; LE-I64-NEXT:    vmov.32 d15[1], r5
+; LE-I64-NEXT:    add lr, sp, #56
+; LE-I64-NEXT:    ldr r5, [sp, #300]
 ; LE-I64-NEXT:    vmov.32 d14[1], r7
-; LE-I64-NEXT:    ldr r4, [sp, #480]
-; LE-I64-NEXT:    ldr r7, [sp, #656]
-; LE-I64-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
-; LE-I64-NEXT:    add lr, sp, #112
+; LE-I64-NEXT:    ldr r2, [sp, #304]
+; LE-I64-NEXT:    ldr r3, [sp, #308]
+; LE-I64-NEXT:    vmov.32 d11[1], r6
+; LE-I64-NEXT:    ldr r6, [sp, #200]
+; LE-I64-NEXT:    ldr r7, [sp, #204]
+; LE-I64-NEXT:    vmov.32 d10[1], r8
+; LE-I64-NEXT:    ldr r8, [sp, #344]
+; LE-I64-NEXT:    vmov.32 d9[1], r11
+; LE-I64-NEXT:    ldr r11, [sp, #216]
 ; LE-I64-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
-; LE-I64-NEXT:    add lr, sp, #160
-; LE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEXT:    add lr, sp, #40
+; LE-I64-NEXT:    vmov.32 d17[0], r0
+; LE-I64-NEXT:    ldr r0, [sp, #296]
+; LE-I64-NEXT:    vmov.32 d8[1], r9
+; LE-I64-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-I64-NEXT:    add lr, sp, #24
+; LE-I64-NEXT:    vorr q5, q8, q8
+; LE-I64-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-I64-NEXT:    vorr q4, q6, q6
+; LE-I64-NEXT:    vmov.32 d11[1], r1
+; LE-I64-NEXT:    mov r1, r5
+; LE-I64-NEXT:    vmov.32 d9[1], r10
+; LE-I64-NEXT:    bl lrintl
 ; LE-I64-NEXT:    vmov.32 d10[0], r0
-; LE-I64-NEXT:    ldr r0, [sp, #28] @ 4-byte Reload
-; LE-I64-NEXT:    ldr r10, [sp, #496]
-; LE-I64-NEXT:    vmov.32 d16[1], r5
-; LE-I64-NEXT:    add r5, r9, #192
-; LE-I64-NEXT:    ldr r8, [sp, #608]
-; LE-I64-NEXT:    vmov.32 d10[1], r1
-; LE-I64-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-I64-NEXT:    add lr, sp, #176
-; LE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-I64-NEXT:    vmov.32 d16[1], r0
-; LE-I64-NEXT:    ldr r0, [sp, #640]
-; LE-I64-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-I64-NEXT:    add lr, sp, #96
-; LE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEXT:    ldr r2, [sp, #208]
+; LE-I64-NEXT:    ldr r3, [sp, #212]
 ; LE-I64-NEXT:    add lr, sp, #8
-; LE-I64-NEXT:    vmov.32 d16[1], r2
-; LE-I64-NEXT:    ldr r2, [sp, #648]
-; LE-I64-NEXT:    vst1.64 {d16, d17}, [r5:128]!
-; LE-I64-NEXT:    vst1.64 {d10, d11}, [r5:128]!
-; LE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-I64-NEXT:    vst1.64 {d16, d17}, [r5:128]!
-; LE-I64-NEXT:    ldr r1, [sp, #48] @ 4-byte Reload
-; LE-I64-NEXT:    vmov.32 d9[0], r1
-; LE-I64-NEXT:    mov r1, r6
-; LE-I64-NEXT:    bl lrintl
-; LE-I64-NEXT:    add r3, sp, #660
-; LE-I64-NEXT:    mov r11, r1
-; LE-I64-NEXT:    vmov.32 d12[0], r0
-; LE-I64-NEXT:    mov r0, r7
-; LE-I64-NEXT:    ldm r3, {r1, r2, r3}
-; LE-I64-NEXT:    bl lrintl
-; LE-I64-NEXT:    add r3, sp, #484
-; LE-I64-NEXT:    mov r7, r1
-; LE-I64-NEXT:    vmov.32 d13[0], r0
-; LE-I64-NEXT:    mov r0, r4
-; LE-I64-NEXT:    ldm r3, {r1, r2, r3}
+; LE-I64-NEXT:    mov r9, r1
+; LE-I64-NEXT:    mov r0, r6
+; LE-I64-NEXT:    mov r1, r7
+; LE-I64-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
 ; LE-I64-NEXT:    bl lrintl
-; LE-I64-NEXT:    add r3, sp, #500
-; LE-I64-NEXT:    mov r6, r1
+; LE-I64-NEXT:    add r3, sp, #220
+; LE-I64-NEXT:    mov r10, r1
 ; LE-I64-NEXT:    vmov.32 d10[0], r0
-; LE-I64-NEXT:    mov r0, r10
+; LE-I64-NEXT:    mov r0, r11
 ; LE-I64-NEXT:    ldm r3, {r1, r2, r3}
 ; LE-I64-NEXT:    bl lrintl
-; LE-I64-NEXT:    add r3, sp, #612
-; LE-I64-NEXT:    mov r4, r1
+; LE-I64-NEXT:    add r3, sp, #348
+; LE-I64-NEXT:    mov r11, r1
 ; LE-I64-NEXT:    vmov.32 d11[0], r0
 ; LE-I64-NEXT:    mov r0, r8
 ; LE-I64-NEXT:    ldm r3, {r1, r2, r3}
 ; LE-I64-NEXT:    bl lrintl
-; LE-I64-NEXT:    vmov.32 d8[0], r0
-; LE-I64-NEXT:    ldr r0, [sp, #64] @ 4-byte Reload
-; LE-I64-NEXT:    add lr, sp, #96
-; LE-I64-NEXT:    add r8, r9, #128
-; LE-I64-NEXT:    vmov.32 d13[1], r7
-; LE-I64-NEXT:    ldr r2, [sp, #344]
-; LE-I64-NEXT:    ldr r3, [sp, #348]
-; LE-I64-NEXT:    vmov.32 d12[1], r11
-; LE-I64-NEXT:    ldr r7, [sp, #452]
-; LE-I64-NEXT:    ldr r10, [sp, #416]
-; LE-I64-NEXT:    vmov.32 d9[1], r0
-; LE-I64-NEXT:    ldr r0, [sp, #336]
-; LE-I64-NEXT:    vmov.32 d8[1], r1
-; LE-I64-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
-; LE-I64-NEXT:    add lr, sp, #64
-; LE-I64-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
-; LE-I64-NEXT:    add lr, sp, #32
-; LE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-I64-NEXT:    add lr, sp, #144
-; LE-I64-NEXT:    vmov.32 d11[1], r4
-; LE-I64-NEXT:    ldr r4, [sp, #340]
-; LE-I64-NEXT:    vst1.64 {d16, d17}, [r5:128]
-; LE-I64-NEXT:    mov r1, r4
-; LE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-I64-NEXT:    add lr, sp, #80
-; LE-I64-NEXT:    vmov.32 d10[1], r6
-; LE-I64-NEXT:    ldr r6, [sp, #448]
-; LE-I64-NEXT:    vst1.64 {d16, d17}, [r8:128]!
-; LE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-I64-NEXT:    vst1.64 {d16, d17}, [r8:128]!
-; LE-I64-NEXT:    bl lrintl
-; LE-I64-NEXT:    ldr r2, [sp, #456]
-; LE-I64-NEXT:    mov r11, r1
-; LE-I64-NEXT:    ldr r3, [sp, #460]
-; LE-I64-NEXT:    vmov.32 d15[0], r0
-; LE-I64-NEXT:    mov r0, r6
-; LE-I64-NEXT:    mov r1, r7
-; LE-I64-NEXT:    ldr r5, [sp, #432]
-; LE-I64-NEXT:    bl lrintl
-; LE-I64-NEXT:    add r3, sp, #468
-; LE-I64-NEXT:    vmov.32 d12[0], r0
-; LE-I64-NEXT:    ldr r0, [sp, #464]
-; LE-I64-NEXT:    mov r6, r1
+; LE-I64-NEXT:    add r3, sp, #364
+; LE-I64-NEXT:    vmov.32 d13[0], r0
+; LE-I64-NEXT:    ldr r0, [sp, #360]
+; LE-I64-NEXT:    mov r8, r1
 ; LE-I64-NEXT:    ldm r3, {r1, r2, r3}
 ; LE-I64-NEXT:    bl lrintl
-; LE-I64-NEXT:    add r3, sp, #420
-; LE-I64-NEXT:    mov r7, r1
-; LE-I64-NEXT:    vmov.32 d13[0], r0
-; LE-I64-NEXT:    mov r0, r10
+; LE-I64-NEXT:    add r3, sp, #380
+; LE-I64-NEXT:    vmov.32 d14[0], r0
+; LE-I64-NEXT:    ldr r0, [sp, #376]
+; LE-I64-NEXT:    mov r5, r1
 ; LE-I64-NEXT:    ldm r3, {r1, r2, r3}
 ; LE-I64-NEXT:    bl lrintl
-; LE-I64-NEXT:    add r3, sp, #436
-; LE-I64-NEXT:    mov r4, r1
-; LE-I64-NEXT:    vmov.32 d8[0], r0
-; LE-I64-NEXT:    mov r0, r5
+; LE-I64-NEXT:    add r3, sp, #396
+; LE-I64-NEXT:    vmov.32 d15[0], r0
+; LE-I64-NEXT:    ldr r0, [sp, #392]
+; LE-I64-NEXT:    mov r6, r1
 ; LE-I64-NEXT:    ldm r3, {r1, r2, r3}
 ; LE-I64-NEXT:    bl lrintl
-; LE-I64-NEXT:    add r3, sp, #324
-; LE-I64-NEXT:    vmov.32 d9[0], r0
-; LE-I64-NEXT:    ldr r0, [sp, #320]
-; LE-I64-NEXT:    mov r5, r1
+; LE-I64-NEXT:    add r3, sp, #332
+; LE-I64-NEXT:    vmov.32 d8[0], r0
+; LE-I64-NEXT:    ldr r0, [sp, #328]
+; LE-I64-NEXT:    mov r7, r1
 ; LE-I64-NEXT:    ldm r3, {r1, r2, r3}
 ; LE-I64-NEXT:    bl lrintl
-; LE-I64-NEXT:    add lr, sp, #64
-; LE-I64-NEXT:    vmov.32 d9[1], r5
-; LE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-I64-NEXT:    add lr, sp, #96
-; LE-I64-NEXT:    vmov.32 d13[1], r7
-; LE-I64-NEXT:    vst1.64 {d16, d17}, [r8:128]!
-; LE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-I64-NEXT:    add lr, sp, #176
-; LE-I64-NEXT:    vmov.32 d8[1], r4
-; LE-I64-NEXT:    vmov.32 d12[1], r6
-; LE-I64-NEXT:    vmov.32 d14[0], r0
-; LE-I64-NEXT:    add r0, r9, #64
-; LE-I64-NEXT:    vst1.64 {d16, d17}, [r8:128]
-; LE-I64-NEXT:    vst1.64 {d8, d9}, [r0:128]!
+; LE-I64-NEXT:    add lr, sp, #8
+; LE-I64-NEXT:    vmov.32 d12[0], r0
+; LE-I64-NEXT:    add r0, r4, #64
+; LE-I64-NEXT:    vldmia lr, {d18, d19} @ 16-byte Reload
+; LE-I64-NEXT:    add lr, sp, #24
+; LE-I64-NEXT:    vmov.32 d13[1], r8
+; LE-I64-NEXT:    vmov.32 d18[1], r9
+; LE-I64-NEXT:    vmov.32 d15[1], r6
+; LE-I64-NEXT:    vmov.32 d12[1], r1
+; LE-I64-NEXT:    vmov.32 d14[1], r5
+; LE-I64-NEXT:    vst1.64 {d18, d19}, [r0:128]!
 ; LE-I64-NEXT:    vst1.64 {d12, d13}, [r0:128]!
-; LE-I64-NEXT:    vst1.64 {d10, d11}, [r0:128]!
-; LE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-I64-NEXT:    add lr, sp, #160
-; LE-I64-NEXT:    vmov.32 d15[1], r11
-; LE-I64-NEXT:    vst1.64 {d16, d17}, [r0:128]
+; LE-I64-NEXT:    vmov.32 d8[1], r7
+; LE-I64-NEXT:    vst1.64 {d14, d15}, [r0:128]!
+; LE-I64-NEXT:    vst1.64 {d8, d9}, [r0:128]
+; LE-I64-NEXT:    vmov.32 d11[1], r11
 ; LE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-I64-NEXT:    add lr, sp, #112
-; LE-I64-NEXT:    vmov.32 d14[1], r1
-; LE-I64-NEXT:    vst1.64 {d16, d17}, [r9:128]!
-; LE-I64-NEXT:    vst1.64 {d14, d15}, [r9:128]!
+; LE-I64-NEXT:    add lr, sp, #40
+; LE-I64-NEXT:    vmov.32 d10[1], r10
+; LE-I64-NEXT:    vst1.64 {d16, d17}, [r4:128]!
+; LE-I64-NEXT:    vst1.64 {d10, d11}, [r4:128]!
 ; LE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-I64-NEXT:    add lr, sp, #128
-; LE-I64-NEXT:    vst1.64 {d16, d17}, [r9:128]!
+; LE-I64-NEXT:    add lr, sp, #56
+; LE-I64-NEXT:    vst1.64 {d16, d17}, [r4:128]!
 ; LE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-I64-NEXT:    vst1.64 {d16, d17}, [r9:128]
-; LE-I64-NEXT:    add sp, sp, #192
+; LE-I64-NEXT:    vst1.64 {d16, d17}, [r4:128]
+; LE-I64-NEXT:    add sp, sp, #72
 ; LE-I64-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
 ; LE-I64-NEXT:    add sp, sp, #4
 ; LE-I64-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
 ;
-; LE-I32-NEON-LABEL: lrint_v32fp128:
-; LE-I32-NEON:       @ %bb.0:
-; LE-I32-NEON-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; LE-I32-NEON-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; LE-I32-NEON-NEXT:    .pad #4
-; LE-I32-NEON-NEXT:    sub sp, sp, #4
-; LE-I32-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-I32-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-I32-NEON-NEXT:    .pad #80
-; LE-I32-NEON-NEXT:    sub sp, sp, #80
-; LE-I32-NEON-NEXT:    str r3, [sp, #16] @ 4-byte Spill
-; LE-I32-NEON-NEXT:    add r3, sp, #336
-; LE-I32-NEON-NEXT:    str r2, [sp, #12] @ 4-byte Spill
-; LE-I32-NEON-NEXT:    mov r9, r0
-; LE-I32-NEON-NEXT:    ldm r3, {r0, r1, r2, r3}
-; LE-I32-NEON-NEXT:    bl lrintl
-; LE-I32-NEON-NEXT:    add r3, sp, #244
-; LE-I32-NEON-NEXT:    ldr r7, [sp, #240]
-; LE-I32-NEON-NEXT:    vmov.32 d9[0], r0
-; LE-I32-NEON-NEXT:    ldr r5, [sp, #288]
-; LE-I32-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; LE-I32-NEON-NEXT:    mov r0, r7
-; LE-I32-NEON-NEXT:    ldr r8, [sp, #352]
-; LE-I32-NEON-NEXT:    ldr r11, [sp, #656]
-; LE-I32-NEON-NEXT:    bl lrintl
-; LE-I32-NEON-NEXT:    add r3, sp, #292
-; LE-I32-NEON-NEXT:    vmov.32 d10[0], r0
-; LE-I32-NEON-NEXT:    mov r0, r5
-; LE-I32-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; LE-I32-NEON-NEXT:    bl lrintl
-; LE-I32-NEON-NEXT:    add r3, sp, #272
-; LE-I32-NEON-NEXT:    mov r10, r0
-; LE-I32-NEON-NEXT:    ldm r3, {r0, r1, r2, r3}
-; LE-I32-NEON-NEXT:    bl lrintl
-; LE-I32-NEON-NEXT:    ldr r6, [sp, #256]
-; LE-I32-NEON-NEXT:    vmov.32 d11[0], r0
-; LE-I32-NEON-NEXT:    ldr r1, [sp, #260]
-; LE-I32-NEON-NEXT:    ldr r2, [sp, #264]
-; LE-I32-NEON-NEXT:    ldr r3, [sp, #268]
-; LE-I32-NEON-NEXT:    mov r0, r6
-; LE-I32-NEON-NEXT:    ldr r7, [sp, #660]
-; LE-I32-NEON-NEXT:    vmov.32 d11[1], r10
-; LE-I32-NEON-NEXT:    ldr r5, [sp, #664]
-; LE-I32-NEON-NEXT:    bl lrintl
-; LE-I32-NEON-NEXT:    vmov.32 d10[1], r0
-; LE-I32-NEON-NEXT:    ldr r1, [sp, #356]
-; LE-I32-NEON-NEXT:    ldr r2, [sp, #360]
-; LE-I32-NEON-NEXT:    add lr, sp, #32
-; LE-I32-NEON-NEXT:    ldr r3, [sp, #364]
-; LE-I32-NEON-NEXT:    mov r0, r8
-; LE-I32-NEON-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
-; LE-I32-NEON-NEXT:    bl lrintl
-; LE-I32-NEON-NEXT:    ldr r3, [sp, #668]
-; LE-I32-NEON-NEXT:    add lr, sp, #48
-; LE-I32-NEON-NEXT:    vmov.32 d9[1], r0
-; LE-I32-NEON-NEXT:    mov r0, r11
-; LE-I32-NEON-NEXT:    mov r1, r7
-; LE-I32-NEON-NEXT:    mov r2, r5
-; LE-I32-NEON-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
-; LE-I32-NEON-NEXT:    bl lrintl
-; LE-I32-NEON-NEXT:    add r3, sp, #400
-; LE-I32-NEON-NEXT:    mov r8, r0
-; LE-I32-NEON-NEXT:    ldm r3, {r0, r1, r2, r3}
-; LE-I32-NEON-NEXT:    bl lrintl
-; LE-I32-NEON-NEXT:    add r3, sp, #592
-; LE-I32-NEON-NEXT:    mov r6, r0
-; LE-I32-NEON-NEXT:    ldm r3, {r0, r1, r2, r3}
-; LE-I32-NEON-NEXT:    bl lrintl
-; LE-I32-NEON-NEXT:    ldr r4, [sp, #416]
-; LE-I32-NEON-NEXT:    vmov.32 d15[0], r0
-; LE-I32-NEON-NEXT:    ldr r1, [sp, #420]
-; LE-I32-NEON-NEXT:    ldr r2, [sp, #424]
-; LE-I32-NEON-NEXT:    vmov.32 d13[0], r6
-; LE-I32-NEON-NEXT:    ldr r3, [sp, #428]
-; LE-I32-NEON-NEXT:    mov r0, r4
-; LE-I32-NEON-NEXT:    ldr r7, [sp, #224]
-; LE-I32-NEON-NEXT:    ldr r10, [sp, #228]
-; LE-I32-NEON-NEXT:    ldr r5, [sp, #232]
-; LE-I32-NEON-NEXT:    ldr r11, [sp, #464]
-; LE-I32-NEON-NEXT:    bl lrintl
-; LE-I32-NEON-NEXT:    ldr r3, [sp, #236]
-; LE-I32-NEON-NEXT:    vmov.32 d13[1], r0
-; LE-I32-NEON-NEXT:    mov r0, r7
-; LE-I32-NEON-NEXT:    mov r1, r10
-; LE-I32-NEON-NEXT:    mov r2, r5
-; LE-I32-NEON-NEXT:    bl lrintl
-; LE-I32-NEON-NEXT:    add r3, sp, #208
-; LE-I32-NEON-NEXT:    mov r4, r0
-; LE-I32-NEON-NEXT:    ldm r3, {r0, r1, r2, r3}
-; LE-I32-NEON-NEXT:    bl lrintl
-; LE-I32-NEON-NEXT:    vmov.32 d9[0], r0
-; LE-I32-NEON-NEXT:    ldr r0, [sp, #672]
-; LE-I32-NEON-NEXT:    ldr r1, [sp, #676]
-; LE-I32-NEON-NEXT:    ldr r2, [sp, #680]
-; LE-I32-NEON-NEXT:    vmov.32 d11[0], r8
-; LE-I32-NEON-NEXT:    ldr r3, [sp, #684]
-; LE-I32-NEON-NEXT:    vmov.32 d9[1], r4
-; LE-I32-NEON-NEXT:    ldr r7, [sp, #612]
-; LE-I32-NEON-NEXT:    ldr r6, [sp, #616]
-; LE-I32-NEON-NEXT:    ldr r5, [sp, #468]
-; LE-I32-NEON-NEXT:    ldr r4, [sp, #472]
-; LE-I32-NEON-NEXT:    bl lrintl
-; LE-I32-NEON-NEXT:    vmov.32 d11[1], r0
-; LE-I32-NEON-NEXT:    ldr r3, [sp, #620]
-; LE-I32-NEON-NEXT:    ldr r0, [sp, #608]
-; LE-I32-NEON-NEXT:    mov r1, r7
-; LE-I32-NEON-NEXT:    mov r2, r6
-; LE-I32-NEON-NEXT:    bl lrintl
-; LE-I32-NEON-NEXT:    ldr r3, [sp, #476]
-; LE-I32-NEON-NEXT:    vmov.32 d15[1], r0
-; LE-I32-NEON-NEXT:    mov r0, r11
-; LE-I32-NEON-NEXT:    mov r1, r5
-; LE-I32-NEON-NEXT:    mov r2, r4
-; LE-I32-NEON-NEXT:    bl lrintl
-; LE-I32-NEON-NEXT:    add r3, sp, #560
-; LE-I32-NEON-NEXT:    str r0, [sp, #8] @ 4-byte Spill
-; LE-I32-NEON-NEXT:    ldm r3, {r0, r1, r2, r3}
-; LE-I32-NEON-NEXT:    bl lrintl
-; LE-I32-NEON-NEXT:    add r3, sp, #644
-; LE-I32-NEON-NEXT:    ldr r7, [sp, #640]
-; LE-I32-NEON-NEXT:    vmov.32 d14[0], r0
-; LE-I32-NEON-NEXT:    add lr, sp, #64
-; LE-I32-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; LE-I32-NEON-NEXT:    mov r0, r7
-; LE-I32-NEON-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
-; LE-I32-NEON-NEXT:    bl lrintl
-; LE-I32-NEON-NEXT:    add r3, sp, #624
-; LE-I32-NEON-NEXT:    mov r11, r0
-; LE-I32-NEON-NEXT:    ldm r3, {r0, r1, r2, r3}
-; LE-I32-NEON-NEXT:    bl lrintl
-; LE-I32-NEON-NEXT:    add r3, sp, #196
-; LE-I32-NEON-NEXT:    ldr r7, [sp, #192]
-; LE-I32-NEON-NEXT:    vmov.32 d10[0], r0
-; LE-I32-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; LE-I32-NEON-NEXT:    mov r0, r7
-; LE-I32-NEON-NEXT:    bl lrintl
-; LE-I32-NEON-NEXT:    mov r6, r0
-; LE-I32-NEON-NEXT:    ldr r2, [sp, #184]
-; LE-I32-NEON-NEXT:    ldr r3, [sp, #188]
-; LE-I32-NEON-NEXT:    ldr r0, [sp, #12] @ 4-byte Reload
-; LE-I32-NEON-NEXT:    ldr r1, [sp, #16] @ 4-byte Reload
-; LE-I32-NEON-NEXT:    bl lrintl
-; LE-I32-NEON-NEXT:    add r3, sp, #324
-; LE-I32-NEON-NEXT:    ldr r7, [sp, #320]
-; LE-I32-NEON-NEXT:    vmov.32 d8[0], r0
-; LE-I32-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; LE-I32-NEON-NEXT:    mov r0, r7
-; LE-I32-NEON-NEXT:    bl lrintl
-; LE-I32-NEON-NEXT:    add r3, sp, #304
-; LE-I32-NEON-NEXT:    mov r7, r0
-; LE-I32-NEON-NEXT:    ldm r3, {r0, r1, r2, r3}
-; LE-I32-NEON-NEXT:    bl lrintl
-; LE-I32-NEON-NEXT:    add lr, sp, #48
-; LE-I32-NEON-NEXT:    ldr r4, [sp, #368]
-; LE-I32-NEON-NEXT:    ldr r1, [sp, #372]
-; LE-I32-NEON-NEXT:    vldmia lr, {d14, d15} @ 16-byte Reload
-; LE-I32-NEON-NEXT:    ldr r2, [sp, #376]
-; LE-I32-NEON-NEXT:    ldr r3, [sp, #380]
-; LE-I32-NEON-NEXT:    vmov.32 d14[0], r0
-; LE-I32-NEON-NEXT:    mov r0, r4
-; LE-I32-NEON-NEXT:    bl lrintl
-; LE-I32-NEON-NEXT:    ldr r5, [sp, #384]
-; LE-I32-NEON-NEXT:    vmov.32 d12[0], r0
-; LE-I32-NEON-NEXT:    ldr r1, [sp, #388]
-; LE-I32-NEON-NEXT:    ldr r2, [sp, #392]
-; LE-I32-NEON-NEXT:    ldr r3, [sp, #396]
-; LE-I32-NEON-NEXT:    mov r0, r5
-; LE-I32-NEON-NEXT:    ldr r4, [sp, #432]
-; LE-I32-NEON-NEXT:    bl lrintl
-; LE-I32-NEON-NEXT:    vmov.32 d12[1], r0
-; LE-I32-NEON-NEXT:    ldr r1, [sp, #436]
-; LE-I32-NEON-NEXT:    ldr r2, [sp, #440]
-; LE-I32-NEON-NEXT:    add lr, sp, #16
-; LE-I32-NEON-NEXT:    ldr r3, [sp, #444]
-; LE-I32-NEON-NEXT:    mov r0, r4
-; LE-I32-NEON-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
-; LE-I32-NEON-NEXT:    bl lrintl
-; LE-I32-NEON-NEXT:    vmov.32 d12[0], r0
-; LE-I32-NEON-NEXT:    ldr r0, [sp, #576]
-; LE-I32-NEON-NEXT:    ldr r1, [sp, #580]
-; LE-I32-NEON-NEXT:    add lr, sp, #48
-; LE-I32-NEON-NEXT:    vmov.32 d14[1], r7
-; LE-I32-NEON-NEXT:    ldr r2, [sp, #584]
-; LE-I32-NEON-NEXT:    ldr r3, [sp, #588]
-; LE-I32-NEON-NEXT:    vmov.32 d10[1], r11
-; LE-I32-NEON-NEXT:    ldr r8, [sp, #448]
-; LE-I32-NEON-NEXT:    ldr r4, [sp, #544]
-; LE-I32-NEON-NEXT:    ldr r10, [sp, #548]
-; LE-I32-NEON-NEXT:    vmov.32 d8[1], r6
-; LE-I32-NEON-NEXT:    ldr r7, [sp, #552]
-; LE-I32-NEON-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
-; LE-I32-NEON-NEXT:    ldr r11, [sp, #512]
-; LE-I32-NEON-NEXT:    bl lrintl
-; LE-I32-NEON-NEXT:    add lr, sp, #64
-; LE-I32-NEON-NEXT:    ldr r3, [sp, #556]
-; LE-I32-NEON-NEXT:    mov r1, r10
-; LE-I32-NEON-NEXT:    mov r2, r7
-; LE-I32-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-I32-NEON-NEXT:    vmov.32 d16[1], r0
-; LE-I32-NEON-NEXT:    mov r0, r4
-; LE-I32-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-I32-NEON-NEXT:    bl lrintl
-; LE-I32-NEON-NEXT:    add r3, sp, #528
-; LE-I32-NEON-NEXT:    mov r4, r0
-; LE-I32-NEON-NEXT:    ldm r3, {r0, r1, r2, r3}
-; LE-I32-NEON-NEXT:    bl lrintl
-; LE-I32-NEON-NEXT:    ldr r1, [sp, #8] @ 4-byte Reload
-; LE-I32-NEON-NEXT:    vmov.32 d15[0], r0
-; LE-I32-NEON-NEXT:    ldr r0, [sp, #480]
-; LE-I32-NEON-NEXT:    ldr r2, [sp, #488]
-; LE-I32-NEON-NEXT:    vmov.32 d13[0], r1
-; LE-I32-NEON-NEXT:    ldr r1, [sp, #484]
-; LE-I32-NEON-NEXT:    ldr r3, [sp, #492]
-; LE-I32-NEON-NEXT:    vmov.32 d15[1], r4
-; LE-I32-NEON-NEXT:    ldr r7, [sp, #452]
-; LE-I32-NEON-NEXT:    ldr r5, [sp, #456]
-; LE-I32-NEON-NEXT:    ldr r6, [sp, #516]
-; LE-I32-NEON-NEXT:    ldr r4, [sp, #520]
-; LE-I32-NEON-NEXT:    bl lrintl
-; LE-I32-NEON-NEXT:    ldr r3, [sp, #460]
-; LE-I32-NEON-NEXT:    vmov.32 d13[1], r0
-; LE-I32-NEON-NEXT:    mov r0, r8
-; LE-I32-NEON-NEXT:    mov r1, r7
-; LE-I32-NEON-NEXT:    mov r2, r5
-; LE-I32-NEON-NEXT:    bl lrintl
-; LE-I32-NEON-NEXT:    ldr r3, [sp, #524]
-; LE-I32-NEON-NEXT:    vmov.32 d12[1], r0
-; LE-I32-NEON-NEXT:    mov r0, r11
-; LE-I32-NEON-NEXT:    mov r1, r6
-; LE-I32-NEON-NEXT:    mov r2, r4
-; LE-I32-NEON-NEXT:    bl lrintl
-; LE-I32-NEON-NEXT:    add r3, sp, #496
-; LE-I32-NEON-NEXT:    mov r4, r0
-; LE-I32-NEON-NEXT:    ldm r3, {r0, r1, r2, r3}
-; LE-I32-NEON-NEXT:    bl lrintl
-; LE-I32-NEON-NEXT:    vmov.32 d14[0], r0
-; LE-I32-NEON-NEXT:    add r0, r9, #64
-; LE-I32-NEON-NEXT:    add lr, sp, #64
-; LE-I32-NEON-NEXT:    vst1.32 {d12, d13}, [r0:128]!
-; LE-I32-NEON-NEXT:    vmov.32 d14[1], r4
-; LE-I32-NEON-NEXT:    vst1.32 {d14, d15}, [r0:128]!
-; LE-I32-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-I32-NEON-NEXT:    add lr, sp, #32
-; LE-I32-NEON-NEXT:    vst1.32 {d16, d17}, [r0:128]!
-; LE-I32-NEON-NEXT:    vst1.64 {d10, d11}, [r0:128]
-; LE-I32-NEON-NEXT:    vst1.32 {d8, d9}, [r9:128]!
-; LE-I32-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-I32-NEON-NEXT:    add lr, sp, #48
-; LE-I32-NEON-NEXT:    vst1.32 {d16, d17}, [r9:128]!
-; LE-I32-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-I32-NEON-NEXT:    add lr, sp, #16
-; LE-I32-NEON-NEXT:    vst1.32 {d16, d17}, [r9:128]!
-; LE-I32-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-I32-NEON-NEXT:    vst1.64 {d16, d17}, [r9:128]
-; LE-I32-NEON-NEXT:    add sp, sp, #80
-; LE-I32-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-I32-NEON-NEXT:    add sp, sp, #4
-; LE-I32-NEON-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
-;
-; LE-I64-NEON-LABEL: lrint_v32fp128:
-; LE-I64-NEON:       @ %bb.0:
-; LE-I64-NEON-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; LE-I64-NEON-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; LE-I64-NEON-NEXT:    .pad #4
-; LE-I64-NEON-NEXT:    sub sp, sp, #4
-; LE-I64-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-I64-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-I64-NEON-NEXT:    .pad #192
-; LE-I64-NEON-NEXT:    sub sp, sp, #192
-; LE-I64-NEON-NEXT:    str r3, [sp, #60] @ 4-byte Spill
-; LE-I64-NEON-NEXT:    add r3, sp, #688
-; LE-I64-NEON-NEXT:    str r2, [sp, #56] @ 4-byte Spill
-; LE-I64-NEON-NEXT:    mov r9, r0
-; LE-I64-NEON-NEXT:    ldm r3, {r0, r1, r2, r3}
-; LE-I64-NEON-NEXT:    bl lrintl
-; LE-I64-NEON-NEXT:    add r3, sp, #560
-; LE-I64-NEON-NEXT:    mov r4, r0
-; LE-I64-NEON-NEXT:    str r1, [sp, #64] @ 4-byte Spill
-; LE-I64-NEON-NEXT:    ldm r3, {r0, r1, r2, r3}
-; LE-I64-NEON-NEXT:    bl lrintl
-; LE-I64-NEON-NEXT:    vmov.32 d17[0], r0
-; LE-I64-NEON-NEXT:    ldr r7, [sp, #544]
-; LE-I64-NEON-NEXT:    ldr r6, [sp, #548]
-; LE-I64-NEON-NEXT:    add lr, sp, #96
-; LE-I64-NEON-NEXT:    ldr r2, [sp, #552]
-; LE-I64-NEON-NEXT:    vmov.32 d17[1], r1
-; LE-I64-NEON-NEXT:    ldr r3, [sp, #556]
-; LE-I64-NEON-NEXT:    mov r0, r7
-; LE-I64-NEON-NEXT:    mov r1, r6
-; LE-I64-NEON-NEXT:    vorr q4, q8, q8
-; LE-I64-NEON-NEXT:    ldr r5, [sp, #528]
-; LE-I64-NEON-NEXT:    vmov.32 d17[0], r4
-; LE-I64-NEON-NEXT:    ldr r10, [sp, #304]
-; LE-I64-NEON-NEXT:    ldr r8, [sp, #368]
-; LE-I64-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-I64-NEON-NEXT:    bl lrintl
-; LE-I64-NEON-NEXT:    add r3, sp, #532
-; LE-I64-NEON-NEXT:    vmov.32 d8[0], r0
-; LE-I64-NEON-NEXT:    mov r11, r1
-; LE-I64-NEON-NEXT:    add lr, sp, #144
-; LE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; LE-I64-NEON-NEXT:    mov r0, r5
-; LE-I64-NEON-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
-; LE-I64-NEON-NEXT:    bl lrintl
-; LE-I64-NEON-NEXT:    add r3, sp, #308
-; LE-I64-NEON-NEXT:    mov r5, r1
-; LE-I64-NEON-NEXT:    add lr, sp, #176
-; LE-I64-NEON-NEXT:    vmov.32 d17[0], r0
-; LE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; LE-I64-NEON-NEXT:    mov r0, r10
-; LE-I64-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-I64-NEON-NEXT:    bl lrintl
-; LE-I64-NEON-NEXT:    add r3, sp, #372
-; LE-I64-NEON-NEXT:    mov r10, r1
-; LE-I64-NEON-NEXT:    vmov.32 d11[0], r0
-; LE-I64-NEON-NEXT:    mov r0, r8
-; LE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; LE-I64-NEON-NEXT:    bl lrintl
-; LE-I64-NEON-NEXT:    add r3, sp, #404
-; LE-I64-NEON-NEXT:    vmov.32 d15[0], r0
-; LE-I64-NEON-NEXT:    ldr r0, [sp, #400]
-; LE-I64-NEON-NEXT:    mov r6, r1
-; LE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; LE-I64-NEON-NEXT:    bl lrintl
-; LE-I64-NEON-NEXT:    add r3, sp, #596
-; LE-I64-NEON-NEXT:    vmov.32 d9[0], r0
-; LE-I64-NEON-NEXT:    ldr r0, [sp, #592]
-; LE-I64-NEON-NEXT:    mov r7, r1
-; LE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; LE-I64-NEON-NEXT:    bl lrintl
-; LE-I64-NEON-NEXT:    add r3, sp, #676
-; LE-I64-NEON-NEXT:    vmov.32 d13[0], r0
-; LE-I64-NEON-NEXT:    ldr r0, [sp, #672]
-; LE-I64-NEON-NEXT:    mov r4, r1
-; LE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; LE-I64-NEON-NEXT:    bl lrintl
-; LE-I64-NEON-NEXT:    add lr, sp, #96
-; LE-I64-NEON-NEXT:    vmov.32 d13[1], r4
-; LE-I64-NEON-NEXT:    str r1, [sp, #52] @ 4-byte Spill
-; LE-I64-NEON-NEXT:    vldmia lr, {d18, d19} @ 16-byte Reload
-; LE-I64-NEON-NEXT:    add lr, sp, #80
-; LE-I64-NEON-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
-; LE-I64-NEON-NEXT:    add lr, sp, #128
-; LE-I64-NEON-NEXT:    vmov.32 d9[1], r7
-; LE-I64-NEON-NEXT:    ldr r1, [sp, #628]
-; LE-I64-NEON-NEXT:    ldr r2, [sp, #632]
-; LE-I64-NEON-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
-; LE-I64-NEON-NEXT:    add lr, sp, #112
-; LE-I64-NEON-NEXT:    vmov.32 d15[1], r6
-; LE-I64-NEON-NEXT:    ldr r3, [sp, #636]
-; LE-I64-NEON-NEXT:    ldr r7, [sp, #64] @ 4-byte Reload
-; LE-I64-NEON-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
-; LE-I64-NEON-NEXT:    add lr, sp, #160
-; LE-I64-NEON-NEXT:    vmov.32 d11[1], r10
-; LE-I64-NEON-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
-; LE-I64-NEON-NEXT:    add lr, sp, #176
-; LE-I64-NEON-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
-; LE-I64-NEON-NEXT:    add lr, sp, #144
-; LE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-I64-NEON-NEXT:    vmov.32 d18[0], r0
-; LE-I64-NEON-NEXT:    ldr r0, [sp, #624]
-; LE-I64-NEON-NEXT:    vmov.32 d16[1], r11
-; LE-I64-NEON-NEXT:    vmov.32 d9[1], r5
-; LE-I64-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-I64-NEON-NEXT:    add lr, sp, #96
-; LE-I64-NEON-NEXT:    vmov.32 d19[1], r7
-; LE-I64-NEON-NEXT:    vstmia lr, {d18, d19} @ 16-byte Spill
-; LE-I64-NEON-NEXT:    bl lrintl
-; LE-I64-NEON-NEXT:    add r3, sp, #512
-; LE-I64-NEON-NEXT:    str r0, [sp, #48] @ 4-byte Spill
-; LE-I64-NEON-NEXT:    str r1, [sp, #64] @ 4-byte Spill
-; LE-I64-NEON-NEXT:    ldm r3, {r0, r1, r2, r3}
-; LE-I64-NEON-NEXT:    bl lrintl
-; LE-I64-NEON-NEXT:    add r3, sp, #768
-; LE-I64-NEON-NEXT:    mov r11, r0
-; LE-I64-NEON-NEXT:    str r1, [sp, #28] @ 4-byte Spill
-; LE-I64-NEON-NEXT:    ldm r3, {r0, r1, r2, r3}
-; LE-I64-NEON-NEXT:    bl lrintl
-; LE-I64-NEON-NEXT:    ldr r6, [sp, #784]
-; LE-I64-NEON-NEXT:    add r3, sp, #788
-; LE-I64-NEON-NEXT:    mov r8, r1
-; LE-I64-NEON-NEXT:    vmov.32 d14[0], r0
-; LE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; LE-I64-NEON-NEXT:    mov r0, r6
-; LE-I64-NEON-NEXT:    ldr r5, [sp, #736]
-; LE-I64-NEON-NEXT:    ldr r7, [sp, #752]
-; LE-I64-NEON-NEXT:    ldr r4, [sp, #720]
-; LE-I64-NEON-NEXT:    bl lrintl
-; LE-I64-NEON-NEXT:    add r3, sp, #740
-; LE-I64-NEON-NEXT:    mov r6, r1
-; LE-I64-NEON-NEXT:    vmov.32 d15[0], r0
-; LE-I64-NEON-NEXT:    mov r0, r5
-; LE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; LE-I64-NEON-NEXT:    bl lrintl
-; LE-I64-NEON-NEXT:    add r3, sp, #756
-; LE-I64-NEON-NEXT:    mov r5, r1
-; LE-I64-NEON-NEXT:    vmov.32 d12[0], r0
-; LE-I64-NEON-NEXT:    mov r0, r7
-; LE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; LE-I64-NEON-NEXT:    bl lrintl
-; LE-I64-NEON-NEXT:    add r3, sp, #724
-; LE-I64-NEON-NEXT:    mov r7, r1
-; LE-I64-NEON-NEXT:    vmov.32 d13[0], r0
-; LE-I64-NEON-NEXT:    mov r0, r4
-; LE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; LE-I64-NEON-NEXT:    bl lrintl
-; LE-I64-NEON-NEXT:    vmov.32 d13[1], r7
-; LE-I64-NEON-NEXT:    add lr, sp, #8
-; LE-I64-NEON-NEXT:    ldr r2, [sp, #296]
-; LE-I64-NEON-NEXT:    vmov.32 d12[1], r5
-; LE-I64-NEON-NEXT:    ldr r3, [sp, #300]
-; LE-I64-NEON-NEXT:    ldr r4, [sp, #576]
-; LE-I64-NEON-NEXT:    vmov.32 d11[0], r0
-; LE-I64-NEON-NEXT:    ldr r0, [sp, #56] @ 4-byte Reload
-; LE-I64-NEON-NEXT:    ldr r10, [sp, #384]
-; LE-I64-NEON-NEXT:    vmov.32 d15[1], r6
-; LE-I64-NEON-NEXT:    ldr r6, [sp, #352]
-; LE-I64-NEON-NEXT:    vmov.32 d14[1], r8
-; LE-I64-NEON-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
-; LE-I64-NEON-NEXT:    add lr, sp, #32
-; LE-I64-NEON-NEXT:    vmov.32 d11[1], r1
-; LE-I64-NEON-NEXT:    ldr r1, [sp, #60] @ 4-byte Reload
-; LE-I64-NEON-NEXT:    vmov.32 d8[0], r11
-; LE-I64-NEON-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
-; LE-I64-NEON-NEXT:    add lr, sp, #176
-; LE-I64-NEON-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
-; LE-I64-NEON-NEXT:    bl lrintl
-; LE-I64-NEON-NEXT:    add lr, sp, #160
-; LE-I64-NEON-NEXT:    add r3, sp, #356
-; LE-I64-NEON-NEXT:    mov r5, r1
-; LE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; LE-I64-NEON-NEXT:    vmov.32 d16[0], r0
-; LE-I64-NEON-NEXT:    mov r0, r6
-; LE-I64-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-I64-NEON-NEXT:    bl lrintl
-; LE-I64-NEON-NEXT:    add lr, sp, #112
-; LE-I64-NEON-NEXT:    add r3, sp, #388
-; LE-I64-NEON-NEXT:    mov r7, r1
-; LE-I64-NEON-NEXT:    vldmia lr, {d14, d15} @ 16-byte Reload
-; LE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; LE-I64-NEON-NEXT:    vmov.32 d14[0], r0
-; LE-I64-NEON-NEXT:    mov r0, r10
-; LE-I64-NEON-NEXT:    bl lrintl
-; LE-I64-NEON-NEXT:    add lr, sp, #128
-; LE-I64-NEON-NEXT:    add r3, sp, #580
-; LE-I64-NEON-NEXT:    mov r6, r1
-; LE-I64-NEON-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
-; LE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; LE-I64-NEON-NEXT:    vmov.32 d12[0], r0
-; LE-I64-NEON-NEXT:    mov r0, r4
-; LE-I64-NEON-NEXT:    bl lrintl
-; LE-I64-NEON-NEXT:    add lr, sp, #80
-; LE-I64-NEON-NEXT:    add r3, sp, #708
-; LE-I64-NEON-NEXT:    mov r4, r1
-; LE-I64-NEON-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
-; LE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; LE-I64-NEON-NEXT:    vmov.32 d8[0], r0
-; LE-I64-NEON-NEXT:    ldr r0, [sp, #704]
-; LE-I64-NEON-NEXT:    bl lrintl
-; LE-I64-NEON-NEXT:    vmov.32 d8[1], r4
-; LE-I64-NEON-NEXT:    add lr, sp, #80
-; LE-I64-NEON-NEXT:    ldr r2, [sp, #52] @ 4-byte Reload
-; LE-I64-NEON-NEXT:    vmov.32 d12[1], r6
-; LE-I64-NEON-NEXT:    ldr r6, [sp, #644]
-; LE-I64-NEON-NEXT:    ldr r3, [sp, #652]
-; LE-I64-NEON-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
-; LE-I64-NEON-NEXT:    add lr, sp, #128
-; LE-I64-NEON-NEXT:    vmov.32 d14[1], r7
-; LE-I64-NEON-NEXT:    ldr r4, [sp, #480]
-; LE-I64-NEON-NEXT:    ldr r7, [sp, #656]
-; LE-I64-NEON-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
-; LE-I64-NEON-NEXT:    add lr, sp, #112
-; LE-I64-NEON-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
-; LE-I64-NEON-NEXT:    add lr, sp, #160
-; LE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-I64-NEON-NEXT:    vmov.32 d10[0], r0
-; LE-I64-NEON-NEXT:    ldr r0, [sp, #28] @ 4-byte Reload
-; LE-I64-NEON-NEXT:    ldr r10, [sp, #496]
-; LE-I64-NEON-NEXT:    vmov.32 d16[1], r5
-; LE-I64-NEON-NEXT:    add r5, r9, #192
-; LE-I64-NEON-NEXT:    ldr r8, [sp, #608]
-; LE-I64-NEON-NEXT:    vmov.32 d10[1], r1
-; LE-I64-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-I64-NEON-NEXT:    add lr, sp, #176
-; LE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-I64-NEON-NEXT:    vmov.32 d16[1], r0
-; LE-I64-NEON-NEXT:    ldr r0, [sp, #640]
-; LE-I64-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; LE-I64-NEON-NEXT:    add lr, sp, #96
-; LE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-I64-NEON-NEXT:    add lr, sp, #8
-; LE-I64-NEON-NEXT:    vmov.32 d16[1], r2
-; LE-I64-NEON-NEXT:    ldr r2, [sp, #648]
-; LE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r5:128]!
-; LE-I64-NEON-NEXT:    vst1.64 {d10, d11}, [r5:128]!
-; LE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r5:128]!
-; LE-I64-NEON-NEXT:    ldr r1, [sp, #48] @ 4-byte Reload
-; LE-I64-NEON-NEXT:    vmov.32 d9[0], r1
-; LE-I64-NEON-NEXT:    mov r1, r6
-; LE-I64-NEON-NEXT:    bl lrintl
-; LE-I64-NEON-NEXT:    add r3, sp, #660
-; LE-I64-NEON-NEXT:    mov r11, r1
-; LE-I64-NEON-NEXT:    vmov.32 d12[0], r0
-; LE-I64-NEON-NEXT:    mov r0, r7
-; LE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; LE-I64-NEON-NEXT:    bl lrintl
-; LE-I64-NEON-NEXT:    add r3, sp, #484
-; LE-I64-NEON-NEXT:    mov r7, r1
-; LE-I64-NEON-NEXT:    vmov.32 d13[0], r0
-; LE-I64-NEON-NEXT:    mov r0, r4
-; LE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; LE-I64-NEON-NEXT:    bl lrintl
-; LE-I64-NEON-NEXT:    add r3, sp, #500
-; LE-I64-NEON-NEXT:    mov r6, r1
-; LE-I64-NEON-NEXT:    vmov.32 d10[0], r0
-; LE-I64-NEON-NEXT:    mov r0, r10
-; LE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; LE-I64-NEON-NEXT:    bl lrintl
-; LE-I64-NEON-NEXT:    add r3, sp, #612
-; LE-I64-NEON-NEXT:    mov r4, r1
-; LE-I64-NEON-NEXT:    vmov.32 d11[0], r0
-; LE-I64-NEON-NEXT:    mov r0, r8
-; LE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; LE-I64-NEON-NEXT:    bl lrintl
-; LE-I64-NEON-NEXT:    vmov.32 d8[0], r0
-; LE-I64-NEON-NEXT:    ldr r0, [sp, #64] @ 4-byte Reload
-; LE-I64-NEON-NEXT:    add lr, sp, #96
-; LE-I64-NEON-NEXT:    add r8, r9, #128
-; LE-I64-NEON-NEXT:    vmov.32 d13[1], r7
-; LE-I64-NEON-NEXT:    ldr r2, [sp, #344]
-; LE-I64-NEON-NEXT:    ldr r3, [sp, #348]
-; LE-I64-NEON-NEXT:    vmov.32 d12[1], r11
-; LE-I64-NEON-NEXT:    ldr r7, [sp, #452]
-; LE-I64-NEON-NEXT:    ldr r10, [sp, #416]
-; LE-I64-NEON-NEXT:    vmov.32 d9[1], r0
-; LE-I64-NEON-NEXT:    ldr r0, [sp, #336]
-; LE-I64-NEON-NEXT:    vmov.32 d8[1], r1
-; LE-I64-NEON-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
-; LE-I64-NEON-NEXT:    add lr, sp, #64
-; LE-I64-NEON-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
-; LE-I64-NEON-NEXT:    add lr, sp, #32
-; LE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-I64-NEON-NEXT:    add lr, sp, #144
-; LE-I64-NEON-NEXT:    vmov.32 d11[1], r4
-; LE-I64-NEON-NEXT:    ldr r4, [sp, #340]
-; LE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r5:128]
-; LE-I64-NEON-NEXT:    mov r1, r4
-; LE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-I64-NEON-NEXT:    add lr, sp, #80
-; LE-I64-NEON-NEXT:    vmov.32 d10[1], r6
-; LE-I64-NEON-NEXT:    ldr r6, [sp, #448]
-; LE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r8:128]!
-; LE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r8:128]!
-; LE-I64-NEON-NEXT:    bl lrintl
-; LE-I64-NEON-NEXT:    ldr r2, [sp, #456]
-; LE-I64-NEON-NEXT:    mov r11, r1
-; LE-I64-NEON-NEXT:    ldr r3, [sp, #460]
-; LE-I64-NEON-NEXT:    vmov.32 d15[0], r0
-; LE-I64-NEON-NEXT:    mov r0, r6
-; LE-I64-NEON-NEXT:    mov r1, r7
-; LE-I64-NEON-NEXT:    ldr r5, [sp, #432]
-; LE-I64-NEON-NEXT:    bl lrintl
-; LE-I64-NEON-NEXT:    add r3, sp, #468
-; LE-I64-NEON-NEXT:    vmov.32 d12[0], r0
-; LE-I64-NEON-NEXT:    ldr r0, [sp, #464]
-; LE-I64-NEON-NEXT:    mov r6, r1
-; LE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; LE-I64-NEON-NEXT:    bl lrintl
-; LE-I64-NEON-NEXT:    add r3, sp, #420
-; LE-I64-NEON-NEXT:    mov r7, r1
-; LE-I64-NEON-NEXT:    vmov.32 d13[0], r0
-; LE-I64-NEON-NEXT:    mov r0, r10
-; LE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; LE-I64-NEON-NEXT:    bl lrintl
-; LE-I64-NEON-NEXT:    add r3, sp, #436
-; LE-I64-NEON-NEXT:    mov r4, r1
-; LE-I64-NEON-NEXT:    vmov.32 d8[0], r0
-; LE-I64-NEON-NEXT:    mov r0, r5
-; LE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; LE-I64-NEON-NEXT:    bl lrintl
-; LE-I64-NEON-NEXT:    add r3, sp, #324
-; LE-I64-NEON-NEXT:    vmov.32 d9[0], r0
-; LE-I64-NEON-NEXT:    ldr r0, [sp, #320]
-; LE-I64-NEON-NEXT:    mov r5, r1
-; LE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; LE-I64-NEON-NEXT:    bl lrintl
-; LE-I64-NEON-NEXT:    add lr, sp, #64
-; LE-I64-NEON-NEXT:    vmov.32 d9[1], r5
-; LE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-I64-NEON-NEXT:    add lr, sp, #96
-; LE-I64-NEON-NEXT:    vmov.32 d13[1], r7
-; LE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r8:128]!
-; LE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-I64-NEON-NEXT:    add lr, sp, #176
-; LE-I64-NEON-NEXT:    vmov.32 d8[1], r4
-; LE-I64-NEON-NEXT:    vmov.32 d12[1], r6
-; LE-I64-NEON-NEXT:    vmov.32 d14[0], r0
-; LE-I64-NEON-NEXT:    add r0, r9, #64
-; LE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r8:128]
-; LE-I64-NEON-NEXT:    vst1.64 {d8, d9}, [r0:128]!
-; LE-I64-NEON-NEXT:    vst1.64 {d12, d13}, [r0:128]!
-; LE-I64-NEON-NEXT:    vst1.64 {d10, d11}, [r0:128]!
-; LE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-I64-NEON-NEXT:    add lr, sp, #160
-; LE-I64-NEON-NEXT:    vmov.32 d15[1], r11
-; LE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r0:128]
-; LE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-I64-NEON-NEXT:    add lr, sp, #112
-; LE-I64-NEON-NEXT:    vmov.32 d14[1], r1
-; LE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r9:128]!
-; LE-I64-NEON-NEXT:    vst1.64 {d14, d15}, [r9:128]!
-; LE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-I64-NEON-NEXT:    add lr, sp, #128
-; LE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r9:128]!
-; LE-I64-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; LE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r9:128]
-; LE-I64-NEON-NEXT:    add sp, sp, #192
-; LE-I64-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
-; LE-I64-NEON-NEXT:    add sp, sp, #4
-; LE-I64-NEON-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
-;
-; BE-I32-LABEL: lrint_v32fp128:
+; BE-I32-LABEL: lrint_v16fp128:
 ; BE-I32:       @ %bb.0:
 ; BE-I32-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
 ; BE-I32-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
@@ -12000,261 +2965,126 @@ define <32 x iXLen> @lrint_v32fp128(<32 x fp128> %x) {
 ; BE-I32-NEXT:    sub sp, sp, #4
 ; BE-I32-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
 ; BE-I32-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
-; BE-I32-NEXT:    .pad #104
-; BE-I32-NEXT:    sub sp, sp, #104
-; BE-I32-NEXT:    mov r4, r3
-; BE-I32-NEXT:    add r3, sp, #248
-; BE-I32-NEXT:    mov r8, r2
-; BE-I32-NEXT:    mov r11, r0
-; BE-I32-NEXT:    ldm r3, {r0, r1, r2, r3}
-; BE-I32-NEXT:    bl lrintl
-; BE-I32-NEXT:    add r3, sp, #616
-; BE-I32-NEXT:    mov r9, r0
-; BE-I32-NEXT:    ldm r3, {r0, r1, r2, r3}
-; BE-I32-NEXT:    bl lrintl
-; BE-I32-NEXT:    add r3, sp, #680
-; BE-I32-NEXT:    mov r5, r0
+; BE-I32-NEXT:    .pad #16
+; BE-I32-NEXT:    sub sp, sp, #16
+; BE-I32-NEXT:    stm sp, {r0, r1, r2, r3} @ 16-byte Folded Spill
+; BE-I32-NEXT:    add r3, sp, #264
 ; BE-I32-NEXT:    ldm r3, {r0, r1, r2, r3}
 ; BE-I32-NEXT:    bl lrintl
-; BE-I32-NEXT:    ldr r7, [sp, #232]
-; BE-I32-NEXT:    add lr, sp, #72
-; BE-I32-NEXT:    ldr r1, [sp, #236]
-; BE-I32-NEXT:    vmov.32 d17[0], r0
-; BE-I32-NEXT:    ldr r2, [sp, #240]
-; BE-I32-NEXT:    ldr r3, [sp, #244]
-; BE-I32-NEXT:    mov r0, r7
-; BE-I32-NEXT:    ldr r10, [sp, #376]
-; BE-I32-NEXT:    vmov.32 d11[0], r5
-; BE-I32-NEXT:    ldr r6, [sp, #296]
-; BE-I32-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-I32-NEXT:    bl lrintl
-; BE-I32-NEXT:    add r3, sp, #300
-; BE-I32-NEXT:    vmov.32 d15[0], r0
-; BE-I32-NEXT:    mov r0, r6
-; BE-I32-NEXT:    ldm r3, {r1, r2, r3}
-; BE-I32-NEXT:    bl lrintl
-; BE-I32-NEXT:    add r3, sp, #380
-; BE-I32-NEXT:    vmov.32 d13[0], r0
-; BE-I32-NEXT:    mov r0, r10
+; BE-I32-NEXT:    add r3, sp, #332
+; BE-I32-NEXT:    ldr r7, [sp, #328]
+; BE-I32-NEXT:    vmov.32 d9[0], r0
+; BE-I32-NEXT:    ldr r10, [sp, #280]
 ; BE-I32-NEXT:    ldm r3, {r1, r2, r3}
+; BE-I32-NEXT:    mov r0, r7
+; BE-I32-NEXT:    ldr r8, [sp, #168]
 ; BE-I32-NEXT:    bl lrintl
-; BE-I32-NEXT:    add r3, sp, #360
-; BE-I32-NEXT:    mov r5, r0
-; BE-I32-NEXT:    ldm r3, {r0, r1, r2, r3}
-; BE-I32-NEXT:    bl lrintl
-; BE-I32-NEXT:    vmov.32 d17[0], r0
-; BE-I32-NEXT:    ldr r6, [sp, #312]
-; BE-I32-NEXT:    ldr r1, [sp, #316]
-; BE-I32-NEXT:    ldr r2, [sp, #320]
-; BE-I32-NEXT:    ldr r3, [sp, #324]
-; BE-I32-NEXT:    vmov.32 d17[1], r5
-; BE-I32-NEXT:    mov r0, r6
-; BE-I32-NEXT:    ldr r7, [sp, #572]
-; BE-I32-NEXT:    vorr q4, q8, q8
-; BE-I32-NEXT:    bl lrintl
-; BE-I32-NEXT:    ldr r6, [sp, #632]
-; BE-I32-NEXT:    add lr, sp, #88
-; BE-I32-NEXT:    ldr r1, [sp, #636]
-; BE-I32-NEXT:    vmov.32 d13[1], r0
-; BE-I32-NEXT:    ldr r2, [sp, #640]
-; BE-I32-NEXT:    ldr r3, [sp, #644]
-; BE-I32-NEXT:    mov r0, r6
-; BE-I32-NEXT:    ldr r5, [sp, #576]
-; BE-I32-NEXT:    vmov.32 d15[1], r9
-; BE-I32-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
+; BE-I32-NEXT:    ldr r5, [sp, #344]
+; BE-I32-NEXT:    vmov.32 d11[0], r0
+; BE-I32-NEXT:    ldr r1, [sp, #348]
+; BE-I32-NEXT:    ldr r2, [sp, #352]
+; BE-I32-NEXT:    ldr r3, [sp, #356]
+; BE-I32-NEXT:    mov r0, r5
+; BE-I32-NEXT:    ldr r7, [sp, #284]
+; BE-I32-NEXT:    ldr r4, [sp, #288]
+; BE-I32-NEXT:    ldr r6, [sp, #172]
+; BE-I32-NEXT:    ldr r9, [sp, #176]
 ; BE-I32-NEXT:    bl lrintl
+; BE-I32-NEXT:    ldr r3, [sp, #292]
 ; BE-I32-NEXT:    vmov.32 d11[1], r0
-; BE-I32-NEXT:    ldr r3, [sp, #580]
-; BE-I32-NEXT:    ldr r0, [sp, #568]
+; BE-I32-NEXT:    mov r0, r10
 ; BE-I32-NEXT:    mov r1, r7
-; BE-I32-NEXT:    mov r2, r5
-; BE-I32-NEXT:    vorr q6, q5, q5
-; BE-I32-NEXT:    bl lrintl
-; BE-I32-NEXT:    add r3, sp, #552
-; BE-I32-NEXT:    mov r9, r0
-; BE-I32-NEXT:    ldm r3, {r0, r1, r2, r3}
-; BE-I32-NEXT:    bl lrintl
-; BE-I32-NEXT:    add r3, sp, #520
-; BE-I32-NEXT:    mov r5, r0
-; BE-I32-NEXT:    ldm r3, {r0, r1, r2, r3}
-; BE-I32-NEXT:    bl lrintl
-; BE-I32-NEXT:    ldr r6, [sp, #584]
-; BE-I32-NEXT:    add lr, sp, #8
-; BE-I32-NEXT:    ldr r1, [sp, #588]
-; BE-I32-NEXT:    vmov.32 d16[0], r0
-; BE-I32-NEXT:    ldr r2, [sp, #592]
-; BE-I32-NEXT:    ldr r3, [sp, #596]
-; BE-I32-NEXT:    mov r0, r6
-; BE-I32-NEXT:    vmov.32 d17[0], r5
-; BE-I32-NEXT:    ldr r7, [sp, #216]
-; BE-I32-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-I32-NEXT:    bl lrintl
-; BE-I32-NEXT:    add r3, sp, #220
-; BE-I32-NEXT:    vmov.32 d12[0], r0
-; BE-I32-NEXT:    mov r0, r7
-; BE-I32-NEXT:    ldm r3, {r1, r2, r3}
+; BE-I32-NEXT:    mov r2, r4
 ; BE-I32-NEXT:    bl lrintl
-; BE-I32-NEXT:    ldr r2, [sp, #208]
-; BE-I32-NEXT:    mov r7, r0
-; BE-I32-NEXT:    ldr r3, [sp, #212]
+; BE-I32-NEXT:    ldr r3, [sp, #180]
+; BE-I32-NEXT:    vmov.32 d9[1], r0
 ; BE-I32-NEXT:    mov r0, r8
-; BE-I32-NEXT:    mov r1, r4
+; BE-I32-NEXT:    mov r1, r6
+; BE-I32-NEXT:    mov r2, r9
 ; BE-I32-NEXT:    bl lrintl
-; BE-I32-NEXT:    add r3, sp, #456
-; BE-I32-NEXT:    mov r5, r0
+; BE-I32-NEXT:    add r3, sp, #232
+; BE-I32-NEXT:    mov r4, r0
 ; BE-I32-NEXT:    ldm r3, {r0, r1, r2, r3}
 ; BE-I32-NEXT:    bl lrintl
-; BE-I32-NEXT:    ldr r6, [sp, #328]
-; BE-I32-NEXT:    vmov.32 d10[0], r0
-; BE-I32-NEXT:    ldr r1, [sp, #332]
-; BE-I32-NEXT:    ldr r2, [sp, #336]
-; BE-I32-NEXT:    vmov.32 d14[0], r5
-; BE-I32-NEXT:    ldr r3, [sp, #340]
-; BE-I32-NEXT:    mov r0, r6
-; BE-I32-NEXT:    ldr r10, [sp, #504]
-; BE-I32-NEXT:    bl lrintl
-; BE-I32-NEXT:    ldr r6, [sp, #344]
-; BE-I32-NEXT:    vmov.32 d8[0], r0
-; BE-I32-NEXT:    ldr r1, [sp, #348]
-; BE-I32-NEXT:    ldr r2, [sp, #352]
-; BE-I32-NEXT:    ldr r3, [sp, #356]
-; BE-I32-NEXT:    mov r0, r6
+; BE-I32-NEXT:    add r3, sp, #136
+; BE-I32-NEXT:    mov r6, r0
+; BE-I32-NEXT:    ldm r3, {r0, r1, r2, r3}
 ; BE-I32-NEXT:    bl lrintl
-; BE-I32-NEXT:    vmov.32 d8[1], r0
-; BE-I32-NEXT:    ldr r6, [sp, #600]
-; BE-I32-NEXT:    add lr, sp, #56
-; BE-I32-NEXT:    ldr r1, [sp, #604]
-; BE-I32-NEXT:    vmov.32 d14[1], r7
-; BE-I32-NEXT:    ldr r2, [sp, #608]
-; BE-I32-NEXT:    ldr r3, [sp, #612]
-; BE-I32-NEXT:    mov r0, r6
-; BE-I32-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
-; BE-I32-NEXT:    add lr, sp, #40
-; BE-I32-NEXT:    ldr r5, [sp, #508]
-; BE-I32-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
+; BE-I32-NEXT:    ldr r5, [sp, #296]
+; BE-I32-NEXT:    vmov.32 d13[0], r0
+; BE-I32-NEXT:    ldr r1, [sp, #300]
+; BE-I32-NEXT:    ldr r2, [sp, #304]
+; BE-I32-NEXT:    ldr r3, [sp, #308]
+; BE-I32-NEXT:    mov r0, r5
+; BE-I32-NEXT:    ldr r10, [sp, #216]
+; BE-I32-NEXT:    ldr r8, [sp, #220]
+; BE-I32-NEXT:    ldr r9, [sp, #152]
 ; BE-I32-NEXT:    bl lrintl
-; BE-I32-NEXT:    vmov.32 d12[1], r0
-; BE-I32-NEXT:    add lr, sp, #24
-; BE-I32-NEXT:    ldr r7, [sp, #536]
-; BE-I32-NEXT:    ldr r1, [sp, #540]
-; BE-I32-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
-; BE-I32-NEXT:    add lr, sp, #8
+; BE-I32-NEXT:    ldr r7, [sp, #248]
+; BE-I32-NEXT:    vmov.32 d10[0], r0
+; BE-I32-NEXT:    ldr r1, [sp, #252]
+; BE-I32-NEXT:    ldr r2, [sp, #256]
+; BE-I32-NEXT:    vmov.32 d8[0], r6
+; BE-I32-NEXT:    ldr r3, [sp, #260]
 ; BE-I32-NEXT:    mov r0, r7
-; BE-I32-NEXT:    ldr r2, [sp, #544]
-; BE-I32-NEXT:    ldr r3, [sp, #548]
-; BE-I32-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
-; BE-I32-NEXT:    ldr r6, [sp, #512]
-; BE-I32-NEXT:    vmov.32 d13[1], r9
+; BE-I32-NEXT:    ldr r5, [sp, #224]
+; BE-I32-NEXT:    ldr r11, [sp, #120]
 ; BE-I32-NEXT:    bl lrintl
-; BE-I32-NEXT:    ldr r3, [sp, #516]
-; BE-I32-NEXT:    vmov.32 d12[1], r0
+; BE-I32-NEXT:    ldr r3, [sp, #228]
+; BE-I32-NEXT:    vmov.32 d8[1], r0
 ; BE-I32-NEXT:    mov r0, r10
-; BE-I32-NEXT:    mov r1, r5
-; BE-I32-NEXT:    mov r2, r6
+; BE-I32-NEXT:    mov r1, r8
+; BE-I32-NEXT:    mov r2, r5
 ; BE-I32-NEXT:    bl lrintl
-; BE-I32-NEXT:    add r3, sp, #488
+; BE-I32-NEXT:    add r3, sp, #200
 ; BE-I32-NEXT:    mov r5, r0
 ; BE-I32-NEXT:    ldm r3, {r0, r1, r2, r3}
 ; BE-I32-NEXT:    bl lrintl
-; BE-I32-NEXT:    add r3, sp, #424
-; BE-I32-NEXT:    mov r7, r0
-; BE-I32-NEXT:    ldm r3, {r0, r1, r2, r3}
-; BE-I32-NEXT:    bl lrintl
-; BE-I32-NEXT:    ldr r6, [sp, #264]
 ; BE-I32-NEXT:    vmov.32 d15[0], r0
-; BE-I32-NEXT:    ldr r1, [sp, #268]
-; BE-I32-NEXT:    ldr r2, [sp, #272]
-; BE-I32-NEXT:    vmov.32 d11[0], r7
-; BE-I32-NEXT:    ldr r3, [sp, #276]
-; BE-I32-NEXT:    mov r0, r6
-; BE-I32-NEXT:    ldr r8, [sp, #696]
-; BE-I32-NEXT:    bl lrintl
-; BE-I32-NEXT:    add lr, sp, #88
-; BE-I32-NEXT:    ldr r4, [sp, #472]
-; BE-I32-NEXT:    ldr r1, [sp, #476]
-; BE-I32-NEXT:    vmov.32 d11[1], r5
-; BE-I32-NEXT:    ldr r2, [sp, #480]
-; BE-I32-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-I32-NEXT:    ldr r3, [sp, #484]
-; BE-I32-NEXT:    vmov.32 d16[0], r0
-; BE-I32-NEXT:    mov r0, r4
-; BE-I32-NEXT:    ldr r6, [sp, #700]
-; BE-I32-NEXT:    ldr r7, [sp, #704]
-; BE-I32-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-I32-NEXT:    bl lrintl
-; BE-I32-NEXT:    ldr r3, [sp, #708]
-; BE-I32-NEXT:    vmov.32 d10[1], r0
-; BE-I32-NEXT:    mov r0, r8
-; BE-I32-NEXT:    mov r1, r6
-; BE-I32-NEXT:    mov r2, r7
-; BE-I32-NEXT:    bl lrintl
-; BE-I32-NEXT:    add r3, sp, #648
-; BE-I32-NEXT:    mov r4, r0
-; BE-I32-NEXT:    ldm r3, {r0, r1, r2, r3}
-; BE-I32-NEXT:    bl lrintl
-; BE-I32-NEXT:    add lr, sp, #72
-; BE-I32-NEXT:    ldr r5, [sp, #664]
-; BE-I32-NEXT:    ldr r1, [sp, #668]
-; BE-I32-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
-; BE-I32-NEXT:    ldr r2, [sp, #672]
-; BE-I32-NEXT:    ldr r3, [sp, #676]
-; BE-I32-NEXT:    vmov.32 d8[0], r0
-; BE-I32-NEXT:    mov r0, r5
-; BE-I32-NEXT:    ldr r6, [sp, #444]
-; BE-I32-NEXT:    vmov.32 d9[1], r4
-; BE-I32-NEXT:    ldr r7, [sp, #448]
-; BE-I32-NEXT:    ldr r8, [sp, #412]
-; BE-I32-NEXT:    ldr r4, [sp, #416]
+; BE-I32-NEXT:    ldr r0, [sp, #184]
+; BE-I32-NEXT:    ldr r1, [sp, #188]
+; BE-I32-NEXT:    ldr r2, [sp, #192]
+; BE-I32-NEXT:    vmov.32 d14[0], r4
+; BE-I32-NEXT:    ldr r3, [sp, #196]
+; BE-I32-NEXT:    vmov.32 d15[1], r5
+; BE-I32-NEXT:    ldr r7, [sp, #156]
+; BE-I32-NEXT:    ldr r6, [sp, #160]
+; BE-I32-NEXT:    ldr r4, [sp, #124]
+; BE-I32-NEXT:    ldr r5, [sp, #128]
 ; BE-I32-NEXT:    bl lrintl
-; BE-I32-NEXT:    vmov.32 d8[1], r0
-; BE-I32-NEXT:    ldr r3, [sp, #452]
-; BE-I32-NEXT:    ldr r0, [sp, #440]
-; BE-I32-NEXT:    mov r1, r6
-; BE-I32-NEXT:    mov r2, r7
+; BE-I32-NEXT:    ldr r3, [sp, #164]
+; BE-I32-NEXT:    vmov.32 d14[1], r0
+; BE-I32-NEXT:    mov r0, r9
+; BE-I32-NEXT:    mov r1, r7
+; BE-I32-NEXT:    mov r2, r6
 ; BE-I32-NEXT:    bl lrintl
-; BE-I32-NEXT:    vmov.32 d15[1], r0
-; BE-I32-NEXT:    ldr r3, [sp, #420]
-; BE-I32-NEXT:    ldr r0, [sp, #408]
-; BE-I32-NEXT:    mov r1, r8
-; BE-I32-NEXT:    mov r2, r4
+; BE-I32-NEXT:    ldr r3, [sp, #132]
+; BE-I32-NEXT:    vmov.32 d13[1], r0
+; BE-I32-NEXT:    mov r0, r11
+; BE-I32-NEXT:    mov r1, r4
+; BE-I32-NEXT:    mov r2, r5
 ; BE-I32-NEXT:    bl lrintl
-; BE-I32-NEXT:    add r3, sp, #392
 ; BE-I32-NEXT:    mov r4, r0
-; BE-I32-NEXT:    ldm r3, {r0, r1, r2, r3}
+; BE-I32-NEXT:    ldm sp, {r0, r1, r2, r3} @ 16-byte Folded Reload
 ; BE-I32-NEXT:    bl lrintl
-; BE-I32-NEXT:    add r3, sp, #284
-; BE-I32-NEXT:    ldr r7, [sp, #280]
-; BE-I32-NEXT:    vmov.32 d14[0], r0
+; BE-I32-NEXT:    add r3, sp, #316
+; BE-I32-NEXT:    ldr r7, [sp, #312]
+; BE-I32-NEXT:    vmov.32 d12[0], r0
 ; BE-I32-NEXT:    ldm r3, {r1, r2, r3}
 ; BE-I32-NEXT:    mov r0, r7
-; BE-I32-NEXT:    vmov.32 d14[1], r4
+; BE-I32-NEXT:    vmov.32 d12[1], r4
 ; BE-I32-NEXT:    bl lrintl
-; BE-I32-NEXT:    add lr, sp, #88
-; BE-I32-NEXT:    vrev64.32 q9, q4
-; BE-I32-NEXT:    vldmia lr, {d20, d21} @ 16-byte Reload
-; BE-I32-NEXT:    add lr, sp, #24
-; BE-I32-NEXT:    vrev64.32 q8, q7
-; BE-I32-NEXT:    vmov.32 d20[1], r0
-; BE-I32-NEXT:    add r0, r11, #64
-; BE-I32-NEXT:    vst1.32 {d10, d11}, [r0:128]!
-; BE-I32-NEXT:    vst1.32 {d12, d13}, [r0:128]!
-; BE-I32-NEXT:    vldmia lr, {d22, d23} @ 16-byte Reload
-; BE-I32-NEXT:    add lr, sp, #40
-; BE-I32-NEXT:    vst1.32 {d22, d23}, [r0:128]!
-; BE-I32-NEXT:    vst1.64 {d18, d19}, [r0:128]
-; BE-I32-NEXT:    vldmia lr, {d18, d19} @ 16-byte Reload
-; BE-I32-NEXT:    add lr, sp, #56
-; BE-I32-NEXT:    vst1.32 {d18, d19}, [r11:128]!
-; BE-I32-NEXT:    vst1.32 {d20, d21}, [r11:128]!
-; BE-I32-NEXT:    vldmia lr, {d18, d19} @ 16-byte Reload
-; BE-I32-NEXT:    vst1.32 {d18, d19}, [r11:128]!
-; BE-I32-NEXT:    vst1.64 {d16, d17}, [r11:128]
-; BE-I32-NEXT:    add sp, sp, #104
+; BE-I32-NEXT:    vmov.32 d10[1], r0
+; BE-I32-NEXT:    vrev64.32 q0, q6
+; BE-I32-NEXT:    vrev64.32 q1, q7
+; BE-I32-NEXT:    vrev64.32 q2, q4
+; BE-I32-NEXT:    vrev64.32 q3, q5
+; BE-I32-NEXT:    add sp, sp, #16
 ; BE-I32-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
 ; BE-I32-NEXT:    add sp, sp, #4
 ; BE-I32-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
 ;
-; BE-I64-LABEL: lrint_v32fp128:
+; BE-I64-LABEL: lrint_v16fp128:
 ; BE-I64:       @ %bb.0:
 ; BE-I64-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
 ; BE-I64-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
@@ -12262,990 +3092,174 @@ define <32 x iXLen> @lrint_v32fp128(<32 x fp128> %x) {
 ; BE-I64-NEXT:    sub sp, sp, #4
 ; BE-I64-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
 ; BE-I64-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
-; BE-I64-NEXT:    .pad #152
-; BE-I64-NEXT:    sub sp, sp, #152
-; BE-I64-NEXT:    str r3, [sp, #120] @ 4-byte Spill
-; BE-I64-NEXT:    add r3, sp, #712
-; BE-I64-NEXT:    str r2, [sp, #112] @ 4-byte Spill
-; BE-I64-NEXT:    mov r9, r0
+; BE-I64-NEXT:    .pad #56
+; BE-I64-NEXT:    sub sp, sp, #56
+; BE-I64-NEXT:    mov r5, r3
+; BE-I64-NEXT:    add r3, sp, #376
+; BE-I64-NEXT:    mov r6, r2
+; BE-I64-NEXT:    mov r4, r0
 ; BE-I64-NEXT:    ldm r3, {r0, r1, r2, r3}
 ; BE-I64-NEXT:    bl lrintl
-; BE-I64-NEXT:    ldr r7, [sp, #648]
-; BE-I64-NEXT:    add r3, sp, #652
-; BE-I64-NEXT:    mov r5, r1
-; BE-I64-NEXT:    vmov.32 d9[0], r0
+; BE-I64-NEXT:    ldr r7, [sp, #392]
+; BE-I64-NEXT:    add r3, sp, #396
+; BE-I64-NEXT:    mov r9, r1
+; BE-I64-NEXT:    vmov.32 d8[0], r0
 ; BE-I64-NEXT:    ldm r3, {r1, r2, r3}
 ; BE-I64-NEXT:    mov r0, r7
-; BE-I64-NEXT:    ldr r6, [sp, #520]
-; BE-I64-NEXT:    ldr r8, [sp, #632]
+; BE-I64-NEXT:    ldr r11, [sp, #168]
 ; BE-I64-NEXT:    bl lrintl
-; BE-I64-NEXT:    add r3, sp, #524
-; BE-I64-NEXT:    mov r7, r1
-; BE-I64-NEXT:    vmov.32 d10[0], r0
+; BE-I64-NEXT:    ldr r2, [sp, #160]
+; BE-I64-NEXT:    mov r10, r1
+; BE-I64-NEXT:    ldr r3, [sp, #164]
+; BE-I64-NEXT:    vmov.32 d9[0], r0
 ; BE-I64-NEXT:    mov r0, r6
-; BE-I64-NEXT:    ldm r3, {r1, r2, r3}
+; BE-I64-NEXT:    mov r1, r5
 ; BE-I64-NEXT:    bl lrintl
-; BE-I64-NEXT:    add r3, sp, #636
-; BE-I64-NEXT:    mov r4, r1
-; BE-I64-NEXT:    vmov.32 d8[0], r0
-; BE-I64-NEXT:    mov r0, r8
+; BE-I64-NEXT:    add r3, sp, #172
+; BE-I64-NEXT:    mov r8, r1
+; BE-I64-NEXT:    vmov.32 d10[0], r0
+; BE-I64-NEXT:    mov r0, r11
 ; BE-I64-NEXT:    ldm r3, {r1, r2, r3}
 ; BE-I64-NEXT:    bl lrintl
-; BE-I64-NEXT:    mov r6, r1
+; BE-I64-NEXT:    add r3, sp, #220
 ; BE-I64-NEXT:    vmov.32 d11[0], r0
-; BE-I64-NEXT:    ldr r0, [sp, #488]
-; BE-I64-NEXT:    vmov.32 d8[1], r4
-; BE-I64-NEXT:    ldr r1, [sp, #492]
-; BE-I64-NEXT:    ldr r2, [sp, #496]
-; BE-I64-NEXT:    vmov.32 d10[1], r7
-; BE-I64-NEXT:    ldr r3, [sp, #500]
-; BE-I64-NEXT:    vmov.32 d9[1], r5
-; BE-I64-NEXT:    vstr d8, [sp, #144] @ 8-byte Spill
-; BE-I64-NEXT:    vstr d10, [sp, #136] @ 8-byte Spill
-; BE-I64-NEXT:    vstr d9, [sp, #128] @ 8-byte Spill
-; BE-I64-NEXT:    bl lrintl
-; BE-I64-NEXT:    add r3, sp, #680
-; BE-I64-NEXT:    str r0, [sp, #104] @ 4-byte Spill
-; BE-I64-NEXT:    str r1, [sp, #88] @ 4-byte Spill
-; BE-I64-NEXT:    ldm r3, {r0, r1, r2, r3}
-; BE-I64-NEXT:    bl lrintl
-; BE-I64-NEXT:    vmov.32 d16[0], r0
-; BE-I64-NEXT:    ldr r0, [sp, #728]
-; BE-I64-NEXT:    ldr r2, [sp, #736]
-; BE-I64-NEXT:    vmov.32 d11[1], r6
-; BE-I64-NEXT:    ldr r6, [sp, #732]
-; BE-I64-NEXT:    ldr r3, [sp, #740]
-; BE-I64-NEXT:    vmov.32 d16[1], r1
-; BE-I64-NEXT:    ldr r5, [sp, #504]
-; BE-I64-NEXT:    mov r1, r6
-; BE-I64-NEXT:    ldr r7, [sp, #744]
-; BE-I64-NEXT:    ldr r4, [sp, #748]
-; BE-I64-NEXT:    vstr d11, [sp, #24] @ 8-byte Spill
-; BE-I64-NEXT:    vstr d16, [sp, #8] @ 8-byte Spill
-; BE-I64-NEXT:    bl lrintl
-; BE-I64-NEXT:    ldr r2, [sp, #752]
+; BE-I64-NEXT:    ldr r0, [sp, #216]
 ; BE-I64-NEXT:    mov r11, r1
-; BE-I64-NEXT:    ldr r3, [sp, #756]
-; BE-I64-NEXT:    vmov.32 d13[0], r0
-; BE-I64-NEXT:    mov r0, r7
-; BE-I64-NEXT:    mov r1, r4
-; BE-I64-NEXT:    ldr r10, [sp, #552]
-; BE-I64-NEXT:    ldr r6, [sp, #664]
-; BE-I64-NEXT:    bl lrintl
-; BE-I64-NEXT:    add r3, sp, #508
-; BE-I64-NEXT:    mov r8, r1
-; BE-I64-NEXT:    vmov.32 d8[0], r0
-; BE-I64-NEXT:    mov r0, r5
-; BE-I64-NEXT:    ldm r3, {r1, r2, r3}
-; BE-I64-NEXT:    bl lrintl
-; BE-I64-NEXT:    add r3, sp, #540
-; BE-I64-NEXT:    vmov.32 d10[0], r0
-; BE-I64-NEXT:    ldr r0, [sp, #536]
-; BE-I64-NEXT:    mov r7, r1
 ; BE-I64-NEXT:    ldm r3, {r1, r2, r3}
 ; BE-I64-NEXT:    bl lrintl
-; BE-I64-NEXT:    add r3, sp, #556
-; BE-I64-NEXT:    mov r5, r1
+; BE-I64-NEXT:    add r3, sp, #236
 ; BE-I64-NEXT:    vmov.32 d12[0], r0
-; BE-I64-NEXT:    mov r0, r10
-; BE-I64-NEXT:    ldm r3, {r1, r2, r3}
-; BE-I64-NEXT:    bl lrintl
-; BE-I64-NEXT:    add r3, sp, #668
-; BE-I64-NEXT:    mov r4, r1
-; BE-I64-NEXT:    vmov.32 d9[0], r0
-; BE-I64-NEXT:    mov r0, r6
-; BE-I64-NEXT:    ldm r3, {r1, r2, r3}
-; BE-I64-NEXT:    bl lrintl
-; BE-I64-NEXT:    add r3, sp, #700
-; BE-I64-NEXT:    vmov.32 d14[0], r0
-; BE-I64-NEXT:    ldr r0, [sp, #696]
+; BE-I64-NEXT:    ldr r0, [sp, #232]
 ; BE-I64-NEXT:    mov r6, r1
 ; BE-I64-NEXT:    ldm r3, {r1, r2, r3}
 ; BE-I64-NEXT:    bl lrintl
-; BE-I64-NEXT:    vmov.32 d11[0], r0
-; BE-I64-NEXT:    ldr r0, [sp, #104] @ 4-byte Reload
-; BE-I64-NEXT:    ldr r2, [sp, #256]
-; BE-I64-NEXT:    vmov.32 d13[1], r11
-; BE-I64-NEXT:    ldr r3, [sp, #260]
-; BE-I64-NEXT:    vmov.32 d14[1], r6
-; BE-I64-NEXT:    ldr r6, [sp, #264]
-; BE-I64-NEXT:    vmov.32 d9[1], r4
-; BE-I64-NEXT:    ldr r4, [sp, #344]
-; BE-I64-NEXT:    vmov.32 d12[1], r5
-; BE-I64-NEXT:    ldr r5, [sp, #312]
-; BE-I64-NEXT:    vmov.32 d8[1], r8
-; BE-I64-NEXT:    ldr r8, [sp, #328]
-; BE-I64-NEXT:    vmov.32 d10[1], r7
-; BE-I64-NEXT:    vstr d13, [sp, #32] @ 8-byte Spill
-; BE-I64-NEXT:    vmov.32 d11[1], r1
-; BE-I64-NEXT:    ldr r1, [sp, #120] @ 4-byte Reload
+; BE-I64-NEXT:    add r3, sp, #252
 ; BE-I64-NEXT:    vmov.32 d13[0], r0
-; BE-I64-NEXT:    ldr r0, [sp, #112] @ 4-byte Reload
-; BE-I64-NEXT:    vstr d14, [sp] @ 8-byte Spill
-; BE-I64-NEXT:    vstr d9, [sp, #16] @ 8-byte Spill
-; BE-I64-NEXT:    vstr d12, [sp, #56] @ 8-byte Spill
-; BE-I64-NEXT:    vstr d10, [sp, #64] @ 8-byte Spill
-; BE-I64-NEXT:    vstr d8, [sp, #40] @ 8-byte Spill
+; BE-I64-NEXT:    ldr r0, [sp, #248]
+; BE-I64-NEXT:    mov r7, r1
+; BE-I64-NEXT:    ldm r3, {r1, r2, r3}
 ; BE-I64-NEXT:    bl lrintl
 ; BE-I64-NEXT:    add r3, sp, #268
-; BE-I64-NEXT:    mov r11, r1
 ; BE-I64-NEXT:    vmov.32 d14[0], r0
-; BE-I64-NEXT:    mov r0, r6
+; BE-I64-NEXT:    ldr r0, [sp, #264]
+; BE-I64-NEXT:    mov r5, r1
 ; BE-I64-NEXT:    ldm r3, {r1, r2, r3}
 ; BE-I64-NEXT:    bl lrintl
-; BE-I64-NEXT:    add r3, sp, #316
+; BE-I64-NEXT:    vmov.32 d15[0], r0
+; BE-I64-NEXT:    ldr r0, [sp, #280]
+; BE-I64-NEXT:    ldr r2, [sp, #288]
+; BE-I64-NEXT:    vmov.32 d13[1], r7
+; BE-I64-NEXT:    ldr r7, [sp, #284]
+; BE-I64-NEXT:    ldr r3, [sp, #292]
+; BE-I64-NEXT:    vmov.32 d14[1], r5
+; BE-I64-NEXT:    ldr r5, [sp, #328]
+; BE-I64-NEXT:    vmov.32 d12[1], r6
+; BE-I64-NEXT:    ldr r6, [sp, #300]
+; BE-I64-NEXT:    vmov.32 d10[1], r8
+; BE-I64-NEXT:    ldr r8, [sp, #184]
+; BE-I64-NEXT:    vmov.32 d11[1], r11
+; BE-I64-NEXT:    vmov.32 d9[1], r10
+; BE-I64-NEXT:    vmov.32 d8[1], r9
+; BE-I64-NEXT:    vmov.32 d15[1], r1
+; BE-I64-NEXT:    mov r1, r7
+; BE-I64-NEXT:    vstr d14, [sp, #48] @ 8-byte Spill
+; BE-I64-NEXT:    vstr d13, [sp, #40] @ 8-byte Spill
+; BE-I64-NEXT:    vstr d12, [sp, #32] @ 8-byte Spill
+; BE-I64-NEXT:    vstr d11, [sp, #24] @ 8-byte Spill
+; BE-I64-NEXT:    vstr d10, [sp, #16] @ 8-byte Spill
+; BE-I64-NEXT:    vstr d9, [sp, #8] @ 8-byte Spill
+; BE-I64-NEXT:    vstr d8, [sp] @ 8-byte Spill
+; BE-I64-NEXT:    bl lrintl
 ; BE-I64-NEXT:    mov r10, r1
+; BE-I64-NEXT:    ldr r1, [sp, #296]
+; BE-I64-NEXT:    ldr r2, [sp, #304]
 ; BE-I64-NEXT:    vmov.32 d8[0], r0
-; BE-I64-NEXT:    mov r0, r5
-; BE-I64-NEXT:    ldm r3, {r1, r2, r3}
+; BE-I64-NEXT:    ldr r3, [sp, #308]
+; BE-I64-NEXT:    mov r0, r1
+; BE-I64-NEXT:    mov r1, r6
 ; BE-I64-NEXT:    bl lrintl
 ; BE-I64-NEXT:    add r3, sp, #332
-; BE-I64-NEXT:    mov r7, r1
-; BE-I64-NEXT:    vmov.32 d12[0], r0
-; BE-I64-NEXT:    mov r0, r8
-; BE-I64-NEXT:    ldm r3, {r1, r2, r3}
-; BE-I64-NEXT:    bl lrintl
-; BE-I64-NEXT:    add r3, sp, #348
-; BE-I64-NEXT:    mov r5, r1
-; BE-I64-NEXT:    vmov.32 d15[0], r0
-; BE-I64-NEXT:    mov r0, r4
-; BE-I64-NEXT:    ldm r3, {r1, r2, r3}
-; BE-I64-NEXT:    bl lrintl
-; BE-I64-NEXT:    add r3, sp, #364
+; BE-I64-NEXT:    mov r11, r1
 ; BE-I64-NEXT:    vmov.32 d9[0], r0
-; BE-I64-NEXT:    ldr r0, [sp, #360]
-; BE-I64-NEXT:    mov r4, r1
+; BE-I64-NEXT:    mov r0, r5
 ; BE-I64-NEXT:    ldm r3, {r1, r2, r3}
 ; BE-I64-NEXT:    bl lrintl
-; BE-I64-NEXT:    add r3, sp, #476
+; BE-I64-NEXT:    add r3, sp, #188
+; BE-I64-NEXT:    mov r7, r1
 ; BE-I64-NEXT:    vmov.32 d10[0], r0
-; BE-I64-NEXT:    ldr r0, [sp, #472]
-; BE-I64-NEXT:    mov r6, r1
+; BE-I64-NEXT:    mov r0, r8
 ; BE-I64-NEXT:    ldm r3, {r1, r2, r3}
 ; BE-I64-NEXT:    bl lrintl
-; BE-I64-NEXT:    vmov.32 d16[0], r0
-; BE-I64-NEXT:    ldr r0, [sp, #88] @ 4-byte Reload
-; BE-I64-NEXT:    ldr r2, [sp, #592]
-; BE-I64-NEXT:    vldr d20, [sp, #136] @ 8-byte Reload
-; BE-I64-NEXT:    vmov.32 d16[1], r1
-; BE-I64-NEXT:    ldr r1, [sp, #588]
-; BE-I64-NEXT:    ldr r3, [sp, #596]
-; BE-I64-NEXT:    vldr d22, [sp, #24] @ 8-byte Reload
-; BE-I64-NEXT:    vldr d18, [sp, #8] @ 8-byte Reload
-; BE-I64-NEXT:    vrev64.32 d21, d20
-; BE-I64-NEXT:    vmov.32 d10[1], r6
-; BE-I64-NEXT:    ldr r6, [sp, #600]
-; BE-I64-NEXT:    vmov.32 d9[1], r4
-; BE-I64-NEXT:    ldr r4, [sp, #616]
-; BE-I64-NEXT:    vmov.32 d12[1], r7
-; BE-I64-NEXT:    ldr r7, [sp, #604]
-; BE-I64-NEXT:    vmov.32 d8[1], r10
-; BE-I64-NEXT:    add r10, r9, #192
-; BE-I64-NEXT:    vmov.32 d14[1], r11
-; BE-I64-NEXT:    ldr r11, [sp, #440]
-; BE-I64-NEXT:    vmov.32 d13[1], r0
-; BE-I64-NEXT:    ldr r0, [sp, #584]
-; BE-I64-NEXT:    vmov.32 d15[1], r5
-; BE-I64-NEXT:    vstr d16, [sp, #48] @ 8-byte Spill
-; BE-I64-NEXT:    vldr d16, [sp, #128] @ 8-byte Reload
-; BE-I64-NEXT:    vrev64.32 d20, d22
-; BE-I64-NEXT:    vldr d22, [sp] @ 8-byte Reload
-; BE-I64-NEXT:    vrev64.32 d19, d18
-; BE-I64-NEXT:    vrev64.32 d17, d16
-; BE-I64-NEXT:    vrev64.32 d18, d22
-; BE-I64-NEXT:    vstr d10, [sp, #120] @ 8-byte Spill
-; BE-I64-NEXT:    vstr d9, [sp, #112] @ 8-byte Spill
-; BE-I64-NEXT:    vstr d15, [sp, #104] @ 8-byte Spill
-; BE-I64-NEXT:    vstr d12, [sp, #96] @ 8-byte Spill
-; BE-I64-NEXT:    vstr d8, [sp, #80] @ 8-byte Spill
-; BE-I64-NEXT:    vstr d14, [sp, #72] @ 8-byte Spill
-; BE-I64-NEXT:    vstr d13, [sp, #88] @ 8-byte Spill
-; BE-I64-NEXT:    vst1.64 {d20, d21}, [r10:128]!
-; BE-I64-NEXT:    vrev64.32 d16, d11
-; BE-I64-NEXT:    vst1.64 {d18, d19}, [r10:128]!
-; BE-I64-NEXT:    vst1.64 {d16, d17}, [r10:128]!
-; BE-I64-NEXT:    bl lrintl
-; BE-I64-NEXT:    ldr r2, [sp, #608]
+; BE-I64-NEXT:    add r3, sp, #204
+; BE-I64-NEXT:    vmov.32 d11[0], r0
+; BE-I64-NEXT:    ldr r0, [sp, #200]
 ; BE-I64-NEXT:    mov r8, r1
-; BE-I64-NEXT:    ldr r3, [sp, #612]
-; BE-I64-NEXT:    vmov.32 d8[0], r0
-; BE-I64-NEXT:    mov r0, r6
-; BE-I64-NEXT:    mov r1, r7
-; BE-I64-NEXT:    ldr r5, [sp, #456]
-; BE-I64-NEXT:    bl lrintl
-; BE-I64-NEXT:    add r3, sp, #620
-; BE-I64-NEXT:    mov r6, r1
-; BE-I64-NEXT:    vmov.32 d9[0], r0
-; BE-I64-NEXT:    mov r0, r4
-; BE-I64-NEXT:    ldm r3, {r1, r2, r3}
-; BE-I64-NEXT:    bl lrintl
-; BE-I64-NEXT:    add r3, sp, #444
-; BE-I64-NEXT:    mov r7, r1
-; BE-I64-NEXT:    vmov.32 d10[0], r0
-; BE-I64-NEXT:    mov r0, r11
 ; BE-I64-NEXT:    ldm r3, {r1, r2, r3}
 ; BE-I64-NEXT:    bl lrintl
-; BE-I64-NEXT:    add r3, sp, #460
-; BE-I64-NEXT:    mov r4, r1
-; BE-I64-NEXT:    vmov.32 d11[0], r0
-; BE-I64-NEXT:    mov r0, r5
+; BE-I64-NEXT:    add r3, sp, #348
+; BE-I64-NEXT:    vmov.32 d12[0], r0
+; BE-I64-NEXT:    ldr r0, [sp, #344]
+; BE-I64-NEXT:    mov r5, r1
 ; BE-I64-NEXT:    ldm r3, {r1, r2, r3}
 ; BE-I64-NEXT:    bl lrintl
-; BE-I64-NEXT:    add r3, sp, #572
+; BE-I64-NEXT:    add r3, sp, #364
 ; BE-I64-NEXT:    vmov.32 d13[0], r0
-; BE-I64-NEXT:    ldr r0, [sp, #568]
-; BE-I64-NEXT:    mov r5, r1
+; BE-I64-NEXT:    ldr r0, [sp, #360]
+; BE-I64-NEXT:    mov r9, r1
 ; BE-I64-NEXT:    ldm r3, {r1, r2, r3}
 ; BE-I64-NEXT:    bl lrintl
-; BE-I64-NEXT:    vldr d16, [sp, #16] @ 8-byte Reload
-; BE-I64-NEXT:    vldr d18, [sp, #56] @ 8-byte Reload
-; BE-I64-NEXT:    vrev64.32 d17, d16
-; BE-I64-NEXT:    ldr r2, [sp, #304]
-; BE-I64-NEXT:    vrev64.32 d16, d18
-; BE-I64-NEXT:    ldr r3, [sp, #308]
-; BE-I64-NEXT:    vldr d18, [sp, #144] @ 8-byte Reload
-; BE-I64-NEXT:    vldr d20, [sp, #64] @ 8-byte Reload
-; BE-I64-NEXT:    vrev64.32 d19, d18
-; BE-I64-NEXT:    vrev64.32 d18, d20
-; BE-I64-NEXT:    vldr d20, [sp, #40] @ 8-byte Reload
-; BE-I64-NEXT:    vldr d22, [sp, #32] @ 8-byte Reload
+; BE-I64-NEXT:    add r3, sp, #316
 ; BE-I64-NEXT:    vmov.32 d14[0], r0
-; BE-I64-NEXT:    ldr r0, [sp, #296]
-; BE-I64-NEXT:    vmov.32 d10[1], r7
-; BE-I64-NEXT:    ldr r7, [sp, #412]
-; BE-I64-NEXT:    vmov.32 d9[1], r6
-; BE-I64-NEXT:    ldr r6, [sp, #408]
-; BE-I64-NEXT:    vmov.32 d8[1], r8
-; BE-I64-NEXT:    add r8, r9, #128
-; BE-I64-NEXT:    vrev64.32 d21, d20
-; BE-I64-NEXT:    vmov.32 d13[1], r5
-; BE-I64-NEXT:    ldr r5, [sp, #300]
-; BE-I64-NEXT:    vrev64.32 d20, d22
-; BE-I64-NEXT:    vmov.32 d14[1], r1
-; BE-I64-NEXT:    mov r1, r5
-; BE-I64-NEXT:    vstr d10, [sp, #136] @ 8-byte Spill
-; BE-I64-NEXT:    vstr d9, [sp, #128] @ 8-byte Spill
-; BE-I64-NEXT:    vstr d8, [sp, #24] @ 8-byte Spill
-; BE-I64-NEXT:    vst1.64 {d20, d21}, [r10:128]
-; BE-I64-NEXT:    vst1.64 {d18, d19}, [r8:128]!
-; BE-I64-NEXT:    vmov.32 d11[1], r4
-; BE-I64-NEXT:    ldr r4, [sp, #424]
-; BE-I64-NEXT:    ldr r10, [sp, #376]
-; BE-I64-NEXT:    vst1.64 {d16, d17}, [r8:128]!
-; BE-I64-NEXT:    bl lrintl
-; BE-I64-NEXT:    ldr r2, [sp, #416]
-; BE-I64-NEXT:    mov r11, r1
-; BE-I64-NEXT:    ldr r3, [sp, #420]
-; BE-I64-NEXT:    vmov.32 d15[0], r0
-; BE-I64-NEXT:    mov r0, r6
-; BE-I64-NEXT:    mov r1, r7
-; BE-I64-NEXT:    ldr r5, [sp, #392]
-; BE-I64-NEXT:    bl lrintl
-; BE-I64-NEXT:    add r3, sp, #428
+; BE-I64-NEXT:    ldr r0, [sp, #312]
 ; BE-I64-NEXT:    mov r6, r1
-; BE-I64-NEXT:    vmov.32 d8[0], r0
-; BE-I64-NEXT:    mov r0, r4
-; BE-I64-NEXT:    ldm r3, {r1, r2, r3}
-; BE-I64-NEXT:    bl lrintl
-; BE-I64-NEXT:    add r3, sp, #380
-; BE-I64-NEXT:    mov r7, r1
-; BE-I64-NEXT:    vmov.32 d9[0], r0
-; BE-I64-NEXT:    mov r0, r10
-; BE-I64-NEXT:    ldm r3, {r1, r2, r3}
-; BE-I64-NEXT:    bl lrintl
-; BE-I64-NEXT:    add r3, sp, #396
-; BE-I64-NEXT:    mov r4, r1
-; BE-I64-NEXT:    vmov.32 d12[0], r0
-; BE-I64-NEXT:    mov r0, r5
-; BE-I64-NEXT:    ldm r3, {r1, r2, r3}
-; BE-I64-NEXT:    bl lrintl
-; BE-I64-NEXT:    add r3, sp, #284
-; BE-I64-NEXT:    vmov.32 d10[0], r0
-; BE-I64-NEXT:    ldr r0, [sp, #280]
-; BE-I64-NEXT:    mov r5, r1
 ; BE-I64-NEXT:    ldm r3, {r1, r2, r3}
 ; BE-I64-NEXT:    bl lrintl
-; BE-I64-NEXT:    vldr d16, [sp, #120] @ 8-byte Reload
-; BE-I64-NEXT:    vldr d18, [sp, #112] @ 8-byte Reload
-; BE-I64-NEXT:    vrev64.32 d17, d16
-; BE-I64-NEXT:    vldr d26, [sp, #136] @ 8-byte Reload
+; BE-I64-NEXT:    vldr d18, [sp, #48] @ 8-byte Reload
+; BE-I64-NEXT:    vrev64.32 d17, d15
 ; BE-I64-NEXT:    vrev64.32 d16, d18
-; BE-I64-NEXT:    vldr d18, [sp, #104] @ 8-byte Reload
-; BE-I64-NEXT:    vrev64.32 d31, d26
-; BE-I64-NEXT:    vldr d26, [sp, #128] @ 8-byte Reload
-; BE-I64-NEXT:    vldr d20, [sp, #96] @ 8-byte Reload
+; BE-I64-NEXT:    vldr d18, [sp, #40] @ 8-byte Reload
+; BE-I64-NEXT:    vmov.32 d24[0], r0
+; BE-I64-NEXT:    add r0, r4, #64
+; BE-I64-NEXT:    vldr d20, [sp, #32] @ 8-byte Reload
 ; BE-I64-NEXT:    vrev64.32 d19, d18
+; BE-I64-NEXT:    vmov.32 d9[1], r11
+; BE-I64-NEXT:    vmov.32 d10[1], r7
 ; BE-I64-NEXT:    vrev64.32 d18, d20
-; BE-I64-NEXT:    vldr d20, [sp, #80] @ 8-byte Reload
-; BE-I64-NEXT:    vrev64.32 d30, d26
-; BE-I64-NEXT:    vldr d26, [sp, #24] @ 8-byte Reload
-; BE-I64-NEXT:    vmov.32 d10[1], r5
-; BE-I64-NEXT:    vldr d22, [sp, #72] @ 8-byte Reload
+; BE-I64-NEXT:    vldr d20, [sp, #24] @ 8-byte Reload
+; BE-I64-NEXT:    vmov.32 d8[1], r10
+; BE-I64-NEXT:    vmov.32 d14[1], r6
+; BE-I64-NEXT:    vmov.32 d24[1], r1
+; BE-I64-NEXT:    vldr d22, [sp, #16] @ 8-byte Reload
 ; BE-I64-NEXT:    vrev64.32 d21, d20
-; BE-I64-NEXT:    vrev64.32 d1, d26
-; BE-I64-NEXT:    vmov.32 d9[1], r7
-; BE-I64-NEXT:    vmov.32 d12[1], r4
+; BE-I64-NEXT:    vrev64.32 d1, d9
+; BE-I64-NEXT:    vmov.32 d13[1], r9
+; BE-I64-NEXT:    vrev64.32 d31, d10
 ; BE-I64-NEXT:    vrev64.32 d20, d22
-; BE-I64-NEXT:    vldr d22, [sp, #88] @ 8-byte Reload
-; BE-I64-NEXT:    vmov.32 d8[1], r6
-; BE-I64-NEXT:    vrev64.32 d0, d14
-; BE-I64-NEXT:    vmov.32 d28[0], r0
-; BE-I64-NEXT:    add r0, r9, #64
-; BE-I64-NEXT:    vrev64.32 d3, d10
-; BE-I64-NEXT:    vldr d24, [sp, #48] @ 8-byte Reload
-; BE-I64-NEXT:    vrev64.32 d23, d22
-; BE-I64-NEXT:    vrev64.32 d5, d9
-; BE-I64-NEXT:    vst1.64 {d0, d1}, [r8:128]!
-; BE-I64-NEXT:    vrev64.32 d2, d12
-; BE-I64-NEXT:    vmov.32 d15[1], r11
-; BE-I64-NEXT:    vrev64.32 d22, d24
-; BE-I64-NEXT:    vrev64.32 d25, d13
-; BE-I64-NEXT:    vrev64.32 d4, d8
-; BE-I64-NEXT:    vst1.64 {d30, d31}, [r8:128]
-; BE-I64-NEXT:    vst1.64 {d2, d3}, [r0:128]!
-; BE-I64-NEXT:    vmov.32 d28[1], r1
-; BE-I64-NEXT:    vrev64.32 d24, d11
-; BE-I64-NEXT:    vst1.64 {d4, d5}, [r0:128]!
-; BE-I64-NEXT:    vrev64.32 d27, d15
-; BE-I64-NEXT:    vst1.64 {d24, d25}, [r0:128]!
-; BE-I64-NEXT:    vrev64.32 d26, d28
-; BE-I64-NEXT:    vst1.64 {d22, d23}, [r0:128]
-; BE-I64-NEXT:    vst1.64 {d20, d21}, [r9:128]!
-; BE-I64-NEXT:    vst1.64 {d26, d27}, [r9:128]!
-; BE-I64-NEXT:    vst1.64 {d18, d19}, [r9:128]!
-; BE-I64-NEXT:    vst1.64 {d16, d17}, [r9:128]
-; BE-I64-NEXT:    add sp, sp, #152
+; BE-I64-NEXT:    vldr d22, [sp, #8] @ 8-byte Reload
+; BE-I64-NEXT:    vrev64.32 d0, d8
+; BE-I64-NEXT:    vrev64.32 d29, d14
+; BE-I64-NEXT:    vmov.32 d12[1], r5
+; BE-I64-NEXT:    vrev64.32 d30, d24
+; BE-I64-NEXT:    vrev64.32 d27, d22
+; BE-I64-NEXT:    vldr d22, [sp] @ 8-byte Reload
+; BE-I64-NEXT:    vst1.64 {d0, d1}, [r0:128]!
+; BE-I64-NEXT:    vmov.32 d11[1], r8
+; BE-I64-NEXT:    vrev64.32 d28, d13
+; BE-I64-NEXT:    vst1.64 {d30, d31}, [r0:128]!
+; BE-I64-NEXT:    vrev64.32 d26, d22
+; BE-I64-NEXT:    vrev64.32 d23, d12
+; BE-I64-NEXT:    vst1.64 {d28, d29}, [r0:128]!
+; BE-I64-NEXT:    vrev64.32 d22, d11
+; BE-I64-NEXT:    vst1.64 {d26, d27}, [r0:128]
+; BE-I64-NEXT:    vst1.64 {d20, d21}, [r4:128]!
+; BE-I64-NEXT:    vst1.64 {d22, d23}, [r4:128]!
+; BE-I64-NEXT:    vst1.64 {d18, d19}, [r4:128]!
+; BE-I64-NEXT:    vst1.64 {d16, d17}, [r4:128]
+; BE-I64-NEXT:    add sp, sp, #56
 ; BE-I64-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
 ; BE-I64-NEXT:    add sp, sp, #4
 ; BE-I64-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
-;
-; BE-I32-NEON-LABEL: lrint_v32fp128:
-; BE-I32-NEON:       @ %bb.0:
-; BE-I32-NEON-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; BE-I32-NEON-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; BE-I32-NEON-NEXT:    .pad #4
-; BE-I32-NEON-NEXT:    sub sp, sp, #4
-; BE-I32-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
-; BE-I32-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
-; BE-I32-NEON-NEXT:    .pad #104
-; BE-I32-NEON-NEXT:    sub sp, sp, #104
-; BE-I32-NEON-NEXT:    mov r4, r3
-; BE-I32-NEON-NEXT:    add r3, sp, #248
-; BE-I32-NEON-NEXT:    mov r8, r2
-; BE-I32-NEON-NEXT:    mov r11, r0
-; BE-I32-NEON-NEXT:    ldm r3, {r0, r1, r2, r3}
-; BE-I32-NEON-NEXT:    bl lrintl
-; BE-I32-NEON-NEXT:    add r3, sp, #616
-; BE-I32-NEON-NEXT:    mov r9, r0
-; BE-I32-NEON-NEXT:    ldm r3, {r0, r1, r2, r3}
-; BE-I32-NEON-NEXT:    bl lrintl
-; BE-I32-NEON-NEXT:    add r3, sp, #680
-; BE-I32-NEON-NEXT:    mov r5, r0
-; BE-I32-NEON-NEXT:    ldm r3, {r0, r1, r2, r3}
-; BE-I32-NEON-NEXT:    bl lrintl
-; BE-I32-NEON-NEXT:    ldr r7, [sp, #232]
-; BE-I32-NEON-NEXT:    add lr, sp, #72
-; BE-I32-NEON-NEXT:    ldr r1, [sp, #236]
-; BE-I32-NEON-NEXT:    vmov.32 d17[0], r0
-; BE-I32-NEON-NEXT:    ldr r2, [sp, #240]
-; BE-I32-NEON-NEXT:    ldr r3, [sp, #244]
-; BE-I32-NEON-NEXT:    mov r0, r7
-; BE-I32-NEON-NEXT:    ldr r10, [sp, #376]
-; BE-I32-NEON-NEXT:    vmov.32 d11[0], r5
-; BE-I32-NEON-NEXT:    ldr r6, [sp, #296]
-; BE-I32-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-I32-NEON-NEXT:    bl lrintl
-; BE-I32-NEON-NEXT:    add r3, sp, #300
-; BE-I32-NEON-NEXT:    vmov.32 d15[0], r0
-; BE-I32-NEON-NEXT:    mov r0, r6
-; BE-I32-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; BE-I32-NEON-NEXT:    bl lrintl
-; BE-I32-NEON-NEXT:    add r3, sp, #380
-; BE-I32-NEON-NEXT:    vmov.32 d13[0], r0
-; BE-I32-NEON-NEXT:    mov r0, r10
-; BE-I32-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; BE-I32-NEON-NEXT:    bl lrintl
-; BE-I32-NEON-NEXT:    add r3, sp, #360
-; BE-I32-NEON-NEXT:    mov r5, r0
-; BE-I32-NEON-NEXT:    ldm r3, {r0, r1, r2, r3}
-; BE-I32-NEON-NEXT:    bl lrintl
-; BE-I32-NEON-NEXT:    vmov.32 d17[0], r0
-; BE-I32-NEON-NEXT:    ldr r6, [sp, #312]
-; BE-I32-NEON-NEXT:    ldr r1, [sp, #316]
-; BE-I32-NEON-NEXT:    ldr r2, [sp, #320]
-; BE-I32-NEON-NEXT:    ldr r3, [sp, #324]
-; BE-I32-NEON-NEXT:    vmov.32 d17[1], r5
-; BE-I32-NEON-NEXT:    mov r0, r6
-; BE-I32-NEON-NEXT:    ldr r7, [sp, #572]
-; BE-I32-NEON-NEXT:    vorr q4, q8, q8
-; BE-I32-NEON-NEXT:    bl lrintl
-; BE-I32-NEON-NEXT:    ldr r6, [sp, #632]
-; BE-I32-NEON-NEXT:    add lr, sp, #88
-; BE-I32-NEON-NEXT:    ldr r1, [sp, #636]
-; BE-I32-NEON-NEXT:    vmov.32 d13[1], r0
-; BE-I32-NEON-NEXT:    ldr r2, [sp, #640]
-; BE-I32-NEON-NEXT:    ldr r3, [sp, #644]
-; BE-I32-NEON-NEXT:    mov r0, r6
-; BE-I32-NEON-NEXT:    ldr r5, [sp, #576]
-; BE-I32-NEON-NEXT:    vmov.32 d15[1], r9
-; BE-I32-NEON-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
-; BE-I32-NEON-NEXT:    bl lrintl
-; BE-I32-NEON-NEXT:    vmov.32 d11[1], r0
-; BE-I32-NEON-NEXT:    ldr r3, [sp, #580]
-; BE-I32-NEON-NEXT:    ldr r0, [sp, #568]
-; BE-I32-NEON-NEXT:    mov r1, r7
-; BE-I32-NEON-NEXT:    mov r2, r5
-; BE-I32-NEON-NEXT:    vorr q6, q5, q5
-; BE-I32-NEON-NEXT:    bl lrintl
-; BE-I32-NEON-NEXT:    add r3, sp, #552
-; BE-I32-NEON-NEXT:    mov r9, r0
-; BE-I32-NEON-NEXT:    ldm r3, {r0, r1, r2, r3}
-; BE-I32-NEON-NEXT:    bl lrintl
-; BE-I32-NEON-NEXT:    add r3, sp, #520
-; BE-I32-NEON-NEXT:    mov r5, r0
-; BE-I32-NEON-NEXT:    ldm r3, {r0, r1, r2, r3}
-; BE-I32-NEON-NEXT:    bl lrintl
-; BE-I32-NEON-NEXT:    ldr r6, [sp, #584]
-; BE-I32-NEON-NEXT:    add lr, sp, #8
-; BE-I32-NEON-NEXT:    ldr r1, [sp, #588]
-; BE-I32-NEON-NEXT:    vmov.32 d16[0], r0
-; BE-I32-NEON-NEXT:    ldr r2, [sp, #592]
-; BE-I32-NEON-NEXT:    ldr r3, [sp, #596]
-; BE-I32-NEON-NEXT:    mov r0, r6
-; BE-I32-NEON-NEXT:    vmov.32 d17[0], r5
-; BE-I32-NEON-NEXT:    ldr r7, [sp, #216]
-; BE-I32-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-I32-NEON-NEXT:    bl lrintl
-; BE-I32-NEON-NEXT:    add r3, sp, #220
-; BE-I32-NEON-NEXT:    vmov.32 d12[0], r0
-; BE-I32-NEON-NEXT:    mov r0, r7
-; BE-I32-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; BE-I32-NEON-NEXT:    bl lrintl
-; BE-I32-NEON-NEXT:    ldr r2, [sp, #208]
-; BE-I32-NEON-NEXT:    mov r7, r0
-; BE-I32-NEON-NEXT:    ldr r3, [sp, #212]
-; BE-I32-NEON-NEXT:    mov r0, r8
-; BE-I32-NEON-NEXT:    mov r1, r4
-; BE-I32-NEON-NEXT:    bl lrintl
-; BE-I32-NEON-NEXT:    add r3, sp, #456
-; BE-I32-NEON-NEXT:    mov r5, r0
-; BE-I32-NEON-NEXT:    ldm r3, {r0, r1, r2, r3}
-; BE-I32-NEON-NEXT:    bl lrintl
-; BE-I32-NEON-NEXT:    ldr r6, [sp, #328]
-; BE-I32-NEON-NEXT:    vmov.32 d10[0], r0
-; BE-I32-NEON-NEXT:    ldr r1, [sp, #332]
-; BE-I32-NEON-NEXT:    ldr r2, [sp, #336]
-; BE-I32-NEON-NEXT:    vmov.32 d14[0], r5
-; BE-I32-NEON-NEXT:    ldr r3, [sp, #340]
-; BE-I32-NEON-NEXT:    mov r0, r6
-; BE-I32-NEON-NEXT:    ldr r10, [sp, #504]
-; BE-I32-NEON-NEXT:    bl lrintl
-; BE-I32-NEON-NEXT:    ldr r6, [sp, #344]
-; BE-I32-NEON-NEXT:    vmov.32 d8[0], r0
-; BE-I32-NEON-NEXT:    ldr r1, [sp, #348]
-; BE-I32-NEON-NEXT:    ldr r2, [sp, #352]
-; BE-I32-NEON-NEXT:    ldr r3, [sp, #356]
-; BE-I32-NEON-NEXT:    mov r0, r6
-; BE-I32-NEON-NEXT:    bl lrintl
-; BE-I32-NEON-NEXT:    vmov.32 d8[1], r0
-; BE-I32-NEON-NEXT:    ldr r6, [sp, #600]
-; BE-I32-NEON-NEXT:    add lr, sp, #56
-; BE-I32-NEON-NEXT:    ldr r1, [sp, #604]
-; BE-I32-NEON-NEXT:    vmov.32 d14[1], r7
-; BE-I32-NEON-NEXT:    ldr r2, [sp, #608]
-; BE-I32-NEON-NEXT:    ldr r3, [sp, #612]
-; BE-I32-NEON-NEXT:    mov r0, r6
-; BE-I32-NEON-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
-; BE-I32-NEON-NEXT:    add lr, sp, #40
-; BE-I32-NEON-NEXT:    ldr r5, [sp, #508]
-; BE-I32-NEON-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
-; BE-I32-NEON-NEXT:    bl lrintl
-; BE-I32-NEON-NEXT:    vmov.32 d12[1], r0
-; BE-I32-NEON-NEXT:    add lr, sp, #24
-; BE-I32-NEON-NEXT:    ldr r7, [sp, #536]
-; BE-I32-NEON-NEXT:    ldr r1, [sp, #540]
-; BE-I32-NEON-NEXT:    vstmia lr, {d12, d13} @ 16-byte Spill
-; BE-I32-NEON-NEXT:    add lr, sp, #8
-; BE-I32-NEON-NEXT:    mov r0, r7
-; BE-I32-NEON-NEXT:    ldr r2, [sp, #544]
-; BE-I32-NEON-NEXT:    ldr r3, [sp, #548]
-; BE-I32-NEON-NEXT:    vldmia lr, {d12, d13} @ 16-byte Reload
-; BE-I32-NEON-NEXT:    ldr r6, [sp, #512]
-; BE-I32-NEON-NEXT:    vmov.32 d13[1], r9
-; BE-I32-NEON-NEXT:    bl lrintl
-; BE-I32-NEON-NEXT:    ldr r3, [sp, #516]
-; BE-I32-NEON-NEXT:    vmov.32 d12[1], r0
-; BE-I32-NEON-NEXT:    mov r0, r10
-; BE-I32-NEON-NEXT:    mov r1, r5
-; BE-I32-NEON-NEXT:    mov r2, r6
-; BE-I32-NEON-NEXT:    bl lrintl
-; BE-I32-NEON-NEXT:    add r3, sp, #488
-; BE-I32-NEON-NEXT:    mov r5, r0
-; BE-I32-NEON-NEXT:    ldm r3, {r0, r1, r2, r3}
-; BE-I32-NEON-NEXT:    bl lrintl
-; BE-I32-NEON-NEXT:    add r3, sp, #424
-; BE-I32-NEON-NEXT:    mov r7, r0
-; BE-I32-NEON-NEXT:    ldm r3, {r0, r1, r2, r3}
-; BE-I32-NEON-NEXT:    bl lrintl
-; BE-I32-NEON-NEXT:    ldr r6, [sp, #264]
-; BE-I32-NEON-NEXT:    vmov.32 d15[0], r0
-; BE-I32-NEON-NEXT:    ldr r1, [sp, #268]
-; BE-I32-NEON-NEXT:    ldr r2, [sp, #272]
-; BE-I32-NEON-NEXT:    vmov.32 d11[0], r7
-; BE-I32-NEON-NEXT:    ldr r3, [sp, #276]
-; BE-I32-NEON-NEXT:    mov r0, r6
-; BE-I32-NEON-NEXT:    ldr r8, [sp, #696]
-; BE-I32-NEON-NEXT:    bl lrintl
-; BE-I32-NEON-NEXT:    add lr, sp, #88
-; BE-I32-NEON-NEXT:    ldr r4, [sp, #472]
-; BE-I32-NEON-NEXT:    ldr r1, [sp, #476]
-; BE-I32-NEON-NEXT:    vmov.32 d11[1], r5
-; BE-I32-NEON-NEXT:    ldr r2, [sp, #480]
-; BE-I32-NEON-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
-; BE-I32-NEON-NEXT:    ldr r3, [sp, #484]
-; BE-I32-NEON-NEXT:    vmov.32 d16[0], r0
-; BE-I32-NEON-NEXT:    mov r0, r4
-; BE-I32-NEON-NEXT:    ldr r6, [sp, #700]
-; BE-I32-NEON-NEXT:    ldr r7, [sp, #704]
-; BE-I32-NEON-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
-; BE-I32-NEON-NEXT:    bl lrintl
-; BE-I32-NEON-NEXT:    ldr r3, [sp, #708]
-; BE-I32-NEON-NEXT:    vmov.32 d10[1], r0
-; BE-I32-NEON-NEXT:    mov r0, r8
-; BE-I32-NEON-NEXT:    mov r1, r6
-; BE-I32-NEON-NEXT:    mov r2, r7
-; BE-I32-NEON-NEXT:    bl lrintl
-; BE-I32-NEON-NEXT:    add r3, sp, #648
-; BE-I32-NEON-NEXT:    mov r4, r0
-; BE-I32-NEON-NEXT:    ldm r3, {r0, r1, r2, r3}
-; BE-I32-NEON-NEXT:    bl lrintl
-; BE-I32-NEON-NEXT:    add lr, sp, #72
-; BE-I32-NEON-NEXT:    ldr r5, [sp, #664]
-; BE-I32-NEON-NEXT:    ldr r1, [sp, #668]
-; BE-I32-NEON-NEXT:    vldmia lr, {d8, d9} @ 16-byte Reload
-; BE-I32-NEON-NEXT:    ldr r2, [sp, #672]
-; BE-I32-NEON-NEXT:    ldr r3, [sp, #676]
-; BE-I32-NEON-NEXT:    vmov.32 d8[0], r0
-; BE-I32-NEON-NEXT:    mov r0, r5
-; BE-I32-NEON-NEXT:    ldr r6, [sp, #444]
-; BE-I32-NEON-NEXT:    vmov.32 d9[1], r4
-; BE-I32-NEON-NEXT:    ldr r7, [sp, #448]
-; BE-I32-NEON-NEXT:    ldr r8, [sp, #412]
-; BE-I32-NEON-NEXT:    ldr r4, [sp, #416]
-; BE-I32-NEON-NEXT:    bl lrintl
-; BE-I32-NEON-NEXT:    vmov.32 d8[1], r0
-; BE-I32-NEON-NEXT:    ldr r3, [sp, #452]
-; BE-I32-NEON-NEXT:    ldr r0, [sp, #440]
-; BE-I32-NEON-NEXT:    mov r1, r6
-; BE-I32-NEON-NEXT:    mov r2, r7
-; BE-I32-NEON-NEXT:    bl lrintl
-; BE-I32-NEON-NEXT:    vmov.32 d15[1], r0
-; BE-I32-NEON-NEXT:    ldr r3, [sp, #420]
-; BE-I32-NEON-NEXT:    ldr r0, [sp, #408]
-; BE-I32-NEON-NEXT:    mov r1, r8
-; BE-I32-NEON-NEXT:    mov r2, r4
-; BE-I32-NEON-NEXT:    bl lrintl
-; BE-I32-NEON-NEXT:    add r3, sp, #392
-; BE-I32-NEON-NEXT:    mov r4, r0
-; BE-I32-NEON-NEXT:    ldm r3, {r0, r1, r2, r3}
-; BE-I32-NEON-NEXT:    bl lrintl
-; BE-I32-NEON-NEXT:    add r3, sp, #284
-; BE-I32-NEON-NEXT:    ldr r7, [sp, #280]
-; BE-I32-NEON-NEXT:    vmov.32 d14[0], r0
-; BE-I32-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; BE-I32-NEON-NEXT:    mov r0, r7
-; BE-I32-NEON-NEXT:    vmov.32 d14[1], r4
-; BE-I32-NEON-NEXT:    bl lrintl
-; BE-I32-NEON-NEXT:    add lr, sp, #88
-; BE-I32-NEON-NEXT:    vrev64.32 q9, q4
-; BE-I32-NEON-NEXT:    vldmia lr, {d20, d21} @ 16-byte Reload
-; BE-I32-NEON-NEXT:    add lr, sp, #24
-; BE-I32-NEON-NEXT:    vrev64.32 q8, q7
-; BE-I32-NEON-NEXT:    vmov.32 d20[1], r0
-; BE-I32-NEON-NEXT:    add r0, r11, #64
-; BE-I32-NEON-NEXT:    vst1.32 {d10, d11}, [r0:128]!
-; BE-I32-NEON-NEXT:    vst1.32 {d12, d13}, [r0:128]!
-; BE-I32-NEON-NEXT:    vldmia lr, {d22, d23} @ 16-byte Reload
-; BE-I32-NEON-NEXT:    add lr, sp, #40
-; BE-I32-NEON-NEXT:    vst1.32 {d22, d23}, [r0:128]!
-; BE-I32-NEON-NEXT:    vst1.64 {d18, d19}, [r0:128]
-; BE-I32-NEON-NEXT:    vldmia lr, {d18, d19} @ 16-byte Reload
-; BE-I32-NEON-NEXT:    add lr, sp, #56
-; BE-I32-NEON-NEXT:    vst1.32 {d18, d19}, [r11:128]!
-; BE-I32-NEON-NEXT:    vst1.32 {d20, d21}, [r11:128]!
-; BE-I32-NEON-NEXT:    vldmia lr, {d18, d19} @ 16-byte Reload
-; BE-I32-NEON-NEXT:    vst1.32 {d18, d19}, [r11:128]!
-; BE-I32-NEON-NEXT:    vst1.64 {d16, d17}, [r11:128]
-; BE-I32-NEON-NEXT:    add sp, sp, #104
-; BE-I32-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
-; BE-I32-NEON-NEXT:    add sp, sp, #4
-; BE-I32-NEON-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
-;
-; BE-I64-NEON-LABEL: lrint_v32fp128:
-; BE-I64-NEON:       @ %bb.0:
-; BE-I64-NEON-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; BE-I64-NEON-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
-; BE-I64-NEON-NEXT:    .pad #4
-; BE-I64-NEON-NEXT:    sub sp, sp, #4
-; BE-I64-NEON-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
-; BE-I64-NEON-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
-; BE-I64-NEON-NEXT:    .pad #152
-; BE-I64-NEON-NEXT:    sub sp, sp, #152
-; BE-I64-NEON-NEXT:    str r3, [sp, #120] @ 4-byte Spill
-; BE-I64-NEON-NEXT:    add r3, sp, #712
-; BE-I64-NEON-NEXT:    str r2, [sp, #112] @ 4-byte Spill
-; BE-I64-NEON-NEXT:    mov r9, r0
-; BE-I64-NEON-NEXT:    ldm r3, {r0, r1, r2, r3}
-; BE-I64-NEON-NEXT:    bl lrintl
-; BE-I64-NEON-NEXT:    ldr r7, [sp, #648]
-; BE-I64-NEON-NEXT:    add r3, sp, #652
-; BE-I64-NEON-NEXT:    mov r5, r1
-; BE-I64-NEON-NEXT:    vmov.32 d9[0], r0
-; BE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; BE-I64-NEON-NEXT:    mov r0, r7
-; BE-I64-NEON-NEXT:    ldr r6, [sp, #520]
-; BE-I64-NEON-NEXT:    ldr r8, [sp, #632]
-; BE-I64-NEON-NEXT:    bl lrintl
-; BE-I64-NEON-NEXT:    add r3, sp, #524
-; BE-I64-NEON-NEXT:    mov r7, r1
-; BE-I64-NEON-NEXT:    vmov.32 d10[0], r0
-; BE-I64-NEON-NEXT:    mov r0, r6
-; BE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; BE-I64-NEON-NEXT:    bl lrintl
-; BE-I64-NEON-NEXT:    add r3, sp, #636
-; BE-I64-NEON-NEXT:    mov r4, r1
-; BE-I64-NEON-NEXT:    vmov.32 d8[0], r0
-; BE-I64-NEON-NEXT:    mov r0, r8
-; BE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; BE-I64-NEON-NEXT:    bl lrintl
-; BE-I64-NEON-NEXT:    mov r6, r1
-; BE-I64-NEON-NEXT:    vmov.32 d11[0], r0
-; BE-I64-NEON-NEXT:    ldr r0, [sp, #488]
-; BE-I64-NEON-NEXT:    vmov.32 d8[1], r4
-; BE-I64-NEON-NEXT:    ldr r1, [sp, #492]
-; BE-I64-NEON-NEXT:    ldr r2, [sp, #496]
-; BE-I64-NEON-NEXT:    vmov.32 d10[1], r7
-; BE-I64-NEON-NEXT:    ldr r3, [sp, #500]
-; BE-I64-NEON-NEXT:    vmov.32 d9[1], r5
-; BE-I64-NEON-NEXT:    vstr d8, [sp, #144] @ 8-byte Spill
-; BE-I64-NEON-NEXT:    vstr d10, [sp, #136] @ 8-byte Spill
-; BE-I64-NEON-NEXT:    vstr d9, [sp, #128] @ 8-byte Spill
-; BE-I64-NEON-NEXT:    bl lrintl
-; BE-I64-NEON-NEXT:    add r3, sp, #680
-; BE-I64-NEON-NEXT:    str r0, [sp, #104] @ 4-byte Spill
-; BE-I64-NEON-NEXT:    str r1, [sp, #88] @ 4-byte Spill
-; BE-I64-NEON-NEXT:    ldm r3, {r0, r1, r2, r3}
-; BE-I64-NEON-NEXT:    bl lrintl
-; BE-I64-NEON-NEXT:    vmov.32 d16[0], r0
-; BE-I64-NEON-NEXT:    ldr r0, [sp, #728]
-; BE-I64-NEON-NEXT:    ldr r2, [sp, #736]
-; BE-I64-NEON-NEXT:    vmov.32 d11[1], r6
-; BE-I64-NEON-NEXT:    ldr r6, [sp, #732]
-; BE-I64-NEON-NEXT:    ldr r3, [sp, #740]
-; BE-I64-NEON-NEXT:    vmov.32 d16[1], r1
-; BE-I64-NEON-NEXT:    ldr r5, [sp, #504]
-; BE-I64-NEON-NEXT:    mov r1, r6
-; BE-I64-NEON-NEXT:    ldr r7, [sp, #744]
-; BE-I64-NEON-NEXT:    ldr r4, [sp, #748]
-; BE-I64-NEON-NEXT:    vstr d11, [sp, #24] @ 8-byte Spill
-; BE-I64-NEON-NEXT:    vstr d16, [sp, #8] @ 8-byte Spill
-; BE-I64-NEON-NEXT:    bl lrintl
-; BE-I64-NEON-NEXT:    ldr r2, [sp, #752]
-; BE-I64-NEON-NEXT:    mov r11, r1
-; BE-I64-NEON-NEXT:    ldr r3, [sp, #756]
-; BE-I64-NEON-NEXT:    vmov.32 d13[0], r0
-; BE-I64-NEON-NEXT:    mov r0, r7
-; BE-I64-NEON-NEXT:    mov r1, r4
-; BE-I64-NEON-NEXT:    ldr r10, [sp, #552]
-; BE-I64-NEON-NEXT:    ldr r6, [sp, #664]
-; BE-I64-NEON-NEXT:    bl lrintl
-; BE-I64-NEON-NEXT:    add r3, sp, #508
-; BE-I64-NEON-NEXT:    mov r8, r1
-; BE-I64-NEON-NEXT:    vmov.32 d8[0], r0
-; BE-I64-NEON-NEXT:    mov r0, r5
-; BE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; BE-I64-NEON-NEXT:    bl lrintl
-; BE-I64-NEON-NEXT:    add r3, sp, #540
-; BE-I64-NEON-NEXT:    vmov.32 d10[0], r0
-; BE-I64-NEON-NEXT:    ldr r0, [sp, #536]
-; BE-I64-NEON-NEXT:    mov r7, r1
-; BE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; BE-I64-NEON-NEXT:    bl lrintl
-; BE-I64-NEON-NEXT:    add r3, sp, #556
-; BE-I64-NEON-NEXT:    mov r5, r1
-; BE-I64-NEON-NEXT:    vmov.32 d12[0], r0
-; BE-I64-NEON-NEXT:    mov r0, r10
-; BE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; BE-I64-NEON-NEXT:    bl lrintl
-; BE-I64-NEON-NEXT:    add r3, sp, #668
-; BE-I64-NEON-NEXT:    mov r4, r1
-; BE-I64-NEON-NEXT:    vmov.32 d9[0], r0
-; BE-I64-NEON-NEXT:    mov r0, r6
-; BE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; BE-I64-NEON-NEXT:    bl lrintl
-; BE-I64-NEON-NEXT:    add r3, sp, #700
-; BE-I64-NEON-NEXT:    vmov.32 d14[0], r0
-; BE-I64-NEON-NEXT:    ldr r0, [sp, #696]
-; BE-I64-NEON-NEXT:    mov r6, r1
-; BE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; BE-I64-NEON-NEXT:    bl lrintl
-; BE-I64-NEON-NEXT:    vmov.32 d11[0], r0
-; BE-I64-NEON-NEXT:    ldr r0, [sp, #104] @ 4-byte Reload
-; BE-I64-NEON-NEXT:    ldr r2, [sp, #256]
-; BE-I64-NEON-NEXT:    vmov.32 d13[1], r11
-; BE-I64-NEON-NEXT:    ldr r3, [sp, #260]
-; BE-I64-NEON-NEXT:    vmov.32 d14[1], r6
-; BE-I64-NEON-NEXT:    ldr r6, [sp, #264]
-; BE-I64-NEON-NEXT:    vmov.32 d9[1], r4
-; BE-I64-NEON-NEXT:    ldr r4, [sp, #344]
-; BE-I64-NEON-NEXT:    vmov.32 d12[1], r5
-; BE-I64-NEON-NEXT:    ldr r5, [sp, #312]
-; BE-I64-NEON-NEXT:    vmov.32 d8[1], r8
-; BE-I64-NEON-NEXT:    ldr r8, [sp, #328]
-; BE-I64-NEON-NEXT:    vmov.32 d10[1], r7
-; BE-I64-NEON-NEXT:    vstr d13, [sp, #32] @ 8-byte Spill
-; BE-I64-NEON-NEXT:    vmov.32 d11[1], r1
-; BE-I64-NEON-NEXT:    ldr r1, [sp, #120] @ 4-byte Reload
-; BE-I64-NEON-NEXT:    vmov.32 d13[0], r0
-; BE-I64-NEON-NEXT:    ldr r0, [sp, #112] @ 4-byte Reload
-; BE-I64-NEON-NEXT:    vstr d14, [sp] @ 8-byte Spill
-; BE-I64-NEON-NEXT:    vstr d9, [sp, #16] @ 8-byte Spill
-; BE-I64-NEON-NEXT:    vstr d12, [sp, #56] @ 8-byte Spill
-; BE-I64-NEON-NEXT:    vstr d10, [sp, #64] @ 8-byte Spill
-; BE-I64-NEON-NEXT:    vstr d8, [sp, #40] @ 8-byte Spill
-; BE-I64-NEON-NEXT:    bl lrintl
-; BE-I64-NEON-NEXT:    add r3, sp, #268
-; BE-I64-NEON-NEXT:    mov r11, r1
-; BE-I64-NEON-NEXT:    vmov.32 d14[0], r0
-; BE-I64-NEON-NEXT:    mov r0, r6
-; BE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; BE-I64-NEON-NEXT:    bl lrintl
-; BE-I64-NEON-NEXT:    add r3, sp, #316
-; BE-I64-NEON-NEXT:    mov r10, r1
-; BE-I64-NEON-NEXT:    vmov.32 d8[0], r0
-; BE-I64-NEON-NEXT:    mov r0, r5
-; BE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; BE-I64-NEON-NEXT:    bl lrintl
-; BE-I64-NEON-NEXT:    add r3, sp, #332
-; BE-I64-NEON-NEXT:    mov r7, r1
-; BE-I64-NEON-NEXT:    vmov.32 d12[0], r0
-; BE-I64-NEON-NEXT:    mov r0, r8
-; BE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; BE-I64-NEON-NEXT:    bl lrintl
-; BE-I64-NEON-NEXT:    add r3, sp, #348
-; BE-I64-NEON-NEXT:    mov r5, r1
-; BE-I64-NEON-NEXT:    vmov.32 d15[0], r0
-; BE-I64-NEON-NEXT:    mov r0, r4
-; BE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; BE-I64-NEON-NEXT:    bl lrintl
-; BE-I64-NEON-NEXT:    add r3, sp, #364
-; BE-I64-NEON-NEXT:    vmov.32 d9[0], r0
-; BE-I64-NEON-NEXT:    ldr r0, [sp, #360]
-; BE-I64-NEON-NEXT:    mov r4, r1
-; BE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; BE-I64-NEON-NEXT:    bl lrintl
-; BE-I64-NEON-NEXT:    add r3, sp, #476
-; BE-I64-NEON-NEXT:    vmov.32 d10[0], r0
-; BE-I64-NEON-NEXT:    ldr r0, [sp, #472]
-; BE-I64-NEON-NEXT:    mov r6, r1
-; BE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; BE-I64-NEON-NEXT:    bl lrintl
-; BE-I64-NEON-NEXT:    vmov.32 d16[0], r0
-; BE-I64-NEON-NEXT:    ldr r0, [sp, #88] @ 4-byte Reload
-; BE-I64-NEON-NEXT:    ldr r2, [sp, #592]
-; BE-I64-NEON-NEXT:    vldr d20, [sp, #136] @ 8-byte Reload
-; BE-I64-NEON-NEXT:    vmov.32 d16[1], r1
-; BE-I64-NEON-NEXT:    ldr r1, [sp, #588]
-; BE-I64-NEON-NEXT:    ldr r3, [sp, #596]
-; BE-I64-NEON-NEXT:    vldr d22, [sp, #24] @ 8-byte Reload
-; BE-I64-NEON-NEXT:    vldr d18, [sp, #8] @ 8-byte Reload
-; BE-I64-NEON-NEXT:    vrev64.32 d21, d20
-; BE-I64-NEON-NEXT:    vmov.32 d10[1], r6
-; BE-I64-NEON-NEXT:    ldr r6, [sp, #600]
-; BE-I64-NEON-NEXT:    vmov.32 d9[1], r4
-; BE-I64-NEON-NEXT:    ldr r4, [sp, #616]
-; BE-I64-NEON-NEXT:    vmov.32 d12[1], r7
-; BE-I64-NEON-NEXT:    ldr r7, [sp, #604]
-; BE-I64-NEON-NEXT:    vmov.32 d8[1], r10
-; BE-I64-NEON-NEXT:    add r10, r9, #192
-; BE-I64-NEON-NEXT:    vmov.32 d14[1], r11
-; BE-I64-NEON-NEXT:    ldr r11, [sp, #440]
-; BE-I64-NEON-NEXT:    vmov.32 d13[1], r0
-; BE-I64-NEON-NEXT:    ldr r0, [sp, #584]
-; BE-I64-NEON-NEXT:    vmov.32 d15[1], r5
-; BE-I64-NEON-NEXT:    vstr d16, [sp, #48] @ 8-byte Spill
-; BE-I64-NEON-NEXT:    vldr d16, [sp, #128] @ 8-byte Reload
-; BE-I64-NEON-NEXT:    vrev64.32 d20, d22
-; BE-I64-NEON-NEXT:    vldr d22, [sp] @ 8-byte Reload
-; BE-I64-NEON-NEXT:    vrev64.32 d19, d18
-; BE-I64-NEON-NEXT:    vrev64.32 d17, d16
-; BE-I64-NEON-NEXT:    vrev64.32 d18, d22
-; BE-I64-NEON-NEXT:    vstr d10, [sp, #120] @ 8-byte Spill
-; BE-I64-NEON-NEXT:    vstr d9, [sp, #112] @ 8-byte Spill
-; BE-I64-NEON-NEXT:    vstr d15, [sp, #104] @ 8-byte Spill
-; BE-I64-NEON-NEXT:    vstr d12, [sp, #96] @ 8-byte Spill
-; BE-I64-NEON-NEXT:    vstr d8, [sp, #80] @ 8-byte Spill
-; BE-I64-NEON-NEXT:    vstr d14, [sp, #72] @ 8-byte Spill
-; BE-I64-NEON-NEXT:    vstr d13, [sp, #88] @ 8-byte Spill
-; BE-I64-NEON-NEXT:    vst1.64 {d20, d21}, [r10:128]!
-; BE-I64-NEON-NEXT:    vrev64.32 d16, d11
-; BE-I64-NEON-NEXT:    vst1.64 {d18, d19}, [r10:128]!
-; BE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r10:128]!
-; BE-I64-NEON-NEXT:    bl lrintl
-; BE-I64-NEON-NEXT:    ldr r2, [sp, #608]
-; BE-I64-NEON-NEXT:    mov r8, r1
-; BE-I64-NEON-NEXT:    ldr r3, [sp, #612]
-; BE-I64-NEON-NEXT:    vmov.32 d8[0], r0
-; BE-I64-NEON-NEXT:    mov r0, r6
-; BE-I64-NEON-NEXT:    mov r1, r7
-; BE-I64-NEON-NEXT:    ldr r5, [sp, #456]
-; BE-I64-NEON-NEXT:    bl lrintl
-; BE-I64-NEON-NEXT:    add r3, sp, #620
-; BE-I64-NEON-NEXT:    mov r6, r1
-; BE-I64-NEON-NEXT:    vmov.32 d9[0], r0
-; BE-I64-NEON-NEXT:    mov r0, r4
-; BE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; BE-I64-NEON-NEXT:    bl lrintl
-; BE-I64-NEON-NEXT:    add r3, sp, #444
-; BE-I64-NEON-NEXT:    mov r7, r1
-; BE-I64-NEON-NEXT:    vmov.32 d10[0], r0
-; BE-I64-NEON-NEXT:    mov r0, r11
-; BE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; BE-I64-NEON-NEXT:    bl lrintl
-; BE-I64-NEON-NEXT:    add r3, sp, #460
-; BE-I64-NEON-NEXT:    mov r4, r1
-; BE-I64-NEON-NEXT:    vmov.32 d11[0], r0
-; BE-I64-NEON-NEXT:    mov r0, r5
-; BE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; BE-I64-NEON-NEXT:    bl lrintl
-; BE-I64-NEON-NEXT:    add r3, sp, #572
-; BE-I64-NEON-NEXT:    vmov.32 d13[0], r0
-; BE-I64-NEON-NEXT:    ldr r0, [sp, #568]
-; BE-I64-NEON-NEXT:    mov r5, r1
-; BE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; BE-I64-NEON-NEXT:    bl lrintl
-; BE-I64-NEON-NEXT:    vldr d16, [sp, #16] @ 8-byte Reload
-; BE-I64-NEON-NEXT:    vldr d18, [sp, #56] @ 8-byte Reload
-; BE-I64-NEON-NEXT:    vrev64.32 d17, d16
-; BE-I64-NEON-NEXT:    ldr r2, [sp, #304]
-; BE-I64-NEON-NEXT:    vrev64.32 d16, d18
-; BE-I64-NEON-NEXT:    ldr r3, [sp, #308]
-; BE-I64-NEON-NEXT:    vldr d18, [sp, #144] @ 8-byte Reload
-; BE-I64-NEON-NEXT:    vldr d20, [sp, #64] @ 8-byte Reload
-; BE-I64-NEON-NEXT:    vrev64.32 d19, d18
-; BE-I64-NEON-NEXT:    vrev64.32 d18, d20
-; BE-I64-NEON-NEXT:    vldr d20, [sp, #40] @ 8-byte Reload
-; BE-I64-NEON-NEXT:    vldr d22, [sp, #32] @ 8-byte Reload
-; BE-I64-NEON-NEXT:    vmov.32 d14[0], r0
-; BE-I64-NEON-NEXT:    ldr r0, [sp, #296]
-; BE-I64-NEON-NEXT:    vmov.32 d10[1], r7
-; BE-I64-NEON-NEXT:    ldr r7, [sp, #412]
-; BE-I64-NEON-NEXT:    vmov.32 d9[1], r6
-; BE-I64-NEON-NEXT:    ldr r6, [sp, #408]
-; BE-I64-NEON-NEXT:    vmov.32 d8[1], r8
-; BE-I64-NEON-NEXT:    add r8, r9, #128
-; BE-I64-NEON-NEXT:    vrev64.32 d21, d20
-; BE-I64-NEON-NEXT:    vmov.32 d13[1], r5
-; BE-I64-NEON-NEXT:    ldr r5, [sp, #300]
-; BE-I64-NEON-NEXT:    vrev64.32 d20, d22
-; BE-I64-NEON-NEXT:    vmov.32 d14[1], r1
-; BE-I64-NEON-NEXT:    mov r1, r5
-; BE-I64-NEON-NEXT:    vstr d10, [sp, #136] @ 8-byte Spill
-; BE-I64-NEON-NEXT:    vstr d9, [sp, #128] @ 8-byte Spill
-; BE-I64-NEON-NEXT:    vstr d8, [sp, #24] @ 8-byte Spill
-; BE-I64-NEON-NEXT:    vst1.64 {d20, d21}, [r10:128]
-; BE-I64-NEON-NEXT:    vst1.64 {d18, d19}, [r8:128]!
-; BE-I64-NEON-NEXT:    vmov.32 d11[1], r4
-; BE-I64-NEON-NEXT:    ldr r4, [sp, #424]
-; BE-I64-NEON-NEXT:    ldr r10, [sp, #376]
-; BE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r8:128]!
-; BE-I64-NEON-NEXT:    bl lrintl
-; BE-I64-NEON-NEXT:    ldr r2, [sp, #416]
-; BE-I64-NEON-NEXT:    mov r11, r1
-; BE-I64-NEON-NEXT:    ldr r3, [sp, #420]
-; BE-I64-NEON-NEXT:    vmov.32 d15[0], r0
-; BE-I64-NEON-NEXT:    mov r0, r6
-; BE-I64-NEON-NEXT:    mov r1, r7
-; BE-I64-NEON-NEXT:    ldr r5, [sp, #392]
-; BE-I64-NEON-NEXT:    bl lrintl
-; BE-I64-NEON-NEXT:    add r3, sp, #428
-; BE-I64-NEON-NEXT:    mov r6, r1
-; BE-I64-NEON-NEXT:    vmov.32 d8[0], r0
-; BE-I64-NEON-NEXT:    mov r0, r4
-; BE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; BE-I64-NEON-NEXT:    bl lrintl
-; BE-I64-NEON-NEXT:    add r3, sp, #380
-; BE-I64-NEON-NEXT:    mov r7, r1
-; BE-I64-NEON-NEXT:    vmov.32 d9[0], r0
-; BE-I64-NEON-NEXT:    mov r0, r10
-; BE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; BE-I64-NEON-NEXT:    bl lrintl
-; BE-I64-NEON-NEXT:    add r3, sp, #396
-; BE-I64-NEON-NEXT:    mov r4, r1
-; BE-I64-NEON-NEXT:    vmov.32 d12[0], r0
-; BE-I64-NEON-NEXT:    mov r0, r5
-; BE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; BE-I64-NEON-NEXT:    bl lrintl
-; BE-I64-NEON-NEXT:    add r3, sp, #284
-; BE-I64-NEON-NEXT:    vmov.32 d10[0], r0
-; BE-I64-NEON-NEXT:    ldr r0, [sp, #280]
-; BE-I64-NEON-NEXT:    mov r5, r1
-; BE-I64-NEON-NEXT:    ldm r3, {r1, r2, r3}
-; BE-I64-NEON-NEXT:    bl lrintl
-; BE-I64-NEON-NEXT:    vldr d16, [sp, #120] @ 8-byte Reload
-; BE-I64-NEON-NEXT:    vldr d18, [sp, #112] @ 8-byte Reload
-; BE-I64-NEON-NEXT:    vrev64.32 d17, d16
-; BE-I64-NEON-NEXT:    vldr d26, [sp, #136] @ 8-byte Reload
-; BE-I64-NEON-NEXT:    vrev64.32 d16, d18
-; BE-I64-NEON-NEXT:    vldr d18, [sp, #104] @ 8-byte Reload
-; BE-I64-NEON-NEXT:    vrev64.32 d31, d26
-; BE-I64-NEON-NEXT:    vldr d26, [sp, #128] @ 8-byte Reload
-; BE-I64-NEON-NEXT:    vldr d20, [sp, #96] @ 8-byte Reload
-; BE-I64-NEON-NEXT:    vrev64.32 d19, d18
-; BE-I64-NEON-NEXT:    vrev64.32 d18, d20
-; BE-I64-NEON-NEXT:    vldr d20, [sp, #80] @ 8-byte Reload
-; BE-I64-NEON-NEXT:    vrev64.32 d30, d26
-; BE-I64-NEON-NEXT:    vldr d26, [sp, #24] @ 8-byte Reload
-; BE-I64-NEON-NEXT:    vmov.32 d10[1], r5
-; BE-I64-NEON-NEXT:    vldr d22, [sp, #72] @ 8-byte Reload
-; BE-I64-NEON-NEXT:    vrev64.32 d21, d20
-; BE-I64-NEON-NEXT:    vrev64.32 d1, d26
-; BE-I64-NEON-NEXT:    vmov.32 d9[1], r7
-; BE-I64-NEON-NEXT:    vmov.32 d12[1], r4
-; BE-I64-NEON-NEXT:    vrev64.32 d20, d22
-; BE-I64-NEON-NEXT:    vldr d22, [sp, #88] @ 8-byte Reload
-; BE-I64-NEON-NEXT:    vmov.32 d8[1], r6
-; BE-I64-NEON-NEXT:    vrev64.32 d0, d14
-; BE-I64-NEON-NEXT:    vmov.32 d28[0], r0
-; BE-I64-NEON-NEXT:    add r0, r9, #64
-; BE-I64-NEON-NEXT:    vrev64.32 d3, d10
-; BE-I64-NEON-NEXT:    vldr d24, [sp, #48] @ 8-byte Reload
-; BE-I64-NEON-NEXT:    vrev64.32 d23, d22
-; BE-I64-NEON-NEXT:    vrev64.32 d5, d9
-; BE-I64-NEON-NEXT:    vst1.64 {d0, d1}, [r8:128]!
-; BE-I64-NEON-NEXT:    vrev64.32 d2, d12
-; BE-I64-NEON-NEXT:    vmov.32 d15[1], r11
-; BE-I64-NEON-NEXT:    vrev64.32 d22, d24
-; BE-I64-NEON-NEXT:    vrev64.32 d25, d13
-; BE-I64-NEON-NEXT:    vrev64.32 d4, d8
-; BE-I64-NEON-NEXT:    vst1.64 {d30, d31}, [r8:128]
-; BE-I64-NEON-NEXT:    vst1.64 {d2, d3}, [r0:128]!
-; BE-I64-NEON-NEXT:    vmov.32 d28[1], r1
-; BE-I64-NEON-NEXT:    vrev64.32 d24, d11
-; BE-I64-NEON-NEXT:    vst1.64 {d4, d5}, [r0:128]!
-; BE-I64-NEON-NEXT:    vrev64.32 d27, d15
-; BE-I64-NEON-NEXT:    vst1.64 {d24, d25}, [r0:128]!
-; BE-I64-NEON-NEXT:    vrev64.32 d26, d28
-; BE-I64-NEON-NEXT:    vst1.64 {d22, d23}, [r0:128]
-; BE-I64-NEON-NEXT:    vst1.64 {d20, d21}, [r9:128]!
-; BE-I64-NEON-NEXT:    vst1.64 {d26, d27}, [r9:128]!
-; BE-I64-NEON-NEXT:    vst1.64 {d18, d19}, [r9:128]!
-; BE-I64-NEON-NEXT:    vst1.64 {d16, d17}, [r9:128]
-; BE-I64-NEON-NEXT:    add sp, sp, #152
-; BE-I64-NEON-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
-; BE-I64-NEON-NEXT:    add sp, sp, #4
-; BE-I64-NEON-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
-  %a = call <32 x iXLen> @llvm.lrint.v32iXLen.v16fp128(<32 x fp128> %x)
-  ret <32 x iXLen> %a
+  %a = call <16 x iXLen> @llvm.lrint.v16iXLen.v16fp128(<16 x fp128> %x)
+  ret <16 x iXLen> %a
 }
-declare <32 x iXLen> @llvm.lrint.v32iXLen.v32fp128(<32 x fp128>)
+declare <16 x iXLen> @llvm.lrint.v16iXLen.v16fp128(<16 x fp128>)



More information about the llvm-commits mailing list