[llvm] [ARM][SDAG] Half promote llvm.lrint nodes. (PR #161088)
David Green via llvm-commits
llvm-commits at lists.llvm.org
Thu Oct 2 14:45:20 PDT 2025
https://github.com/davemgreen updated https://github.com/llvm/llvm-project/pull/161088
>From 9c966f70389338987de4ca02e0cec86cac315eeb Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Thu, 2 Oct 2025 21:53:18 +0100
Subject: [PATCH 1/2] [ARM][SDAG] Half promote llvm.lrint nodes.
As shown in #137101, fp16 lrint are not handled correctly on Arm. This adds
soft-half promotion for them, reusing the function that promotes a value with
operands (and can handle strict fp once that is added).
---
.../SelectionDAG/LegalizeFloatTypes.cpp | 7 +-
llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h | 2 +-
llvm/lib/Target/ARM/ARMISelLowering.cpp | 1 +
llvm/test/CodeGen/ARM/lrint-conv.ll | 40 +-
llvm/test/CodeGen/ARM/vector-lrint.ll | 1303 ++++++++++++++++-
5 files changed, 1317 insertions(+), 36 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index 83bb1dfe86c6a..1737a93837852 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -3740,7 +3740,10 @@ bool DAGTypeLegalizer::SoftPromoteHalfOperand(SDNode *N, unsigned OpNo) {
case ISD::STRICT_FP_TO_SINT:
case ISD::STRICT_FP_TO_UINT:
case ISD::FP_TO_SINT:
- case ISD::FP_TO_UINT: Res = SoftPromoteHalfOp_FP_TO_XINT(N); break;
+ case ISD::FP_TO_UINT:
+ case ISD::LRINT:
+ Res = SoftPromoteHalfOp_Op0WithStrict(N);
+ break;
case ISD::FP_TO_SINT_SAT:
case ISD::FP_TO_UINT_SAT:
Res = SoftPromoteHalfOp_FP_TO_XINT_SAT(N); break;
@@ -3819,7 +3822,7 @@ SDValue DAGTypeLegalizer::SoftPromoteHalfOp_FP_EXTEND(SDNode *N) {
return DAG.getNode(GetPromotionOpcode(SVT, RVT), SDLoc(N), RVT, Op);
}
-SDValue DAGTypeLegalizer::SoftPromoteHalfOp_FP_TO_XINT(SDNode *N) {
+SDValue DAGTypeLegalizer::SoftPromoteHalfOp_Op0WithStrict(SDNode *N) {
EVT RVT = N->getValueType(0);
bool IsStrict = N->isStrictFPOpcode();
SDValue Op = N->getOperand(IsStrict ? 1 : 0);
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 586c3411791f9..d580ce0026e69 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -843,7 +843,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
SDValue SoftPromoteHalfOp_FAKE_USE(SDNode *N, unsigned OpNo);
SDValue SoftPromoteHalfOp_FCOPYSIGN(SDNode *N, unsigned OpNo);
SDValue SoftPromoteHalfOp_FP_EXTEND(SDNode *N);
- SDValue SoftPromoteHalfOp_FP_TO_XINT(SDNode *N);
+ SDValue SoftPromoteHalfOp_Op0WithStrict(SDNode *N);
SDValue SoftPromoteHalfOp_FP_TO_XINT_SAT(SDNode *N);
SDValue SoftPromoteHalfOp_SETCC(SDNode *N);
SDValue SoftPromoteHalfOp_SELECT_CC(SDNode *N, unsigned OpNo);
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index f4ac6bb76b3fe..2a40fb9b476f8 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -1353,6 +1353,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM_,
setOperationAction(ISD::FLOG, MVT::f16, Promote);
setOperationAction(ISD::FLOG10, MVT::f16, Promote);
setOperationAction(ISD::FLOG2, MVT::f16, Promote);
+ setOperationAction(ISD::LRINT, MVT::f16, Expand);
setOperationAction(ISD::FROUND, MVT::f16, Legal);
setOperationAction(ISD::FROUNDEVEN, MVT::f16, Legal);
diff --git a/llvm/test/CodeGen/ARM/lrint-conv.ll b/llvm/test/CodeGen/ARM/lrint-conv.ll
index 23a2685aa1122..216488fe33313 100644
--- a/llvm/test/CodeGen/ARM/lrint-conv.ll
+++ b/llvm/test/CodeGen/ARM/lrint-conv.ll
@@ -3,12 +3,35 @@
; RUN: llc < %s -mtriple=armv7-none-eabihf -mattr=+vfp2 -float-abi=hard | FileCheck %s --check-prefixes=CHECK,CHECK-NOFP16
; RUN: llc < %s -mtriple=armv7-none-eabihf -mattr=+vfp2,+fullfp16 -float-abi=hard | FileCheck %s --check-prefixes=CHECK,CHECK-FP16
-; FIXME: crash
-; define i32 @testmswh_builtin(half %x) {
-; entry:
-; %0 = tail call i32 @llvm.lrint.i32.f16(half %x)
-; ret i32 %0
-; }
+define i32 @testmswh_builtin(half %x) {
+; CHECK-SOFT-LABEL: testmswh_builtin:
+; CHECK-SOFT: @ %bb.0: @ %entry
+; CHECK-SOFT-NEXT: .save {r11, lr}
+; CHECK-SOFT-NEXT: push {r11, lr}
+; CHECK-SOFT-NEXT: bl __aeabi_h2f
+; CHECK-SOFT-NEXT: pop {r11, lr}
+; CHECK-SOFT-NEXT: b lrintf
+;
+; CHECK-NOFP16-LABEL: testmswh_builtin:
+; CHECK-NOFP16: @ %bb.0: @ %entry
+; CHECK-NOFP16-NEXT: .save {r11, lr}
+; CHECK-NOFP16-NEXT: push {r11, lr}
+; CHECK-NOFP16-NEXT: vmov r0, s0
+; CHECK-NOFP16-NEXT: bl __aeabi_h2f
+; CHECK-NOFP16-NEXT: vmov s0, r0
+; CHECK-NOFP16-NEXT: pop {r11, lr}
+; CHECK-NOFP16-NEXT: b lrintf
+;
+; CHECK-FP16-LABEL: testmswh_builtin:
+; CHECK-FP16: @ %bb.0: @ %entry
+; CHECK-FP16-NEXT: vrintx.f16 s0, s0
+; CHECK-FP16-NEXT: vcvt.s32.f16 s0, s0
+; CHECK-FP16-NEXT: vmov r0, s0
+; CHECK-FP16-NEXT: bx lr
+entry:
+ %0 = tail call i32 @llvm.lrint.i32.f16(half %x)
+ ret i32 %0
+}
define i32 @testmsws_builtin(float %x) {
; CHECK-LABEL: testmsws_builtin:
@@ -39,8 +62,3 @@ entry:
%0 = tail call i32 @llvm.lrint.i32.f128(fp128 %x)
ret i32 %0
}
-
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; CHECK-FP16: {{.*}}
-; CHECK-NOFP16: {{.*}}
-; CHECK-SOFT: {{.*}}
diff --git a/llvm/test/CodeGen/ARM/vector-lrint.ll b/llvm/test/CodeGen/ARM/vector-lrint.ll
index c1159da77707c..c3c88840b1a6a 100644
--- a/llvm/test/CodeGen/ARM/vector-lrint.ll
+++ b/llvm/test/CodeGen/ARM/vector-lrint.ll
@@ -9,31 +9,1290 @@
; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=armebv7-unknown-none-eabihf -mattr=+neon | FileCheck %s --check-prefixes=BE-I32
; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=armebv7-unknown-none-eabihf -mattr=+neon | FileCheck %s --check-prefixes=BE-I64
-; FIXME: crash "Do not know how to soft promote this operator's operand!"
-; define <1 x iXLen> @lrint_v1f16(<1 x half> %x) {
-; %a = call <1 x iXLen> @llvm.lrint.v1iXLen.v1f16(<1 x half> %x)
-; ret <1 x iXLen> %a
-; }
-
-; define <2 x iXLen> @lrint_v2f16(<2 x half> %x) {
-; %a = call <2 x iXLen> @llvm.lrint.v2iXLen.v2f16(<2 x half> %x)
-; ret <2 x iXLen> %a
-; }
+define <1 x iXLen> @lrint_v1f16(<1 x half> %x) {
+; LE-I32-LABEL: lrint_v1f16:
+; LE-I32: @ %bb.0:
+; LE-I32-NEXT: .save {r11, lr}
+; LE-I32-NEXT: push {r11, lr}
+; LE-I32-NEXT: vmov r0, s0
+; LE-I32-NEXT: bl __aeabi_f2h
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: vmov s0, r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: pop {r11, pc}
+;
+; LE-I64-LABEL: lrint_v1f16:
+; LE-I64: @ %bb.0:
+; LE-I64-NEXT: .save {r11, lr}
+; LE-I64-NEXT: push {r11, lr}
+; LE-I64-NEXT: vmov r0, s0
+; LE-I64-NEXT: bl __aeabi_f2h
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vmov s0, r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.32 d0[0], r0
+; LE-I64-NEXT: vmov.32 d0[1], r1
+; LE-I64-NEXT: pop {r11, pc}
+;
+; BE-I32-LABEL: lrint_v1f16:
+; BE-I32: @ %bb.0:
+; BE-I32-NEXT: .save {r11, lr}
+; BE-I32-NEXT: push {r11, lr}
+; BE-I32-NEXT: vmov r0, s0
+; BE-I32-NEXT: bl __aeabi_f2h
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: vmov s0, r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: pop {r11, pc}
+;
+; BE-I64-LABEL: lrint_v1f16:
+; BE-I64: @ %bb.0:
+; BE-I64-NEXT: .save {r11, lr}
+; BE-I64-NEXT: push {r11, lr}
+; BE-I64-NEXT: vmov r0, s0
+; BE-I64-NEXT: bl __aeabi_f2h
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: vmov s0, r0
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.32 d16[0], r0
+; BE-I64-NEXT: vmov.32 d16[1], r1
+; BE-I64-NEXT: vrev64.32 d0, d16
+; BE-I64-NEXT: pop {r11, pc}
+ %a = call <1 x iXLen> @llvm.lrint.v1iXLen.v1f16(<1 x half> %x)
+ ret <1 x iXLen> %a
+}
-; define <4 x iXLen> @lrint_v4f16(<4 x half> %x) {
-; %a = call <4 x iXLen> @llvm.lrint.v4iXLen.v4f16(<4 x half> %x)
-; ret <4 x iXLen> %a
-; }
+define <2 x iXLen> @lrint_v2f16(<2 x half> %x) {
+; LE-I32-LABEL: lrint_v2f16:
+; LE-I32: @ %bb.0:
+; LE-I32-NEXT: .save {r11, lr}
+; LE-I32-NEXT: push {r11, lr}
+; LE-I32-NEXT: .vsave {d8}
+; LE-I32-NEXT: vpush {d8}
+; LE-I32-NEXT: vmov r0, s0
+; LE-I32-NEXT: vmov.f32 s16, s1
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: vmov s0, r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov r1, s16
+; LE-I32-NEXT: vmov.32 d8[0], r0
+; LE-I32-NEXT: mov r0, r1
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: vmov s0, r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov.32 d8[1], r0
+; LE-I32-NEXT: vorr d0, d8, d8
+; LE-I32-NEXT: vpop {d8}
+; LE-I32-NEXT: pop {r11, pc}
+;
+; LE-I64-LABEL: lrint_v2f16:
+; LE-I64: @ %bb.0:
+; LE-I64-NEXT: .save {r4, r5, r11, lr}
+; LE-I64-NEXT: push {r4, r5, r11, lr}
+; LE-I64-NEXT: .vsave {d8, d9}
+; LE-I64-NEXT: vpush {d8, d9}
+; LE-I64-NEXT: vmov r0, s1
+; LE-I64-NEXT: vmov.f32 s16, s0
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vmov s0, r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: mov r4, r0
+; LE-I64-NEXT: vmov r0, s16
+; LE-I64-NEXT: mov r5, r1
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vmov s0, r0
+; LE-I64-NEXT: vmov.32 d9[0], r4
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.32 d8[0], r0
+; LE-I64-NEXT: vmov.32 d9[1], r5
+; LE-I64-NEXT: vmov.32 d8[1], r1
+; LE-I64-NEXT: vorr q0, q4, q4
+; LE-I64-NEXT: vpop {d8, d9}
+; LE-I64-NEXT: pop {r4, r5, r11, pc}
+;
+; BE-I32-LABEL: lrint_v2f16:
+; BE-I32: @ %bb.0:
+; BE-I32-NEXT: .save {r11, lr}
+; BE-I32-NEXT: push {r11, lr}
+; BE-I32-NEXT: .vsave {d8}
+; BE-I32-NEXT: vpush {d8}
+; BE-I32-NEXT: vmov r0, s0
+; BE-I32-NEXT: vmov.f32 s16, s1
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: vmov s0, r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov r1, s16
+; BE-I32-NEXT: vmov.32 d8[0], r0
+; BE-I32-NEXT: mov r0, r1
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: vmov s0, r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov.32 d8[1], r0
+; BE-I32-NEXT: vrev64.32 d0, d8
+; BE-I32-NEXT: vpop {d8}
+; BE-I32-NEXT: pop {r11, pc}
+;
+; BE-I64-LABEL: lrint_v2f16:
+; BE-I64: @ %bb.0:
+; BE-I64-NEXT: .save {r4, r5, r11, lr}
+; BE-I64-NEXT: push {r4, r5, r11, lr}
+; BE-I64-NEXT: .vsave {d8}
+; BE-I64-NEXT: vpush {d8}
+; BE-I64-NEXT: vmov r0, s1
+; BE-I64-NEXT: vmov.f32 s16, s0
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: vmov s0, r0
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: mov r4, r0
+; BE-I64-NEXT: vmov r0, s16
+; BE-I64-NEXT: mov r5, r1
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: vmov s0, r0
+; BE-I64-NEXT: vmov.32 d8[0], r4
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.32 d16[0], r0
+; BE-I64-NEXT: vmov.32 d8[1], r5
+; BE-I64-NEXT: vmov.32 d16[1], r1
+; BE-I64-NEXT: vrev64.32 d1, d8
+; BE-I64-NEXT: vrev64.32 d0, d16
+; BE-I64-NEXT: vpop {d8}
+; BE-I64-NEXT: pop {r4, r5, r11, pc}
+ %a = call <2 x iXLen> @llvm.lrint.v2iXLen.v2f16(<2 x half> %x)
+ ret <2 x iXLen> %a
+}
-; define <8 x iXLen> @lrint_v8f16(<8 x half> %x) {
-; %a = call <8 x iXLen> @llvm.lrint.v8iXLen.v8f16(<8 x half> %x)
-; ret <8 x iXLen> %a
-; }
+define <4 x iXLen> @lrint_v4f16(<4 x half> %x) {
+; LE-I32-LABEL: lrint_v4f16:
+; LE-I32: @ %bb.0:
+; LE-I32-NEXT: .save {r4, r5, r11, lr}
+; LE-I32-NEXT: push {r4, r5, r11, lr}
+; LE-I32-NEXT: .vsave {d8, d9, d10, d11}
+; LE-I32-NEXT: vpush {d8, d9, d10, d11}
+; LE-I32-NEXT: vmov r0, s3
+; LE-I32-NEXT: vmov.f32 s16, s2
+; LE-I32-NEXT: vmov.f32 s18, s1
+; LE-I32-NEXT: vmov.f32 s20, s0
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: vmov s0, r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: mov r4, r0
+; LE-I32-NEXT: vmov r0, s16
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: mov r5, r0
+; LE-I32-NEXT: vmov r0, s20
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: vmov s0, r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov s0, r5
+; LE-I32-NEXT: vmov.32 d10[0], r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov.32 d11[0], r0
+; LE-I32-NEXT: vmov r0, s18
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: vmov s0, r0
+; LE-I32-NEXT: vmov.32 d11[1], r4
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov.32 d10[1], r0
+; LE-I32-NEXT: vorr q0, q5, q5
+; LE-I32-NEXT: vpop {d8, d9, d10, d11}
+; LE-I32-NEXT: pop {r4, r5, r11, pc}
+;
+; LE-I64-LABEL: lrint_v4f16:
+; LE-I64: @ %bb.0:
+; LE-I64-NEXT: .save {r4, r5, r6, r7, r11, lr}
+; LE-I64-NEXT: push {r4, r5, r6, r7, r11, lr}
+; LE-I64-NEXT: .vsave {d12, d13}
+; LE-I64-NEXT: vpush {d12, d13}
+; LE-I64-NEXT: .vsave {d8, d9, d10}
+; LE-I64-NEXT: vpush {d8, d9, d10}
+; LE-I64-NEXT: vmov r0, s1
+; LE-I64-NEXT: vmov.f32 s16, s3
+; LE-I64-NEXT: vmov.f32 s20, s2
+; LE-I64-NEXT: vmov.f32 s18, s0
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vmov s0, r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: mov r5, r0
+; LE-I64-NEXT: vmov r0, s18
+; LE-I64-NEXT: mov r4, r1
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: mov r7, r0
+; LE-I64-NEXT: vmov r0, s16
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vmov s0, r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov s0, r7
+; LE-I64-NEXT: mov r6, r1
+; LE-I64-NEXT: vmov.32 d9[0], r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.32 d12[0], r0
+; LE-I64-NEXT: vmov r0, s20
+; LE-I64-NEXT: mov r7, r1
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vmov s0, r0
+; LE-I64-NEXT: vmov.32 d13[0], r5
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.32 d8[0], r0
+; LE-I64-NEXT: vmov.32 d13[1], r4
+; LE-I64-NEXT: vmov.32 d9[1], r6
+; LE-I64-NEXT: vmov.32 d12[1], r7
+; LE-I64-NEXT: vmov.32 d8[1], r1
+; LE-I64-NEXT: vorr q0, q6, q6
+; LE-I64-NEXT: vorr q1, q4, q4
+; LE-I64-NEXT: vpop {d8, d9, d10}
+; LE-I64-NEXT: vpop {d12, d13}
+; LE-I64-NEXT: pop {r4, r5, r6, r7, r11, pc}
+;
+; BE-I32-LABEL: lrint_v4f16:
+; BE-I32: @ %bb.0:
+; BE-I32-NEXT: .save {r4, r5, r11, lr}
+; BE-I32-NEXT: push {r4, r5, r11, lr}
+; BE-I32-NEXT: .vsave {d8, d9, d10, d11}
+; BE-I32-NEXT: vpush {d8, d9, d10, d11}
+; BE-I32-NEXT: vmov r0, s3
+; BE-I32-NEXT: vmov.f32 s16, s2
+; BE-I32-NEXT: vmov.f32 s18, s1
+; BE-I32-NEXT: vmov.f32 s20, s0
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: vmov s0, r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: mov r4, r0
+; BE-I32-NEXT: vmov r0, s16
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: mov r5, r0
+; BE-I32-NEXT: vmov r0, s20
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: vmov s0, r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov s0, r5
+; BE-I32-NEXT: vmov.32 d10[0], r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov.32 d11[0], r0
+; BE-I32-NEXT: vmov r0, s18
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: vmov s0, r0
+; BE-I32-NEXT: vmov.32 d11[1], r4
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov.32 d10[1], r0
+; BE-I32-NEXT: vrev64.32 q0, q5
+; BE-I32-NEXT: vpop {d8, d9, d10, d11}
+; BE-I32-NEXT: pop {r4, r5, r11, pc}
+;
+; BE-I64-LABEL: lrint_v4f16:
+; BE-I64: @ %bb.0:
+; BE-I64-NEXT: .save {r4, r5, r6, r7, r11, lr}
+; BE-I64-NEXT: push {r4, r5, r6, r7, r11, lr}
+; BE-I64-NEXT: .vsave {d8, d9, d10}
+; BE-I64-NEXT: vpush {d8, d9, d10}
+; BE-I64-NEXT: vmov r0, s1
+; BE-I64-NEXT: vmov.f32 s16, s3
+; BE-I64-NEXT: vmov.f32 s18, s2
+; BE-I64-NEXT: vmov.f32 s20, s0
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: vmov s0, r0
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: mov r5, r0
+; BE-I64-NEXT: vmov r0, s20
+; BE-I64-NEXT: mov r4, r1
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: mov r7, r0
+; BE-I64-NEXT: vmov r0, s16
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: vmov s0, r0
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov s0, r7
+; BE-I64-NEXT: mov r6, r1
+; BE-I64-NEXT: vmov.32 d8[0], r0
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.32 d10[0], r0
+; BE-I64-NEXT: vmov r0, s18
+; BE-I64-NEXT: mov r7, r1
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: vmov s0, r0
+; BE-I64-NEXT: vmov.32 d9[0], r5
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.32 d16[0], r0
+; BE-I64-NEXT: vmov.32 d9[1], r4
+; BE-I64-NEXT: vmov.32 d8[1], r6
+; BE-I64-NEXT: vmov.32 d10[1], r7
+; BE-I64-NEXT: vmov.32 d16[1], r1
+; BE-I64-NEXT: vrev64.32 d1, d9
+; BE-I64-NEXT: vrev64.32 d3, d8
+; BE-I64-NEXT: vrev64.32 d0, d10
+; BE-I64-NEXT: vrev64.32 d2, d16
+; BE-I64-NEXT: vpop {d8, d9, d10}
+; BE-I64-NEXT: pop {r4, r5, r6, r7, r11, pc}
+ %a = call <4 x iXLen> @llvm.lrint.v4iXLen.v4f16(<4 x half> %x)
+ ret <4 x iXLen> %a
+}
-; define <16 x iXLen> @lrint_v16f16(<16 x half> %x) {
-; %a = call <16 x iXLen> @llvm.lrint.v16iXLen.v16f16(<16 x half> %x)
-; ret <16 x iXLen> %a
-; }
+define <8 x iXLen> @lrint_v8f16(<8 x half> %x) {
+; LE-I32-LABEL: lrint_v8f16:
+; LE-I32: @ %bb.0:
+; LE-I32-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr}
+; LE-I32-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr}
+; LE-I32-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14}
+; LE-I32-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14}
+; LE-I32-NEXT: vmov r0, s7
+; LE-I32-NEXT: vmov.f32 s18, s6
+; LE-I32-NEXT: vmov.f32 s16, s5
+; LE-I32-NEXT: vmov.f32 s20, s4
+; LE-I32-NEXT: vmov.f32 s22, s3
+; LE-I32-NEXT: vmov.f32 s24, s2
+; LE-I32-NEXT: vmov.f32 s26, s1
+; LE-I32-NEXT: vmov.f32 s28, s0
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: vmov s0, r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: mov r8, r0
+; LE-I32-NEXT: vmov r0, s26
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: mov r9, r0
+; LE-I32-NEXT: vmov r0, s22
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: mov r6, r0
+; LE-I32-NEXT: vmov r0, s28
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: mov r7, r0
+; LE-I32-NEXT: vmov r0, s24
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: mov r4, r0
+; LE-I32-NEXT: vmov r0, s18
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: mov r5, r0
+; LE-I32-NEXT: vmov r0, s20
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: vmov s0, r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov s0, r5
+; LE-I32-NEXT: vmov.32 d10[0], r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov s0, r4
+; LE-I32-NEXT: vmov.32 d11[0], r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov s0, r7
+; LE-I32-NEXT: vmov.32 d13[0], r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov s0, r6
+; LE-I32-NEXT: vmov.32 d12[0], r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov s0, r9
+; LE-I32-NEXT: vmov.32 d13[1], r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov.32 d12[1], r0
+; LE-I32-NEXT: vmov r0, s16
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: vmov s0, r0
+; LE-I32-NEXT: vmov.32 d11[1], r8
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov.32 d10[1], r0
+; LE-I32-NEXT: vorr q0, q6, q6
+; LE-I32-NEXT: vorr q1, q5, q5
+; LE-I32-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14}
+; LE-I32-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc}
+;
+; LE-I64-LABEL: lrint_v8f16:
+; LE-I64: @ %bb.0:
+; LE-I64-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-I64-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-I64-NEXT: .pad #4
+; LE-I64-NEXT: sub sp, sp, #4
+; LE-I64-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEXT: .pad #8
+; LE-I64-NEXT: sub sp, sp, #8
+; LE-I64-NEXT: vmov r0, s1
+; LE-I64-NEXT: vstr s6, [sp, #4] @ 4-byte Spill
+; LE-I64-NEXT: vmov.f32 s16, s7
+; LE-I64-NEXT: vmov.f32 s18, s5
+; LE-I64-NEXT: vmov.f32 s20, s4
+; LE-I64-NEXT: vmov.f32 s22, s3
+; LE-I64-NEXT: vmov.f32 s24, s2
+; LE-I64-NEXT: vmov.f32 s26, s0
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vmov s0, r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: mov r9, r0
+; LE-I64-NEXT: vmov r0, s26
+; LE-I64-NEXT: str r1, [sp] @ 4-byte Spill
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: mov r10, r0
+; LE-I64-NEXT: vmov r0, s22
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: mov r5, r0
+; LE-I64-NEXT: vmov r0, s24
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: mov r7, r0
+; LE-I64-NEXT: vmov r0, s18
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: mov r6, r0
+; LE-I64-NEXT: vmov r0, s20
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: mov r4, r0
+; LE-I64-NEXT: vmov r0, s16
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vmov s0, r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov s0, r4
+; LE-I64-NEXT: mov r11, r1
+; LE-I64-NEXT: vmov.32 d11[0], r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov s0, r6
+; LE-I64-NEXT: mov r8, r1
+; LE-I64-NEXT: vmov.32 d12[0], r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov s0, r7
+; LE-I64-NEXT: mov r6, r1
+; LE-I64-NEXT: vmov.32 d13[0], r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov s0, r5
+; LE-I64-NEXT: mov r7, r1
+; LE-I64-NEXT: vmov.32 d14[0], r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov s0, r10
+; LE-I64-NEXT: mov r5, r1
+; LE-I64-NEXT: vmov.32 d15[0], r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vldr s0, [sp, #4] @ 4-byte Reload
+; LE-I64-NEXT: mov r4, r1
+; LE-I64-NEXT: vmov.32 d8[0], r0
+; LE-I64-NEXT: vmov r0, s0
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vmov s0, r0
+; LE-I64-NEXT: vmov.32 d9[0], r9
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.32 d10[0], r0
+; LE-I64-NEXT: ldr r0, [sp] @ 4-byte Reload
+; LE-I64-NEXT: vmov.32 d15[1], r5
+; LE-I64-NEXT: vmov.32 d9[1], r0
+; LE-I64-NEXT: vmov.32 d13[1], r6
+; LE-I64-NEXT: vmov.32 d11[1], r11
+; LE-I64-NEXT: vmov.32 d8[1], r4
+; LE-I64-NEXT: vmov.32 d14[1], r7
+; LE-I64-NEXT: vorr q0, q4, q4
+; LE-I64-NEXT: vmov.32 d12[1], r8
+; LE-I64-NEXT: vorr q1, q7, q7
+; LE-I64-NEXT: vmov.32 d10[1], r1
+; LE-I64-NEXT: vorr q2, q6, q6
+; LE-I64-NEXT: vorr q3, q5, q5
+; LE-I64-NEXT: add sp, sp, #8
+; LE-I64-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEXT: add sp, sp, #4
+; LE-I64-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; BE-I32-LABEL: lrint_v8f16:
+; BE-I32: @ %bb.0:
+; BE-I32-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr}
+; BE-I32-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr}
+; BE-I32-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14}
+; BE-I32-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14}
+; BE-I32-NEXT: vmov r0, s1
+; BE-I32-NEXT: vmov.f32 s18, s7
+; BE-I32-NEXT: vmov.f32 s20, s6
+; BE-I32-NEXT: vmov.f32 s16, s5
+; BE-I32-NEXT: vmov.f32 s22, s4
+; BE-I32-NEXT: vmov.f32 s24, s3
+; BE-I32-NEXT: vmov.f32 s26, s2
+; BE-I32-NEXT: vmov.f32 s28, s0
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: vmov s0, r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: mov r8, r0
+; BE-I32-NEXT: vmov r0, s24
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: mov r9, r0
+; BE-I32-NEXT: vmov r0, s18
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: mov r6, r0
+; BE-I32-NEXT: vmov r0, s26
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: mov r7, r0
+; BE-I32-NEXT: vmov r0, s20
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: mov r4, r0
+; BE-I32-NEXT: vmov r0, s28
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: mov r5, r0
+; BE-I32-NEXT: vmov r0, s22
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: vmov s0, r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov s0, r5
+; BE-I32-NEXT: vmov.32 d10[0], r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov s0, r4
+; BE-I32-NEXT: vmov.32 d12[0], r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov s0, r7
+; BE-I32-NEXT: vmov.32 d11[0], r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov s0, r6
+; BE-I32-NEXT: vmov.32 d13[0], r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov s0, r9
+; BE-I32-NEXT: vmov.32 d11[1], r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov.32 d13[1], r0
+; BE-I32-NEXT: vmov r0, s16
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: vmov s0, r0
+; BE-I32-NEXT: vmov.32 d12[1], r8
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov.32 d10[1], r0
+; BE-I32-NEXT: vrev64.32 q0, q6
+; BE-I32-NEXT: vrev64.32 q1, q5
+; BE-I32-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14}
+; BE-I32-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc}
+;
+; BE-I64-LABEL: lrint_v8f16:
+; BE-I64: @ %bb.0:
+; BE-I64-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-I64-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-I64-NEXT: .pad #4
+; BE-I64-NEXT: sub sp, sp, #4
+; BE-I64-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14}
+; BE-I64-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14}
+; BE-I64-NEXT: .pad #8
+; BE-I64-NEXT: sub sp, sp, #8
+; BE-I64-NEXT: vmov r0, s1
+; BE-I64-NEXT: vmov.f32 s18, s7
+; BE-I64-NEXT: vmov.f32 s16, s6
+; BE-I64-NEXT: vmov.f32 s20, s5
+; BE-I64-NEXT: vmov.f32 s22, s4
+; BE-I64-NEXT: vmov.f32 s24, s3
+; BE-I64-NEXT: vmov.f32 s26, s2
+; BE-I64-NEXT: vmov.f32 s28, s0
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: vmov s0, r0
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: mov r9, r0
+; BE-I64-NEXT: vmov r0, s28
+; BE-I64-NEXT: str r1, [sp, #4] @ 4-byte Spill
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: mov r10, r0
+; BE-I64-NEXT: vmov r0, s24
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: mov r5, r0
+; BE-I64-NEXT: vmov r0, s26
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: mov r7, r0
+; BE-I64-NEXT: vmov r0, s20
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: mov r6, r0
+; BE-I64-NEXT: vmov r0, s22
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: mov r4, r0
+; BE-I64-NEXT: vmov r0, s18
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: vmov s0, r0
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov s0, r4
+; BE-I64-NEXT: mov r11, r1
+; BE-I64-NEXT: vmov.32 d9[0], r0
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov s0, r6
+; BE-I64-NEXT: mov r8, r1
+; BE-I64-NEXT: vmov.32 d10[0], r0
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov s0, r7
+; BE-I64-NEXT: mov r6, r1
+; BE-I64-NEXT: vmov.32 d11[0], r0
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov s0, r5
+; BE-I64-NEXT: mov r7, r1
+; BE-I64-NEXT: vmov.32 d12[0], r0
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov s0, r10
+; BE-I64-NEXT: mov r5, r1
+; BE-I64-NEXT: vmov.32 d13[0], r0
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.32 d14[0], r0
+; BE-I64-NEXT: vmov r0, s16
+; BE-I64-NEXT: mov r4, r1
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: vmov s0, r0
+; BE-I64-NEXT: vmov.32 d8[0], r9
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.32 d16[0], r0
+; BE-I64-NEXT: ldr r0, [sp, #4] @ 4-byte Reload
+; BE-I64-NEXT: vmov.32 d13[1], r5
+; BE-I64-NEXT: vmov.32 d8[1], r0
+; BE-I64-NEXT: vmov.32 d11[1], r6
+; BE-I64-NEXT: vmov.32 d9[1], r11
+; BE-I64-NEXT: vmov.32 d14[1], r4
+; BE-I64-NEXT: vmov.32 d12[1], r7
+; BE-I64-NEXT: vmov.32 d10[1], r8
+; BE-I64-NEXT: vmov.32 d16[1], r1
+; BE-I64-NEXT: vrev64.32 d1, d8
+; BE-I64-NEXT: vrev64.32 d3, d13
+; BE-I64-NEXT: vrev64.32 d5, d11
+; BE-I64-NEXT: vrev64.32 d7, d9
+; BE-I64-NEXT: vrev64.32 d0, d14
+; BE-I64-NEXT: vrev64.32 d2, d12
+; BE-I64-NEXT: vrev64.32 d4, d10
+; BE-I64-NEXT: vrev64.32 d6, d16
+; BE-I64-NEXT: add sp, sp, #8
+; BE-I64-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14}
+; BE-I64-NEXT: add sp, sp, #4
+; BE-I64-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+ %a = call <8 x iXLen> @llvm.lrint.v8iXLen.v8f16(<8 x half> %x)
+ ret <8 x iXLen> %a
+}
+
+define <16 x iXLen> @lrint_v16f16(<16 x half> %x) {
+; LE-I32-LABEL: lrint_v16f16:
+; LE-I32: @ %bb.0:
+; LE-I32-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr}
+; LE-I32-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr}
+; LE-I32-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I32-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I32-NEXT: .pad #8
+; LE-I32-NEXT: sub sp, sp, #8
+; LE-I32-NEXT: vmov r0, s15
+; LE-I32-NEXT: vstr s13, [sp, #4] @ 4-byte Spill
+; LE-I32-NEXT: vmov.f32 s26, s14
+; LE-I32-NEXT: vstr s0, [sp] @ 4-byte Spill
+; LE-I32-NEXT: vmov.f32 s20, s12
+; LE-I32-NEXT: vmov.f32 s22, s11
+; LE-I32-NEXT: vmov.f32 s18, s10
+; LE-I32-NEXT: vmov.f32 s17, s9
+; LE-I32-NEXT: vmov.f32 s24, s8
+; LE-I32-NEXT: vmov.f32 s19, s7
+; LE-I32-NEXT: vmov.f32 s30, s6
+; LE-I32-NEXT: vmov.f32 s21, s5
+; LE-I32-NEXT: vmov.f32 s16, s4
+; LE-I32-NEXT: vmov.f32 s23, s3
+; LE-I32-NEXT: vmov.f32 s28, s2
+; LE-I32-NEXT: vmov.f32 s25, s1
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: vmov s0, r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: mov r8, r0
+; LE-I32-NEXT: vmov r0, s17
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: mov r9, r0
+; LE-I32-NEXT: vmov r0, s22
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: mov r10, r0
+; LE-I32-NEXT: vmov r0, s21
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: mov r7, r0
+; LE-I32-NEXT: vmov r0, s19
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: mov r4, r0
+; LE-I32-NEXT: vmov r0, s25
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: mov r5, r0
+; LE-I32-NEXT: vmov r0, s23
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: mov r6, r0
+; LE-I32-NEXT: vmov r0, s20
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: vmov s0, r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov.32 d10[0], r0
+; LE-I32-NEXT: vmov r0, s26
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: vmov s0, r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov.32 d11[0], r0
+; LE-I32-NEXT: vmov r0, s24
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: vmov s0, r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov.32 d12[0], r0
+; LE-I32-NEXT: vmov r0, s18
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: vmov s0, r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov.32 d13[0], r0
+; LE-I32-NEXT: vmov r0, s16
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: vmov s0, r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov.32 d8[0], r0
+; LE-I32-NEXT: vmov r0, s30
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: vmov s0, r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov.32 d9[0], r0
+; LE-I32-NEXT: vmov r0, s28
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: vmov s0, r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vldr s0, [sp] @ 4-byte Reload
+; LE-I32-NEXT: vmov.32 d15[0], r0
+; LE-I32-NEXT: vmov r0, s0
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: vmov s0, r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov s0, r6
+; LE-I32-NEXT: vmov.32 d14[0], r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov s0, r5
+; LE-I32-NEXT: vmov.32 d15[1], r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov s0, r4
+; LE-I32-NEXT: vmov.32 d14[1], r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov s0, r7
+; LE-I32-NEXT: vmov.32 d9[1], r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov s0, r10
+; LE-I32-NEXT: vmov.32 d8[1], r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov s0, r9
+; LE-I32-NEXT: vmov.32 d13[1], r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vldr s0, [sp, #4] @ 4-byte Reload
+; LE-I32-NEXT: vmov.32 d12[1], r0
+; LE-I32-NEXT: vmov r0, s0
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: vmov s0, r0
+; LE-I32-NEXT: vmov.32 d11[1], r8
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov.32 d10[1], r0
+; LE-I32-NEXT: vorr q0, q7, q7
+; LE-I32-NEXT: vorr q1, q4, q4
+; LE-I32-NEXT: vorr q2, q6, q6
+; LE-I32-NEXT: vorr q3, q5, q5
+; LE-I32-NEXT: add sp, sp, #8
+; LE-I32-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I32-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc}
+;
+; LE-I64-LABEL: lrint_v16f16:
+; LE-I64: @ %bb.0:
+; LE-I64-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-I64-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-I64-NEXT: .pad #4
+; LE-I64-NEXT: sub sp, sp, #4
+; LE-I64-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEXT: .pad #120
+; LE-I64-NEXT: sub sp, sp, #120
+; LE-I64-NEXT: mov r11, r0
+; LE-I64-NEXT: vmov r0, s7
+; LE-I64-NEXT: vstr s15, [sp, #24] @ 4-byte Spill
+; LE-I64-NEXT: vmov.f32 s23, s13
+; LE-I64-NEXT: vstr s14, [sp, #100] @ 4-byte Spill
+; LE-I64-NEXT: vmov.f32 s25, s12
+; LE-I64-NEXT: vmov.f32 s27, s11
+; LE-I64-NEXT: vstr s10, [sp, #104] @ 4-byte Spill
+; LE-I64-NEXT: vstr s9, [sp, #108] @ 4-byte Spill
+; LE-I64-NEXT: vmov.f32 s24, s8
+; LE-I64-NEXT: vmov.f32 s19, s6
+; LE-I64-NEXT: vmov.f32 s29, s5
+; LE-I64-NEXT: vmov.f32 s17, s4
+; LE-I64-NEXT: vmov.f32 s16, s3
+; LE-I64-NEXT: vmov.f32 s21, s2
+; LE-I64-NEXT: vmov.f32 s26, s1
+; LE-I64-NEXT: vmov.f32 s18, s0
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vmov s0, r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: mov r7, r0
+; LE-I64-NEXT: vmov r0, s25
+; LE-I64-NEXT: str r1, [sp, #56] @ 4-byte Spill
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vmov s0, r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: mov r5, r0
+; LE-I64-NEXT: vmov r0, s27
+; LE-I64-NEXT: str r1, [sp, #116] @ 4-byte Spill
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vmov s0, r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: mov r6, r0
+; LE-I64-NEXT: vmov r0, s29
+; LE-I64-NEXT: str r1, [sp, #112] @ 4-byte Spill
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vmov s0, r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.32 d15[0], r0
+; LE-I64-NEXT: vmov r0, s23
+; LE-I64-NEXT: mov r4, r1
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vmov s0, r0
+; LE-I64-NEXT: add lr, sp, #80
+; LE-I64-NEXT: vmov.32 d17[0], r6
+; LE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: mov r6, r0
+; LE-I64-NEXT: vmov r0, s17
+; LE-I64-NEXT: vmov r8, s21
+; LE-I64-NEXT: str r1, [sp, #76] @ 4-byte Spill
+; LE-I64-NEXT: vmov r10, s19
+; LE-I64-NEXT: vmov.32 d10[0], r5
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vmov s0, r0
+; LE-I64-NEXT: add lr, sp, #40
+; LE-I64-NEXT: vmov.32 d11[0], r6
+; LE-I64-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.32 d14[0], r0
+; LE-I64-NEXT: mov r0, r10
+; LE-I64-NEXT: mov r9, r1
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vmov s0, r0
+; LE-I64-NEXT: vmov.32 d11[0], r7
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.32 d10[0], r0
+; LE-I64-NEXT: mov r0, r8
+; LE-I64-NEXT: mov r7, r1
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: mov r6, r0
+; LE-I64-NEXT: ldr r0, [sp, #56] @ 4-byte Reload
+; LE-I64-NEXT: vmov.32 d11[1], r0
+; LE-I64-NEXT: vmov r0, s18
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: mov r5, r0
+; LE-I64-NEXT: vmov r0, s16
+; LE-I64-NEXT: vmov.32 d10[1], r7
+; LE-I64-NEXT: add lr, sp, #56
+; LE-I64-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vmov s0, r0
+; LE-I64-NEXT: vmov.32 d15[1], r4
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.32 d9[0], r0
+; LE-I64-NEXT: vmov r0, s26
+; LE-I64-NEXT: add lr, sp, #24
+; LE-I64-NEXT: vmov r8, s24
+; LE-I64-NEXT: vmov.32 d14[1], r9
+; LE-I64-NEXT: mov r10, r1
+; LE-I64-NEXT: vmov s24, r5
+; LE-I64-NEXT: vldr s0, [sp, #24] @ 4-byte Reload
+; LE-I64-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-I64-NEXT: vmov r7, s0
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vmov.f32 s0, s24
+; LE-I64-NEXT: vmov s22, r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.f32 s0, s22
+; LE-I64-NEXT: mov r5, r1
+; LE-I64-NEXT: vmov.32 d14[0], r0
+; LE-I64-NEXT: vmov s24, r6
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.32 d15[0], r0
+; LE-I64-NEXT: mov r0, r7
+; LE-I64-NEXT: mov r6, r1
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vmov.f32 s0, s24
+; LE-I64-NEXT: vmov s22, r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.f32 s0, s22
+; LE-I64-NEXT: vmov.32 d8[0], r0
+; LE-I64-NEXT: add lr, sp, #8
+; LE-I64-NEXT: mov r9, r1
+; LE-I64-NEXT: vmov.32 d15[1], r6
+; LE-I64-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.32 d13[0], r0
+; LE-I64-NEXT: mov r0, r8
+; LE-I64-NEXT: mov r6, r1
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vldr s0, [sp, #100] @ 4-byte Reload
+; LE-I64-NEXT: mov r7, r0
+; LE-I64-NEXT: vmov.32 d14[1], r5
+; LE-I64-NEXT: vmov r0, s0
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vldr s0, [sp, #104] @ 4-byte Reload
+; LE-I64-NEXT: vmov s20, r0
+; LE-I64-NEXT: vmov.32 d13[1], r6
+; LE-I64-NEXT: vmov r4, s0
+; LE-I64-NEXT: vldr s0, [sp, #108] @ 4-byte Reload
+; LE-I64-NEXT: vmov r0, s0
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vmov.f32 s0, s20
+; LE-I64-NEXT: vmov s16, r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.f32 s0, s16
+; LE-I64-NEXT: mov r5, r1
+; LE-I64-NEXT: vmov.32 d12[0], r0
+; LE-I64-NEXT: vmov s18, r7
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.32 d11[0], r0
+; LE-I64-NEXT: mov r0, r4
+; LE-I64-NEXT: mov r6, r1
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vmov.f32 s0, s18
+; LE-I64-NEXT: vmov s16, r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.f32 s0, s16
+; LE-I64-NEXT: vmov.32 d10[0], r0
+; LE-I64-NEXT: mov r4, r1
+; LE-I64-NEXT: vmov.32 d11[1], r6
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: add lr, sp, #80
+; LE-I64-NEXT: vmov.32 d10[1], r4
+; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEXT: add lr, sp, #40
+; LE-I64-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload
+; LE-I64-NEXT: add lr, sp, #8
+; LE-I64-NEXT: vmov.32 d16[0], r0
+; LE-I64-NEXT: ldr r0, [sp, #76] @ 4-byte Reload
+; LE-I64-NEXT: vldmia lr, {d20, d21} @ 16-byte Reload
+; LE-I64-NEXT: add lr, sp, #24
+; LE-I64-NEXT: vmov.32 d19[1], r0
+; LE-I64-NEXT: ldr r0, [sp, #116] @ 4-byte Reload
+; LE-I64-NEXT: vmov.32 d21[1], r10
+; LE-I64-NEXT: vmov.32 d18[1], r0
+; LE-I64-NEXT: ldr r0, [sp, #112] @ 4-byte Reload
+; LE-I64-NEXT: vmov.32 d12[1], r5
+; LE-I64-NEXT: vmov.32 d17[1], r0
+; LE-I64-NEXT: add r0, r11, #64
+; LE-I64-NEXT: vmov.32 d16[1], r1
+; LE-I64-NEXT: vst1.64 {d10, d11}, [r0:128]!
+; LE-I64-NEXT: vst1.64 {d16, d17}, [r0:128]!
+; LE-I64-NEXT: vst1.64 {d18, d19}, [r0:128]!
+; LE-I64-NEXT: vmov.32 d20[1], r9
+; LE-I64-NEXT: vst1.64 {d12, d13}, [r0:128]
+; LE-I64-NEXT: vst1.64 {d14, d15}, [r11:128]!
+; LE-I64-NEXT: vst1.64 {d20, d21}, [r11:128]!
+; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEXT: add lr, sp, #56
+; LE-I64-NEXT: vst1.64 {d16, d17}, [r11:128]!
+; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEXT: vst1.64 {d16, d17}, [r11:128]
+; LE-I64-NEXT: add sp, sp, #120
+; LE-I64-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEXT: add sp, sp, #4
+; LE-I64-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; BE-I32-LABEL: lrint_v16f16:
+; BE-I32: @ %bb.0:
+; BE-I32-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr}
+; BE-I32-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr}
+; BE-I32-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I32-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I32-NEXT: .pad #16
+; BE-I32-NEXT: sub sp, sp, #16
+; BE-I32-NEXT: vmov r0, s1
+; BE-I32-NEXT: vstr s14, [sp, #4] @ 4-byte Spill
+; BE-I32-NEXT: vmov.f32 s30, s15
+; BE-I32-NEXT: vstr s13, [sp, #12] @ 4-byte Spill
+; BE-I32-NEXT: vmov.f32 s17, s12
+; BE-I32-NEXT: vstr s10, [sp, #8] @ 4-byte Spill
+; BE-I32-NEXT: vmov.f32 s19, s11
+; BE-I32-NEXT: vstr s8, [sp] @ 4-byte Spill
+; BE-I32-NEXT: vmov.f32 s21, s9
+; BE-I32-NEXT: vmov.f32 s23, s7
+; BE-I32-NEXT: vmov.f32 s24, s6
+; BE-I32-NEXT: vmov.f32 s25, s5
+; BE-I32-NEXT: vmov.f32 s26, s4
+; BE-I32-NEXT: vmov.f32 s27, s3
+; BE-I32-NEXT: vmov.f32 s28, s2
+; BE-I32-NEXT: vmov.f32 s29, s0
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: vmov s0, r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: mov r8, r0
+; BE-I32-NEXT: vmov r0, s27
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: mov r9, r0
+; BE-I32-NEXT: vmov r0, s25
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: mov r10, r0
+; BE-I32-NEXT: vmov r0, s23
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: mov r7, r0
+; BE-I32-NEXT: vmov r0, s21
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: mov r4, r0
+; BE-I32-NEXT: vmov r0, s19
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: mov r5, r0
+; BE-I32-NEXT: vmov r0, s30
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: mov r6, r0
+; BE-I32-NEXT: vmov r0, s17
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: vmov s0, r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov.32 d8[0], r0
+; BE-I32-NEXT: vmov r0, s29
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: vmov s0, r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov.32 d10[0], r0
+; BE-I32-NEXT: vmov r0, s28
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: vmov s0, r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov.32 d11[0], r0
+; BE-I32-NEXT: vmov r0, s26
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: vmov s0, r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov.32 d14[0], r0
+; BE-I32-NEXT: vmov r0, s24
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: vmov s0, r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vldr s0, [sp] @ 4-byte Reload
+; BE-I32-NEXT: vmov.32 d15[0], r0
+; BE-I32-NEXT: vmov r0, s0
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: vmov s0, r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vldr s0, [sp, #4] @ 4-byte Reload
+; BE-I32-NEXT: vmov.32 d12[0], r0
+; BE-I32-NEXT: vmov r0, s0
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: vmov s0, r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vldr s0, [sp, #8] @ 4-byte Reload
+; BE-I32-NEXT: vmov.32 d9[0], r0
+; BE-I32-NEXT: vmov r0, s0
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: vmov s0, r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov s0, r6
+; BE-I32-NEXT: vmov.32 d13[0], r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov s0, r5
+; BE-I32-NEXT: vmov.32 d9[1], r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov s0, r4
+; BE-I32-NEXT: vmov.32 d13[1], r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov s0, r7
+; BE-I32-NEXT: vmov.32 d12[1], r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov s0, r10
+; BE-I32-NEXT: vmov.32 d15[1], r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov s0, r9
+; BE-I32-NEXT: vmov.32 d14[1], r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vldr s0, [sp, #12] @ 4-byte Reload
+; BE-I32-NEXT: vmov.32 d11[1], r0
+; BE-I32-NEXT: vmov r0, s0
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: vmov s0, r0
+; BE-I32-NEXT: vmov.32 d10[1], r8
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov.32 d8[1], r0
+; BE-I32-NEXT: vrev64.32 q0, q5
+; BE-I32-NEXT: vrev64.32 q1, q7
+; BE-I32-NEXT: vrev64.32 q2, q6
+; BE-I32-NEXT: vrev64.32 q3, q4
+; BE-I32-NEXT: add sp, sp, #16
+; BE-I32-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I32-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc}
+;
+; BE-I64-LABEL: lrint_v16f16:
+; BE-I64: @ %bb.0:
+; BE-I64-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-I64-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-I64-NEXT: .pad #4
+; BE-I64-NEXT: sub sp, sp, #4
+; BE-I64-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEXT: .pad #112
+; BE-I64-NEXT: sub sp, sp, #112
+; BE-I64-NEXT: mov r11, r0
+; BE-I64-NEXT: vmov r0, s14
+; BE-I64-NEXT: vmov.f32 s17, s15
+; BE-I64-NEXT: vstr s13, [sp, #52] @ 4-byte Spill
+; BE-I64-NEXT: vmov.f32 s21, s12
+; BE-I64-NEXT: vstr s10, [sp, #68] @ 4-byte Spill
+; BE-I64-NEXT: vmov.f32 s23, s11
+; BE-I64-NEXT: vstr s7, [sp, #72] @ 4-byte Spill
+; BE-I64-NEXT: vmov.f32 s19, s9
+; BE-I64-NEXT: vstr s4, [sp, #28] @ 4-byte Spill
+; BE-I64-NEXT: vmov.f32 s26, s8
+; BE-I64-NEXT: vmov.f32 s24, s6
+; BE-I64-NEXT: vmov.f32 s18, s5
+; BE-I64-NEXT: vmov.f32 s25, s3
+; BE-I64-NEXT: vmov.f32 s16, s2
+; BE-I64-NEXT: vmov.f32 s27, s1
+; BE-I64-NEXT: vmov.f32 s29, s0
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: vmov s0, r0
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: mov r8, r0
+; BE-I64-NEXT: vmov r0, s29
+; BE-I64-NEXT: mov r4, r1
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: mov r9, r0
+; BE-I64-NEXT: vmov r0, s27
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: mov r7, r0
+; BE-I64-NEXT: vmov r0, s21
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: mov r6, r0
+; BE-I64-NEXT: vmov r0, s25
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: mov r5, r0
+; BE-I64-NEXT: vmov r0, s23
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: vmov s0, r0
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.32 d16[0], r0
+; BE-I64-NEXT: vmov s0, r5
+; BE-I64-NEXT: str r1, [sp, #108] @ 4-byte Spill
+; BE-I64-NEXT: vstr d16, [sp, #96] @ 8-byte Spill
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.32 d16[0], r0
+; BE-I64-NEXT: vmov s0, r6
+; BE-I64-NEXT: str r1, [sp, #92] @ 4-byte Spill
+; BE-I64-NEXT: vstr d16, [sp, #80] @ 8-byte Spill
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.32 d16[0], r0
+; BE-I64-NEXT: vmov s0, r7
+; BE-I64-NEXT: str r1, [sp, #76] @ 4-byte Spill
+; BE-I64-NEXT: vstr d16, [sp, #56] @ 8-byte Spill
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov s0, r9
+; BE-I64-NEXT: mov r10, r1
+; BE-I64-NEXT: vmov.32 d14[0], r0
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.32 d15[0], r0
+; BE-I64-NEXT: vmov r0, s17
+; BE-I64-NEXT: mov r5, r1
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: vmov s0, r0
+; BE-I64-NEXT: vmov.32 d10[0], r8
+; BE-I64-NEXT: vmov r6, s19
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.32 d11[0], r0
+; BE-I64-NEXT: mov r0, r6
+; BE-I64-NEXT: mov r7, r1
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: mov r6, r0
+; BE-I64-NEXT: vmov r0, s18
+; BE-I64-NEXT: vmov.32 d10[1], r4
+; BE-I64-NEXT: vstr d10, [sp, #40] @ 8-byte Spill
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: mov r4, r0
+; BE-I64-NEXT: vmov r0, s16
+; BE-I64-NEXT: vmov.32 d11[1], r7
+; BE-I64-NEXT: vstr d11, [sp, #32] @ 8-byte Spill
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: vmov.32 d15[1], r5
+; BE-I64-NEXT: vmov s0, r0
+; BE-I64-NEXT: vstr d15, [sp, #16] @ 8-byte Spill
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vldr s0, [sp, #28] @ 4-byte Reload
+; BE-I64-NEXT: vmov r5, s26
+; BE-I64-NEXT: vmov.32 d16[0], r0
+; BE-I64-NEXT: vmov s26, r4
+; BE-I64-NEXT: vmov r0, s0
+; BE-I64-NEXT: mov r8, r1
+; BE-I64-NEXT: vmov.32 d14[1], r10
+; BE-I64-NEXT: vmov r4, s24
+; BE-I64-NEXT: vstr d16, [sp] @ 8-byte Spill
+; BE-I64-NEXT: vstr d14, [sp, #8] @ 8-byte Spill
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: vmov.f32 s0, s26
+; BE-I64-NEXT: vmov s22, r0
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.f32 s0, s22
+; BE-I64-NEXT: mov r7, r1
+; BE-I64-NEXT: vmov.32 d13[0], r0
+; BE-I64-NEXT: vmov s24, r6
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.32 d14[0], r0
+; BE-I64-NEXT: mov r0, r4
+; BE-I64-NEXT: mov r6, r1
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: vmov.f32 s0, s24
+; BE-I64-NEXT: vmov s22, r0
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.f32 s0, s22
+; BE-I64-NEXT: mov r9, r1
+; BE-I64-NEXT: vmov.32 d12[0], r0
+; BE-I64-NEXT: vmov.32 d14[1], r6
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.32 d11[0], r0
+; BE-I64-NEXT: mov r0, r5
+; BE-I64-NEXT: mov r6, r1
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: vldr s0, [sp, #52] @ 4-byte Reload
+; BE-I64-NEXT: mov r4, r0
+; BE-I64-NEXT: vmov.32 d13[1], r7
+; BE-I64-NEXT: vmov r0, s0
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: vldr s0, [sp, #68] @ 4-byte Reload
+; BE-I64-NEXT: vmov s20, r0
+; BE-I64-NEXT: vmov.32 d11[1], r6
+; BE-I64-NEXT: vmov r7, s0
+; BE-I64-NEXT: vldr s0, [sp, #72] @ 4-byte Reload
+; BE-I64-NEXT: vmov r0, s0
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: vmov.f32 s0, s20
+; BE-I64-NEXT: vmov s16, r0
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.f32 s0, s16
+; BE-I64-NEXT: mov r5, r1
+; BE-I64-NEXT: vmov.32 d10[0], r0
+; BE-I64-NEXT: vmov s18, r4
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.32 d15[0], r0
+; BE-I64-NEXT: mov r0, r7
+; BE-I64-NEXT: mov r4, r1
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: vmov.f32 s0, s18
+; BE-I64-NEXT: vmov s16, r0
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.f32 s0, s16
+; BE-I64-NEXT: mov r6, r1
+; BE-I64-NEXT: vmov.32 d9[0], r0
+; BE-I64-NEXT: vmov.32 d15[1], r4
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.32 d24[0], r0
+; BE-I64-NEXT: ldr r0, [sp, #76] @ 4-byte Reload
+; BE-I64-NEXT: vldr d23, [sp, #56] @ 8-byte Reload
+; BE-I64-NEXT: vldr d20, [sp, #8] @ 8-byte Reload
+; BE-I64-NEXT: vmov.32 d23[1], r0
+; BE-I64-NEXT: ldr r0, [sp, #92] @ 4-byte Reload
+; BE-I64-NEXT: vldr d22, [sp, #80] @ 8-byte Reload
+; BE-I64-NEXT: vldr d26, [sp, #16] @ 8-byte Reload
+; BE-I64-NEXT: vrev64.32 d21, d20
+; BE-I64-NEXT: vmov.32 d22[1], r0
+; BE-I64-NEXT: ldr r0, [sp, #108] @ 4-byte Reload
+; BE-I64-NEXT: vldr d30, [sp] @ 8-byte Reload
+; BE-I64-NEXT: vldr d25, [sp, #96] @ 8-byte Reload
+; BE-I64-NEXT: vrev64.32 d20, d26
+; BE-I64-NEXT: vldr d26, [sp, #32] @ 8-byte Reload
+; BE-I64-NEXT: vmov.32 d10[1], r5
+; BE-I64-NEXT: vmov.32 d12[1], r9
+; BE-I64-NEXT: vldr d28, [sp, #40] @ 8-byte Reload
+; BE-I64-NEXT: vrev64.32 d27, d26
+; BE-I64-NEXT: vmov.32 d25[1], r0
+; BE-I64-NEXT: add r0, r11, #64
+; BE-I64-NEXT: vmov.32 d30[1], r8
+; BE-I64-NEXT: vmov.32 d9[1], r6
+; BE-I64-NEXT: vrev64.32 d26, d28
+; BE-I64-NEXT: vrev64.32 d29, d10
+; BE-I64-NEXT: vmov.32 d24[1], r1
+; BE-I64-NEXT: vrev64.32 d1, d12
+; BE-I64-NEXT: vrev64.32 d28, d23
+; BE-I64-NEXT: vrev64.32 d23, d22
+; BE-I64-NEXT: vrev64.32 d22, d30
+; BE-I64-NEXT: vrev64.32 d31, d25
+; BE-I64-NEXT: vrev64.32 d0, d9
+; BE-I64-NEXT: vrev64.32 d30, d24
+; BE-I64-NEXT: vst1.64 {d0, d1}, [r0:128]!
+; BE-I64-NEXT: vst1.64 {d30, d31}, [r0:128]!
+; BE-I64-NEXT: vst1.64 {d28, d29}, [r0:128]!
+; BE-I64-NEXT: vrev64.32 d19, d13
+; BE-I64-NEXT: vst1.64 {d26, d27}, [r0:128]
+; BE-I64-NEXT: vst1.64 {d20, d21}, [r11:128]!
+; BE-I64-NEXT: vrev64.32 d18, d14
+; BE-I64-NEXT: vst1.64 {d22, d23}, [r11:128]!
+; BE-I64-NEXT: vrev64.32 d17, d15
+; BE-I64-NEXT: vrev64.32 d16, d11
+; BE-I64-NEXT: vst1.64 {d18, d19}, [r11:128]!
+; BE-I64-NEXT: vst1.64 {d16, d17}, [r11:128]
+; BE-I64-NEXT: add sp, sp, #112
+; BE-I64-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEXT: add sp, sp, #4
+; BE-I64-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+ %a = call <16 x iXLen> @llvm.lrint.v16iXLen.v16f16(<16 x half> %x)
+ ret <16 x iXLen> %a
+}
define <1 x iXLen> @lrint_v1f32(<1 x float> %x) {
; LE-I32-LABEL: lrint_v1f32:
>From c0a41605d6eee890c35b3c8737e133157abafc88 Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Thu, 2 Oct 2025 22:45:09 +0100
Subject: [PATCH 2/2] Make the fp16 run line use +fp-armv8
---
llvm/test/CodeGen/ARM/lrint-conv.ll | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/test/CodeGen/ARM/lrint-conv.ll b/llvm/test/CodeGen/ARM/lrint-conv.ll
index 216488fe33313..9b471cc11d896 100644
--- a/llvm/test/CodeGen/ARM/lrint-conv.ll
+++ b/llvm/test/CodeGen/ARM/lrint-conv.ll
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -mtriple=armv7-none-eabi -float-abi=soft | FileCheck %s --check-prefixes=CHECK,CHECK-SOFT
; RUN: llc < %s -mtriple=armv7-none-eabihf -mattr=+vfp2 -float-abi=hard | FileCheck %s --check-prefixes=CHECK,CHECK-NOFP16
-; RUN: llc < %s -mtriple=armv7-none-eabihf -mattr=+vfp2,+fullfp16 -float-abi=hard | FileCheck %s --check-prefixes=CHECK,CHECK-FP16
+; RUN: llc < %s -mtriple=armv8-none-eabihf -mattr=+fp-armv8,+fullfp16 -float-abi=hard | FileCheck %s --check-prefixes=CHECK,CHECK-FP16
define i32 @testmswh_builtin(half %x) {
; CHECK-SOFT-LABEL: testmswh_builtin:
More information about the llvm-commits
mailing list