[llvm] [ARM][SDAG] Half promote llvm.lrint nodes. (PR #161088)

David Green via llvm-commits llvm-commits at lists.llvm.org
Thu Oct 2 14:45:20 PDT 2025


https://github.com/davemgreen updated https://github.com/llvm/llvm-project/pull/161088

>From 9c966f70389338987de4ca02e0cec86cac315eeb Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Thu, 2 Oct 2025 21:53:18 +0100
Subject: [PATCH 1/2] [ARM][SDAG] Half promote llvm.lrint nodes.

As shown in #137101, fp16 lrint are not handled correctly on Arm. This adds
soft-half promotion for them, reusing the function that promotes a value with
operands (and can handle strict fp once that is added).
---
 .../SelectionDAG/LegalizeFloatTypes.cpp       |    7 +-
 llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h |    2 +-
 llvm/lib/Target/ARM/ARMISelLowering.cpp       |    1 +
 llvm/test/CodeGen/ARM/lrint-conv.ll           |   40 +-
 llvm/test/CodeGen/ARM/vector-lrint.ll         | 1303 ++++++++++++++++-
 5 files changed, 1317 insertions(+), 36 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index 83bb1dfe86c6a..1737a93837852 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -3740,7 +3740,10 @@ bool DAGTypeLegalizer::SoftPromoteHalfOperand(SDNode *N, unsigned OpNo) {
   case ISD::STRICT_FP_TO_SINT:
   case ISD::STRICT_FP_TO_UINT:
   case ISD::FP_TO_SINT:
-  case ISD::FP_TO_UINT: Res = SoftPromoteHalfOp_FP_TO_XINT(N); break;
+  case ISD::FP_TO_UINT:
+  case ISD::LRINT:
+    Res = SoftPromoteHalfOp_Op0WithStrict(N);
+    break;
   case ISD::FP_TO_SINT_SAT:
   case ISD::FP_TO_UINT_SAT:
                         Res = SoftPromoteHalfOp_FP_TO_XINT_SAT(N); break;
@@ -3819,7 +3822,7 @@ SDValue DAGTypeLegalizer::SoftPromoteHalfOp_FP_EXTEND(SDNode *N) {
   return DAG.getNode(GetPromotionOpcode(SVT, RVT), SDLoc(N), RVT, Op);
 }
 
-SDValue DAGTypeLegalizer::SoftPromoteHalfOp_FP_TO_XINT(SDNode *N) {
+SDValue DAGTypeLegalizer::SoftPromoteHalfOp_Op0WithStrict(SDNode *N) {
   EVT RVT = N->getValueType(0);
   bool IsStrict = N->isStrictFPOpcode();
   SDValue Op = N->getOperand(IsStrict ? 1 : 0);
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 586c3411791f9..d580ce0026e69 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -843,7 +843,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
   SDValue SoftPromoteHalfOp_FAKE_USE(SDNode *N, unsigned OpNo);
   SDValue SoftPromoteHalfOp_FCOPYSIGN(SDNode *N, unsigned OpNo);
   SDValue SoftPromoteHalfOp_FP_EXTEND(SDNode *N);
-  SDValue SoftPromoteHalfOp_FP_TO_XINT(SDNode *N);
+  SDValue SoftPromoteHalfOp_Op0WithStrict(SDNode *N);
   SDValue SoftPromoteHalfOp_FP_TO_XINT_SAT(SDNode *N);
   SDValue SoftPromoteHalfOp_SETCC(SDNode *N);
   SDValue SoftPromoteHalfOp_SELECT_CC(SDNode *N, unsigned OpNo);
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index f4ac6bb76b3fe..2a40fb9b476f8 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -1353,6 +1353,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM_,
     setOperationAction(ISD::FLOG, MVT::f16, Promote);
     setOperationAction(ISD::FLOG10, MVT::f16, Promote);
     setOperationAction(ISD::FLOG2, MVT::f16, Promote);
+    setOperationAction(ISD::LRINT, MVT::f16, Expand);
 
     setOperationAction(ISD::FROUND, MVT::f16, Legal);
     setOperationAction(ISD::FROUNDEVEN, MVT::f16, Legal);
diff --git a/llvm/test/CodeGen/ARM/lrint-conv.ll b/llvm/test/CodeGen/ARM/lrint-conv.ll
index 23a2685aa1122..216488fe33313 100644
--- a/llvm/test/CodeGen/ARM/lrint-conv.ll
+++ b/llvm/test/CodeGen/ARM/lrint-conv.ll
@@ -3,12 +3,35 @@
 ; RUN: llc < %s -mtriple=armv7-none-eabihf -mattr=+vfp2 -float-abi=hard | FileCheck %s --check-prefixes=CHECK,CHECK-NOFP16
 ; RUN: llc < %s -mtriple=armv7-none-eabihf -mattr=+vfp2,+fullfp16 -float-abi=hard | FileCheck %s --check-prefixes=CHECK,CHECK-FP16
 
-; FIXME: crash
-; define i32 @testmswh_builtin(half %x) {
-; entry:
-;   %0 = tail call i32 @llvm.lrint.i32.f16(half %x)
-;   ret i32 %0
-; }
+define i32 @testmswh_builtin(half %x) {
+; CHECK-SOFT-LABEL: testmswh_builtin:
+; CHECK-SOFT:       @ %bb.0: @ %entry
+; CHECK-SOFT-NEXT:    .save {r11, lr}
+; CHECK-SOFT-NEXT:    push {r11, lr}
+; CHECK-SOFT-NEXT:    bl __aeabi_h2f
+; CHECK-SOFT-NEXT:    pop {r11, lr}
+; CHECK-SOFT-NEXT:    b lrintf
+;
+; CHECK-NOFP16-LABEL: testmswh_builtin:
+; CHECK-NOFP16:       @ %bb.0: @ %entry
+; CHECK-NOFP16-NEXT:    .save {r11, lr}
+; CHECK-NOFP16-NEXT:    push {r11, lr}
+; CHECK-NOFP16-NEXT:    vmov r0, s0
+; CHECK-NOFP16-NEXT:    bl __aeabi_h2f
+; CHECK-NOFP16-NEXT:    vmov s0, r0
+; CHECK-NOFP16-NEXT:    pop {r11, lr}
+; CHECK-NOFP16-NEXT:    b lrintf
+;
+; CHECK-FP16-LABEL: testmswh_builtin:
+; CHECK-FP16:       @ %bb.0: @ %entry
+; CHECK-FP16-NEXT:    vrintx.f16 s0, s0
+; CHECK-FP16-NEXT:    vcvt.s32.f16 s0, s0
+; CHECK-FP16-NEXT:    vmov r0, s0
+; CHECK-FP16-NEXT:    bx lr
+entry:
+  %0 = tail call i32 @llvm.lrint.i32.f16(half %x)
+  ret i32 %0
+}
 
 define i32 @testmsws_builtin(float %x) {
 ; CHECK-LABEL: testmsws_builtin:
@@ -39,8 +62,3 @@ entry:
   %0 = tail call i32 @llvm.lrint.i32.f128(fp128 %x)
   ret i32 %0
 }
-
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; CHECK-FP16: {{.*}}
-; CHECK-NOFP16: {{.*}}
-; CHECK-SOFT: {{.*}}
diff --git a/llvm/test/CodeGen/ARM/vector-lrint.ll b/llvm/test/CodeGen/ARM/vector-lrint.ll
index c1159da77707c..c3c88840b1a6a 100644
--- a/llvm/test/CodeGen/ARM/vector-lrint.ll
+++ b/llvm/test/CodeGen/ARM/vector-lrint.ll
@@ -9,31 +9,1290 @@
 ; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=armebv7-unknown-none-eabihf -mattr=+neon | FileCheck %s --check-prefixes=BE-I32
 ; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=armebv7-unknown-none-eabihf -mattr=+neon | FileCheck %s --check-prefixes=BE-I64
 
-; FIXME: crash "Do not know how to soft promote this operator's operand!"
-; define <1 x iXLen> @lrint_v1f16(<1 x half> %x) {
-;   %a = call <1 x iXLen> @llvm.lrint.v1iXLen.v1f16(<1 x half> %x)
-;   ret <1 x iXLen> %a
-; }
-
-; define <2 x iXLen> @lrint_v2f16(<2 x half> %x) {
-;   %a = call <2 x iXLen> @llvm.lrint.v2iXLen.v2f16(<2 x half> %x)
-;   ret <2 x iXLen> %a
-; }
+define <1 x iXLen> @lrint_v1f16(<1 x half> %x) {
+; LE-I32-LABEL: lrint_v1f16:
+; LE-I32:       @ %bb.0:
+; LE-I32-NEXT:    .save {r11, lr}
+; LE-I32-NEXT:    push {r11, lr}
+; LE-I32-NEXT:    vmov r0, s0
+; LE-I32-NEXT:    bl __aeabi_f2h
+; LE-I32-NEXT:    bl __aeabi_h2f
+; LE-I32-NEXT:    vmov s0, r0
+; LE-I32-NEXT:    bl lrintf
+; LE-I32-NEXT:    pop {r11, pc}
+;
+; LE-I64-LABEL: lrint_v1f16:
+; LE-I64:       @ %bb.0:
+; LE-I64-NEXT:    .save {r11, lr}
+; LE-I64-NEXT:    push {r11, lr}
+; LE-I64-NEXT:    vmov r0, s0
+; LE-I64-NEXT:    bl __aeabi_f2h
+; LE-I64-NEXT:    bl __aeabi_h2f
+; LE-I64-NEXT:    vmov s0, r0
+; LE-I64-NEXT:    bl lrintf
+; LE-I64-NEXT:    vmov.32 d0[0], r0
+; LE-I64-NEXT:    vmov.32 d0[1], r1
+; LE-I64-NEXT:    pop {r11, pc}
+;
+; BE-I32-LABEL: lrint_v1f16:
+; BE-I32:       @ %bb.0:
+; BE-I32-NEXT:    .save {r11, lr}
+; BE-I32-NEXT:    push {r11, lr}
+; BE-I32-NEXT:    vmov r0, s0
+; BE-I32-NEXT:    bl __aeabi_f2h
+; BE-I32-NEXT:    bl __aeabi_h2f
+; BE-I32-NEXT:    vmov s0, r0
+; BE-I32-NEXT:    bl lrintf
+; BE-I32-NEXT:    pop {r11, pc}
+;
+; BE-I64-LABEL: lrint_v1f16:
+; BE-I64:       @ %bb.0:
+; BE-I64-NEXT:    .save {r11, lr}
+; BE-I64-NEXT:    push {r11, lr}
+; BE-I64-NEXT:    vmov r0, s0
+; BE-I64-NEXT:    bl __aeabi_f2h
+; BE-I64-NEXT:    bl __aeabi_h2f
+; BE-I64-NEXT:    vmov s0, r0
+; BE-I64-NEXT:    bl lrintf
+; BE-I64-NEXT:    vmov.32 d16[0], r0
+; BE-I64-NEXT:    vmov.32 d16[1], r1
+; BE-I64-NEXT:    vrev64.32 d0, d16
+; BE-I64-NEXT:    pop {r11, pc}
+  %a = call <1 x iXLen> @llvm.lrint.v1iXLen.v1f16(<1 x half> %x)
+  ret <1 x iXLen> %a
+}
 
-; define <4 x iXLen> @lrint_v4f16(<4 x half> %x) {
-;   %a = call <4 x iXLen> @llvm.lrint.v4iXLen.v4f16(<4 x half> %x)
-;   ret <4 x iXLen> %a
-; }
+define <2 x iXLen> @lrint_v2f16(<2 x half> %x) {
+; LE-I32-LABEL: lrint_v2f16:
+; LE-I32:       @ %bb.0:
+; LE-I32-NEXT:    .save {r11, lr}
+; LE-I32-NEXT:    push {r11, lr}
+; LE-I32-NEXT:    .vsave {d8}
+; LE-I32-NEXT:    vpush {d8}
+; LE-I32-NEXT:    vmov r0, s0
+; LE-I32-NEXT:    vmov.f32 s16, s1
+; LE-I32-NEXT:    bl __aeabi_h2f
+; LE-I32-NEXT:    vmov s0, r0
+; LE-I32-NEXT:    bl lrintf
+; LE-I32-NEXT:    vmov r1, s16
+; LE-I32-NEXT:    vmov.32 d8[0], r0
+; LE-I32-NEXT:    mov r0, r1
+; LE-I32-NEXT:    bl __aeabi_h2f
+; LE-I32-NEXT:    vmov s0, r0
+; LE-I32-NEXT:    bl lrintf
+; LE-I32-NEXT:    vmov.32 d8[1], r0
+; LE-I32-NEXT:    vorr d0, d8, d8
+; LE-I32-NEXT:    vpop {d8}
+; LE-I32-NEXT:    pop {r11, pc}
+;
+; LE-I64-LABEL: lrint_v2f16:
+; LE-I64:       @ %bb.0:
+; LE-I64-NEXT:    .save {r4, r5, r11, lr}
+; LE-I64-NEXT:    push {r4, r5, r11, lr}
+; LE-I64-NEXT:    .vsave {d8, d9}
+; LE-I64-NEXT:    vpush {d8, d9}
+; LE-I64-NEXT:    vmov r0, s1
+; LE-I64-NEXT:    vmov.f32 s16, s0
+; LE-I64-NEXT:    bl __aeabi_h2f
+; LE-I64-NEXT:    vmov s0, r0
+; LE-I64-NEXT:    bl lrintf
+; LE-I64-NEXT:    mov r4, r0
+; LE-I64-NEXT:    vmov r0, s16
+; LE-I64-NEXT:    mov r5, r1
+; LE-I64-NEXT:    bl __aeabi_h2f
+; LE-I64-NEXT:    vmov s0, r0
+; LE-I64-NEXT:    vmov.32 d9[0], r4
+; LE-I64-NEXT:    bl lrintf
+; LE-I64-NEXT:    vmov.32 d8[0], r0
+; LE-I64-NEXT:    vmov.32 d9[1], r5
+; LE-I64-NEXT:    vmov.32 d8[1], r1
+; LE-I64-NEXT:    vorr q0, q4, q4
+; LE-I64-NEXT:    vpop {d8, d9}
+; LE-I64-NEXT:    pop {r4, r5, r11, pc}
+;
+; BE-I32-LABEL: lrint_v2f16:
+; BE-I32:       @ %bb.0:
+; BE-I32-NEXT:    .save {r11, lr}
+; BE-I32-NEXT:    push {r11, lr}
+; BE-I32-NEXT:    .vsave {d8}
+; BE-I32-NEXT:    vpush {d8}
+; BE-I32-NEXT:    vmov r0, s0
+; BE-I32-NEXT:    vmov.f32 s16, s1
+; BE-I32-NEXT:    bl __aeabi_h2f
+; BE-I32-NEXT:    vmov s0, r0
+; BE-I32-NEXT:    bl lrintf
+; BE-I32-NEXT:    vmov r1, s16
+; BE-I32-NEXT:    vmov.32 d8[0], r0
+; BE-I32-NEXT:    mov r0, r1
+; BE-I32-NEXT:    bl __aeabi_h2f
+; BE-I32-NEXT:    vmov s0, r0
+; BE-I32-NEXT:    bl lrintf
+; BE-I32-NEXT:    vmov.32 d8[1], r0
+; BE-I32-NEXT:    vrev64.32 d0, d8
+; BE-I32-NEXT:    vpop {d8}
+; BE-I32-NEXT:    pop {r11, pc}
+;
+; BE-I64-LABEL: lrint_v2f16:
+; BE-I64:       @ %bb.0:
+; BE-I64-NEXT:    .save {r4, r5, r11, lr}
+; BE-I64-NEXT:    push {r4, r5, r11, lr}
+; BE-I64-NEXT:    .vsave {d8}
+; BE-I64-NEXT:    vpush {d8}
+; BE-I64-NEXT:    vmov r0, s1
+; BE-I64-NEXT:    vmov.f32 s16, s0
+; BE-I64-NEXT:    bl __aeabi_h2f
+; BE-I64-NEXT:    vmov s0, r0
+; BE-I64-NEXT:    bl lrintf
+; BE-I64-NEXT:    mov r4, r0
+; BE-I64-NEXT:    vmov r0, s16
+; BE-I64-NEXT:    mov r5, r1
+; BE-I64-NEXT:    bl __aeabi_h2f
+; BE-I64-NEXT:    vmov s0, r0
+; BE-I64-NEXT:    vmov.32 d8[0], r4
+; BE-I64-NEXT:    bl lrintf
+; BE-I64-NEXT:    vmov.32 d16[0], r0
+; BE-I64-NEXT:    vmov.32 d8[1], r5
+; BE-I64-NEXT:    vmov.32 d16[1], r1
+; BE-I64-NEXT:    vrev64.32 d1, d8
+; BE-I64-NEXT:    vrev64.32 d0, d16
+; BE-I64-NEXT:    vpop {d8}
+; BE-I64-NEXT:    pop {r4, r5, r11, pc}
+  %a = call <2 x iXLen> @llvm.lrint.v2iXLen.v2f16(<2 x half> %x)
+  ret <2 x iXLen> %a
+}
 
-; define <8 x iXLen> @lrint_v8f16(<8 x half> %x) {
-;   %a = call <8 x iXLen> @llvm.lrint.v8iXLen.v8f16(<8 x half> %x)
-;   ret <8 x iXLen> %a
-; }
+define <4 x iXLen> @lrint_v4f16(<4 x half> %x) {
+; LE-I32-LABEL: lrint_v4f16:
+; LE-I32:       @ %bb.0:
+; LE-I32-NEXT:    .save {r4, r5, r11, lr}
+; LE-I32-NEXT:    push {r4, r5, r11, lr}
+; LE-I32-NEXT:    .vsave {d8, d9, d10, d11}
+; LE-I32-NEXT:    vpush {d8, d9, d10, d11}
+; LE-I32-NEXT:    vmov r0, s3
+; LE-I32-NEXT:    vmov.f32 s16, s2
+; LE-I32-NEXT:    vmov.f32 s18, s1
+; LE-I32-NEXT:    vmov.f32 s20, s0
+; LE-I32-NEXT:    bl __aeabi_h2f
+; LE-I32-NEXT:    vmov s0, r0
+; LE-I32-NEXT:    bl lrintf
+; LE-I32-NEXT:    mov r4, r0
+; LE-I32-NEXT:    vmov r0, s16
+; LE-I32-NEXT:    bl __aeabi_h2f
+; LE-I32-NEXT:    mov r5, r0
+; LE-I32-NEXT:    vmov r0, s20
+; LE-I32-NEXT:    bl __aeabi_h2f
+; LE-I32-NEXT:    vmov s0, r0
+; LE-I32-NEXT:    bl lrintf
+; LE-I32-NEXT:    vmov s0, r5
+; LE-I32-NEXT:    vmov.32 d10[0], r0
+; LE-I32-NEXT:    bl lrintf
+; LE-I32-NEXT:    vmov.32 d11[0], r0
+; LE-I32-NEXT:    vmov r0, s18
+; LE-I32-NEXT:    bl __aeabi_h2f
+; LE-I32-NEXT:    vmov s0, r0
+; LE-I32-NEXT:    vmov.32 d11[1], r4
+; LE-I32-NEXT:    bl lrintf
+; LE-I32-NEXT:    vmov.32 d10[1], r0
+; LE-I32-NEXT:    vorr q0, q5, q5
+; LE-I32-NEXT:    vpop {d8, d9, d10, d11}
+; LE-I32-NEXT:    pop {r4, r5, r11, pc}
+;
+; LE-I64-LABEL: lrint_v4f16:
+; LE-I64:       @ %bb.0:
+; LE-I64-NEXT:    .save {r4, r5, r6, r7, r11, lr}
+; LE-I64-NEXT:    push {r4, r5, r6, r7, r11, lr}
+; LE-I64-NEXT:    .vsave {d12, d13}
+; LE-I64-NEXT:    vpush {d12, d13}
+; LE-I64-NEXT:    .vsave {d8, d9, d10}
+; LE-I64-NEXT:    vpush {d8, d9, d10}
+; LE-I64-NEXT:    vmov r0, s1
+; LE-I64-NEXT:    vmov.f32 s16, s3
+; LE-I64-NEXT:    vmov.f32 s20, s2
+; LE-I64-NEXT:    vmov.f32 s18, s0
+; LE-I64-NEXT:    bl __aeabi_h2f
+; LE-I64-NEXT:    vmov s0, r0
+; LE-I64-NEXT:    bl lrintf
+; LE-I64-NEXT:    mov r5, r0
+; LE-I64-NEXT:    vmov r0, s18
+; LE-I64-NEXT:    mov r4, r1
+; LE-I64-NEXT:    bl __aeabi_h2f
+; LE-I64-NEXT:    mov r7, r0
+; LE-I64-NEXT:    vmov r0, s16
+; LE-I64-NEXT:    bl __aeabi_h2f
+; LE-I64-NEXT:    vmov s0, r0
+; LE-I64-NEXT:    bl lrintf
+; LE-I64-NEXT:    vmov s0, r7
+; LE-I64-NEXT:    mov r6, r1
+; LE-I64-NEXT:    vmov.32 d9[0], r0
+; LE-I64-NEXT:    bl lrintf
+; LE-I64-NEXT:    vmov.32 d12[0], r0
+; LE-I64-NEXT:    vmov r0, s20
+; LE-I64-NEXT:    mov r7, r1
+; LE-I64-NEXT:    bl __aeabi_h2f
+; LE-I64-NEXT:    vmov s0, r0
+; LE-I64-NEXT:    vmov.32 d13[0], r5
+; LE-I64-NEXT:    bl lrintf
+; LE-I64-NEXT:    vmov.32 d8[0], r0
+; LE-I64-NEXT:    vmov.32 d13[1], r4
+; LE-I64-NEXT:    vmov.32 d9[1], r6
+; LE-I64-NEXT:    vmov.32 d12[1], r7
+; LE-I64-NEXT:    vmov.32 d8[1], r1
+; LE-I64-NEXT:    vorr q0, q6, q6
+; LE-I64-NEXT:    vorr q1, q4, q4
+; LE-I64-NEXT:    vpop {d8, d9, d10}
+; LE-I64-NEXT:    vpop {d12, d13}
+; LE-I64-NEXT:    pop {r4, r5, r6, r7, r11, pc}
+;
+; BE-I32-LABEL: lrint_v4f16:
+; BE-I32:       @ %bb.0:
+; BE-I32-NEXT:    .save {r4, r5, r11, lr}
+; BE-I32-NEXT:    push {r4, r5, r11, lr}
+; BE-I32-NEXT:    .vsave {d8, d9, d10, d11}
+; BE-I32-NEXT:    vpush {d8, d9, d10, d11}
+; BE-I32-NEXT:    vmov r0, s3
+; BE-I32-NEXT:    vmov.f32 s16, s2
+; BE-I32-NEXT:    vmov.f32 s18, s1
+; BE-I32-NEXT:    vmov.f32 s20, s0
+; BE-I32-NEXT:    bl __aeabi_h2f
+; BE-I32-NEXT:    vmov s0, r0
+; BE-I32-NEXT:    bl lrintf
+; BE-I32-NEXT:    mov r4, r0
+; BE-I32-NEXT:    vmov r0, s16
+; BE-I32-NEXT:    bl __aeabi_h2f
+; BE-I32-NEXT:    mov r5, r0
+; BE-I32-NEXT:    vmov r0, s20
+; BE-I32-NEXT:    bl __aeabi_h2f
+; BE-I32-NEXT:    vmov s0, r0
+; BE-I32-NEXT:    bl lrintf
+; BE-I32-NEXT:    vmov s0, r5
+; BE-I32-NEXT:    vmov.32 d10[0], r0
+; BE-I32-NEXT:    bl lrintf
+; BE-I32-NEXT:    vmov.32 d11[0], r0
+; BE-I32-NEXT:    vmov r0, s18
+; BE-I32-NEXT:    bl __aeabi_h2f
+; BE-I32-NEXT:    vmov s0, r0
+; BE-I32-NEXT:    vmov.32 d11[1], r4
+; BE-I32-NEXT:    bl lrintf
+; BE-I32-NEXT:    vmov.32 d10[1], r0
+; BE-I32-NEXT:    vrev64.32 q0, q5
+; BE-I32-NEXT:    vpop {d8, d9, d10, d11}
+; BE-I32-NEXT:    pop {r4, r5, r11, pc}
+;
+; BE-I64-LABEL: lrint_v4f16:
+; BE-I64:       @ %bb.0:
+; BE-I64-NEXT:    .save {r4, r5, r6, r7, r11, lr}
+; BE-I64-NEXT:    push {r4, r5, r6, r7, r11, lr}
+; BE-I64-NEXT:    .vsave {d8, d9, d10}
+; BE-I64-NEXT:    vpush {d8, d9, d10}
+; BE-I64-NEXT:    vmov r0, s1
+; BE-I64-NEXT:    vmov.f32 s16, s3
+; BE-I64-NEXT:    vmov.f32 s18, s2
+; BE-I64-NEXT:    vmov.f32 s20, s0
+; BE-I64-NEXT:    bl __aeabi_h2f
+; BE-I64-NEXT:    vmov s0, r0
+; BE-I64-NEXT:    bl lrintf
+; BE-I64-NEXT:    mov r5, r0
+; BE-I64-NEXT:    vmov r0, s20
+; BE-I64-NEXT:    mov r4, r1
+; BE-I64-NEXT:    bl __aeabi_h2f
+; BE-I64-NEXT:    mov r7, r0
+; BE-I64-NEXT:    vmov r0, s16
+; BE-I64-NEXT:    bl __aeabi_h2f
+; BE-I64-NEXT:    vmov s0, r0
+; BE-I64-NEXT:    bl lrintf
+; BE-I64-NEXT:    vmov s0, r7
+; BE-I64-NEXT:    mov r6, r1
+; BE-I64-NEXT:    vmov.32 d8[0], r0
+; BE-I64-NEXT:    bl lrintf
+; BE-I64-NEXT:    vmov.32 d10[0], r0
+; BE-I64-NEXT:    vmov r0, s18
+; BE-I64-NEXT:    mov r7, r1
+; BE-I64-NEXT:    bl __aeabi_h2f
+; BE-I64-NEXT:    vmov s0, r0
+; BE-I64-NEXT:    vmov.32 d9[0], r5
+; BE-I64-NEXT:    bl lrintf
+; BE-I64-NEXT:    vmov.32 d16[0], r0
+; BE-I64-NEXT:    vmov.32 d9[1], r4
+; BE-I64-NEXT:    vmov.32 d8[1], r6
+; BE-I64-NEXT:    vmov.32 d10[1], r7
+; BE-I64-NEXT:    vmov.32 d16[1], r1
+; BE-I64-NEXT:    vrev64.32 d1, d9
+; BE-I64-NEXT:    vrev64.32 d3, d8
+; BE-I64-NEXT:    vrev64.32 d0, d10
+; BE-I64-NEXT:    vrev64.32 d2, d16
+; BE-I64-NEXT:    vpop {d8, d9, d10}
+; BE-I64-NEXT:    pop {r4, r5, r6, r7, r11, pc}
+  %a = call <4 x iXLen> @llvm.lrint.v4iXLen.v4f16(<4 x half> %x)
+  ret <4 x iXLen> %a
+}
 
-; define <16 x iXLen> @lrint_v16f16(<16 x half> %x) {
-;   %a = call <16 x iXLen> @llvm.lrint.v16iXLen.v16f16(<16 x half> %x)
-;   ret <16 x iXLen> %a
-; }
+define <8 x iXLen> @lrint_v8f16(<8 x half> %x) {
+; LE-I32-LABEL: lrint_v8f16:
+; LE-I32:       @ %bb.0:
+; LE-I32-NEXT:    .save {r4, r5, r6, r7, r8, r9, r11, lr}
+; LE-I32-NEXT:    push {r4, r5, r6, r7, r8, r9, r11, lr}
+; LE-I32-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14}
+; LE-I32-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14}
+; LE-I32-NEXT:    vmov r0, s7
+; LE-I32-NEXT:    vmov.f32 s18, s6
+; LE-I32-NEXT:    vmov.f32 s16, s5
+; LE-I32-NEXT:    vmov.f32 s20, s4
+; LE-I32-NEXT:    vmov.f32 s22, s3
+; LE-I32-NEXT:    vmov.f32 s24, s2
+; LE-I32-NEXT:    vmov.f32 s26, s1
+; LE-I32-NEXT:    vmov.f32 s28, s0
+; LE-I32-NEXT:    bl __aeabi_h2f
+; LE-I32-NEXT:    vmov s0, r0
+; LE-I32-NEXT:    bl lrintf
+; LE-I32-NEXT:    mov r8, r0
+; LE-I32-NEXT:    vmov r0, s26
+; LE-I32-NEXT:    bl __aeabi_h2f
+; LE-I32-NEXT:    mov r9, r0
+; LE-I32-NEXT:    vmov r0, s22
+; LE-I32-NEXT:    bl __aeabi_h2f
+; LE-I32-NEXT:    mov r6, r0
+; LE-I32-NEXT:    vmov r0, s28
+; LE-I32-NEXT:    bl __aeabi_h2f
+; LE-I32-NEXT:    mov r7, r0
+; LE-I32-NEXT:    vmov r0, s24
+; LE-I32-NEXT:    bl __aeabi_h2f
+; LE-I32-NEXT:    mov r4, r0
+; LE-I32-NEXT:    vmov r0, s18
+; LE-I32-NEXT:    bl __aeabi_h2f
+; LE-I32-NEXT:    mov r5, r0
+; LE-I32-NEXT:    vmov r0, s20
+; LE-I32-NEXT:    bl __aeabi_h2f
+; LE-I32-NEXT:    vmov s0, r0
+; LE-I32-NEXT:    bl lrintf
+; LE-I32-NEXT:    vmov s0, r5
+; LE-I32-NEXT:    vmov.32 d10[0], r0
+; LE-I32-NEXT:    bl lrintf
+; LE-I32-NEXT:    vmov s0, r4
+; LE-I32-NEXT:    vmov.32 d11[0], r0
+; LE-I32-NEXT:    bl lrintf
+; LE-I32-NEXT:    vmov s0, r7
+; LE-I32-NEXT:    vmov.32 d13[0], r0
+; LE-I32-NEXT:    bl lrintf
+; LE-I32-NEXT:    vmov s0, r6
+; LE-I32-NEXT:    vmov.32 d12[0], r0
+; LE-I32-NEXT:    bl lrintf
+; LE-I32-NEXT:    vmov s0, r9
+; LE-I32-NEXT:    vmov.32 d13[1], r0
+; LE-I32-NEXT:    bl lrintf
+; LE-I32-NEXT:    vmov.32 d12[1], r0
+; LE-I32-NEXT:    vmov r0, s16
+; LE-I32-NEXT:    bl __aeabi_h2f
+; LE-I32-NEXT:    vmov s0, r0
+; LE-I32-NEXT:    vmov.32 d11[1], r8
+; LE-I32-NEXT:    bl lrintf
+; LE-I32-NEXT:    vmov.32 d10[1], r0
+; LE-I32-NEXT:    vorr q0, q6, q6
+; LE-I32-NEXT:    vorr q1, q5, q5
+; LE-I32-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14}
+; LE-I32-NEXT:    pop {r4, r5, r6, r7, r8, r9, r11, pc}
+;
+; LE-I64-LABEL: lrint_v8f16:
+; LE-I64:       @ %bb.0:
+; LE-I64-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-I64-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-I64-NEXT:    .pad #4
+; LE-I64-NEXT:    sub sp, sp, #4
+; LE-I64-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEXT:    .pad #8
+; LE-I64-NEXT:    sub sp, sp, #8
+; LE-I64-NEXT:    vmov r0, s1
+; LE-I64-NEXT:    vstr s6, [sp, #4] @ 4-byte Spill
+; LE-I64-NEXT:    vmov.f32 s16, s7
+; LE-I64-NEXT:    vmov.f32 s18, s5
+; LE-I64-NEXT:    vmov.f32 s20, s4
+; LE-I64-NEXT:    vmov.f32 s22, s3
+; LE-I64-NEXT:    vmov.f32 s24, s2
+; LE-I64-NEXT:    vmov.f32 s26, s0
+; LE-I64-NEXT:    bl __aeabi_h2f
+; LE-I64-NEXT:    vmov s0, r0
+; LE-I64-NEXT:    bl lrintf
+; LE-I64-NEXT:    mov r9, r0
+; LE-I64-NEXT:    vmov r0, s26
+; LE-I64-NEXT:    str r1, [sp] @ 4-byte Spill
+; LE-I64-NEXT:    bl __aeabi_h2f
+; LE-I64-NEXT:    mov r10, r0
+; LE-I64-NEXT:    vmov r0, s22
+; LE-I64-NEXT:    bl __aeabi_h2f
+; LE-I64-NEXT:    mov r5, r0
+; LE-I64-NEXT:    vmov r0, s24
+; LE-I64-NEXT:    bl __aeabi_h2f
+; LE-I64-NEXT:    mov r7, r0
+; LE-I64-NEXT:    vmov r0, s18
+; LE-I64-NEXT:    bl __aeabi_h2f
+; LE-I64-NEXT:    mov r6, r0
+; LE-I64-NEXT:    vmov r0, s20
+; LE-I64-NEXT:    bl __aeabi_h2f
+; LE-I64-NEXT:    mov r4, r0
+; LE-I64-NEXT:    vmov r0, s16
+; LE-I64-NEXT:    bl __aeabi_h2f
+; LE-I64-NEXT:    vmov s0, r0
+; LE-I64-NEXT:    bl lrintf
+; LE-I64-NEXT:    vmov s0, r4
+; LE-I64-NEXT:    mov r11, r1
+; LE-I64-NEXT:    vmov.32 d11[0], r0
+; LE-I64-NEXT:    bl lrintf
+; LE-I64-NEXT:    vmov s0, r6
+; LE-I64-NEXT:    mov r8, r1
+; LE-I64-NEXT:    vmov.32 d12[0], r0
+; LE-I64-NEXT:    bl lrintf
+; LE-I64-NEXT:    vmov s0, r7
+; LE-I64-NEXT:    mov r6, r1
+; LE-I64-NEXT:    vmov.32 d13[0], r0
+; LE-I64-NEXT:    bl lrintf
+; LE-I64-NEXT:    vmov s0, r5
+; LE-I64-NEXT:    mov r7, r1
+; LE-I64-NEXT:    vmov.32 d14[0], r0
+; LE-I64-NEXT:    bl lrintf
+; LE-I64-NEXT:    vmov s0, r10
+; LE-I64-NEXT:    mov r5, r1
+; LE-I64-NEXT:    vmov.32 d15[0], r0
+; LE-I64-NEXT:    bl lrintf
+; LE-I64-NEXT:    vldr s0, [sp, #4] @ 4-byte Reload
+; LE-I64-NEXT:    mov r4, r1
+; LE-I64-NEXT:    vmov.32 d8[0], r0
+; LE-I64-NEXT:    vmov r0, s0
+; LE-I64-NEXT:    bl __aeabi_h2f
+; LE-I64-NEXT:    vmov s0, r0
+; LE-I64-NEXT:    vmov.32 d9[0], r9
+; LE-I64-NEXT:    bl lrintf
+; LE-I64-NEXT:    vmov.32 d10[0], r0
+; LE-I64-NEXT:    ldr r0, [sp] @ 4-byte Reload
+; LE-I64-NEXT:    vmov.32 d15[1], r5
+; LE-I64-NEXT:    vmov.32 d9[1], r0
+; LE-I64-NEXT:    vmov.32 d13[1], r6
+; LE-I64-NEXT:    vmov.32 d11[1], r11
+; LE-I64-NEXT:    vmov.32 d8[1], r4
+; LE-I64-NEXT:    vmov.32 d14[1], r7
+; LE-I64-NEXT:    vorr q0, q4, q4
+; LE-I64-NEXT:    vmov.32 d12[1], r8
+; LE-I64-NEXT:    vorr q1, q7, q7
+; LE-I64-NEXT:    vmov.32 d10[1], r1
+; LE-I64-NEXT:    vorr q2, q6, q6
+; LE-I64-NEXT:    vorr q3, q5, q5
+; LE-I64-NEXT:    add sp, sp, #8
+; LE-I64-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEXT:    add sp, sp, #4
+; LE-I64-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; BE-I32-LABEL: lrint_v8f16:
+; BE-I32:       @ %bb.0:
+; BE-I32-NEXT:    .save {r4, r5, r6, r7, r8, r9, r11, lr}
+; BE-I32-NEXT:    push {r4, r5, r6, r7, r8, r9, r11, lr}
+; BE-I32-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14}
+; BE-I32-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14}
+; BE-I32-NEXT:    vmov r0, s1
+; BE-I32-NEXT:    vmov.f32 s18, s7
+; BE-I32-NEXT:    vmov.f32 s20, s6
+; BE-I32-NEXT:    vmov.f32 s16, s5
+; BE-I32-NEXT:    vmov.f32 s22, s4
+; BE-I32-NEXT:    vmov.f32 s24, s3
+; BE-I32-NEXT:    vmov.f32 s26, s2
+; BE-I32-NEXT:    vmov.f32 s28, s0
+; BE-I32-NEXT:    bl __aeabi_h2f
+; BE-I32-NEXT:    vmov s0, r0
+; BE-I32-NEXT:    bl lrintf
+; BE-I32-NEXT:    mov r8, r0
+; BE-I32-NEXT:    vmov r0, s24
+; BE-I32-NEXT:    bl __aeabi_h2f
+; BE-I32-NEXT:    mov r9, r0
+; BE-I32-NEXT:    vmov r0, s18
+; BE-I32-NEXT:    bl __aeabi_h2f
+; BE-I32-NEXT:    mov r6, r0
+; BE-I32-NEXT:    vmov r0, s26
+; BE-I32-NEXT:    bl __aeabi_h2f
+; BE-I32-NEXT:    mov r7, r0
+; BE-I32-NEXT:    vmov r0, s20
+; BE-I32-NEXT:    bl __aeabi_h2f
+; BE-I32-NEXT:    mov r4, r0
+; BE-I32-NEXT:    vmov r0, s28
+; BE-I32-NEXT:    bl __aeabi_h2f
+; BE-I32-NEXT:    mov r5, r0
+; BE-I32-NEXT:    vmov r0, s22
+; BE-I32-NEXT:    bl __aeabi_h2f
+; BE-I32-NEXT:    vmov s0, r0
+; BE-I32-NEXT:    bl lrintf
+; BE-I32-NEXT:    vmov s0, r5
+; BE-I32-NEXT:    vmov.32 d10[0], r0
+; BE-I32-NEXT:    bl lrintf
+; BE-I32-NEXT:    vmov s0, r4
+; BE-I32-NEXT:    vmov.32 d12[0], r0
+; BE-I32-NEXT:    bl lrintf
+; BE-I32-NEXT:    vmov s0, r7
+; BE-I32-NEXT:    vmov.32 d11[0], r0
+; BE-I32-NEXT:    bl lrintf
+; BE-I32-NEXT:    vmov s0, r6
+; BE-I32-NEXT:    vmov.32 d13[0], r0
+; BE-I32-NEXT:    bl lrintf
+; BE-I32-NEXT:    vmov s0, r9
+; BE-I32-NEXT:    vmov.32 d11[1], r0
+; BE-I32-NEXT:    bl lrintf
+; BE-I32-NEXT:    vmov.32 d13[1], r0
+; BE-I32-NEXT:    vmov r0, s16
+; BE-I32-NEXT:    bl __aeabi_h2f
+; BE-I32-NEXT:    vmov s0, r0
+; BE-I32-NEXT:    vmov.32 d12[1], r8
+; BE-I32-NEXT:    bl lrintf
+; BE-I32-NEXT:    vmov.32 d10[1], r0
+; BE-I32-NEXT:    vrev64.32 q0, q6
+; BE-I32-NEXT:    vrev64.32 q1, q5
+; BE-I32-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14}
+; BE-I32-NEXT:    pop {r4, r5, r6, r7, r8, r9, r11, pc}
+;
+; BE-I64-LABEL: lrint_v8f16:
+; BE-I64:       @ %bb.0:
+; BE-I64-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-I64-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-I64-NEXT:    .pad #4
+; BE-I64-NEXT:    sub sp, sp, #4
+; BE-I64-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14}
+; BE-I64-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14}
+; BE-I64-NEXT:    .pad #8
+; BE-I64-NEXT:    sub sp, sp, #8
+; BE-I64-NEXT:    vmov r0, s1
+; BE-I64-NEXT:    vmov.f32 s18, s7
+; BE-I64-NEXT:    vmov.f32 s16, s6
+; BE-I64-NEXT:    vmov.f32 s20, s5
+; BE-I64-NEXT:    vmov.f32 s22, s4
+; BE-I64-NEXT:    vmov.f32 s24, s3
+; BE-I64-NEXT:    vmov.f32 s26, s2
+; BE-I64-NEXT:    vmov.f32 s28, s0
+; BE-I64-NEXT:    bl __aeabi_h2f
+; BE-I64-NEXT:    vmov s0, r0
+; BE-I64-NEXT:    bl lrintf
+; BE-I64-NEXT:    mov r9, r0
+; BE-I64-NEXT:    vmov r0, s28
+; BE-I64-NEXT:    str r1, [sp, #4] @ 4-byte Spill
+; BE-I64-NEXT:    bl __aeabi_h2f
+; BE-I64-NEXT:    mov r10, r0
+; BE-I64-NEXT:    vmov r0, s24
+; BE-I64-NEXT:    bl __aeabi_h2f
+; BE-I64-NEXT:    mov r5, r0
+; BE-I64-NEXT:    vmov r0, s26
+; BE-I64-NEXT:    bl __aeabi_h2f
+; BE-I64-NEXT:    mov r7, r0
+; BE-I64-NEXT:    vmov r0, s20
+; BE-I64-NEXT:    bl __aeabi_h2f
+; BE-I64-NEXT:    mov r6, r0
+; BE-I64-NEXT:    vmov r0, s22
+; BE-I64-NEXT:    bl __aeabi_h2f
+; BE-I64-NEXT:    mov r4, r0
+; BE-I64-NEXT:    vmov r0, s18
+; BE-I64-NEXT:    bl __aeabi_h2f
+; BE-I64-NEXT:    vmov s0, r0
+; BE-I64-NEXT:    bl lrintf
+; BE-I64-NEXT:    vmov s0, r4
+; BE-I64-NEXT:    mov r11, r1
+; BE-I64-NEXT:    vmov.32 d9[0], r0
+; BE-I64-NEXT:    bl lrintf
+; BE-I64-NEXT:    vmov s0, r6
+; BE-I64-NEXT:    mov r8, r1
+; BE-I64-NEXT:    vmov.32 d10[0], r0
+; BE-I64-NEXT:    bl lrintf
+; BE-I64-NEXT:    vmov s0, r7
+; BE-I64-NEXT:    mov r6, r1
+; BE-I64-NEXT:    vmov.32 d11[0], r0
+; BE-I64-NEXT:    bl lrintf
+; BE-I64-NEXT:    vmov s0, r5
+; BE-I64-NEXT:    mov r7, r1
+; BE-I64-NEXT:    vmov.32 d12[0], r0
+; BE-I64-NEXT:    bl lrintf
+; BE-I64-NEXT:    vmov s0, r10
+; BE-I64-NEXT:    mov r5, r1
+; BE-I64-NEXT:    vmov.32 d13[0], r0
+; BE-I64-NEXT:    bl lrintf
+; BE-I64-NEXT:    vmov.32 d14[0], r0
+; BE-I64-NEXT:    vmov r0, s16
+; BE-I64-NEXT:    mov r4, r1
+; BE-I64-NEXT:    bl __aeabi_h2f
+; BE-I64-NEXT:    vmov s0, r0
+; BE-I64-NEXT:    vmov.32 d8[0], r9
+; BE-I64-NEXT:    bl lrintf
+; BE-I64-NEXT:    vmov.32 d16[0], r0
+; BE-I64-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
+; BE-I64-NEXT:    vmov.32 d13[1], r5
+; BE-I64-NEXT:    vmov.32 d8[1], r0
+; BE-I64-NEXT:    vmov.32 d11[1], r6
+; BE-I64-NEXT:    vmov.32 d9[1], r11
+; BE-I64-NEXT:    vmov.32 d14[1], r4
+; BE-I64-NEXT:    vmov.32 d12[1], r7
+; BE-I64-NEXT:    vmov.32 d10[1], r8
+; BE-I64-NEXT:    vmov.32 d16[1], r1
+; BE-I64-NEXT:    vrev64.32 d1, d8
+; BE-I64-NEXT:    vrev64.32 d3, d13
+; BE-I64-NEXT:    vrev64.32 d5, d11
+; BE-I64-NEXT:    vrev64.32 d7, d9
+; BE-I64-NEXT:    vrev64.32 d0, d14
+; BE-I64-NEXT:    vrev64.32 d2, d12
+; BE-I64-NEXT:    vrev64.32 d4, d10
+; BE-I64-NEXT:    vrev64.32 d6, d16
+; BE-I64-NEXT:    add sp, sp, #8
+; BE-I64-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14}
+; BE-I64-NEXT:    add sp, sp, #4
+; BE-I64-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+  %a = call <8 x iXLen> @llvm.lrint.v8iXLen.v8f16(<8 x half> %x)
+  ret <8 x iXLen> %a
+}
+
+define <16 x iXLen> @lrint_v16f16(<16 x half> %x) {
+; LE-I32-LABEL: lrint_v16f16:
+; LE-I32:       @ %bb.0:
+; LE-I32-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, lr}
+; LE-I32-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, lr}
+; LE-I32-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I32-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I32-NEXT:    .pad #8
+; LE-I32-NEXT:    sub sp, sp, #8
+; LE-I32-NEXT:    vmov r0, s15
+; LE-I32-NEXT:    vstr s13, [sp, #4] @ 4-byte Spill
+; LE-I32-NEXT:    vmov.f32 s26, s14
+; LE-I32-NEXT:    vstr s0, [sp] @ 4-byte Spill
+; LE-I32-NEXT:    vmov.f32 s20, s12
+; LE-I32-NEXT:    vmov.f32 s22, s11
+; LE-I32-NEXT:    vmov.f32 s18, s10
+; LE-I32-NEXT:    vmov.f32 s17, s9
+; LE-I32-NEXT:    vmov.f32 s24, s8
+; LE-I32-NEXT:    vmov.f32 s19, s7
+; LE-I32-NEXT:    vmov.f32 s30, s6
+; LE-I32-NEXT:    vmov.f32 s21, s5
+; LE-I32-NEXT:    vmov.f32 s16, s4
+; LE-I32-NEXT:    vmov.f32 s23, s3
+; LE-I32-NEXT:    vmov.f32 s28, s2
+; LE-I32-NEXT:    vmov.f32 s25, s1
+; LE-I32-NEXT:    bl __aeabi_h2f
+; LE-I32-NEXT:    vmov s0, r0
+; LE-I32-NEXT:    bl lrintf
+; LE-I32-NEXT:    mov r8, r0
+; LE-I32-NEXT:    vmov r0, s17
+; LE-I32-NEXT:    bl __aeabi_h2f
+; LE-I32-NEXT:    mov r9, r0
+; LE-I32-NEXT:    vmov r0, s22
+; LE-I32-NEXT:    bl __aeabi_h2f
+; LE-I32-NEXT:    mov r10, r0
+; LE-I32-NEXT:    vmov r0, s21
+; LE-I32-NEXT:    bl __aeabi_h2f
+; LE-I32-NEXT:    mov r7, r0
+; LE-I32-NEXT:    vmov r0, s19
+; LE-I32-NEXT:    bl __aeabi_h2f
+; LE-I32-NEXT:    mov r4, r0
+; LE-I32-NEXT:    vmov r0, s25
+; LE-I32-NEXT:    bl __aeabi_h2f
+; LE-I32-NEXT:    mov r5, r0
+; LE-I32-NEXT:    vmov r0, s23
+; LE-I32-NEXT:    bl __aeabi_h2f
+; LE-I32-NEXT:    mov r6, r0
+; LE-I32-NEXT:    vmov r0, s20
+; LE-I32-NEXT:    bl __aeabi_h2f
+; LE-I32-NEXT:    vmov s0, r0
+; LE-I32-NEXT:    bl lrintf
+; LE-I32-NEXT:    vmov.32 d10[0], r0
+; LE-I32-NEXT:    vmov r0, s26
+; LE-I32-NEXT:    bl __aeabi_h2f
+; LE-I32-NEXT:    vmov s0, r0
+; LE-I32-NEXT:    bl lrintf
+; LE-I32-NEXT:    vmov.32 d11[0], r0
+; LE-I32-NEXT:    vmov r0, s24
+; LE-I32-NEXT:    bl __aeabi_h2f
+; LE-I32-NEXT:    vmov s0, r0
+; LE-I32-NEXT:    bl lrintf
+; LE-I32-NEXT:    vmov.32 d12[0], r0
+; LE-I32-NEXT:    vmov r0, s18
+; LE-I32-NEXT:    bl __aeabi_h2f
+; LE-I32-NEXT:    vmov s0, r0
+; LE-I32-NEXT:    bl lrintf
+; LE-I32-NEXT:    vmov.32 d13[0], r0
+; LE-I32-NEXT:    vmov r0, s16
+; LE-I32-NEXT:    bl __aeabi_h2f
+; LE-I32-NEXT:    vmov s0, r0
+; LE-I32-NEXT:    bl lrintf
+; LE-I32-NEXT:    vmov.32 d8[0], r0
+; LE-I32-NEXT:    vmov r0, s30
+; LE-I32-NEXT:    bl __aeabi_h2f
+; LE-I32-NEXT:    vmov s0, r0
+; LE-I32-NEXT:    bl lrintf
+; LE-I32-NEXT:    vmov.32 d9[0], r0
+; LE-I32-NEXT:    vmov r0, s28
+; LE-I32-NEXT:    bl __aeabi_h2f
+; LE-I32-NEXT:    vmov s0, r0
+; LE-I32-NEXT:    bl lrintf
+; LE-I32-NEXT:    vldr s0, [sp] @ 4-byte Reload
+; LE-I32-NEXT:    vmov.32 d15[0], r0
+; LE-I32-NEXT:    vmov r0, s0
+; LE-I32-NEXT:    bl __aeabi_h2f
+; LE-I32-NEXT:    vmov s0, r0
+; LE-I32-NEXT:    bl lrintf
+; LE-I32-NEXT:    vmov s0, r6
+; LE-I32-NEXT:    vmov.32 d14[0], r0
+; LE-I32-NEXT:    bl lrintf
+; LE-I32-NEXT:    vmov s0, r5
+; LE-I32-NEXT:    vmov.32 d15[1], r0
+; LE-I32-NEXT:    bl lrintf
+; LE-I32-NEXT:    vmov s0, r4
+; LE-I32-NEXT:    vmov.32 d14[1], r0
+; LE-I32-NEXT:    bl lrintf
+; LE-I32-NEXT:    vmov s0, r7
+; LE-I32-NEXT:    vmov.32 d9[1], r0
+; LE-I32-NEXT:    bl lrintf
+; LE-I32-NEXT:    vmov s0, r10
+; LE-I32-NEXT:    vmov.32 d8[1], r0
+; LE-I32-NEXT:    bl lrintf
+; LE-I32-NEXT:    vmov s0, r9
+; LE-I32-NEXT:    vmov.32 d13[1], r0
+; LE-I32-NEXT:    bl lrintf
+; LE-I32-NEXT:    vldr s0, [sp, #4] @ 4-byte Reload
+; LE-I32-NEXT:    vmov.32 d12[1], r0
+; LE-I32-NEXT:    vmov r0, s0
+; LE-I32-NEXT:    bl __aeabi_h2f
+; LE-I32-NEXT:    vmov s0, r0
+; LE-I32-NEXT:    vmov.32 d11[1], r8
+; LE-I32-NEXT:    bl lrintf
+; LE-I32-NEXT:    vmov.32 d10[1], r0
+; LE-I32-NEXT:    vorr q0, q7, q7
+; LE-I32-NEXT:    vorr q1, q4, q4
+; LE-I32-NEXT:    vorr q2, q6, q6
+; LE-I32-NEXT:    vorr q3, q5, q5
+; LE-I32-NEXT:    add sp, sp, #8
+; LE-I32-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I32-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, pc}
+;
+; LE-I64-LABEL: lrint_v16f16:
+; LE-I64:       @ %bb.0:
+; LE-I64-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-I64-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-I64-NEXT:    .pad #4
+; LE-I64-NEXT:    sub sp, sp, #4
+; LE-I64-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEXT:    .pad #120
+; LE-I64-NEXT:    sub sp, sp, #120
+; LE-I64-NEXT:    mov r11, r0
+; LE-I64-NEXT:    vmov r0, s7
+; LE-I64-NEXT:    vstr s15, [sp, #24] @ 4-byte Spill
+; LE-I64-NEXT:    vmov.f32 s23, s13
+; LE-I64-NEXT:    vstr s14, [sp, #100] @ 4-byte Spill
+; LE-I64-NEXT:    vmov.f32 s25, s12
+; LE-I64-NEXT:    vmov.f32 s27, s11
+; LE-I64-NEXT:    vstr s10, [sp, #104] @ 4-byte Spill
+; LE-I64-NEXT:    vstr s9, [sp, #108] @ 4-byte Spill
+; LE-I64-NEXT:    vmov.f32 s24, s8
+; LE-I64-NEXT:    vmov.f32 s19, s6
+; LE-I64-NEXT:    vmov.f32 s29, s5
+; LE-I64-NEXT:    vmov.f32 s17, s4
+; LE-I64-NEXT:    vmov.f32 s16, s3
+; LE-I64-NEXT:    vmov.f32 s21, s2
+; LE-I64-NEXT:    vmov.f32 s26, s1
+; LE-I64-NEXT:    vmov.f32 s18, s0
+; LE-I64-NEXT:    bl __aeabi_h2f
+; LE-I64-NEXT:    vmov s0, r0
+; LE-I64-NEXT:    bl lrintf
+; LE-I64-NEXT:    mov r7, r0
+; LE-I64-NEXT:    vmov r0, s25
+; LE-I64-NEXT:    str r1, [sp, #56] @ 4-byte Spill
+; LE-I64-NEXT:    bl __aeabi_h2f
+; LE-I64-NEXT:    vmov s0, r0
+; LE-I64-NEXT:    bl lrintf
+; LE-I64-NEXT:    mov r5, r0
+; LE-I64-NEXT:    vmov r0, s27
+; LE-I64-NEXT:    str r1, [sp, #116] @ 4-byte Spill
+; LE-I64-NEXT:    bl __aeabi_h2f
+; LE-I64-NEXT:    vmov s0, r0
+; LE-I64-NEXT:    bl lrintf
+; LE-I64-NEXT:    mov r6, r0
+; LE-I64-NEXT:    vmov r0, s29
+; LE-I64-NEXT:    str r1, [sp, #112] @ 4-byte Spill
+; LE-I64-NEXT:    bl __aeabi_h2f
+; LE-I64-NEXT:    vmov s0, r0
+; LE-I64-NEXT:    bl lrintf
+; LE-I64-NEXT:    vmov.32 d15[0], r0
+; LE-I64-NEXT:    vmov r0, s23
+; LE-I64-NEXT:    mov r4, r1
+; LE-I64-NEXT:    bl __aeabi_h2f
+; LE-I64-NEXT:    vmov s0, r0
+; LE-I64-NEXT:    add lr, sp, #80
+; LE-I64-NEXT:    vmov.32 d17[0], r6
+; LE-I64-NEXT:    vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEXT:    bl lrintf
+; LE-I64-NEXT:    mov r6, r0
+; LE-I64-NEXT:    vmov r0, s17
+; LE-I64-NEXT:    vmov r8, s21
+; LE-I64-NEXT:    str r1, [sp, #76] @ 4-byte Spill
+; LE-I64-NEXT:    vmov r10, s19
+; LE-I64-NEXT:    vmov.32 d10[0], r5
+; LE-I64-NEXT:    bl __aeabi_h2f
+; LE-I64-NEXT:    vmov s0, r0
+; LE-I64-NEXT:    add lr, sp, #40
+; LE-I64-NEXT:    vmov.32 d11[0], r6
+; LE-I64-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-I64-NEXT:    bl lrintf
+; LE-I64-NEXT:    vmov.32 d14[0], r0
+; LE-I64-NEXT:    mov r0, r10
+; LE-I64-NEXT:    mov r9, r1
+; LE-I64-NEXT:    bl __aeabi_h2f
+; LE-I64-NEXT:    vmov s0, r0
+; LE-I64-NEXT:    vmov.32 d11[0], r7
+; LE-I64-NEXT:    bl lrintf
+; LE-I64-NEXT:    vmov.32 d10[0], r0
+; LE-I64-NEXT:    mov r0, r8
+; LE-I64-NEXT:    mov r7, r1
+; LE-I64-NEXT:    bl __aeabi_h2f
+; LE-I64-NEXT:    mov r6, r0
+; LE-I64-NEXT:    ldr r0, [sp, #56] @ 4-byte Reload
+; LE-I64-NEXT:    vmov.32 d11[1], r0
+; LE-I64-NEXT:    vmov r0, s18
+; LE-I64-NEXT:    bl __aeabi_h2f
+; LE-I64-NEXT:    mov r5, r0
+; LE-I64-NEXT:    vmov r0, s16
+; LE-I64-NEXT:    vmov.32 d10[1], r7
+; LE-I64-NEXT:    add lr, sp, #56
+; LE-I64-NEXT:    vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-I64-NEXT:    bl __aeabi_h2f
+; LE-I64-NEXT:    vmov s0, r0
+; LE-I64-NEXT:    vmov.32 d15[1], r4
+; LE-I64-NEXT:    bl lrintf
+; LE-I64-NEXT:    vmov.32 d9[0], r0
+; LE-I64-NEXT:    vmov r0, s26
+; LE-I64-NEXT:    add lr, sp, #24
+; LE-I64-NEXT:    vmov r8, s24
+; LE-I64-NEXT:    vmov.32 d14[1], r9
+; LE-I64-NEXT:    mov r10, r1
+; LE-I64-NEXT:    vmov s24, r5
+; LE-I64-NEXT:    vldr s0, [sp, #24] @ 4-byte Reload
+; LE-I64-NEXT:    vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-I64-NEXT:    vmov r7, s0
+; LE-I64-NEXT:    bl __aeabi_h2f
+; LE-I64-NEXT:    vmov.f32 s0, s24
+; LE-I64-NEXT:    vmov s22, r0
+; LE-I64-NEXT:    bl lrintf
+; LE-I64-NEXT:    vmov.f32 s0, s22
+; LE-I64-NEXT:    mov r5, r1
+; LE-I64-NEXT:    vmov.32 d14[0], r0
+; LE-I64-NEXT:    vmov s24, r6
+; LE-I64-NEXT:    bl lrintf
+; LE-I64-NEXT:    vmov.32 d15[0], r0
+; LE-I64-NEXT:    mov r0, r7
+; LE-I64-NEXT:    mov r6, r1
+; LE-I64-NEXT:    bl __aeabi_h2f
+; LE-I64-NEXT:    vmov.f32 s0, s24
+; LE-I64-NEXT:    vmov s22, r0
+; LE-I64-NEXT:    bl lrintf
+; LE-I64-NEXT:    vmov.f32 s0, s22
+; LE-I64-NEXT:    vmov.32 d8[0], r0
+; LE-I64-NEXT:    add lr, sp, #8
+; LE-I64-NEXT:    mov r9, r1
+; LE-I64-NEXT:    vmov.32 d15[1], r6
+; LE-I64-NEXT:    vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-I64-NEXT:    bl lrintf
+; LE-I64-NEXT:    vmov.32 d13[0], r0
+; LE-I64-NEXT:    mov r0, r8
+; LE-I64-NEXT:    mov r6, r1
+; LE-I64-NEXT:    bl __aeabi_h2f
+; LE-I64-NEXT:    vldr s0, [sp, #100] @ 4-byte Reload
+; LE-I64-NEXT:    mov r7, r0
+; LE-I64-NEXT:    vmov.32 d14[1], r5
+; LE-I64-NEXT:    vmov r0, s0
+; LE-I64-NEXT:    bl __aeabi_h2f
+; LE-I64-NEXT:    vldr s0, [sp, #104] @ 4-byte Reload
+; LE-I64-NEXT:    vmov s20, r0
+; LE-I64-NEXT:    vmov.32 d13[1], r6
+; LE-I64-NEXT:    vmov r4, s0
+; LE-I64-NEXT:    vldr s0, [sp, #108] @ 4-byte Reload
+; LE-I64-NEXT:    vmov r0, s0
+; LE-I64-NEXT:    bl __aeabi_h2f
+; LE-I64-NEXT:    vmov.f32 s0, s20
+; LE-I64-NEXT:    vmov s16, r0
+; LE-I64-NEXT:    bl lrintf
+; LE-I64-NEXT:    vmov.f32 s0, s16
+; LE-I64-NEXT:    mov r5, r1
+; LE-I64-NEXT:    vmov.32 d12[0], r0
+; LE-I64-NEXT:    vmov s18, r7
+; LE-I64-NEXT:    bl lrintf
+; LE-I64-NEXT:    vmov.32 d11[0], r0
+; LE-I64-NEXT:    mov r0, r4
+; LE-I64-NEXT:    mov r6, r1
+; LE-I64-NEXT:    bl __aeabi_h2f
+; LE-I64-NEXT:    vmov.f32 s0, s18
+; LE-I64-NEXT:    vmov s16, r0
+; LE-I64-NEXT:    bl lrintf
+; LE-I64-NEXT:    vmov.f32 s0, s16
+; LE-I64-NEXT:    vmov.32 d10[0], r0
+; LE-I64-NEXT:    mov r4, r1
+; LE-I64-NEXT:    vmov.32 d11[1], r6
+; LE-I64-NEXT:    bl lrintf
+; LE-I64-NEXT:    add lr, sp, #80
+; LE-I64-NEXT:    vmov.32 d10[1], r4
+; LE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEXT:    add lr, sp, #40
+; LE-I64-NEXT:    vldmia lr, {d18, d19} @ 16-byte Reload
+; LE-I64-NEXT:    add lr, sp, #8
+; LE-I64-NEXT:    vmov.32 d16[0], r0
+; LE-I64-NEXT:    ldr r0, [sp, #76] @ 4-byte Reload
+; LE-I64-NEXT:    vldmia lr, {d20, d21} @ 16-byte Reload
+; LE-I64-NEXT:    add lr, sp, #24
+; LE-I64-NEXT:    vmov.32 d19[1], r0
+; LE-I64-NEXT:    ldr r0, [sp, #116] @ 4-byte Reload
+; LE-I64-NEXT:    vmov.32 d21[1], r10
+; LE-I64-NEXT:    vmov.32 d18[1], r0
+; LE-I64-NEXT:    ldr r0, [sp, #112] @ 4-byte Reload
+; LE-I64-NEXT:    vmov.32 d12[1], r5
+; LE-I64-NEXT:    vmov.32 d17[1], r0
+; LE-I64-NEXT:    add r0, r11, #64
+; LE-I64-NEXT:    vmov.32 d16[1], r1
+; LE-I64-NEXT:    vst1.64 {d10, d11}, [r0:128]!
+; LE-I64-NEXT:    vst1.64 {d16, d17}, [r0:128]!
+; LE-I64-NEXT:    vst1.64 {d18, d19}, [r0:128]!
+; LE-I64-NEXT:    vmov.32 d20[1], r9
+; LE-I64-NEXT:    vst1.64 {d12, d13}, [r0:128]
+; LE-I64-NEXT:    vst1.64 {d14, d15}, [r11:128]!
+; LE-I64-NEXT:    vst1.64 {d20, d21}, [r11:128]!
+; LE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEXT:    add lr, sp, #56
+; LE-I64-NEXT:    vst1.64 {d16, d17}, [r11:128]!
+; LE-I64-NEXT:    vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEXT:    vst1.64 {d16, d17}, [r11:128]
+; LE-I64-NEXT:    add sp, sp, #120
+; LE-I64-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEXT:    add sp, sp, #4
+; LE-I64-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; BE-I32-LABEL: lrint_v16f16:
+; BE-I32:       @ %bb.0:
+; BE-I32-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, lr}
+; BE-I32-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, lr}
+; BE-I32-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I32-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I32-NEXT:    .pad #16
+; BE-I32-NEXT:    sub sp, sp, #16
+; BE-I32-NEXT:    vmov r0, s1
+; BE-I32-NEXT:    vstr s14, [sp, #4] @ 4-byte Spill
+; BE-I32-NEXT:    vmov.f32 s30, s15
+; BE-I32-NEXT:    vstr s13, [sp, #12] @ 4-byte Spill
+; BE-I32-NEXT:    vmov.f32 s17, s12
+; BE-I32-NEXT:    vstr s10, [sp, #8] @ 4-byte Spill
+; BE-I32-NEXT:    vmov.f32 s19, s11
+; BE-I32-NEXT:    vstr s8, [sp] @ 4-byte Spill
+; BE-I32-NEXT:    vmov.f32 s21, s9
+; BE-I32-NEXT:    vmov.f32 s23, s7
+; BE-I32-NEXT:    vmov.f32 s24, s6
+; BE-I32-NEXT:    vmov.f32 s25, s5
+; BE-I32-NEXT:    vmov.f32 s26, s4
+; BE-I32-NEXT:    vmov.f32 s27, s3
+; BE-I32-NEXT:    vmov.f32 s28, s2
+; BE-I32-NEXT:    vmov.f32 s29, s0
+; BE-I32-NEXT:    bl __aeabi_h2f
+; BE-I32-NEXT:    vmov s0, r0
+; BE-I32-NEXT:    bl lrintf
+; BE-I32-NEXT:    mov r8, r0
+; BE-I32-NEXT:    vmov r0, s27
+; BE-I32-NEXT:    bl __aeabi_h2f
+; BE-I32-NEXT:    mov r9, r0
+; BE-I32-NEXT:    vmov r0, s25
+; BE-I32-NEXT:    bl __aeabi_h2f
+; BE-I32-NEXT:    mov r10, r0
+; BE-I32-NEXT:    vmov r0, s23
+; BE-I32-NEXT:    bl __aeabi_h2f
+; BE-I32-NEXT:    mov r7, r0
+; BE-I32-NEXT:    vmov r0, s21
+; BE-I32-NEXT:    bl __aeabi_h2f
+; BE-I32-NEXT:    mov r4, r0
+; BE-I32-NEXT:    vmov r0, s19
+; BE-I32-NEXT:    bl __aeabi_h2f
+; BE-I32-NEXT:    mov r5, r0
+; BE-I32-NEXT:    vmov r0, s30
+; BE-I32-NEXT:    bl __aeabi_h2f
+; BE-I32-NEXT:    mov r6, r0
+; BE-I32-NEXT:    vmov r0, s17
+; BE-I32-NEXT:    bl __aeabi_h2f
+; BE-I32-NEXT:    vmov s0, r0
+; BE-I32-NEXT:    bl lrintf
+; BE-I32-NEXT:    vmov.32 d8[0], r0
+; BE-I32-NEXT:    vmov r0, s29
+; BE-I32-NEXT:    bl __aeabi_h2f
+; BE-I32-NEXT:    vmov s0, r0
+; BE-I32-NEXT:    bl lrintf
+; BE-I32-NEXT:    vmov.32 d10[0], r0
+; BE-I32-NEXT:    vmov r0, s28
+; BE-I32-NEXT:    bl __aeabi_h2f
+; BE-I32-NEXT:    vmov s0, r0
+; BE-I32-NEXT:    bl lrintf
+; BE-I32-NEXT:    vmov.32 d11[0], r0
+; BE-I32-NEXT:    vmov r0, s26
+; BE-I32-NEXT:    bl __aeabi_h2f
+; BE-I32-NEXT:    vmov s0, r0
+; BE-I32-NEXT:    bl lrintf
+; BE-I32-NEXT:    vmov.32 d14[0], r0
+; BE-I32-NEXT:    vmov r0, s24
+; BE-I32-NEXT:    bl __aeabi_h2f
+; BE-I32-NEXT:    vmov s0, r0
+; BE-I32-NEXT:    bl lrintf
+; BE-I32-NEXT:    vldr s0, [sp] @ 4-byte Reload
+; BE-I32-NEXT:    vmov.32 d15[0], r0
+; BE-I32-NEXT:    vmov r0, s0
+; BE-I32-NEXT:    bl __aeabi_h2f
+; BE-I32-NEXT:    vmov s0, r0
+; BE-I32-NEXT:    bl lrintf
+; BE-I32-NEXT:    vldr s0, [sp, #4] @ 4-byte Reload
+; BE-I32-NEXT:    vmov.32 d12[0], r0
+; BE-I32-NEXT:    vmov r0, s0
+; BE-I32-NEXT:    bl __aeabi_h2f
+; BE-I32-NEXT:    vmov s0, r0
+; BE-I32-NEXT:    bl lrintf
+; BE-I32-NEXT:    vldr s0, [sp, #8] @ 4-byte Reload
+; BE-I32-NEXT:    vmov.32 d9[0], r0
+; BE-I32-NEXT:    vmov r0, s0
+; BE-I32-NEXT:    bl __aeabi_h2f
+; BE-I32-NEXT:    vmov s0, r0
+; BE-I32-NEXT:    bl lrintf
+; BE-I32-NEXT:    vmov s0, r6
+; BE-I32-NEXT:    vmov.32 d13[0], r0
+; BE-I32-NEXT:    bl lrintf
+; BE-I32-NEXT:    vmov s0, r5
+; BE-I32-NEXT:    vmov.32 d9[1], r0
+; BE-I32-NEXT:    bl lrintf
+; BE-I32-NEXT:    vmov s0, r4
+; BE-I32-NEXT:    vmov.32 d13[1], r0
+; BE-I32-NEXT:    bl lrintf
+; BE-I32-NEXT:    vmov s0, r7
+; BE-I32-NEXT:    vmov.32 d12[1], r0
+; BE-I32-NEXT:    bl lrintf
+; BE-I32-NEXT:    vmov s0, r10
+; BE-I32-NEXT:    vmov.32 d15[1], r0
+; BE-I32-NEXT:    bl lrintf
+; BE-I32-NEXT:    vmov s0, r9
+; BE-I32-NEXT:    vmov.32 d14[1], r0
+; BE-I32-NEXT:    bl lrintf
+; BE-I32-NEXT:    vldr s0, [sp, #12] @ 4-byte Reload
+; BE-I32-NEXT:    vmov.32 d11[1], r0
+; BE-I32-NEXT:    vmov r0, s0
+; BE-I32-NEXT:    bl __aeabi_h2f
+; BE-I32-NEXT:    vmov s0, r0
+; BE-I32-NEXT:    vmov.32 d10[1], r8
+; BE-I32-NEXT:    bl lrintf
+; BE-I32-NEXT:    vmov.32 d8[1], r0
+; BE-I32-NEXT:    vrev64.32 q0, q5
+; BE-I32-NEXT:    vrev64.32 q1, q7
+; BE-I32-NEXT:    vrev64.32 q2, q6
+; BE-I32-NEXT:    vrev64.32 q3, q4
+; BE-I32-NEXT:    add sp, sp, #16
+; BE-I32-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I32-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, pc}
+;
+; BE-I64-LABEL: lrint_v16f16:
+; BE-I64:       @ %bb.0:
+; BE-I64-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-I64-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-I64-NEXT:    .pad #4
+; BE-I64-NEXT:    sub sp, sp, #4
+; BE-I64-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEXT:    .pad #112
+; BE-I64-NEXT:    sub sp, sp, #112
+; BE-I64-NEXT:    mov r11, r0
+; BE-I64-NEXT:    vmov r0, s14
+; BE-I64-NEXT:    vmov.f32 s17, s15
+; BE-I64-NEXT:    vstr s13, [sp, #52] @ 4-byte Spill
+; BE-I64-NEXT:    vmov.f32 s21, s12
+; BE-I64-NEXT:    vstr s10, [sp, #68] @ 4-byte Spill
+; BE-I64-NEXT:    vmov.f32 s23, s11
+; BE-I64-NEXT:    vstr s7, [sp, #72] @ 4-byte Spill
+; BE-I64-NEXT:    vmov.f32 s19, s9
+; BE-I64-NEXT:    vstr s4, [sp, #28] @ 4-byte Spill
+; BE-I64-NEXT:    vmov.f32 s26, s8
+; BE-I64-NEXT:    vmov.f32 s24, s6
+; BE-I64-NEXT:    vmov.f32 s18, s5
+; BE-I64-NEXT:    vmov.f32 s25, s3
+; BE-I64-NEXT:    vmov.f32 s16, s2
+; BE-I64-NEXT:    vmov.f32 s27, s1
+; BE-I64-NEXT:    vmov.f32 s29, s0
+; BE-I64-NEXT:    bl __aeabi_h2f
+; BE-I64-NEXT:    vmov s0, r0
+; BE-I64-NEXT:    bl lrintf
+; BE-I64-NEXT:    mov r8, r0
+; BE-I64-NEXT:    vmov r0, s29
+; BE-I64-NEXT:    mov r4, r1
+; BE-I64-NEXT:    bl __aeabi_h2f
+; BE-I64-NEXT:    mov r9, r0
+; BE-I64-NEXT:    vmov r0, s27
+; BE-I64-NEXT:    bl __aeabi_h2f
+; BE-I64-NEXT:    mov r7, r0
+; BE-I64-NEXT:    vmov r0, s21
+; BE-I64-NEXT:    bl __aeabi_h2f
+; BE-I64-NEXT:    mov r6, r0
+; BE-I64-NEXT:    vmov r0, s25
+; BE-I64-NEXT:    bl __aeabi_h2f
+; BE-I64-NEXT:    mov r5, r0
+; BE-I64-NEXT:    vmov r0, s23
+; BE-I64-NEXT:    bl __aeabi_h2f
+; BE-I64-NEXT:    vmov s0, r0
+; BE-I64-NEXT:    bl lrintf
+; BE-I64-NEXT:    vmov.32 d16[0], r0
+; BE-I64-NEXT:    vmov s0, r5
+; BE-I64-NEXT:    str r1, [sp, #108] @ 4-byte Spill
+; BE-I64-NEXT:    vstr d16, [sp, #96] @ 8-byte Spill
+; BE-I64-NEXT:    bl lrintf
+; BE-I64-NEXT:    vmov.32 d16[0], r0
+; BE-I64-NEXT:    vmov s0, r6
+; BE-I64-NEXT:    str r1, [sp, #92] @ 4-byte Spill
+; BE-I64-NEXT:    vstr d16, [sp, #80] @ 8-byte Spill
+; BE-I64-NEXT:    bl lrintf
+; BE-I64-NEXT:    vmov.32 d16[0], r0
+; BE-I64-NEXT:    vmov s0, r7
+; BE-I64-NEXT:    str r1, [sp, #76] @ 4-byte Spill
+; BE-I64-NEXT:    vstr d16, [sp, #56] @ 8-byte Spill
+; BE-I64-NEXT:    bl lrintf
+; BE-I64-NEXT:    vmov s0, r9
+; BE-I64-NEXT:    mov r10, r1
+; BE-I64-NEXT:    vmov.32 d14[0], r0
+; BE-I64-NEXT:    bl lrintf
+; BE-I64-NEXT:    vmov.32 d15[0], r0
+; BE-I64-NEXT:    vmov r0, s17
+; BE-I64-NEXT:    mov r5, r1
+; BE-I64-NEXT:    bl __aeabi_h2f
+; BE-I64-NEXT:    vmov s0, r0
+; BE-I64-NEXT:    vmov.32 d10[0], r8
+; BE-I64-NEXT:    vmov r6, s19
+; BE-I64-NEXT:    bl lrintf
+; BE-I64-NEXT:    vmov.32 d11[0], r0
+; BE-I64-NEXT:    mov r0, r6
+; BE-I64-NEXT:    mov r7, r1
+; BE-I64-NEXT:    bl __aeabi_h2f
+; BE-I64-NEXT:    mov r6, r0
+; BE-I64-NEXT:    vmov r0, s18
+; BE-I64-NEXT:    vmov.32 d10[1], r4
+; BE-I64-NEXT:    vstr d10, [sp, #40] @ 8-byte Spill
+; BE-I64-NEXT:    bl __aeabi_h2f
+; BE-I64-NEXT:    mov r4, r0
+; BE-I64-NEXT:    vmov r0, s16
+; BE-I64-NEXT:    vmov.32 d11[1], r7
+; BE-I64-NEXT:    vstr d11, [sp, #32] @ 8-byte Spill
+; BE-I64-NEXT:    bl __aeabi_h2f
+; BE-I64-NEXT:    vmov.32 d15[1], r5
+; BE-I64-NEXT:    vmov s0, r0
+; BE-I64-NEXT:    vstr d15, [sp, #16] @ 8-byte Spill
+; BE-I64-NEXT:    bl lrintf
+; BE-I64-NEXT:    vldr s0, [sp, #28] @ 4-byte Reload
+; BE-I64-NEXT:    vmov r5, s26
+; BE-I64-NEXT:    vmov.32 d16[0], r0
+; BE-I64-NEXT:    vmov s26, r4
+; BE-I64-NEXT:    vmov r0, s0
+; BE-I64-NEXT:    mov r8, r1
+; BE-I64-NEXT:    vmov.32 d14[1], r10
+; BE-I64-NEXT:    vmov r4, s24
+; BE-I64-NEXT:    vstr d16, [sp] @ 8-byte Spill
+; BE-I64-NEXT:    vstr d14, [sp, #8] @ 8-byte Spill
+; BE-I64-NEXT:    bl __aeabi_h2f
+; BE-I64-NEXT:    vmov.f32 s0, s26
+; BE-I64-NEXT:    vmov s22, r0
+; BE-I64-NEXT:    bl lrintf
+; BE-I64-NEXT:    vmov.f32 s0, s22
+; BE-I64-NEXT:    mov r7, r1
+; BE-I64-NEXT:    vmov.32 d13[0], r0
+; BE-I64-NEXT:    vmov s24, r6
+; BE-I64-NEXT:    bl lrintf
+; BE-I64-NEXT:    vmov.32 d14[0], r0
+; BE-I64-NEXT:    mov r0, r4
+; BE-I64-NEXT:    mov r6, r1
+; BE-I64-NEXT:    bl __aeabi_h2f
+; BE-I64-NEXT:    vmov.f32 s0, s24
+; BE-I64-NEXT:    vmov s22, r0
+; BE-I64-NEXT:    bl lrintf
+; BE-I64-NEXT:    vmov.f32 s0, s22
+; BE-I64-NEXT:    mov r9, r1
+; BE-I64-NEXT:    vmov.32 d12[0], r0
+; BE-I64-NEXT:    vmov.32 d14[1], r6
+; BE-I64-NEXT:    bl lrintf
+; BE-I64-NEXT:    vmov.32 d11[0], r0
+; BE-I64-NEXT:    mov r0, r5
+; BE-I64-NEXT:    mov r6, r1
+; BE-I64-NEXT:    bl __aeabi_h2f
+; BE-I64-NEXT:    vldr s0, [sp, #52] @ 4-byte Reload
+; BE-I64-NEXT:    mov r4, r0
+; BE-I64-NEXT:    vmov.32 d13[1], r7
+; BE-I64-NEXT:    vmov r0, s0
+; BE-I64-NEXT:    bl __aeabi_h2f
+; BE-I64-NEXT:    vldr s0, [sp, #68] @ 4-byte Reload
+; BE-I64-NEXT:    vmov s20, r0
+; BE-I64-NEXT:    vmov.32 d11[1], r6
+; BE-I64-NEXT:    vmov r7, s0
+; BE-I64-NEXT:    vldr s0, [sp, #72] @ 4-byte Reload
+; BE-I64-NEXT:    vmov r0, s0
+; BE-I64-NEXT:    bl __aeabi_h2f
+; BE-I64-NEXT:    vmov.f32 s0, s20
+; BE-I64-NEXT:    vmov s16, r0
+; BE-I64-NEXT:    bl lrintf
+; BE-I64-NEXT:    vmov.f32 s0, s16
+; BE-I64-NEXT:    mov r5, r1
+; BE-I64-NEXT:    vmov.32 d10[0], r0
+; BE-I64-NEXT:    vmov s18, r4
+; BE-I64-NEXT:    bl lrintf
+; BE-I64-NEXT:    vmov.32 d15[0], r0
+; BE-I64-NEXT:    mov r0, r7
+; BE-I64-NEXT:    mov r4, r1
+; BE-I64-NEXT:    bl __aeabi_h2f
+; BE-I64-NEXT:    vmov.f32 s0, s18
+; BE-I64-NEXT:    vmov s16, r0
+; BE-I64-NEXT:    bl lrintf
+; BE-I64-NEXT:    vmov.f32 s0, s16
+; BE-I64-NEXT:    mov r6, r1
+; BE-I64-NEXT:    vmov.32 d9[0], r0
+; BE-I64-NEXT:    vmov.32 d15[1], r4
+; BE-I64-NEXT:    bl lrintf
+; BE-I64-NEXT:    vmov.32 d24[0], r0
+; BE-I64-NEXT:    ldr r0, [sp, #76] @ 4-byte Reload
+; BE-I64-NEXT:    vldr d23, [sp, #56] @ 8-byte Reload
+; BE-I64-NEXT:    vldr d20, [sp, #8] @ 8-byte Reload
+; BE-I64-NEXT:    vmov.32 d23[1], r0
+; BE-I64-NEXT:    ldr r0, [sp, #92] @ 4-byte Reload
+; BE-I64-NEXT:    vldr d22, [sp, #80] @ 8-byte Reload
+; BE-I64-NEXT:    vldr d26, [sp, #16] @ 8-byte Reload
+; BE-I64-NEXT:    vrev64.32 d21, d20
+; BE-I64-NEXT:    vmov.32 d22[1], r0
+; BE-I64-NEXT:    ldr r0, [sp, #108] @ 4-byte Reload
+; BE-I64-NEXT:    vldr d30, [sp] @ 8-byte Reload
+; BE-I64-NEXT:    vldr d25, [sp, #96] @ 8-byte Reload
+; BE-I64-NEXT:    vrev64.32 d20, d26
+; BE-I64-NEXT:    vldr d26, [sp, #32] @ 8-byte Reload
+; BE-I64-NEXT:    vmov.32 d10[1], r5
+; BE-I64-NEXT:    vmov.32 d12[1], r9
+; BE-I64-NEXT:    vldr d28, [sp, #40] @ 8-byte Reload
+; BE-I64-NEXT:    vrev64.32 d27, d26
+; BE-I64-NEXT:    vmov.32 d25[1], r0
+; BE-I64-NEXT:    add r0, r11, #64
+; BE-I64-NEXT:    vmov.32 d30[1], r8
+; BE-I64-NEXT:    vmov.32 d9[1], r6
+; BE-I64-NEXT:    vrev64.32 d26, d28
+; BE-I64-NEXT:    vrev64.32 d29, d10
+; BE-I64-NEXT:    vmov.32 d24[1], r1
+; BE-I64-NEXT:    vrev64.32 d1, d12
+; BE-I64-NEXT:    vrev64.32 d28, d23
+; BE-I64-NEXT:    vrev64.32 d23, d22
+; BE-I64-NEXT:    vrev64.32 d22, d30
+; BE-I64-NEXT:    vrev64.32 d31, d25
+; BE-I64-NEXT:    vrev64.32 d0, d9
+; BE-I64-NEXT:    vrev64.32 d30, d24
+; BE-I64-NEXT:    vst1.64 {d0, d1}, [r0:128]!
+; BE-I64-NEXT:    vst1.64 {d30, d31}, [r0:128]!
+; BE-I64-NEXT:    vst1.64 {d28, d29}, [r0:128]!
+; BE-I64-NEXT:    vrev64.32 d19, d13
+; BE-I64-NEXT:    vst1.64 {d26, d27}, [r0:128]
+; BE-I64-NEXT:    vst1.64 {d20, d21}, [r11:128]!
+; BE-I64-NEXT:    vrev64.32 d18, d14
+; BE-I64-NEXT:    vst1.64 {d22, d23}, [r11:128]!
+; BE-I64-NEXT:    vrev64.32 d17, d15
+; BE-I64-NEXT:    vrev64.32 d16, d11
+; BE-I64-NEXT:    vst1.64 {d18, d19}, [r11:128]!
+; BE-I64-NEXT:    vst1.64 {d16, d17}, [r11:128]
+; BE-I64-NEXT:    add sp, sp, #112
+; BE-I64-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEXT:    add sp, sp, #4
+; BE-I64-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+  %a = call <16 x iXLen> @llvm.lrint.v16iXLen.v16f16(<16 x half> %x)
+  ret <16 x iXLen> %a
+}
 
 define <1 x iXLen> @lrint_v1f32(<1 x float> %x) {
 ; LE-I32-LABEL: lrint_v1f32:

>From c0a41605d6eee890c35b3c8737e133157abafc88 Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Thu, 2 Oct 2025 22:45:09 +0100
Subject: [PATCH 2/2] Make the fp16 run line use +fp-armv8

---
 llvm/test/CodeGen/ARM/lrint-conv.ll | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/test/CodeGen/ARM/lrint-conv.ll b/llvm/test/CodeGen/ARM/lrint-conv.ll
index 216488fe33313..9b471cc11d896 100644
--- a/llvm/test/CodeGen/ARM/lrint-conv.ll
+++ b/llvm/test/CodeGen/ARM/lrint-conv.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc < %s -mtriple=armv7-none-eabi -float-abi=soft | FileCheck %s --check-prefixes=CHECK,CHECK-SOFT
 ; RUN: llc < %s -mtriple=armv7-none-eabihf -mattr=+vfp2 -float-abi=hard | FileCheck %s --check-prefixes=CHECK,CHECK-NOFP16
-; RUN: llc < %s -mtriple=armv7-none-eabihf -mattr=+vfp2,+fullfp16 -float-abi=hard | FileCheck %s --check-prefixes=CHECK,CHECK-FP16
+; RUN: llc < %s -mtriple=armv8-none-eabihf -mattr=+fp-armv8,+fullfp16 -float-abi=hard | FileCheck %s --check-prefixes=CHECK,CHECK-FP16
 
 define i32 @testmswh_builtin(half %x) {
 ; CHECK-SOFT-LABEL: testmswh_builtin:



More information about the llvm-commits mailing list