[llvm] [ARM][SDAG] Half promote llvm.lrint nodes. (PR #161088)
via llvm-commits
llvm-commits at lists.llvm.org
Sun Sep 28 10:14:53 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-selectiondag
Author: David Green (davemgreen)
<details>
<summary>Changes</summary>
As shown in #<!-- -->137101, fp16 lrint are not handled correctly on Arm. This adds soft-half promotion for them, reusing the function that promotes a value with operands (and can handle strict fp once that is added).
---
Patch is 55.55 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/161088.diff
6 Files Affected:
- (modified) llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp (+5-2)
- (modified) llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h (+1-1)
- (modified) llvm/lib/Target/ARM/ARMISelLowering.cpp (+1)
- (modified) llvm/test/CodeGen/ARM/llrint-conv.ll (+47-22)
- (modified) llvm/test/CodeGen/ARM/lrint-conv.ll (+45-24)
- (modified) llvm/test/CodeGen/ARM/vector-lrint.ll (+1286-27)
``````````diff
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index 83bb1dfe86c6a..1737a93837852 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -3740,7 +3740,10 @@ bool DAGTypeLegalizer::SoftPromoteHalfOperand(SDNode *N, unsigned OpNo) {
case ISD::STRICT_FP_TO_SINT:
case ISD::STRICT_FP_TO_UINT:
case ISD::FP_TO_SINT:
- case ISD::FP_TO_UINT: Res = SoftPromoteHalfOp_FP_TO_XINT(N); break;
+ case ISD::FP_TO_UINT:
+ case ISD::LRINT:
+ Res = SoftPromoteHalfOp_Op0WithStrict(N);
+ break;
case ISD::FP_TO_SINT_SAT:
case ISD::FP_TO_UINT_SAT:
Res = SoftPromoteHalfOp_FP_TO_XINT_SAT(N); break;
@@ -3819,7 +3822,7 @@ SDValue DAGTypeLegalizer::SoftPromoteHalfOp_FP_EXTEND(SDNode *N) {
return DAG.getNode(GetPromotionOpcode(SVT, RVT), SDLoc(N), RVT, Op);
}
-SDValue DAGTypeLegalizer::SoftPromoteHalfOp_FP_TO_XINT(SDNode *N) {
+SDValue DAGTypeLegalizer::SoftPromoteHalfOp_Op0WithStrict(SDNode *N) {
EVT RVT = N->getValueType(0);
bool IsStrict = N->isStrictFPOpcode();
SDValue Op = N->getOperand(IsStrict ? 1 : 0);
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 586c3411791f9..d580ce0026e69 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -843,7 +843,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
SDValue SoftPromoteHalfOp_FAKE_USE(SDNode *N, unsigned OpNo);
SDValue SoftPromoteHalfOp_FCOPYSIGN(SDNode *N, unsigned OpNo);
SDValue SoftPromoteHalfOp_FP_EXTEND(SDNode *N);
- SDValue SoftPromoteHalfOp_FP_TO_XINT(SDNode *N);
+ SDValue SoftPromoteHalfOp_Op0WithStrict(SDNode *N);
SDValue SoftPromoteHalfOp_FP_TO_XINT_SAT(SDNode *N);
SDValue SoftPromoteHalfOp_SETCC(SDNode *N);
SDValue SoftPromoteHalfOp_SELECT_CC(SDNode *N, unsigned OpNo);
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index f4ac6bb76b3fe..2a40fb9b476f8 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -1353,6 +1353,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM_,
setOperationAction(ISD::FLOG, MVT::f16, Promote);
setOperationAction(ISD::FLOG10, MVT::f16, Promote);
setOperationAction(ISD::FLOG2, MVT::f16, Promote);
+ setOperationAction(ISD::LRINT, MVT::f16, Expand);
setOperationAction(ISD::FROUND, MVT::f16, Legal);
setOperationAction(ISD::FROUNDEVEN, MVT::f16, Legal);
diff --git a/llvm/test/CodeGen/ARM/llrint-conv.ll b/llvm/test/CodeGen/ARM/llrint-conv.ll
index 749ee00a3c68e..a1a04db8622c7 100644
--- a/llvm/test/CodeGen/ARM/llrint-conv.ll
+++ b/llvm/test/CodeGen/ARM/llrint-conv.ll
@@ -1,46 +1,71 @@
-; RUN: llc < %s -mtriple=arm-eabi -float-abi=soft | FileCheck %s --check-prefix=SOFTFP
-; RUN: llc < %s -mtriple=arm-eabi -float-abi=hard | FileCheck %s --check-prefix=HARDFP
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=armv7-none-eabi -float-abi=soft | FileCheck %s --check-prefixes=CHECK,CHECK-SOFT
+; RUN: llc < %s -mtriple=armv7-none-eabihf -mattr=+vfp2 -float-abi=hard | FileCheck %s --check-prefixes=CHECK,CHECK-NOFP16
+; RUN: llc < %s -mtriple=armv7-none-eabihf -mattr=+vfp2,+fullfp16 -float-abi=hard | FileCheck %s --check-prefixes=CHECK,CHECK-FP16
-; SOFTFP-LABEL: testmsxh_builtin:
-; SOFTFP: bl llrintf
-; HARDFP-LABEL: testmsxh_builtin:
-; HARDFP: bl llrintf
define i64 @testmsxh_builtin(half %x) {
+; CHECK-SOFT-LABEL: testmsxh_builtin:
+; CHECK-SOFT: @ %bb.0: @ %entry
+; CHECK-SOFT-NEXT: .save {r11, lr}
+; CHECK-SOFT-NEXT: push {r11, lr}
+; CHECK-SOFT-NEXT: bl __aeabi_h2f
+; CHECK-SOFT-NEXT: bl llrintf
+; CHECK-SOFT-NEXT: pop {r11, pc}
+;
+; CHECK-NOFP16-LABEL: testmsxh_builtin:
+; CHECK-NOFP16: @ %bb.0: @ %entry
+; CHECK-NOFP16-NEXT: .save {r11, lr}
+; CHECK-NOFP16-NEXT: push {r11, lr}
+; CHECK-NOFP16-NEXT: vmov r0, s0
+; CHECK-NOFP16-NEXT: bl __aeabi_h2f
+; CHECK-NOFP16-NEXT: vmov s0, r0
+; CHECK-NOFP16-NEXT: bl llrintf
+; CHECK-NOFP16-NEXT: pop {r11, pc}
+;
+; CHECK-FP16-LABEL: testmsxh_builtin:
+; CHECK-FP16: @ %bb.0: @ %entry
+; CHECK-FP16-NEXT: .save {r11, lr}
+; CHECK-FP16-NEXT: push {r11, lr}
+; CHECK-FP16-NEXT: vcvtb.f32.f16 s0, s0
+; CHECK-FP16-NEXT: bl llrintf
+; CHECK-FP16-NEXT: pop {r11, pc}
entry:
%0 = tail call i64 @llvm.llrint.i64.f16(half %x)
ret i64 %0
}
-; SOFTFP-LABEL: testmsxs_builtin:
-; SOFTFP: bl llrintf
-; HARDFP-LABEL: testmsxs_builtin:
-; HARDFP: bl llrintf
define i64 @testmsxs_builtin(float %x) {
+; CHECK-LABEL: testmsxs_builtin:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r11, lr}
+; CHECK-NEXT: push {r11, lr}
+; CHECK-NEXT: bl llrintf
+; CHECK-NEXT: pop {r11, pc}
entry:
%0 = tail call i64 @llvm.llrint.i64.f32(float %x)
ret i64 %0
}
-; SOFTFP-LABEL: testmsxd_builtin:
-; SOFTFP: bl llrint
-; HARDFP-LABEL: testmsxd_builtin:
-; HARDFP: bl llrint
define i64 @testmsxd_builtin(double %x) {
+; CHECK-LABEL: testmsxd_builtin:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r11, lr}
+; CHECK-NEXT: push {r11, lr}
+; CHECK-NEXT: bl llrint
+; CHECK-NEXT: pop {r11, pc}
entry:
%0 = tail call i64 @llvm.llrint.i64.f64(double %x)
ret i64 %0
}
-; FIXME(#44744): incorrect libcall
-; SOFTFP-LABEL: testmsxq_builtin:
-; SOFTFP: bl llrintl
-; HARDFP-LABEL: testmsxq_builtin:
-; HARDFP: bl llrintl
define i64 @testmsxq_builtin(fp128 %x) {
+; CHECK-LABEL: testmsxq_builtin:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r11, lr}
+; CHECK-NEXT: push {r11, lr}
+; CHECK-NEXT: bl llrintl
+; CHECK-NEXT: pop {r11, pc}
entry:
%0 = tail call i64 @llvm.llrint.i64.f128(fp128 %x)
ret i64 %0
}
-
-declare i64 @llvm.llrint.i64.f32(float) nounwind readnone
-declare i64 @llvm.llrint.i64.f64(double) nounwind readnone
diff --git a/llvm/test/CodeGen/ARM/lrint-conv.ll b/llvm/test/CodeGen/ARM/lrint-conv.ll
index 9aa95112af533..216488fe33313 100644
--- a/llvm/test/CodeGen/ARM/lrint-conv.ll
+++ b/llvm/test/CodeGen/ARM/lrint-conv.ll
@@ -1,43 +1,64 @@
-; RUN: llc < %s -mtriple=arm-eabi -float-abi=soft | FileCheck %s --check-prefix=SOFTFP
-; RUN: llc < %s -mtriple=arm-eabi -float-abi=hard | FileCheck %s --check-prefix=HARDFP
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=armv7-none-eabi -float-abi=soft | FileCheck %s --check-prefixes=CHECK,CHECK-SOFT
+; RUN: llc < %s -mtriple=armv7-none-eabihf -mattr=+vfp2 -float-abi=hard | FileCheck %s --check-prefixes=CHECK,CHECK-NOFP16
+; RUN: llc < %s -mtriple=armv7-none-eabihf -mattr=+vfp2,+fullfp16 -float-abi=hard | FileCheck %s --check-prefixes=CHECK,CHECK-FP16
-; FIXME: crash
-; define i32 @testmswh_builtin(half %x) {
-; entry:
-; %0 = tail call i32 @llvm.lrint.i32.f16(half %x)
-; ret i32 %0
-; }
+define i32 @testmswh_builtin(half %x) {
+; CHECK-SOFT-LABEL: testmswh_builtin:
+; CHECK-SOFT: @ %bb.0: @ %entry
+; CHECK-SOFT-NEXT: .save {r11, lr}
+; CHECK-SOFT-NEXT: push {r11, lr}
+; CHECK-SOFT-NEXT: bl __aeabi_h2f
+; CHECK-SOFT-NEXT: pop {r11, lr}
+; CHECK-SOFT-NEXT: b lrintf
+;
+; CHECK-NOFP16-LABEL: testmswh_builtin:
+; CHECK-NOFP16: @ %bb.0: @ %entry
+; CHECK-NOFP16-NEXT: .save {r11, lr}
+; CHECK-NOFP16-NEXT: push {r11, lr}
+; CHECK-NOFP16-NEXT: vmov r0, s0
+; CHECK-NOFP16-NEXT: bl __aeabi_h2f
+; CHECK-NOFP16-NEXT: vmov s0, r0
+; CHECK-NOFP16-NEXT: pop {r11, lr}
+; CHECK-NOFP16-NEXT: b lrintf
+;
+; CHECK-FP16-LABEL: testmswh_builtin:
+; CHECK-FP16: @ %bb.0: @ %entry
+; CHECK-FP16-NEXT: vrintx.f16 s0, s0
+; CHECK-FP16-NEXT: vcvt.s32.f16 s0, s0
+; CHECK-FP16-NEXT: vmov r0, s0
+; CHECK-FP16-NEXT: bx lr
+entry:
+ %0 = tail call i32 @llvm.lrint.i32.f16(half %x)
+ ret i32 %0
+}
-; SOFTFP-LABEL: testmsws_builtin:
-; SOFTFP: bl lrintf
-; HARDFP-LABEL: testmsws_builtin:
-; HARDFP: bl lrintf
define i32 @testmsws_builtin(float %x) {
+; CHECK-LABEL: testmsws_builtin:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: b lrintf
entry:
%0 = tail call i32 @llvm.lrint.i32.f32(float %x)
ret i32 %0
}
-; SOFTFP-LABEL: testmswd_builtin:
-; SOFTFP: bl lrint
-; HARDFP-LABEL: testmswd_builtin:
-; HARDFP: bl lrint
define i32 @testmswd_builtin(double %x) {
+; CHECK-LABEL: testmswd_builtin:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: b lrint
entry:
%0 = tail call i32 @llvm.lrint.i32.f64(double %x)
ret i32 %0
}
-; FIXME(#44744): incorrect libcall
-; SOFTFP-LABEL: testmswq_builtin:
-; SOFTFP: bl lrintl
-; HARDFP-LABEL: testmswq_builtin:
-; HARDFP: bl lrintl
define i32 @testmswq_builtin(fp128 %x) {
+; CHECK-LABEL: testmswq_builtin:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r11, lr}
+; CHECK-NEXT: push {r11, lr}
+; CHECK-NEXT: bl lrintl
+; CHECK-NEXT: pop {r11, pc}
entry:
%0 = tail call i32 @llvm.lrint.i32.f128(fp128 %x)
ret i32 %0
}
-
-declare i32 @llvm.lrint.i32.f32(float) nounwind readnone
-declare i32 @llvm.lrint.i32.f64(double) nounwind readnone
diff --git a/llvm/test/CodeGen/ARM/vector-lrint.ll b/llvm/test/CodeGen/ARM/vector-lrint.ll
index fe5e3cbcdf771..a927def8a8cad 100644
--- a/llvm/test/CodeGen/ARM/vector-lrint.ll
+++ b/llvm/test/CodeGen/ARM/vector-lrint.ll
@@ -9,36 +9,1295 @@
; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=armebv7-unknown-none-eabihf -mattr=+neon | FileCheck %s --check-prefixes=BE-I32
; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=armebv7-unknown-none-eabihf -mattr=+neon | FileCheck %s --check-prefixes=BE-I64
-; FIXME: crash "Do not know how to soft promote this operator's operand!"
-; define <1 x iXLen> @lrint_v1f16(<1 x half> %x) {
-; %a = call <1 x iXLen> @llvm.lrint.v1iXLen.v1f16(<1 x half> %x)
-; ret <1 x iXLen> %a
-; }
-; declare <1 x iXLen> @llvm.lrint.v1iXLen.v1f16(<1 x half>)
-
-; define <2 x iXLen> @lrint_v2f16(<2 x half> %x) {
-; %a = call <2 x iXLen> @llvm.lrint.v2iXLen.v2f16(<2 x half> %x)
-; ret <2 x iXLen> %a
-; }
-; declare <2 x iXLen> @llvm.lrint.v2iXLen.v2f16(<2 x half>)
+define <1 x iXLen> @lrint_v1f16(<1 x half> %x) {
+; LE-I32-LABEL: lrint_v1f16:
+; LE-I32: @ %bb.0:
+; LE-I32-NEXT: .save {r11, lr}
+; LE-I32-NEXT: push {r11, lr}
+; LE-I32-NEXT: vmov r0, s0
+; LE-I32-NEXT: bl __aeabi_f2h
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: vmov s0, r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: pop {r11, pc}
+;
+; LE-I64-LABEL: lrint_v1f16:
+; LE-I64: @ %bb.0:
+; LE-I64-NEXT: .save {r11, lr}
+; LE-I64-NEXT: push {r11, lr}
+; LE-I64-NEXT: vmov r0, s0
+; LE-I64-NEXT: bl __aeabi_f2h
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vmov s0, r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.32 d0[0], r0
+; LE-I64-NEXT: vmov.32 d0[1], r1
+; LE-I64-NEXT: pop {r11, pc}
+;
+; BE-I32-LABEL: lrint_v1f16:
+; BE-I32: @ %bb.0:
+; BE-I32-NEXT: .save {r11, lr}
+; BE-I32-NEXT: push {r11, lr}
+; BE-I32-NEXT: vmov r0, s0
+; BE-I32-NEXT: bl __aeabi_f2h
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: vmov s0, r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: pop {r11, pc}
+;
+; BE-I64-LABEL: lrint_v1f16:
+; BE-I64: @ %bb.0:
+; BE-I64-NEXT: .save {r11, lr}
+; BE-I64-NEXT: push {r11, lr}
+; BE-I64-NEXT: vmov r0, s0
+; BE-I64-NEXT: bl __aeabi_f2h
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: vmov s0, r0
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.32 d16[0], r0
+; BE-I64-NEXT: vmov.32 d16[1], r1
+; BE-I64-NEXT: vrev64.32 d0, d16
+; BE-I64-NEXT: pop {r11, pc}
+ %a = call <1 x iXLen> @llvm.lrint.v1iXLen.v1f16(<1 x half> %x)
+ ret <1 x iXLen> %a
+}
+declare <1 x iXLen> @llvm.lrint.v1iXLen.v1f16(<1 x half>)
-; define <4 x iXLen> @lrint_v4f16(<4 x half> %x) {
-; %a = call <4 x iXLen> @llvm.lrint.v4iXLen.v4f16(<4 x half> %x)
-; ret <4 x iXLen> %a
-; }
-; declare <4 x iXLen> @llvm.lrint.v4iXLen.v4f16(<4 x half>)
+define <2 x iXLen> @lrint_v2f16(<2 x half> %x) {
+; LE-I32-LABEL: lrint_v2f16:
+; LE-I32: @ %bb.0:
+; LE-I32-NEXT: .save {r11, lr}
+; LE-I32-NEXT: push {r11, lr}
+; LE-I32-NEXT: .vsave {d8}
+; LE-I32-NEXT: vpush {d8}
+; LE-I32-NEXT: vmov r0, s0
+; LE-I32-NEXT: vmov.f32 s16, s1
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: vmov s0, r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov r1, s16
+; LE-I32-NEXT: vmov.32 d8[0], r0
+; LE-I32-NEXT: mov r0, r1
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: vmov s0, r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov.32 d8[1], r0
+; LE-I32-NEXT: vorr d0, d8, d8
+; LE-I32-NEXT: vpop {d8}
+; LE-I32-NEXT: pop {r11, pc}
+;
+; LE-I64-LABEL: lrint_v2f16:
+; LE-I64: @ %bb.0:
+; LE-I64-NEXT: .save {r4, r5, r11, lr}
+; LE-I64-NEXT: push {r4, r5, r11, lr}
+; LE-I64-NEXT: .vsave {d8, d9}
+; LE-I64-NEXT: vpush {d8, d9}
+; LE-I64-NEXT: vmov r0, s1
+; LE-I64-NEXT: vmov.f32 s16, s0
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vmov s0, r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: mov r4, r0
+; LE-I64-NEXT: vmov r0, s16
+; LE-I64-NEXT: mov r5, r1
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vmov s0, r0
+; LE-I64-NEXT: vmov.32 d9[0], r4
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.32 d8[0], r0
+; LE-I64-NEXT: vmov.32 d9[1], r5
+; LE-I64-NEXT: vmov.32 d8[1], r1
+; LE-I64-NEXT: vorr q0, q4, q4
+; LE-I64-NEXT: vpop {d8, d9}
+; LE-I64-NEXT: pop {r4, r5, r11, pc}
+;
+; BE-I32-LABEL: lrint_v2f16:
+; BE-I32: @ %bb.0:
+; BE-I32-NEXT: .save {r11, lr}
+; BE-I32-NEXT: push {r11, lr}
+; BE-I32-NEXT: .vsave {d8}
+; BE-I32-NEXT: vpush {d8}
+; BE-I32-NEXT: vmov r0, s0
+; BE-I32-NEXT: vmov.f32 s16, s1
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: vmov s0, r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov r1, s16
+; BE-I32-NEXT: vmov.32 d8[0], r0
+; BE-I32-NEXT: mov r0, r1
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: vmov s0, r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov.32 d8[1], r0
+; BE-I32-NEXT: vrev64.32 d0, d8
+; BE-I32-NEXT: vpop {d8}
+; BE-I32-NEXT: pop {r11, pc}
+;
+; BE-I64-LABEL: lrint_v2f16:
+; BE-I64: @ %bb.0:
+; BE-I64-NEXT: .save {r4, r5, r11, lr}
+; BE-I64-NEXT: push {r4, r5, r11, lr}
+; BE-I64-NEXT: .vsave {d8}
+; BE-I64-NEXT: vpush {d8}
+; BE-I64-NEXT: vmov r0, s1
+; BE-I64-NEXT: vmov.f32 s16, s0
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: vmov s0, r0
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: mov r4, r0
+; BE-I64-NEXT: vmov r0, s16
+; BE-I64-NEXT: mov r5, r1
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: vmov s0, r0
+; BE-I64-NEXT: vmov.32 d8[0], r4
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.32 d16[0], r0
+; BE-I64-NEXT: vmov.32 d8[1], r5
+; BE-I64-NEXT: vmov.32 d16[1], r1
+; BE-I64-NEXT: vrev64.32 d1, d8
+; BE-I64-NEXT: vrev64.32 d0, d16
+; BE-I64-NEXT: vpop {d8}
+; BE-I64-NEXT: pop {r4, r5, r11, pc}
+ %a = call <2 x iXLen> @llvm.lrint.v2iXLen.v2f16(<2 x half> %x)
+ ret <2 x iXLen> %a
+}
+declare <2 x iXLen> @llvm.lrint.v2iXLen.v2f16(<2 x half>)
-; define <8 x iXLen> @lrint_v8f16(<8 x half> %x) {
-; %a = call <8 x iXLen> @llvm.lrint.v8iXLen.v8f16(<8 x half> %x)
-; ret <8 x iXLen> %a
-; }
-; declare <8 x iXLen> @llvm.lrint.v8iXLen.v8f16(<8 x half>)
+define <4 x iXLen> @lrint_v4f16(<4 x half> %x) {
+; LE-I32-LABEL: lrint_v4f16:
+; LE-I32: @ %bb.0:
+; LE-I32-NEXT: .save {r4, r5, r11, lr}
+; LE-I32-NEXT: push {r4, r5, r11, lr}
+; LE-I32-NEXT: .vsave {d8, d9, d10, d11}
+; LE-I32-NEXT: vpush {d8, d9, d10, d11}
+; LE-I32-NEXT: vmov r0, s3
+; LE-I32-NEXT: vmov.f32 s16, s2
+; LE-I32-NEXT: vmov.f32 s18, s1
+; LE-I32-NEXT: vmov.f32 s20, s0
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: vmov s0, r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: mov r4, r0
+; LE-I32-NEXT: vmov r0, s16
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: mov r5, r0
+; LE-I32-NEXT: vmov r0, s20
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: vmov s0, r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov s0, r5
+; LE-I32-NEXT: vmov.32 d10[0], r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov.32 d11[0], r0
+; LE-I32-NEXT: vmov r0, s18
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: vmov s0, r0
+; LE-I32-NEXT: vmov.32 d11[1], r4
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov.32 d10[1], r0
+; LE-I32-NEXT: vorr q0, q5, q5
+; LE-I32-NEXT: vpop {d8, d9, d10, d11}
+; LE-I32-NEXT: pop {r4, r5, r11, pc}
+;
+; LE-I64-LABEL: lrint_v4f16:
+; LE-I64: @ %bb.0:
+; LE-I64-NEXT: .save {r4, r5, r6, r7, r11, lr}
+; LE-I64-NEXT: push {r4, r5, r6, r7, r11, lr}
+; LE-I64-NEXT: .vsave {d12, d13}
+; LE-I64-NEXT: vpush {d12, d13}
+; LE-I64-NEXT: .vsave {d8, d9, d10}
+; LE-I64-NEXT: vpush {d8, d9, d10}
+; LE-I64-NEXT: vmov r0, s1
+; LE-I64-NEXT: vmov.f32 s16, s3
+; LE-I64-NEXT: vmov.f32 s20, s2
+; LE-I64-NEXT: vmov.f32 s18, s0
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vmov s0, r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: mov r5, r0
+; LE-I64-NEXT: vmov r0, s18
+; LE-I64-NEXT: mov r4, r1
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: mov r7, r0
+; LE-I64-NEXT: vmov r0, s16
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vmov s0, r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov s0, r7
+; LE-I64-NEXT: mov r6, r1
+; LE-I64-NEXT: vmov.32 d9[0], r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.32 d12[0], r0
+; LE-I64-NEXT: vmov r0, s20
+; LE-I64-NEXT: mov r7, r1
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vmov s0, r0
+; LE-I64-NEXT: vmov.32 d13[0], r5
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.32 d8[0], r0
+; LE-I64-NEXT: vmov.32 d13[1], r4
+; LE-I64-NEXT: vmov.32 d9[1], r6
+; LE-I64-NEXT: vmov.32 d12[1], r7
+; LE-I64-NEXT: vmov.32 d8[1], r1
+; LE-I64-NEXT: vorr q0, q6, q6
+; LE-I64-NEXT: vorr q1, q4, q4
+; LE-I64-NEXT: vpop {d8, d9, d10}
+; LE-I64-NEXT: vpop {d12, d13}
+; LE-I64-NEXT: pop {r4, r5, r6, r7, r11, pc}
+;
+; BE-I32-LABEL: lrint_v4f16:
+; BE-I32: @ %bb.0:
+; BE-I32-NEXT: .save {r4, r5, r11, lr}
+; BE-I32-NEXT: push {r4, r5, r11, lr}
+; BE-I32-NEXT: .vsave {d8, d9, d10, d11}
+; BE-I32-NEXT: vpush {d8, d9, d10, d11}
+; BE-I32-NEXT: vmov r0, s3
+; BE-I32-NEXT: vmov.f32 s16, s2
+; BE-I32-NEXT: vmov.f32 s18, s1
+; BE-I32-NEXT: vmov.f32 s20, s0
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: vmov s0, r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: mov r4, r0
+; BE-I32-NEXT: vmov r0, s16
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: mov r5, r0
+; BE-I32-NEXT: vmov r0, s20
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: vmov s0, r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov s0, r5
+; BE-I32-NEXT: vmov.32 d10[0], r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov.32 d11[0], r0
+; BE-I32-NEXT: vmov r0, s18
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: vmov s0, r0
+; BE-I32-NEXT: vmov.32 d11[1], r4
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov.32 d10[1], r0
+; BE-I32-NEXT: vrev64.32 q0, q5
+; BE-I32-NEXT: vpop {d8, d9, d10, d11}
+; BE-I32-NEXT: pop {r4, r5, r11, pc}
+;
+; BE-I64-LABEL: lrint_v4f16:
+; BE-I64: @ %bb.0:
+; BE-I64-NEXT: .save {r4, r5, r6, r7, r11, lr}
+; BE-I64-NEXT: push {r4, r5, r6, r7, r11, lr}
+; BE-I64-NEXT: .vsave {d8, d9, d10}
+; BE-I64-NEXT: vpush {d8, d9, d10}
+; BE-I64-NEXT: vmov r0, s1
+; BE-I64-NEXT: vmov.f32 s16, s3
+; BE-I64-NEXT: vmov.f32 s18, s2
+; BE-I64-NEXT: vmov.f32 s20, s0
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: vmov s0, r0
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: mov r5, r0
+; BE-I64-NEXT: vmov r0, s20
+; BE-I64-NEXT: mov r4, r1
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: mov r7, r0
+; BE-I64-NEXT: vmov r0, s16
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: vmov s0, r0
+; BE-I64-NEXT: bl lrintf
+; BE-...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/161088
More information about the llvm-commits
mailing list