[llvm] e020f46 - [ARM] Fix BF16 lowering with FullFP16
David Green via llvm-commits
llvm-commits at lists.llvm.org
Thu Dec 19 02:20:40 PST 2024
Author: David Green
Date: 2024-12-19T10:20:35Z
New Revision: e020f460275aab9053d9e090d0b777b40da14a81
URL: https://github.com/llvm/llvm-project/commit/e020f460275aab9053d9e090d0b777b40da14a81
DIFF: https://github.com/llvm/llvm-project/commit/e020f460275aab9053d9e090d0b777b40da14a81.diff
LOG: [ARM] Fix BF16 lowering with FullFP16
This adds test coverage for bf16 instructions, making sure that lowering bf16
works with and without +fullfp16.
Added:
llvm/test/CodeGen/Thumb2/bf16-instructions.ll
Modified:
llvm/lib/Target/ARM/ARMISelLowering.cpp
llvm/lib/Target/ARM/ARMISelLowering.h
Removed:
################################################################################
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 88293c1b1101ac..860d13f3d12175 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -804,6 +804,9 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setAllExpand(MVT::bf16);
if (!Subtarget->hasFullFP16())
setOperationAction(ISD::BITCAST, MVT::bf16, Custom);
+ } else {
+ setOperationAction(ISD::BF16_TO_FP, MVT::f32, Expand);
+ setOperationAction(ISD::FP_TO_BF16, MVT::f32, Custom);
}
for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
@@ -6301,10 +6304,13 @@ SDValue ARMTargetLowering::ExpandBITCAST(SDNode *N, SelectionDAG &DAG,
DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), MVT::i32, Op));
if ((DstVT == MVT::i16 || DstVT == MVT::i32) &&
- (SrcVT == MVT::f16 || SrcVT == MVT::bf16))
+ (SrcVT == MVT::f16 || SrcVT == MVT::bf16)) {
+ if (Subtarget->hasFullFP16() && !Subtarget->hasBF16())
+ Op = DAG.getBitcast(MVT::f16, Op);
return DAG.getNode(
ISD::TRUNCATE, SDLoc(N), DstVT,
MoveFromHPR(SDLoc(N), DAG, MVT::i32, SrcVT.getSimpleVT(), Op));
+ }
if (!(SrcVT == MVT::i64 || DstVT == MVT::i64))
return SDValue();
@@ -10588,6 +10594,17 @@ SDValue ARMTargetLowering::LowerSPONENTRY(SDValue Op, SelectionDAG &DAG) const {
return DAG.getFrameIndex(FI, VT);
}
+SDValue ARMTargetLowering::LowerFP_TO_BF16(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ MakeLibCallOptions CallOptions;
+ MVT SVT = Op.getOperand(0).getSimpleValueType();
+ RTLIB::Libcall LC = RTLIB::getFPROUND(SVT, MVT::bf16);
+ SDValue Res =
+ makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first;
+ return DAG.getBitcast(MVT::i32, Res);
+}
+
SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
LLVM_DEBUG(dbgs() << "Lowering node: "; Op.dump());
switch (Op.getOpcode()) {
@@ -10713,6 +10730,8 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::STRICT_FSETCCS: return LowerFSETCC(Op, DAG);
case ISD::SPONENTRY:
return LowerSPONENTRY(Op, DAG);
+ case ISD::FP_TO_BF16:
+ return LowerFP_TO_BF16(Op, DAG);
case ARMISD::WIN__DBZCHK: return SDValue();
}
}
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h
index 4fa600e0cfcc40..49416e2c8b25e1 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -886,6 +886,7 @@ class VectorType;
SDValue LowerSPONENTRY(SDValue Op, SelectionDAG &DAG) const;
void LowerLOAD(SDNode *N, SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG) const;
+ SDValue LowerFP_TO_BF16(SDValue Op, SelectionDAG &DAG) const;
Register getRegisterByName(const char* RegName, LLT VT,
const MachineFunction &MF) const override;
diff --git a/llvm/test/CodeGen/Thumb2/bf16-instructions.ll b/llvm/test/CodeGen/Thumb2/bf16-instructions.ll
new file mode 100644
index 00000000000000..11c9c6028d342e
--- /dev/null
+++ b/llvm/test/CodeGen/Thumb2/bf16-instructions.ll
@@ -0,0 +1,2336 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple thumbv8.1m.main-none-eabi | FileCheck %s --check-prefixes=CHECK,CHECK-NOFP
+; RUN: llc < %s -mtriple thumbv8.1m.main-none-eabihf -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-FP
+
+define bfloat @test_fadd(bfloat %a, bfloat %b) {
+; CHECK-NOFP-LABEL: test_fadd:
+; CHECK-NOFP: @ %bb.0:
+; CHECK-NOFP-NEXT: .save {r7, lr}
+; CHECK-NOFP-NEXT: push {r7, lr}
+; CHECK-NOFP-NEXT: lsls r0, r0, #16
+; CHECK-NOFP-NEXT: lsls r1, r1, #16
+; CHECK-NOFP-NEXT: bl __aeabi_fadd
+; CHECK-NOFP-NEXT: bl __truncsfbf2
+; CHECK-NOFP-NEXT: pop {r7, pc}
+;
+; CHECK-FP-LABEL: test_fadd:
+; CHECK-FP: @ %bb.0:
+; CHECK-FP-NEXT: .save {r7, lr}
+; CHECK-FP-NEXT: push {r7, lr}
+; CHECK-FP-NEXT: vmov r0, s1
+; CHECK-FP-NEXT: vmov r1, s0
+; CHECK-FP-NEXT: lsls r0, r0, #16
+; CHECK-FP-NEXT: vmov s0, r0
+; CHECK-FP-NEXT: lsls r0, r1, #16
+; CHECK-FP-NEXT: vmov s2, r0
+; CHECK-FP-NEXT: vadd.f32 s0, s2, s0
+; CHECK-FP-NEXT: bl __truncsfbf2
+; CHECK-FP-NEXT: vmov.f16 r0, s0
+; CHECK-FP-NEXT: vmov s0, r0
+; CHECK-FP-NEXT: pop {r7, pc}
+ %r = fadd bfloat %a, %b
+ ret bfloat %r
+}
+
+define bfloat @test_fsub(bfloat %a, bfloat %b) {
+;
+; CHECK-NOFP-LABEL: test_fsub:
+; CHECK-NOFP: @ %bb.0:
+; CHECK-NOFP-NEXT: .save {r7, lr}
+; CHECK-NOFP-NEXT: push {r7, lr}
+; CHECK-NOFP-NEXT: lsls r0, r0, #16
+; CHECK-NOFP-NEXT: lsls r1, r1, #16
+; CHECK-NOFP-NEXT: bl __aeabi_fsub
+; CHECK-NOFP-NEXT: bl __truncsfbf2
+; CHECK-NOFP-NEXT: pop {r7, pc}
+;
+; CHECK-FP-LABEL: test_fsub:
+; CHECK-FP: @ %bb.0:
+; CHECK-FP-NEXT: .save {r7, lr}
+; CHECK-FP-NEXT: push {r7, lr}
+; CHECK-FP-NEXT: vmov r0, s1
+; CHECK-FP-NEXT: vmov r1, s0
+; CHECK-FP-NEXT: lsls r0, r0, #16
+; CHECK-FP-NEXT: vmov s0, r0
+; CHECK-FP-NEXT: lsls r0, r1, #16
+; CHECK-FP-NEXT: vmov s2, r0
+; CHECK-FP-NEXT: vsub.f32 s0, s2, s0
+; CHECK-FP-NEXT: bl __truncsfbf2
+; CHECK-FP-NEXT: vmov.f16 r0, s0
+; CHECK-FP-NEXT: vmov s0, r0
+; CHECK-FP-NEXT: pop {r7, pc}
+ %r = fsub bfloat %a, %b
+ ret bfloat %r
+}
+
+define bfloat @test_fmul(bfloat %a, bfloat %b) {
+;
+; CHECK-NOFP-LABEL: test_fmul:
+; CHECK-NOFP: @ %bb.0:
+; CHECK-NOFP-NEXT: .save {r7, lr}
+; CHECK-NOFP-NEXT: push {r7, lr}
+; CHECK-NOFP-NEXT: lsls r0, r0, #16
+; CHECK-NOFP-NEXT: lsls r1, r1, #16
+; CHECK-NOFP-NEXT: bl __aeabi_fmul
+; CHECK-NOFP-NEXT: bl __truncsfbf2
+; CHECK-NOFP-NEXT: pop {r7, pc}
+;
+; CHECK-FP-LABEL: test_fmul:
+; CHECK-FP: @ %bb.0:
+; CHECK-FP-NEXT: .save {r7, lr}
+; CHECK-FP-NEXT: push {r7, lr}
+; CHECK-FP-NEXT: vmov r0, s1
+; CHECK-FP-NEXT: vmov r1, s0
+; CHECK-FP-NEXT: lsls r0, r0, #16
+; CHECK-FP-NEXT: vmov s0, r0
+; CHECK-FP-NEXT: lsls r0, r1, #16
+; CHECK-FP-NEXT: vmov s2, r0
+; CHECK-FP-NEXT: vmul.f32 s0, s2, s0
+; CHECK-FP-NEXT: bl __truncsfbf2
+; CHECK-FP-NEXT: vmov.f16 r0, s0
+; CHECK-FP-NEXT: vmov s0, r0
+; CHECK-FP-NEXT: pop {r7, pc}
+ %r = fmul bfloat %a, %b
+ ret bfloat %r
+}
+
+define bfloat @test_fmadd(bfloat %a, bfloat %b, bfloat %c) {
+;
+; CHECK-NOFP-LABEL: test_fmadd:
+; CHECK-NOFP: @ %bb.0:
+; CHECK-NOFP-NEXT: .save {r4, lr}
+; CHECK-NOFP-NEXT: push {r4, lr}
+; CHECK-NOFP-NEXT: lsls r0, r0, #16
+; CHECK-NOFP-NEXT: lsls r1, r1, #16
+; CHECK-NOFP-NEXT: mov r4, r2
+; CHECK-NOFP-NEXT: bl __aeabi_fmul
+; CHECK-NOFP-NEXT: bl __truncsfbf2
+; CHECK-NOFP-NEXT: lsls r0, r0, #16
+; CHECK-NOFP-NEXT: lsls r1, r4, #16
+; CHECK-NOFP-NEXT: bl __aeabi_fadd
+; CHECK-NOFP-NEXT: bl __truncsfbf2
+; CHECK-NOFP-NEXT: pop {r4, pc}
+;
+; CHECK-FP-LABEL: test_fmadd:
+; CHECK-FP: @ %bb.0:
+; CHECK-FP-NEXT: .save {r7, lr}
+; CHECK-FP-NEXT: push {r7, lr}
+; CHECK-FP-NEXT: .vsave {d8}
+; CHECK-FP-NEXT: vpush {d8}
+; CHECK-FP-NEXT: vmov r0, s1
+; CHECK-FP-NEXT: vmov r1, s0
+; CHECK-FP-NEXT: vmov.f32 s16, s2
+; CHECK-FP-NEXT: lsls r0, r0, #16
+; CHECK-FP-NEXT: vmov s0, r0
+; CHECK-FP-NEXT: lsls r0, r1, #16
+; CHECK-FP-NEXT: vmov s2, r0
+; CHECK-FP-NEXT: vmul.f32 s0, s2, s0
+; CHECK-FP-NEXT: bl __truncsfbf2
+; CHECK-FP-NEXT: vmov r0, s16
+; CHECK-FP-NEXT: vmov r1, s0
+; CHECK-FP-NEXT: lsls r0, r0, #16
+; CHECK-FP-NEXT: vmov s0, r0
+; CHECK-FP-NEXT: lsls r0, r1, #16
+; CHECK-FP-NEXT: vmov s2, r0
+; CHECK-FP-NEXT: vadd.f32 s0, s2, s0
+; CHECK-FP-NEXT: bl __truncsfbf2
+; CHECK-FP-NEXT: vmov.f16 r0, s0
+; CHECK-FP-NEXT: vmov s0, r0
+; CHECK-FP-NEXT: vpop {d8}
+; CHECK-FP-NEXT: pop {r7, pc}
+ %mul = fmul fast bfloat %a, %b
+ %r = fadd fast bfloat %mul, %c
+ ret bfloat %r
+}
+
+define bfloat @test_fdiv(bfloat %a, bfloat %b) {
+;
+; CHECK-NOFP-LABEL: test_fdiv:
+; CHECK-NOFP: @ %bb.0:
+; CHECK-NOFP-NEXT: .save {r7, lr}
+; CHECK-NOFP-NEXT: push {r7, lr}
+; CHECK-NOFP-NEXT: lsls r0, r0, #16
+; CHECK-NOFP-NEXT: lsls r1, r1, #16
+; CHECK-NOFP-NEXT: bl __aeabi_fdiv
+; CHECK-NOFP-NEXT: bl __truncsfbf2
+; CHECK-NOFP-NEXT: pop {r7, pc}
+;
+; CHECK-FP-LABEL: test_fdiv:
+; CHECK-FP: @ %bb.0:
+; CHECK-FP-NEXT: .save {r7, lr}
+; CHECK-FP-NEXT: push {r7, lr}
+; CHECK-FP-NEXT: vmov r0, s1
+; CHECK-FP-NEXT: vmov r1, s0
+; CHECK-FP-NEXT: lsls r0, r0, #16
+; CHECK-FP-NEXT: vmov s0, r0
+; CHECK-FP-NEXT: lsls r0, r1, #16
+; CHECK-FP-NEXT: vmov s2, r0
+; CHECK-FP-NEXT: vdiv.f32 s0, s2, s0
+; CHECK-FP-NEXT: bl __truncsfbf2
+; CHECK-FP-NEXT: vmov.f16 r0, s0
+; CHECK-FP-NEXT: vmov s0, r0
+; CHECK-FP-NEXT: pop {r7, pc}
+ %r = fdiv bfloat %a, %b
+ ret bfloat %r
+}
+
+define bfloat @test_frem(bfloat %a, bfloat %b) {
+;
+; CHECK-NOFP-LABEL: test_frem:
+; CHECK-NOFP: @ %bb.0:
+; CHECK-NOFP-NEXT: .save {r7, lr}
+; CHECK-NOFP-NEXT: push {r7, lr}
+; CHECK-NOFP-NEXT: lsls r0, r0, #16
+; CHECK-NOFP-NEXT: lsls r1, r1, #16
+; CHECK-NOFP-NEXT: bl fmodf
+; CHECK-NOFP-NEXT: bl __truncsfbf2
+; CHECK-NOFP-NEXT: pop {r7, pc}
+;
+; CHECK-FP-LABEL: test_frem:
+; CHECK-FP: @ %bb.0:
+; CHECK-FP-NEXT: .save {r7, lr}
+; CHECK-FP-NEXT: push {r7, lr}
+; CHECK-FP-NEXT: vmov r0, s1
+; CHECK-FP-NEXT: vmov r1, s0
+; CHECK-FP-NEXT: lsls r0, r0, #16
+; CHECK-FP-NEXT: lsls r1, r1, #16
+; CHECK-FP-NEXT: vmov s1, r0
+; CHECK-FP-NEXT: vmov s0, r1
+; CHECK-FP-NEXT: bl fmodf
+; CHECK-FP-NEXT: bl __truncsfbf2
+; CHECK-FP-NEXT: vmov.f16 r0, s0
+; CHECK-FP-NEXT: vmov s0, r0
+; CHECK-FP-NEXT: pop {r7, pc}
+ %r = frem bfloat %a, %b
+ ret bfloat %r
+}
+
+define void @test_store(bfloat %a, ptr %b) {
+; CHECK-NOFP-LABEL: test_store:
+; CHECK-NOFP: @ %bb.0:
+; CHECK-NOFP-NEXT: strh r0, [r1]
+; CHECK-NOFP-NEXT: bx lr
+;
+; CHECK-FP-LABEL: test_store:
+; CHECK-FP: @ %bb.0:
+; CHECK-FP-NEXT: vmov r1, s0
+; CHECK-FP-NEXT: strh r1, [r0]
+; CHECK-FP-NEXT: bx lr
+ store bfloat %a, ptr %b
+ ret void
+}
+
+define bfloat @test_load(ptr %a) {
+; CHECK-NOFP-LABEL: test_load:
+; CHECK-NOFP: @ %bb.0:
+; CHECK-NOFP-NEXT: ldrh r0, [r0]
+; CHECK-NOFP-NEXT: bx lr
+;
+; CHECK-FP-LABEL: test_load:
+; CHECK-FP: @ %bb.0:
+; CHECK-FP-NEXT: ldrh r0, [r0]
+; CHECK-FP-NEXT: vmov s0, r0
+; CHECK-FP-NEXT: bx lr
+ %r = load bfloat, ptr %a
+ ret bfloat %r
+}
+
+declare bfloat @test_callee(bfloat %a, bfloat %b)
+
+define bfloat @test_call(bfloat %a, bfloat %b) {
+; CHECK-LABEL: test_call:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: .save {r7, lr}
+; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: bl test_callee
+; CHECK-NEXT: pop {r7, pc}
+ %r = call bfloat @test_callee(bfloat %a, bfloat %b)
+ ret bfloat %r
+}
+
+define void @test_call_store(bfloat %a, bfloat %b, ptr %p) {
+; CHECK-NOFP-LABEL: test_call_store:
+; CHECK-NOFP: @ %bb.0:
+; CHECK-NOFP-NEXT: .save {r4, lr}
+; CHECK-NOFP-NEXT: push {r4, lr}
+; CHECK-NOFP-NEXT: mov r4, r2
+; CHECK-NOFP-NEXT: bl test_callee
+; CHECK-NOFP-NEXT: strh r0, [r4]
+; CHECK-NOFP-NEXT: pop {r4, pc}
+;
+; CHECK-FP-LABEL: test_call_store:
+; CHECK-FP: @ %bb.0:
+; CHECK-FP-NEXT: .save {r4, lr}
+; CHECK-FP-NEXT: push {r4, lr}
+; CHECK-FP-NEXT: mov r4, r0
+; CHECK-FP-NEXT: bl test_callee
+; CHECK-FP-NEXT: vmov r0, s0
+; CHECK-FP-NEXT: strh r0, [r4]
+; CHECK-FP-NEXT: pop {r4, pc}
+ %r = call bfloat @test_callee(bfloat %a, bfloat %b)
+ store bfloat %r, ptr %p
+ ret void
+}
+
+define bfloat @test_call_flipped(bfloat %a, bfloat %b) {
+; CHECK-NOFP-LABEL: test_call_flipped:
+; CHECK-NOFP: @ %bb.0:
+; CHECK-NOFP-NEXT: .save {r7, lr}
+; CHECK-NOFP-NEXT: push {r7, lr}
+; CHECK-NOFP-NEXT: mov r2, r0
+; CHECK-NOFP-NEXT: mov r0, r1
+; CHECK-NOFP-NEXT: mov r1, r2
+; CHECK-NOFP-NEXT: bl test_callee
+; CHECK-NOFP-NEXT: pop {r7, pc}
+;
+; CHECK-FP-LABEL: test_call_flipped:
+; CHECK-FP: @ %bb.0:
+; CHECK-FP-NEXT: .save {r7, lr}
+; CHECK-FP-NEXT: push {r7, lr}
+; CHECK-FP-NEXT: vmov.f32 s2, s0
+; CHECK-FP-NEXT: vmov.f32 s0, s1
+; CHECK-FP-NEXT: vmov.f32 s1, s2
+; CHECK-FP-NEXT: bl test_callee
+; CHECK-FP-NEXT: pop {r7, pc}
+ %r = call bfloat @test_callee(bfloat %b, bfloat %a)
+ ret bfloat %r
+}
+
+define bfloat @test_tailcall_flipped(bfloat %a, bfloat %b) {
+; CHECK-NOFP-LABEL: test_tailcall_flipped:
+; CHECK-NOFP: @ %bb.0:
+; CHECK-NOFP-NEXT: mov r2, r0
+; CHECK-NOFP-NEXT: mov r0, r1
+; CHECK-NOFP-NEXT: mov r1, r2
+; CHECK-NOFP-NEXT: b test_callee
+;
+; CHECK-FP-LABEL: test_tailcall_flipped:
+; CHECK-FP: @ %bb.0:
+; CHECK-FP-NEXT: vmov.f32 s2, s0
+; CHECK-FP-NEXT: vmov.f32 s0, s1
+; CHECK-FP-NEXT: vmov.f32 s1, s2
+; CHECK-FP-NEXT: b test_callee
+ %r = tail call bfloat @test_callee(bfloat %b, bfloat %a)
+ ret bfloat %r
+}
+
+define bfloat @test_select(bfloat %a, bfloat %b, i1 zeroext %c) {
+; CHECK-NOFP-LABEL: test_select:
+; CHECK-NOFP: @ %bb.0:
+; CHECK-NOFP-NEXT: cmp r2, #0
+; CHECK-NOFP-NEXT: csel r0, r0, r1, ne
+; CHECK-NOFP-NEXT: bx lr
+;
+; CHECK-FP-LABEL: test_select:
+; CHECK-FP: @ %bb.0:
+; CHECK-FP-NEXT: vmov r1, s1
+; CHECK-FP-NEXT: cmp r0, #0
+; CHECK-FP-NEXT: it ne
+; CHECK-FP-NEXT: vmovne r1, s0
+; CHECK-FP-NEXT: vmov.f16 s0, r1
+; CHECK-FP-NEXT: vmov.f16 r0, s0
+; CHECK-FP-NEXT: vmov s0, r0
+; CHECK-FP-NEXT: bx lr
+ %r = select i1 %c, bfloat %a, bfloat %b
+ ret bfloat %r
+}
+
+define bfloat @test_select_cc(bfloat %a, bfloat %b, bfloat %c, bfloat %d) {
+; CHECK-NOFP-LABEL: test_select_cc:
+; CHECK-NOFP: @ %bb.0:
+; CHECK-NOFP-NEXT: .save {r4, r5, r7, lr}
+; CHECK-NOFP-NEXT: push {r4, r5, r7, lr}
+; CHECK-NOFP-NEXT: mov r4, r1
+; CHECK-NOFP-NEXT: mov r5, r0
+; CHECK-NOFP-NEXT: lsls r0, r2, #16
+; CHECK-NOFP-NEXT: lsls r1, r3, #16
+; CHECK-NOFP-NEXT: bl __aeabi_fcmpeq
+; CHECK-NOFP-NEXT: cmp r0, #0
+; CHECK-NOFP-NEXT: csel r0, r5, r4, eq
+; CHECK-NOFP-NEXT: pop {r4, r5, r7, pc}
+;
+; CHECK-FP-LABEL: test_select_cc:
+; CHECK-FP: @ %bb.0:
+; CHECK-FP-NEXT: vmov r0, s3
+; CHECK-FP-NEXT: vmov r1, s2
+; CHECK-FP-NEXT: lsls r0, r0, #16
+; CHECK-FP-NEXT: vmov s2, r0
+; CHECK-FP-NEXT: lsls r0, r1, #16
+; CHECK-FP-NEXT: vmov s4, r0
+; CHECK-FP-NEXT: vmov r0, s1
+; CHECK-FP-NEXT: vcmp.f32 s4, s2
+; CHECK-FP-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-FP-NEXT: it ne
+; CHECK-FP-NEXT: vmovne r0, s0
+; CHECK-FP-NEXT: vmov.f16 s0, r0
+; CHECK-FP-NEXT: vmov.f16 r0, s0
+; CHECK-FP-NEXT: vmov s0, r0
+; CHECK-FP-NEXT: bx lr
+ %cc = fcmp une bfloat %c, %d
+ %r = select i1 %cc, bfloat %a, bfloat %b
+ ret bfloat %r
+}
+
+define float @test_select_cc_f32_f16(float %a, float %b, bfloat %c, bfloat %d) {
+; CHECK-NOFP-LABEL: test_select_cc_f32_f16:
+; CHECK-NOFP: @ %bb.0:
+; CHECK-NOFP-NEXT: .save {r4, r5, r7, lr}
+; CHECK-NOFP-NEXT: push {r4, r5, r7, lr}
+; CHECK-NOFP-NEXT: mov r4, r1
+; CHECK-NOFP-NEXT: mov r5, r0
+; CHECK-NOFP-NEXT: lsls r0, r2, #16
+; CHECK-NOFP-NEXT: lsls r1, r3, #16
+; CHECK-NOFP-NEXT: bl __aeabi_fcmpeq
+; CHECK-NOFP-NEXT: cmp r0, #0
+; CHECK-NOFP-NEXT: csel r0, r5, r4, eq
+; CHECK-NOFP-NEXT: pop {r4, r5, r7, pc}
+;
+; CHECK-FP-LABEL: test_select_cc_f32_f16:
+; CHECK-FP: @ %bb.0:
+; CHECK-FP-NEXT: vmov r0, s3
+; CHECK-FP-NEXT: vmov r1, s2
+; CHECK-FP-NEXT: lsls r0, r0, #16
+; CHECK-FP-NEXT: vmov s2, r0
+; CHECK-FP-NEXT: lsls r0, r1, #16
+; CHECK-FP-NEXT: vmov s4, r0
+; CHECK-FP-NEXT: vcmp.f32 s4, s2
+; CHECK-FP-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-FP-NEXT: vseleq.f32 s0, s1, s0
+; CHECK-FP-NEXT: bx lr
+ %cc = fcmp une bfloat %c, %d
+ %r = select i1 %cc, float %a, float %b
+ ret float %r
+}
+
+define bfloat @test_select_cc_f16_f32(bfloat %a, bfloat %b, float %c, float %d) {
+; CHECK-NOFP-LABEL: test_select_cc_f16_f32:
+; CHECK-NOFP: @ %bb.0:
+; CHECK-NOFP-NEXT: .save {r4, r5, r7, lr}
+; CHECK-NOFP-NEXT: push {r4, r5, r7, lr}
+; CHECK-NOFP-NEXT: mov r4, r1
+; CHECK-NOFP-NEXT: mov r5, r0
+; CHECK-NOFP-NEXT: mov r0, r2
+; CHECK-NOFP-NEXT: mov r1, r3
+; CHECK-NOFP-NEXT: bl __aeabi_fcmpeq
+; CHECK-NOFP-NEXT: cmp r0, #0
+; CHECK-NOFP-NEXT: csel r0, r5, r4, eq
+; CHECK-NOFP-NEXT: pop {r4, r5, r7, pc}
+;
+; CHECK-FP-LABEL: test_select_cc_f16_f32:
+; CHECK-FP: @ %bb.0:
+; CHECK-FP-NEXT: vcmp.f32 s2, s3
+; CHECK-FP-NEXT: vmov r0, s1
+; CHECK-FP-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-FP-NEXT: it ne
+; CHECK-FP-NEXT: vmovne r0, s0
+; CHECK-FP-NEXT: vmov.f16 s0, r0
+; CHECK-FP-NEXT: vmov.f16 r0, s0
+; CHECK-FP-NEXT: vmov s0, r0
+; CHECK-FP-NEXT: bx lr
+ %cc = fcmp une float %c, %d
+ %r = select i1 %cc, bfloat %a, bfloat %b
+ ret bfloat %r
+}
+
+define i1 @test_fcmp_une(bfloat %a, bfloat %b) {
+; CHECK-NOFP-LABEL: test_fcmp_une:
+; CHECK-NOFP: @ %bb.0:
+; CHECK-NOFP-NEXT: .save {r7, lr}
+; CHECK-NOFP-NEXT: push {r7, lr}
+; CHECK-NOFP-NEXT: lsls r0, r0, #16
+; CHECK-NOFP-NEXT: lsls r1, r1, #16
+; CHECK-NOFP-NEXT: bl __aeabi_fcmpeq
+; CHECK-NOFP-NEXT: cmp r0, #0
+; CHECK-NOFP-NEXT: cset r0, eq
+; CHECK-NOFP-NEXT: pop {r7, pc}
+;
+; CHECK-FP-LABEL: test_fcmp_une:
+; CHECK-FP: @ %bb.0:
+; CHECK-FP-NEXT: vmov r0, s1
+; CHECK-FP-NEXT: vmov r1, s0
+; CHECK-FP-NEXT: lsls r0, r0, #16
+; CHECK-FP-NEXT: vmov s0, r0
+; CHECK-FP-NEXT: lsls r0, r1, #16
+; CHECK-FP-NEXT: vmov s2, r0
+; CHECK-FP-NEXT: vcmp.f32 s2, s0
+; CHECK-FP-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-FP-NEXT: cset r0, ne
+; CHECK-FP-NEXT: bx lr
+ %r = fcmp une bfloat %a, %b
+ ret i1 %r
+}
+
+define i1 @test_fcmp_ueq(bfloat %a, bfloat %b) {
+; CHECK-NOFP-LABEL: test_fcmp_ueq:
+; CHECK-NOFP: @ %bb.0:
+; CHECK-NOFP-NEXT: .save {r4, r5, r6, lr}
+; CHECK-NOFP-NEXT: push {r4, r5, r6, lr}
+; CHECK-NOFP-NEXT: lsls r4, r0, #16
+; CHECK-NOFP-NEXT: lsls r5, r1, #16
+; CHECK-NOFP-NEXT: mov r0, r4
+; CHECK-NOFP-NEXT: mov r1, r5
+; CHECK-NOFP-NEXT: bl __aeabi_fcmpeq
+; CHECK-NOFP-NEXT: mov r6, r0
+; CHECK-NOFP-NEXT: mov r0, r4
+; CHECK-NOFP-NEXT: mov r1, r5
+; CHECK-NOFP-NEXT: bl __aeabi_fcmpun
+; CHECK-NOFP-NEXT: orrs r0, r6
+; CHECK-NOFP-NEXT: cset r0, ne
+; CHECK-NOFP-NEXT: pop {r4, r5, r6, pc}
+;
+; CHECK-FP-LABEL: test_fcmp_ueq:
+; CHECK-FP: @ %bb.0:
+; CHECK-FP-NEXT: vmov r0, s1
+; CHECK-FP-NEXT: vmov r1, s0
+; CHECK-FP-NEXT: lsls r0, r0, #16
+; CHECK-FP-NEXT: vmov s0, r0
+; CHECK-FP-NEXT: lsls r0, r1, #16
+; CHECK-FP-NEXT: vmov s2, r0
+; CHECK-FP-NEXT: vcmp.f32 s2, s0
+; CHECK-FP-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-FP-NEXT: cset r0, eq
+; CHECK-FP-NEXT: csinc r0, r0, zr, vc
+; CHECK-FP-NEXT: bx lr
+ %r = fcmp ueq bfloat %a, %b
+ ret i1 %r
+}
+
+define i1 @test_fcmp_ugt(bfloat %a, bfloat %b) {
+; CHECK-NOFP-LABEL: test_fcmp_ugt:
+; CHECK-NOFP: @ %bb.0:
+; CHECK-NOFP-NEXT: .save {r7, lr}
+; CHECK-NOFP-NEXT: push {r7, lr}
+; CHECK-NOFP-NEXT: lsls r0, r0, #16
+; CHECK-NOFP-NEXT: lsls r1, r1, #16
+; CHECK-NOFP-NEXT: bl __aeabi_fcmple
+; CHECK-NOFP-NEXT: cmp r0, #0
+; CHECK-NOFP-NEXT: cset r0, eq
+; CHECK-NOFP-NEXT: pop {r7, pc}
+;
+; CHECK-FP-LABEL: test_fcmp_ugt:
+; CHECK-FP: @ %bb.0:
+; CHECK-FP-NEXT: vmov r0, s1
+; CHECK-FP-NEXT: vmov r1, s0
+; CHECK-FP-NEXT: lsls r0, r0, #16
+; CHECK-FP-NEXT: vmov s0, r0
+; CHECK-FP-NEXT: lsls r0, r1, #16
+; CHECK-FP-NEXT: vmov s2, r0
+; CHECK-FP-NEXT: vcmp.f32 s2, s0
+; CHECK-FP-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-FP-NEXT: cset r0, hi
+; CHECK-FP-NEXT: bx lr
+ %r = fcmp ugt bfloat %a, %b
+ ret i1 %r
+}
+
+define i1 @test_fcmp_uge(bfloat %a, bfloat %b) {
+; CHECK-NOFP-LABEL: test_fcmp_uge:
+; CHECK-NOFP: @ %bb.0:
+; CHECK-NOFP-NEXT: .save {r7, lr}
+; CHECK-NOFP-NEXT: push {r7, lr}
+; CHECK-NOFP-NEXT: lsls r0, r0, #16
+; CHECK-NOFP-NEXT: lsls r1, r1, #16
+; CHECK-NOFP-NEXT: bl __aeabi_fcmplt
+; CHECK-NOFP-NEXT: cmp r0, #0
+; CHECK-NOFP-NEXT: cset r0, eq
+; CHECK-NOFP-NEXT: pop {r7, pc}
+;
+; CHECK-FP-LABEL: test_fcmp_uge:
+; CHECK-FP: @ %bb.0:
+; CHECK-FP-NEXT: vmov r0, s1
+; CHECK-FP-NEXT: vmov r1, s0
+; CHECK-FP-NEXT: lsls r0, r0, #16
+; CHECK-FP-NEXT: vmov s0, r0
+; CHECK-FP-NEXT: lsls r0, r1, #16
+; CHECK-FP-NEXT: vmov s2, r0
+; CHECK-FP-NEXT: vcmp.f32 s2, s0
+; CHECK-FP-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-FP-NEXT: cset r0, pl
+; CHECK-FP-NEXT: bx lr
+ %r = fcmp uge bfloat %a, %b
+ ret i1 %r
+}
+
+define i1 @test_fcmp_ult(bfloat %a, bfloat %b) {
+; CHECK-NOFP-LABEL: test_fcmp_ult:
+; CHECK-NOFP: @ %bb.0:
+; CHECK-NOFP-NEXT: .save {r7, lr}
+; CHECK-NOFP-NEXT: push {r7, lr}
+; CHECK-NOFP-NEXT: lsls r0, r0, #16
+; CHECK-NOFP-NEXT: lsls r1, r1, #16
+; CHECK-NOFP-NEXT: bl __aeabi_fcmpge
+; CHECK-NOFP-NEXT: cmp r0, #0
+; CHECK-NOFP-NEXT: cset r0, eq
+; CHECK-NOFP-NEXT: pop {r7, pc}
+;
+; CHECK-FP-LABEL: test_fcmp_ult:
+; CHECK-FP: @ %bb.0:
+; CHECK-FP-NEXT: vmov r0, s1
+; CHECK-FP-NEXT: vmov r1, s0
+; CHECK-FP-NEXT: lsls r0, r0, #16
+; CHECK-FP-NEXT: vmov s0, r0
+; CHECK-FP-NEXT: lsls r0, r1, #16
+; CHECK-FP-NEXT: vmov s2, r0
+; CHECK-FP-NEXT: vcmp.f32 s2, s0
+; CHECK-FP-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-FP-NEXT: cset r0, lt
+; CHECK-FP-NEXT: bx lr
+ %r = fcmp ult bfloat %a, %b
+ ret i1 %r
+}
+
+define i1 @test_fcmp_ule(bfloat %a, bfloat %b) {
+; CHECK-NOFP-LABEL: test_fcmp_ule:
+; CHECK-NOFP: @ %bb.0:
+; CHECK-NOFP-NEXT: .save {r7, lr}
+; CHECK-NOFP-NEXT: push {r7, lr}
+; CHECK-NOFP-NEXT: lsls r0, r0, #16
+; CHECK-NOFP-NEXT: lsls r1, r1, #16
+; CHECK-NOFP-NEXT: bl __aeabi_fcmpgt
+; CHECK-NOFP-NEXT: cmp r0, #0
+; CHECK-NOFP-NEXT: cset r0, eq
+; CHECK-NOFP-NEXT: pop {r7, pc}
+;
+; CHECK-FP-LABEL: test_fcmp_ule:
+; CHECK-FP: @ %bb.0:
+; CHECK-FP-NEXT: vmov r0, s1
+; CHECK-FP-NEXT: vmov r1, s0
+; CHECK-FP-NEXT: lsls r0, r0, #16
+; CHECK-FP-NEXT: vmov s0, r0
+; CHECK-FP-NEXT: lsls r0, r1, #16
+; CHECK-FP-NEXT: vmov s2, r0
+; CHECK-FP-NEXT: vcmp.f32 s2, s0
+; CHECK-FP-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-FP-NEXT: cset r0, le
+; CHECK-FP-NEXT: bx lr
+ %r = fcmp ule bfloat %a, %b
+ ret i1 %r
+}
+
+define i1 @test_fcmp_uno(bfloat %a, bfloat %b) {
+; CHECK-NOFP-LABEL: test_fcmp_uno:
+; CHECK-NOFP: @ %bb.0:
+; CHECK-NOFP-NEXT: .save {r7, lr}
+; CHECK-NOFP-NEXT: push {r7, lr}
+; CHECK-NOFP-NEXT: lsls r0, r0, #16
+; CHECK-NOFP-NEXT: lsls r1, r1, #16
+; CHECK-NOFP-NEXT: bl __aeabi_fcmpun
+; CHECK-NOFP-NEXT: cmp r0, #0
+; CHECK-NOFP-NEXT: cset r0, ne
+; CHECK-NOFP-NEXT: pop {r7, pc}
+;
+; CHECK-FP-LABEL: test_fcmp_uno:
+; CHECK-FP: @ %bb.0:
+; CHECK-FP-NEXT: vmov r0, s1
+; CHECK-FP-NEXT: vmov r1, s0
+; CHECK-FP-NEXT: lsls r0, r0, #16
+; CHECK-FP-NEXT: vmov s0, r0
+; CHECK-FP-NEXT: lsls r0, r1, #16
+; CHECK-FP-NEXT: vmov s2, r0
+; CHECK-FP-NEXT: vcmp.f32 s2, s0
+; CHECK-FP-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-FP-NEXT: cset r0, vs
+; CHECK-FP-NEXT: bx lr
+ %r = fcmp uno bfloat %a, %b
+ ret i1 %r
+}
+
+define i1 @test_fcmp_one(bfloat %a, bfloat %b) {
+; CHECK-NOFP-LABEL: test_fcmp_one:
+; CHECK-NOFP: @ %bb.0:
+; CHECK-NOFP-NEXT: .save {r4, r5, r6, lr}
+; CHECK-NOFP-NEXT: push {r4, r5, r6, lr}
+; CHECK-NOFP-NEXT: lsls r4, r0, #16
+; CHECK-NOFP-NEXT: lsls r5, r1, #16
+; CHECK-NOFP-NEXT: mov r0, r4
+; CHECK-NOFP-NEXT: mov r1, r5
+; CHECK-NOFP-NEXT: bl __aeabi_fcmpeq
+; CHECK-NOFP-NEXT: mov r6, r0
+; CHECK-NOFP-NEXT: mov r0, r4
+; CHECK-NOFP-NEXT: mov r1, r5
+; CHECK-NOFP-NEXT: bl __aeabi_fcmpun
+; CHECK-NOFP-NEXT: orrs r0, r6
+; CHECK-NOFP-NEXT: cset r0, eq
+; CHECK-NOFP-NEXT: pop {r4, r5, r6, pc}
+;
+; CHECK-FP-LABEL: test_fcmp_one:
+; CHECK-FP: @ %bb.0:
+; CHECK-FP-NEXT: vmov r0, s1
+; CHECK-FP-NEXT: vmov r1, s0
+; CHECK-FP-NEXT: lsls r0, r0, #16
+; CHECK-FP-NEXT: vmov s0, r0
+; CHECK-FP-NEXT: lsls r0, r1, #16
+; CHECK-FP-NEXT: vmov s2, r0
+; CHECK-FP-NEXT: vcmp.f32 s2, s0
+; CHECK-FP-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-FP-NEXT: cset r0, mi
+; CHECK-FP-NEXT: csinc r0, r0, zr, le
+; CHECK-FP-NEXT: bx lr
+ %r = fcmp one bfloat %a, %b
+ ret i1 %r
+}
+
+define i1 @test_fcmp_oeq(bfloat %a, bfloat %b) {
+; CHECK-NOFP-LABEL: test_fcmp_oeq:
+; CHECK-NOFP: @ %bb.0:
+; CHECK-NOFP-NEXT: .save {r7, lr}
+; CHECK-NOFP-NEXT: push {r7, lr}
+; CHECK-NOFP-NEXT: lsls r0, r0, #16
+; CHECK-NOFP-NEXT: lsls r1, r1, #16
+; CHECK-NOFP-NEXT: bl __aeabi_fcmpeq
+; CHECK-NOFP-NEXT: cmp r0, #0
+; CHECK-NOFP-NEXT: cset r0, ne
+; CHECK-NOFP-NEXT: pop {r7, pc}
+;
+; CHECK-FP-LABEL: test_fcmp_oeq:
+; CHECK-FP: @ %bb.0:
+; CHECK-FP-NEXT: vmov r0, s1
+; CHECK-FP-NEXT: vmov r1, s0
+; CHECK-FP-NEXT: lsls r0, r0, #16
+; CHECK-FP-NEXT: vmov s0, r0
+; CHECK-FP-NEXT: lsls r0, r1, #16
+; CHECK-FP-NEXT: vmov s2, r0
+; CHECK-FP-NEXT: vcmp.f32 s2, s0
+; CHECK-FP-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-FP-NEXT: cset r0, eq
+; CHECK-FP-NEXT: bx lr
+ %r = fcmp oeq bfloat %a, %b
+ ret i1 %r
+}
+
+define i1 @test_fcmp_ogt(bfloat %a, bfloat %b) {
+; CHECK-NOFP-LABEL: test_fcmp_ogt:
+; CHECK-NOFP: @ %bb.0:
+; CHECK-NOFP-NEXT: .save {r7, lr}
+; CHECK-NOFP-NEXT: push {r7, lr}
+; CHECK-NOFP-NEXT: lsls r0, r0, #16
+; CHECK-NOFP-NEXT: lsls r1, r1, #16
+; CHECK-NOFP-NEXT: bl __aeabi_fcmpgt
+; CHECK-NOFP-NEXT: cmp r0, #0
+; CHECK-NOFP-NEXT: cset r0, ne
+; CHECK-NOFP-NEXT: pop {r7, pc}
+;
+; CHECK-FP-LABEL: test_fcmp_ogt:
+; CHECK-FP: @ %bb.0:
+; CHECK-FP-NEXT: vmov r0, s1
+; CHECK-FP-NEXT: vmov r1, s0
+; CHECK-FP-NEXT: lsls r0, r0, #16
+; CHECK-FP-NEXT: vmov s0, r0
+; CHECK-FP-NEXT: lsls r0, r1, #16
+; CHECK-FP-NEXT: vmov s2, r0
+; CHECK-FP-NEXT: vcmp.f32 s2, s0
+; CHECK-FP-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-FP-NEXT: cset r0, gt
+; CHECK-FP-NEXT: bx lr
+ %r = fcmp ogt bfloat %a, %b
+ ret i1 %r
+}
+
+define i1 @test_fcmp_oge(bfloat %a, bfloat %b) {
+; CHECK-NOFP-LABEL: test_fcmp_oge:
+; CHECK-NOFP: @ %bb.0:
+; CHECK-NOFP-NEXT: .save {r7, lr}
+; CHECK-NOFP-NEXT: push {r7, lr}
+; CHECK-NOFP-NEXT: lsls r0, r0, #16
+; CHECK-NOFP-NEXT: lsls r1, r1, #16
+; CHECK-NOFP-NEXT: bl __aeabi_fcmpge
+; CHECK-NOFP-NEXT: cmp r0, #0
+; CHECK-NOFP-NEXT: cset r0, ne
+; CHECK-NOFP-NEXT: pop {r7, pc}
+;
+; CHECK-FP-LABEL: test_fcmp_oge:
+; CHECK-FP: @ %bb.0:
+; CHECK-FP-NEXT: vmov r0, s1
+; CHECK-FP-NEXT: vmov r1, s0
+; CHECK-FP-NEXT: lsls r0, r0, #16
+; CHECK-FP-NEXT: vmov s0, r0
+; CHECK-FP-NEXT: lsls r0, r1, #16
+; CHECK-FP-NEXT: vmov s2, r0
+; CHECK-FP-NEXT: vcmp.f32 s2, s0
+; CHECK-FP-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-FP-NEXT: cset r0, ge
+; CHECK-FP-NEXT: bx lr
+ %r = fcmp oge bfloat %a, %b
+ ret i1 %r
+}
+
+define i1 @test_fcmp_olt(bfloat %a, bfloat %b) {
+; CHECK-NOFP-LABEL: test_fcmp_olt:
+; CHECK-NOFP: @ %bb.0:
+; CHECK-NOFP-NEXT: .save {r7, lr}
+; CHECK-NOFP-NEXT: push {r7, lr}
+; CHECK-NOFP-NEXT: lsls r0, r0, #16
+; CHECK-NOFP-NEXT: lsls r1, r1, #16
+; CHECK-NOFP-NEXT: bl __aeabi_fcmplt
+; CHECK-NOFP-NEXT: cmp r0, #0
+; CHECK-NOFP-NEXT: cset r0, ne
+; CHECK-NOFP-NEXT: pop {r7, pc}
+;
+; CHECK-FP-LABEL: test_fcmp_olt:
+; CHECK-FP: @ %bb.0:
+; CHECK-FP-NEXT: vmov r0, s1
+; CHECK-FP-NEXT: vmov r1, s0
+; CHECK-FP-NEXT: lsls r0, r0, #16
+; CHECK-FP-NEXT: vmov s0, r0
+; CHECK-FP-NEXT: lsls r0, r1, #16
+; CHECK-FP-NEXT: vmov s2, r0
+; CHECK-FP-NEXT: vcmp.f32 s2, s0
+; CHECK-FP-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-FP-NEXT: cset r0, mi
+; CHECK-FP-NEXT: bx lr
+ %r = fcmp olt bfloat %a, %b
+ ret i1 %r
+}
+
+define i1 @test_fcmp_ole(bfloat %a, bfloat %b) {
+; CHECK-NOFP-LABEL: test_fcmp_ole:
+; CHECK-NOFP: @ %bb.0:
+; CHECK-NOFP-NEXT: .save {r7, lr}
+; CHECK-NOFP-NEXT: push {r7, lr}
+; CHECK-NOFP-NEXT: lsls r0, r0, #16
+; CHECK-NOFP-NEXT: lsls r1, r1, #16
+; CHECK-NOFP-NEXT: bl __aeabi_fcmple
+; CHECK-NOFP-NEXT: cmp r0, #0
+; CHECK-NOFP-NEXT: cset r0, ne
+; CHECK-NOFP-NEXT: pop {r7, pc}
+;
+; CHECK-FP-LABEL: test_fcmp_ole:
+; CHECK-FP: @ %bb.0:
+; CHECK-FP-NEXT: vmov r0, s1
+; CHECK-FP-NEXT: vmov r1, s0
+; CHECK-FP-NEXT: lsls r0, r0, #16
+; CHECK-FP-NEXT: vmov s0, r0
+; CHECK-FP-NEXT: lsls r0, r1, #16
+; CHECK-FP-NEXT: vmov s2, r0
+; CHECK-FP-NEXT: vcmp.f32 s2, s0
+; CHECK-FP-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-FP-NEXT: cset r0, ls
+; CHECK-FP-NEXT: bx lr
+ %r = fcmp ole bfloat %a, %b
+ ret i1 %r
+}
+
+define i1 @test_fcmp_ord(bfloat %a, bfloat %b) {
+; CHECK-NOFP-LABEL: test_fcmp_ord:
+; CHECK-NOFP: @ %bb.0:
+; CHECK-NOFP-NEXT: .save {r7, lr}
+; CHECK-NOFP-NEXT: push {r7, lr}
+; CHECK-NOFP-NEXT: lsls r0, r0, #16
+; CHECK-NOFP-NEXT: lsls r1, r1, #16
+; CHECK-NOFP-NEXT: bl __aeabi_fcmpun
+; CHECK-NOFP-NEXT: cmp r0, #0
+; CHECK-NOFP-NEXT: cset r0, eq
+; CHECK-NOFP-NEXT: pop {r7, pc}
+;
+; CHECK-FP-LABEL: test_fcmp_ord:
+; CHECK-FP: @ %bb.0:
+; CHECK-FP-NEXT: vmov r0, s1
+; CHECK-FP-NEXT: vmov r1, s0
+; CHECK-FP-NEXT: lsls r0, r0, #16
+; CHECK-FP-NEXT: vmov s0, r0
+; CHECK-FP-NEXT: lsls r0, r1, #16
+; CHECK-FP-NEXT: vmov s2, r0
+; CHECK-FP-NEXT: vcmp.f32 s2, s0
+; CHECK-FP-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-FP-NEXT: cset r0, vc
+; CHECK-FP-NEXT: bx lr
+ %r = fcmp ord bfloat %a, %b
+ ret i1 %r
+}
+
+define void @test_fccmp(bfloat %in, ptr %out) {
+; CHECK-NOFP-LABEL: test_fccmp:
+; CHECK-NOFP: @ %bb.0:
+; CHECK-NOFP-NEXT: .save {r4, r5, r6, r7, lr}
+; CHECK-NOFP-NEXT: push {r4, r5, r6, r7, lr}
+; CHECK-NOFP-NEXT: .pad #4
+; CHECK-NOFP-NEXT: sub sp, #4
+; CHECK-NOFP-NEXT: lsls r6, r0, #16
+; CHECK-NOFP-NEXT: mov r4, r1
+; CHECK-NOFP-NEXT: mov r5, r0
+; CHECK-NOFP-NEXT: mov r0, r6
+; CHECK-NOFP-NEXT: mov.w r1, #1207959552
+; CHECK-NOFP-NEXT: bl __aeabi_fcmpgt
+; CHECK-NOFP-NEXT: mov r7, r0
+; CHECK-NOFP-NEXT: mov r0, r6
+; CHECK-NOFP-NEXT: mov.w r1, #1157627904
+; CHECK-NOFP-NEXT: bl __aeabi_fcmplt
+; CHECK-NOFP-NEXT: mov.w r1, #17664
+; CHECK-NOFP-NEXT: cmp r0, #0
+; CHECK-NOFP-NEXT: csel r0, r5, r1, ne
+; CHECK-NOFP-NEXT: cmp r7, #0
+; CHECK-NOFP-NEXT: csel r0, r0, r1, ne
+; CHECK-NOFP-NEXT: strh r0, [r4]
+; CHECK-NOFP-NEXT: add sp, #4
+; CHECK-NOFP-NEXT: pop {r4, r5, r6, r7, pc}
+;
+; CHECK-FP-LABEL: test_fccmp:
+; CHECK-FP: @ %bb.0:
+; CHECK-FP-NEXT: vmov r1, s0
+; CHECK-FP-NEXT: vldr s0, .LCPI30_0
+; CHECK-FP-NEXT: vldr s4, .LCPI30_1
+; CHECK-FP-NEXT: lsls r2, r1, #16
+; CHECK-FP-NEXT: vmov s2, r2
+; CHECK-FP-NEXT: mov.w r2, #17664
+; CHECK-FP-NEXT: vcmp.f32 s2, s0
+; CHECK-FP-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-FP-NEXT: vcmp.f32 s2, s4
+; CHECK-FP-NEXT: csel r1, r1, r2, mi
+; CHECK-FP-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-FP-NEXT: csel r1, r1, r2, gt
+; CHECK-FP-NEXT: strh r1, [r0]
+; CHECK-FP-NEXT: bx lr
+; CHECK-FP-NEXT: .p2align 2
+; CHECK-FP-NEXT: @ %bb.1:
+; CHECK-FP-NEXT: .LCPI30_0:
+; CHECK-FP-NEXT: .long 0x45000000 @ float 2048
+; CHECK-FP-NEXT: .LCPI30_1:
+; CHECK-FP-NEXT: .long 0x48000000 @ float 131072
+ %cmp1 = fcmp ogt bfloat %in, 0xR4800
+ %cmp2 = fcmp olt bfloat %in, 0xR4500
+ %cond = and i1 %cmp1, %cmp2
+ %result = select i1 %cond, bfloat %in, bfloat 0xR4500
+ store bfloat %result, ptr %out
+ ret void
+}
+
+define void @test_br_cc(bfloat %a, bfloat %b, ptr %p1, ptr %p2) {
+; CHECK-NOFP-LABEL: test_br_cc:
+; CHECK-NOFP: @ %bb.0: @ %common.ret
+; CHECK-NOFP-NEXT: .save {r4, r5, r7, lr}
+; CHECK-NOFP-NEXT: push {r4, r5, r7, lr}
+; CHECK-NOFP-NEXT: lsls r0, r0, #16
+; CHECK-NOFP-NEXT: lsls r1, r1, #16
+; CHECK-NOFP-NEXT: mov r4, r3
+; CHECK-NOFP-NEXT: mov r5, r2
+; CHECK-NOFP-NEXT: bl __aeabi_fcmplt
+; CHECK-NOFP-NEXT: cmp r0, #0
+; CHECK-NOFP-NEXT: mov.w r1, #0
+; CHECK-NOFP-NEXT: csel r0, r5, r4, eq
+; CHECK-NOFP-NEXT: str r1, [r0]
+; CHECK-NOFP-NEXT: pop {r4, r5, r7, pc}
+;
+; CHECK-FP-LABEL: test_br_cc:
+; CHECK-FP: @ %bb.0: @ %common.ret
+; CHECK-FP-NEXT: vmov r2, s1
+; CHECK-FP-NEXT: vmov r3, s0
+; CHECK-FP-NEXT: lsls r2, r2, #16
+; CHECK-FP-NEXT: vmov s0, r2
+; CHECK-FP-NEXT: lsls r2, r3, #16
+; CHECK-FP-NEXT: vmov s2, r2
+; CHECK-FP-NEXT: vcmp.f32 s2, s0
+; CHECK-FP-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-FP-NEXT: csel r0, r0, r1, pl
+; CHECK-FP-NEXT: movs r1, #0
+; CHECK-FP-NEXT: str r1, [r0]
+; CHECK-FP-NEXT: bx lr
+ %c = fcmp uge bfloat %a, %b
+ br i1 %c, label %then, label %else
+then:
+ store i32 0, ptr %p1
+ ret void
+else:
+ store i32 0, ptr %p2
+ ret void
+}
+
+define bfloat @test_phi(ptr %p1) {
+; CHECK-NOFP-LABEL: test_phi:
+; CHECK-NOFP: @ %bb.0: @ %entry
+; CHECK-NOFP-NEXT: .save {r4, r5, r6, lr}
+; CHECK-NOFP-NEXT: push {r4, r5, r6, lr}
+; CHECK-NOFP-NEXT: ldrh r6, [r0]
+; CHECK-NOFP-NEXT: mov r4, r0
+; CHECK-NOFP-NEXT: .LBB32_1: @ %loop
+; CHECK-NOFP-NEXT: @ =>This Inner Loop Header: Depth=1
+; CHECK-NOFP-NEXT: mov r0, r4
+; CHECK-NOFP-NEXT: mov r5, r6
+; CHECK-NOFP-NEXT: ldrh r6, [r4]
+; CHECK-NOFP-NEXT: bl test_dummy
+; CHECK-NOFP-NEXT: lsls r0, r0, #31
+; CHECK-NOFP-NEXT: bne .LBB32_1
+; CHECK-NOFP-NEXT: @ %bb.2: @ %return
+; CHECK-NOFP-NEXT: mov r0, r5
+; CHECK-NOFP-NEXT: pop {r4, r5, r6, pc}
+;
+; CHECK-FP-LABEL: test_phi:
+; CHECK-FP: @ %bb.0: @ %entry
+; CHECK-FP-NEXT: .save {r4, lr}
+; CHECK-FP-NEXT: push {r4, lr}
+; CHECK-FP-NEXT: .vsave {d8, d9}
+; CHECK-FP-NEXT: vpush {d8, d9}
+; CHECK-FP-NEXT: mov r4, r0
+; CHECK-FP-NEXT: ldrh r0, [r0]
+; CHECK-FP-NEXT: vmov s18, r0
+; CHECK-FP-NEXT: .LBB32_1: @ %loop
+; CHECK-FP-NEXT: @ =>This Inner Loop Header: Depth=1
+; CHECK-FP-NEXT: ldrh r0, [r4]
+; CHECK-FP-NEXT: vmov.f32 s16, s18
+; CHECK-FP-NEXT: vmov s18, r0
+; CHECK-FP-NEXT: mov r0, r4
+; CHECK-FP-NEXT: bl test_dummy
+; CHECK-FP-NEXT: lsls r0, r0, #31
+; CHECK-FP-NEXT: bne .LBB32_1
+; CHECK-FP-NEXT: @ %bb.2: @ %return
+; CHECK-FP-NEXT: vmov.f32 s0, s16
+; CHECK-FP-NEXT: vpop {d8, d9}
+; CHECK-FP-NEXT: pop {r4, pc}
+entry:
+ %a = load bfloat, ptr %p1
+ br label %loop
+loop:
+ %r = phi bfloat [%a, %entry], [%b, %loop]
+ %b = load bfloat, ptr %p1
+ %c = call i1 @test_dummy(ptr %p1)
+ br i1 %c, label %loop, label %return
+return:
+ ret bfloat %r
+}
+
+declare i1 @test_dummy(ptr %p1) #0
+
+define i32 @test_fptosi_i32(bfloat %a) {
+; CHECK-NOFP-LABEL: test_fptosi_i32:
+; CHECK-NOFP: @ %bb.0:
+; CHECK-NOFP-NEXT: .save {r7, lr}
+; CHECK-NOFP-NEXT: push {r7, lr}
+; CHECK-NOFP-NEXT: lsls r0, r0, #16
+; CHECK-NOFP-NEXT: bl __aeabi_f2iz
+; CHECK-NOFP-NEXT: pop {r7, pc}
+;
+; CHECK-FP-LABEL: test_fptosi_i32:
+; CHECK-FP: @ %bb.0:
+; CHECK-FP-NEXT: vmov r0, s0
+; CHECK-FP-NEXT: lsls r0, r0, #16
+; CHECK-FP-NEXT: vmov s0, r0
+; CHECK-FP-NEXT: vcvt.s32.f32 s0, s0
+; CHECK-FP-NEXT: vmov r0, s0
+; CHECK-FP-NEXT: bx lr
+ %r = fptosi bfloat %a to i32
+ ret i32 %r
+}
+
+define i64 @test_fptosi_i64(bfloat %a) {
+; CHECK-NOFP-LABEL: test_fptosi_i64:
+; CHECK-NOFP: @ %bb.0:
+; CHECK-NOFP-NEXT: .save {r7, lr}
+; CHECK-NOFP-NEXT: push {r7, lr}
+; CHECK-NOFP-NEXT: lsls r0, r0, #16
+; CHECK-NOFP-NEXT: bl __aeabi_f2lz
+; CHECK-NOFP-NEXT: pop {r7, pc}
+;
+; CHECK-FP-LABEL: test_fptosi_i64:
+; CHECK-FP: @ %bb.0:
+; CHECK-FP-NEXT: .save {r7, lr}
+; CHECK-FP-NEXT: push {r7, lr}
+; CHECK-FP-NEXT: vmov r0, s0
+; CHECK-FP-NEXT: lsls r0, r0, #16
+; CHECK-FP-NEXT: bl __aeabi_f2lz
+; CHECK-FP-NEXT: pop {r7, pc}
+ %r = fptosi bfloat %a to i64
+ ret i64 %r
+}
+
+define i32 @test_fptoui_i32(bfloat %a) {
+; CHECK-NOFP-LABEL: test_fptoui_i32:
+; CHECK-NOFP: @ %bb.0:
+; CHECK-NOFP-NEXT: .save {r7, lr}
+; CHECK-NOFP-NEXT: push {r7, lr}
+; CHECK-NOFP-NEXT: lsls r0, r0, #16
+; CHECK-NOFP-NEXT: bl __aeabi_f2uiz
+; CHECK-NOFP-NEXT: pop {r7, pc}
+;
+; CHECK-FP-LABEL: test_fptoui_i32:
+; CHECK-FP: @ %bb.0:
+; CHECK-FP-NEXT: vmov r0, s0
+; CHECK-FP-NEXT: lsls r0, r0, #16
+; CHECK-FP-NEXT: vmov s0, r0
+; CHECK-FP-NEXT: vcvt.u32.f32 s0, s0
+; CHECK-FP-NEXT: vmov r0, s0
+; CHECK-FP-NEXT: bx lr
+ %r = fptoui bfloat %a to i32
+ ret i32 %r
+}
+
+define i64 @test_fptoui_i64(bfloat %a) {
+; CHECK-NOFP-LABEL: test_fptoui_i64:
+; CHECK-NOFP: @ %bb.0:
+; CHECK-NOFP-NEXT: .save {r7, lr}
+; CHECK-NOFP-NEXT: push {r7, lr}
+; CHECK-NOFP-NEXT: lsls r0, r0, #16
+; CHECK-NOFP-NEXT: bl __aeabi_f2ulz
+; CHECK-NOFP-NEXT: pop {r7, pc}
+;
+; CHECK-FP-LABEL: test_fptoui_i64:
+; CHECK-FP: @ %bb.0:
+; CHECK-FP-NEXT: .save {r7, lr}
+; CHECK-FP-NEXT: push {r7, lr}
+; CHECK-FP-NEXT: vmov r0, s0
+; CHECK-FP-NEXT: lsls r0, r0, #16
+; CHECK-FP-NEXT: bl __aeabi_f2ulz
+; CHECK-FP-NEXT: pop {r7, pc}
+ %r = fptoui bfloat %a to i64
+ ret i64 %r
+}
+
+define bfloat @test_uitofp_i32(i32 %a) {
+;
+; CHECK-NOFP-LABEL: test_uitofp_i32:
+; CHECK-NOFP: @ %bb.0:
+; CHECK-NOFP-NEXT: .save {r7, lr}
+; CHECK-NOFP-NEXT: push {r7, lr}
+; CHECK-NOFP-NEXT: bl __aeabi_ui2f
+; CHECK-NOFP-NEXT: bl __truncsfbf2
+; CHECK-NOFP-NEXT: pop {r7, pc}
+;
+; CHECK-FP-LABEL: test_uitofp_i32:
+; CHECK-FP: @ %bb.0:
+; CHECK-FP-NEXT: .save {r7, lr}
+; CHECK-FP-NEXT: push {r7, lr}
+; CHECK-FP-NEXT: vmov s0, r0
+; CHECK-FP-NEXT: vcvt.f32.u32 s0, s0
+; CHECK-FP-NEXT: bl __truncsfbf2
+; CHECK-FP-NEXT: vmov.f16 r0, s0
+; CHECK-FP-NEXT: vmov s0, r0
+; CHECK-FP-NEXT: pop {r7, pc}
+ %r = uitofp i32 %a to bfloat
+ ret bfloat %r
+}
+
+define bfloat @test_uitofp_i64(i64 %a) {
+;
+; CHECK-NOFP-LABEL: test_uitofp_i64:
+; CHECK-NOFP: @ %bb.0:
+; CHECK-NOFP-NEXT: .save {r7, lr}
+; CHECK-NOFP-NEXT: push {r7, lr}
+; CHECK-NOFP-NEXT: bl __aeabi_ul2f
+; CHECK-NOFP-NEXT: bl __truncsfbf2
+; CHECK-NOFP-NEXT: pop {r7, pc}
+;
+; CHECK-FP-LABEL: test_uitofp_i64:
+; CHECK-FP: @ %bb.0:
+; CHECK-FP-NEXT: .save {r7, lr}
+; CHECK-FP-NEXT: push {r7, lr}
+; CHECK-FP-NEXT: bl __aeabi_ul2f
+; CHECK-FP-NEXT: vmov s0, r0
+; CHECK-FP-NEXT: bl __truncsfbf2
+; CHECK-FP-NEXT: vmov.f16 r0, s0
+; CHECK-FP-NEXT: vmov s0, r0
+; CHECK-FP-NEXT: pop {r7, pc}
+ %r = uitofp i64 %a to bfloat
+ ret bfloat %r
+}
+
+define bfloat @test_sitofp_i32(i32 %a) {
+;
+; CHECK-NOFP-LABEL: test_sitofp_i32:
+; CHECK-NOFP: @ %bb.0:
+; CHECK-NOFP-NEXT: .save {r7, lr}
+; CHECK-NOFP-NEXT: push {r7, lr}
+; CHECK-NOFP-NEXT: bl __aeabi_i2f
+; CHECK-NOFP-NEXT: bl __truncsfbf2
+; CHECK-NOFP-NEXT: pop {r7, pc}
+;
+; CHECK-FP-LABEL: test_sitofp_i32:
+; CHECK-FP: @ %bb.0:
+; CHECK-FP-NEXT: .save {r7, lr}
+; CHECK-FP-NEXT: push {r7, lr}
+; CHECK-FP-NEXT: vmov s0, r0
+; CHECK-FP-NEXT: vcvt.f32.s32 s0, s0
+; CHECK-FP-NEXT: bl __truncsfbf2
+; CHECK-FP-NEXT: vmov.f16 r0, s0
+; CHECK-FP-NEXT: vmov s0, r0
+; CHECK-FP-NEXT: pop {r7, pc}
+ %r = sitofp i32 %a to bfloat
+ ret bfloat %r
+}
+
+define bfloat @test_sitofp_i64(i64 %a) {
+;
+; CHECK-NOFP-LABEL: test_sitofp_i64:
+; CHECK-NOFP: @ %bb.0:
+; CHECK-NOFP-NEXT: .save {r7, lr}
+; CHECK-NOFP-NEXT: push {r7, lr}
+; CHECK-NOFP-NEXT: bl __aeabi_l2f
+; CHECK-NOFP-NEXT: bl __truncsfbf2
+; CHECK-NOFP-NEXT: pop {r7, pc}
+;
+; CHECK-FP-LABEL: test_sitofp_i64:
+; CHECK-FP: @ %bb.0:
+; CHECK-FP-NEXT: .save {r7, lr}
+; CHECK-FP-NEXT: push {r7, lr}
+; CHECK-FP-NEXT: bl __aeabi_l2f
+; CHECK-FP-NEXT: vmov s0, r0
+; CHECK-FP-NEXT: bl __truncsfbf2
+; CHECK-FP-NEXT: vmov.f16 r0, s0
+; CHECK-FP-NEXT: vmov s0, r0
+; CHECK-FP-NEXT: pop {r7, pc}
+ %r = sitofp i64 %a to bfloat
+ ret bfloat %r
+}
+
+define bfloat @test_uitofp_i32_fadd(i32 %a, bfloat %b) {
+;
+; CHECK-NOFP-LABEL: test_uitofp_i32_fadd:
+; CHECK-NOFP: @ %bb.0:
+; CHECK-NOFP-NEXT: .save {r4, lr}
+; CHECK-NOFP-NEXT: push {r4, lr}
+; CHECK-NOFP-NEXT: mov r4, r1
+; CHECK-NOFP-NEXT: bl __aeabi_ui2f
+; CHECK-NOFP-NEXT: bl __truncsfbf2
+; CHECK-NOFP-NEXT: lsls r2, r4, #16
+; CHECK-NOFP-NEXT: lsls r1, r0, #16
+; CHECK-NOFP-NEXT: mov r0, r2
+; CHECK-NOFP-NEXT: bl __aeabi_fadd
+; CHECK-NOFP-NEXT: bl __truncsfbf2
+; CHECK-NOFP-NEXT: pop {r4, pc}
+;
+; CHECK-FP-LABEL: test_uitofp_i32_fadd:
+; CHECK-FP: @ %bb.0:
+; CHECK-FP-NEXT: .save {r4, lr}
+; CHECK-FP-NEXT: push {r4, lr}
+; CHECK-FP-NEXT: vmov s2, r0
+; CHECK-FP-NEXT: vmov r4, s0
+; CHECK-FP-NEXT: vcvt.f32.u32 s2, s2
+; CHECK-FP-NEXT: vmov.f32 s0, s2
+; CHECK-FP-NEXT: bl __truncsfbf2
+; CHECK-FP-NEXT: vmov r0, s0
+; CHECK-FP-NEXT: lsls r1, r4, #16
+; CHECK-FP-NEXT: vmov s0, r1
+; CHECK-FP-NEXT: lsls r0, r0, #16
+; CHECK-FP-NEXT: vmov s2, r0
+; CHECK-FP-NEXT: vadd.f32 s0, s0, s2
+; CHECK-FP-NEXT: bl __truncsfbf2
+; CHECK-FP-NEXT: vmov.f16 r0, s0
+; CHECK-FP-NEXT: vmov s0, r0
+; CHECK-FP-NEXT: pop {r4, pc}
+ %c = uitofp i32 %a to bfloat
+ %r = fadd bfloat %b, %c
+ ret bfloat %r
+}
+
+define bfloat @test_sitofp_i32_fadd(i32 %a, bfloat %b) {
+;
+; CHECK-NOFP-LABEL: test_sitofp_i32_fadd:
+; CHECK-NOFP: @ %bb.0:
+; CHECK-NOFP-NEXT: .save {r4, lr}
+; CHECK-NOFP-NEXT: push {r4, lr}
+; CHECK-NOFP-NEXT: mov r4, r1
+; CHECK-NOFP-NEXT: bl __aeabi_i2f
+; CHECK-NOFP-NEXT: bl __truncsfbf2
+; CHECK-NOFP-NEXT: lsls r2, r4, #16
+; CHECK-NOFP-NEXT: lsls r1, r0, #16
+; CHECK-NOFP-NEXT: mov r0, r2
+; CHECK-NOFP-NEXT: bl __aeabi_fadd
+; CHECK-NOFP-NEXT: bl __truncsfbf2
+; CHECK-NOFP-NEXT: pop {r4, pc}
+;
+; CHECK-FP-LABEL: test_sitofp_i32_fadd:
+; CHECK-FP: @ %bb.0:
+; CHECK-FP-NEXT: .save {r4, lr}
+; CHECK-FP-NEXT: push {r4, lr}
+; CHECK-FP-NEXT: vmov s2, r0
+; CHECK-FP-NEXT: vmov r4, s0
+; CHECK-FP-NEXT: vcvt.f32.s32 s2, s2
+; CHECK-FP-NEXT: vmov.f32 s0, s2
+; CHECK-FP-NEXT: bl __truncsfbf2
+; CHECK-FP-NEXT: vmov r0, s0
+; CHECK-FP-NEXT: lsls r1, r4, #16
+; CHECK-FP-NEXT: vmov s0, r1
+; CHECK-FP-NEXT: lsls r0, r0, #16
+; CHECK-FP-NEXT: vmov s2, r0
+; CHECK-FP-NEXT: vadd.f32 s0, s0, s2
+; CHECK-FP-NEXT: bl __truncsfbf2
+; CHECK-FP-NEXT: vmov.f16 r0, s0
+; CHECK-FP-NEXT: vmov s0, r0
+; CHECK-FP-NEXT: pop {r4, pc}
+ %c = sitofp i32 %a to bfloat
+ %r = fadd bfloat %b, %c
+ ret bfloat %r
+}
+
+define bfloat @test_fptrunc_float(float %a) {
+;
+; CHECK-NOFP-LABEL: test_fptrunc_float:
+; CHECK-NOFP: @ %bb.0:
+; CHECK-NOFP-NEXT: .save {r7, lr}
+; CHECK-NOFP-NEXT: push {r7, lr}
+; CHECK-NOFP-NEXT: bl __truncsfbf2
+; CHECK-NOFP-NEXT: pop {r7, pc}
+;
+; CHECK-FP-LABEL: test_fptrunc_float:
+; CHECK-FP: @ %bb.0:
+; CHECK-FP-NEXT: .save {r7, lr}
+; CHECK-FP-NEXT: push {r7, lr}
+; CHECK-FP-NEXT: bl __truncsfbf2
+; CHECK-FP-NEXT: vmov.f16 r0, s0
+; CHECK-FP-NEXT: vmov s0, r0
+; CHECK-FP-NEXT: pop {r7, pc}
+ %r = fptrunc float %a to bfloat
+ ret bfloat %r
+}
+
+define bfloat @test_fptrunc_double(double %a) {
+;
+; CHECK-NOFP-LABEL: test_fptrunc_double:
+; CHECK-NOFP: @ %bb.0:
+; CHECK-NOFP-NEXT: .save {r7, lr}
+; CHECK-NOFP-NEXT: push {r7, lr}
+; CHECK-NOFP-NEXT: bl __truncdfbf2
+; CHECK-NOFP-NEXT: pop {r7, pc}
+;
+; CHECK-FP-LABEL: test_fptrunc_double:
+; CHECK-FP: @ %bb.0:
+; CHECK-FP-NEXT: .save {r7, lr}
+; CHECK-FP-NEXT: push {r7, lr}
+; CHECK-FP-NEXT: vmov r0, r1, d0
+; CHECK-FP-NEXT: bl __aeabi_d2f
+; CHECK-FP-NEXT: lsrs r0, r0, #16
+; CHECK-FP-NEXT: vmov.f16 s0, r0
+; CHECK-FP-NEXT: vmov.f16 r0, s0
+; CHECK-FP-NEXT: vmov s0, r0
+; CHECK-FP-NEXT: pop {r7, pc}
+ %r = fptrunc double %a to bfloat
+ ret bfloat %r
+}
+
+define float @test_fpext_float(bfloat %a) {
+; CHECK-NOFP-LABEL: test_fpext_float:
+; CHECK-NOFP: @ %bb.0:
+; CHECK-NOFP-NEXT: lsls r0, r0, #16
+; CHECK-NOFP-NEXT: bx lr
+;
+; CHECK-FP-LABEL: test_fpext_float:
+; CHECK-FP: @ %bb.0:
+; CHECK-FP-NEXT: vmov r0, s0
+; CHECK-FP-NEXT: lsls r0, r0, #16
+; CHECK-FP-NEXT: vmov s0, r0
+; CHECK-FP-NEXT: bx lr
+ %r = fpext bfloat %a to float
+ ret float %r
+}
+
+define double @test_fpext_double(bfloat %a) {
+; CHECK-NOFP-LABEL: test_fpext_double:
+; CHECK-NOFP: @ %bb.0:
+; CHECK-NOFP-NEXT: .save {r7, lr}
+; CHECK-NOFP-NEXT: push {r7, lr}
+; CHECK-NOFP-NEXT: lsls r0, r0, #16
+; CHECK-NOFP-NEXT: bl __aeabi_f2d
+; CHECK-NOFP-NEXT: pop {r7, pc}
+;
+; CHECK-FP-LABEL: test_fpext_double:
+; CHECK-FP: @ %bb.0:
+; CHECK-FP-NEXT: .save {r7, lr}
+; CHECK-FP-NEXT: push {r7, lr}
+; CHECK-FP-NEXT: vmov r0, s0
+; CHECK-FP-NEXT: lsls r0, r0, #16
+; CHECK-FP-NEXT: bl __aeabi_f2d
+; CHECK-FP-NEXT: vmov d0, r0, r1
+; CHECK-FP-NEXT: pop {r7, pc}
+ %r = fpext bfloat %a to double
+ ret double %r
+}
+
+define i16 @test_bitcast_bfloattoi16(bfloat %a) {
+; CHECK-NOFP-LABEL: test_bitcast_bfloattoi16:
+; CHECK-NOFP: @ %bb.0:
+; CHECK-NOFP-NEXT: bx lr
+;
+; CHECK-FP-LABEL: test_bitcast_bfloattoi16:
+; CHECK-FP: @ %bb.0:
+; CHECK-FP-NEXT: vmov r0, s0
+; CHECK-FP-NEXT: bx lr
+ %r = bitcast bfloat %a to i16
+ ret i16 %r
+}
+
+define bfloat @test_bitcast_i16tobfloat(i16 %a) {
+; CHECK-NOFP-LABEL: test_bitcast_i16tobfloat:
+; CHECK-NOFP: @ %bb.0:
+; CHECK-NOFP-NEXT: bx lr
+;
+; CHECK-FP-LABEL: test_bitcast_i16tobfloat:
+; CHECK-FP: @ %bb.0:
+; CHECK-FP-NEXT: vmov s0, r0
+; CHECK-FP-NEXT: bx lr
+ %r = bitcast i16 %a to bfloat
+ ret bfloat %r
+}
+
+declare bfloat @llvm.sqrt.f16(bfloat %a) #0
+declare bfloat @llvm.powi.f16.i32(bfloat %a, i32 %b) #0
+declare bfloat @llvm.sin.f16(bfloat %a) #0
+declare bfloat @llvm.cos.f16(bfloat %a) #0
+declare bfloat @llvm.tan.f16(bfloat %a) #0
+declare bfloat @llvm.asin.f16(bfloat %a) #0
+declare bfloat @llvm.acos.f16(bfloat %a) #0
+declare bfloat @llvm.atan.f16(bfloat %a) #0
+declare bfloat @llvm.atan2.f16(bfloat %a, bfloat %b) #0
+declare bfloat @llvm.sinh.f16(bfloat %a) #0
+declare bfloat @llvm.cosh.f16(bfloat %a) #0
+declare bfloat @llvm.tanh.f16(bfloat %a) #0
+declare bfloat @llvm.pow.f16(bfloat %a, bfloat %b) #0
+declare bfloat @llvm.exp.f16(bfloat %a) #0
+declare bfloat @llvm.exp2.f16(bfloat %a) #0
+declare bfloat @llvm.log.f16(bfloat %a) #0
+declare bfloat @llvm.log10.f16(bfloat %a) #0
+declare bfloat @llvm.log2.f16(bfloat %a) #0
+declare bfloat @llvm.fma.f16(bfloat %a, bfloat %b, bfloat %c) #0
+declare bfloat @llvm.fabs.f16(bfloat %a) #0
+declare bfloat @llvm.minnum.f16(bfloat %a, bfloat %b) #0
+declare bfloat @llvm.maxnum.f16(bfloat %a, bfloat %b) #0
+declare bfloat @llvm.copysign.f16(bfloat %a, bfloat %b) #0
+declare bfloat @llvm.floor.f16(bfloat %a) #0
+declare bfloat @llvm.ceil.f16(bfloat %a) #0
+declare bfloat @llvm.trunc.f16(bfloat %a) #0
+declare bfloat @llvm.rint.f16(bfloat %a) #0
+declare bfloat @llvm.nearbyint.f16(bfloat %a) #0
+declare bfloat @llvm.round.f16(bfloat %a) #0
+declare bfloat @llvm.roundeven.f16(bfloat %a) #0
+declare bfloat @llvm.fmuladd.f16(bfloat %a, bfloat %b, bfloat %c) #0
+
+
+define bfloat @test_sqrt(bfloat %a) {
+;
+; CHECK-NOFP-LABEL: test_sqrt:
+; CHECK-NOFP: @ %bb.0:
+; CHECK-NOFP-NEXT: .save {r7, lr}
+; CHECK-NOFP-NEXT: push {r7, lr}
+; CHECK-NOFP-NEXT: lsls r0, r0, #16
+; CHECK-NOFP-NEXT: bl sqrtf
+; CHECK-NOFP-NEXT: bl __truncsfbf2
+; CHECK-NOFP-NEXT: pop {r7, pc}
+;
+; CHECK-FP-LABEL: test_sqrt:
+; CHECK-FP: @ %bb.0:
+; CHECK-FP-NEXT: .save {r7, lr}
+; CHECK-FP-NEXT: push {r7, lr}
+; CHECK-FP-NEXT: vmov r0, s0
+; CHECK-FP-NEXT: lsls r0, r0, #16
+; CHECK-FP-NEXT: vmov s0, r0
+; CHECK-FP-NEXT: vsqrt.f32 s0, s0
+; CHECK-FP-NEXT: bl __truncsfbf2
+; CHECK-FP-NEXT: vmov.f16 r0, s0
+; CHECK-FP-NEXT: vmov s0, r0
+; CHECK-FP-NEXT: pop {r7, pc}
+ %r = call bfloat @llvm.sqrt.f16(bfloat %a)
+ ret bfloat %r
+}
+
+define bfloat @test_powi(bfloat %a, i32 %b) {
+;
+; CHECK-NOFP-LABEL: test_powi:
+; CHECK-NOFP: @ %bb.0:
+; CHECK-NOFP-NEXT: .save {r7, lr}
+; CHECK-NOFP-NEXT: push {r7, lr}
+; CHECK-NOFP-NEXT: lsls r0, r0, #16
+; CHECK-NOFP-NEXT: bl __powisf2
+; CHECK-NOFP-NEXT: bl __truncsfbf2
+; CHECK-NOFP-NEXT: pop {r7, pc}
+;
+; CHECK-FP-LABEL: test_powi:
+; CHECK-FP: @ %bb.0:
+; CHECK-FP-NEXT: .save {r7, lr}
+; CHECK-FP-NEXT: push {r7, lr}
+; CHECK-FP-NEXT: vmov r1, s0
+; CHECK-FP-NEXT: lsls r1, r1, #16
+; CHECK-FP-NEXT: vmov s0, r1
+; CHECK-FP-NEXT: bl __powisf2
+; CHECK-FP-NEXT: bl __truncsfbf2
+; CHECK-FP-NEXT: vmov.f16 r0, s0
+; CHECK-FP-NEXT: vmov s0, r0
+; CHECK-FP-NEXT: pop {r7, pc}
+ %r = call bfloat @llvm.powi.f16.i32(bfloat %a, i32 %b)
+ ret bfloat %r
+}
+
+
+define bfloat @test_sin(bfloat %a) {
+;
+; CHECK-NOFP-LABEL: test_sin:
+; CHECK-NOFP: @ %bb.0:
+; CHECK-NOFP-NEXT: .save {r7, lr}
+; CHECK-NOFP-NEXT: push {r7, lr}
+; CHECK-NOFP-NEXT: lsls r0, r0, #16
+; CHECK-NOFP-NEXT: bl sinf
+; CHECK-NOFP-NEXT: bl __truncsfbf2
+; CHECK-NOFP-NEXT: pop {r7, pc}
+;
+; CHECK-FP-LABEL: test_sin:
+; CHECK-FP: @ %bb.0:
+; CHECK-FP-NEXT: .save {r7, lr}
+; CHECK-FP-NEXT: push {r7, lr}
+; CHECK-FP-NEXT: vmov r0, s0
+; CHECK-FP-NEXT: lsls r0, r0, #16
+; CHECK-FP-NEXT: vmov s0, r0
+; CHECK-FP-NEXT: bl sinf
+; CHECK-FP-NEXT: bl __truncsfbf2
+; CHECK-FP-NEXT: vmov.f16 r0, s0
+; CHECK-FP-NEXT: vmov s0, r0
+; CHECK-FP-NEXT: pop {r7, pc}
+ %r = call bfloat @llvm.sin.f16(bfloat %a)
+ ret bfloat %r
+}
+
+define bfloat @test_cos(bfloat %a) {
+;
+; CHECK-NOFP-LABEL: test_cos:
+; CHECK-NOFP: @ %bb.0:
+; CHECK-NOFP-NEXT: .save {r7, lr}
+; CHECK-NOFP-NEXT: push {r7, lr}
+; CHECK-NOFP-NEXT: lsls r0, r0, #16
+; CHECK-NOFP-NEXT: bl cosf
+; CHECK-NOFP-NEXT: bl __truncsfbf2
+; CHECK-NOFP-NEXT: pop {r7, pc}
+;
+; CHECK-FP-LABEL: test_cos:
+; CHECK-FP: @ %bb.0:
+; CHECK-FP-NEXT: .save {r7, lr}
+; CHECK-FP-NEXT: push {r7, lr}
+; CHECK-FP-NEXT: vmov r0, s0
+; CHECK-FP-NEXT: lsls r0, r0, #16
+; CHECK-FP-NEXT: vmov s0, r0
+; CHECK-FP-NEXT: bl cosf
+; CHECK-FP-NEXT: bl __truncsfbf2
+; CHECK-FP-NEXT: vmov.f16 r0, s0
+; CHECK-FP-NEXT: vmov s0, r0
+; CHECK-FP-NEXT: pop {r7, pc}
+ %r = call bfloat @llvm.cos.f16(bfloat %a)
+ ret bfloat %r
+}
+
+define bfloat @test_tan(bfloat %a) {
+;
+; CHECK-NOFP-LABEL: test_tan:
+; CHECK-NOFP: @ %bb.0:
+; CHECK-NOFP-NEXT: .save {r7, lr}
+; CHECK-NOFP-NEXT: push {r7, lr}
+; CHECK-NOFP-NEXT: lsls r0, r0, #16
+; CHECK-NOFP-NEXT: bl tanf
+; CHECK-NOFP-NEXT: bl __truncsfbf2
+; CHECK-NOFP-NEXT: pop {r7, pc}
+;
+; CHECK-FP-LABEL: test_tan:
+; CHECK-FP: @ %bb.0:
+; CHECK-FP-NEXT: .save {r7, lr}
+; CHECK-FP-NEXT: push {r7, lr}
+; CHECK-FP-NEXT: vmov r0, s0
+; CHECK-FP-NEXT: lsls r0, r0, #16
+; CHECK-FP-NEXT: vmov s0, r0
+; CHECK-FP-NEXT: bl tanf
+; CHECK-FP-NEXT: bl __truncsfbf2
+; CHECK-FP-NEXT: vmov.f16 r0, s0
+; CHECK-FP-NEXT: vmov s0, r0
+; CHECK-FP-NEXT: pop {r7, pc}
+ %r = call bfloat @llvm.tan.f16(bfloat %a)
+ ret bfloat %r
+}
+
+define bfloat @test_acos(bfloat %a) {
+;
+; CHECK-NOFP-LABEL: test_acos:
+; CHECK-NOFP: @ %bb.0:
+; CHECK-NOFP-NEXT: .save {r7, lr}
+; CHECK-NOFP-NEXT: push {r7, lr}
+; CHECK-NOFP-NEXT: lsls r0, r0, #16
+; CHECK-NOFP-NEXT: bl acosf
+; CHECK-NOFP-NEXT: bl __truncsfbf2
+; CHECK-NOFP-NEXT: pop {r7, pc}
+;
+; CHECK-FP-LABEL: test_acos:
+; CHECK-FP: @ %bb.0:
+; CHECK-FP-NEXT: .save {r7, lr}
+; CHECK-FP-NEXT: push {r7, lr}
+; CHECK-FP-NEXT: vmov r0, s0
+; CHECK-FP-NEXT: lsls r0, r0, #16
+; CHECK-FP-NEXT: vmov s0, r0
+; CHECK-FP-NEXT: bl acosf
+; CHECK-FP-NEXT: bl __truncsfbf2
+; CHECK-FP-NEXT: vmov.f16 r0, s0
+; CHECK-FP-NEXT: vmov s0, r0
+; CHECK-FP-NEXT: pop {r7, pc}
+ %r = call bfloat @llvm.acos.f16(bfloat %a)
+ ret bfloat %r
+}
+
+define bfloat @test_asin(bfloat %a) {
+;
+; CHECK-NOFP-LABEL: test_asin:
+; CHECK-NOFP: @ %bb.0:
+; CHECK-NOFP-NEXT: .save {r7, lr}
+; CHECK-NOFP-NEXT: push {r7, lr}
+; CHECK-NOFP-NEXT: lsls r0, r0, #16
+; CHECK-NOFP-NEXT: bl asinf
+; CHECK-NOFP-NEXT: bl __truncsfbf2
+; CHECK-NOFP-NEXT: pop {r7, pc}
+;
+; CHECK-FP-LABEL: test_asin:
+; CHECK-FP: @ %bb.0:
+; CHECK-FP-NEXT: .save {r7, lr}
+; CHECK-FP-NEXT: push {r7, lr}
+; CHECK-FP-NEXT: vmov r0, s0
+; CHECK-FP-NEXT: lsls r0, r0, #16
+; CHECK-FP-NEXT: vmov s0, r0
+; CHECK-FP-NEXT: bl asinf
+; CHECK-FP-NEXT: bl __truncsfbf2
+; CHECK-FP-NEXT: vmov.f16 r0, s0
+; CHECK-FP-NEXT: vmov s0, r0
+; CHECK-FP-NEXT: pop {r7, pc}
+ %r = call bfloat @llvm.asin.f16(bfloat %a)
+ ret bfloat %r
+}
+
+define bfloat @test_atan(bfloat %a) {
+;
+; CHECK-NOFP-LABEL: test_atan:
+; CHECK-NOFP: @ %bb.0:
+; CHECK-NOFP-NEXT: .save {r7, lr}
+; CHECK-NOFP-NEXT: push {r7, lr}
+; CHECK-NOFP-NEXT: lsls r0, r0, #16
+; CHECK-NOFP-NEXT: bl atanf
+; CHECK-NOFP-NEXT: bl __truncsfbf2
+; CHECK-NOFP-NEXT: pop {r7, pc}
+;
+; CHECK-FP-LABEL: test_atan:
+; CHECK-FP: @ %bb.0:
+; CHECK-FP-NEXT: .save {r7, lr}
+; CHECK-FP-NEXT: push {r7, lr}
+; CHECK-FP-NEXT: vmov r0, s0
+; CHECK-FP-NEXT: lsls r0, r0, #16
+; CHECK-FP-NEXT: vmov s0, r0
+; CHECK-FP-NEXT: bl atanf
+; CHECK-FP-NEXT: bl __truncsfbf2
+; CHECK-FP-NEXT: vmov.f16 r0, s0
+; CHECK-FP-NEXT: vmov s0, r0
+; CHECK-FP-NEXT: pop {r7, pc}
+ %r = call bfloat @llvm.atan.f16(bfloat %a)
+ ret bfloat %r
+}
+
+define bfloat @test_atan2(bfloat %a, bfloat %b) {
+;
+; CHECK-NOFP-LABEL: test_atan2:
+; CHECK-NOFP: @ %bb.0:
+; CHECK-NOFP-NEXT: .save {r7, lr}
+; CHECK-NOFP-NEXT: push {r7, lr}
+; CHECK-NOFP-NEXT: lsls r0, r0, #16
+; CHECK-NOFP-NEXT: lsls r1, r1, #16
+; CHECK-NOFP-NEXT: bl atan2f
+; CHECK-NOFP-NEXT: bl __truncsfbf2
+; CHECK-NOFP-NEXT: pop {r7, pc}
+;
+; CHECK-FP-LABEL: test_atan2:
+; CHECK-FP: @ %bb.0:
+; CHECK-FP-NEXT: .save {r7, lr}
+; CHECK-FP-NEXT: push {r7, lr}
+; CHECK-FP-NEXT: vmov r0, s1
+; CHECK-FP-NEXT: vmov r1, s0
+; CHECK-FP-NEXT: lsls r0, r0, #16
+; CHECK-FP-NEXT: lsls r1, r1, #16
+; CHECK-FP-NEXT: vmov s1, r0
+; CHECK-FP-NEXT: vmov s0, r1
+; CHECK-FP-NEXT: bl atan2f
+; CHECK-FP-NEXT: bl __truncsfbf2
+; CHECK-FP-NEXT: vmov.f16 r0, s0
+; CHECK-FP-NEXT: vmov s0, r0
+; CHECK-FP-NEXT: pop {r7, pc}
+ %r = call bfloat @llvm.atan2.f16(bfloat %a, bfloat %b)
+ ret bfloat %r
+}
+
+define bfloat @test_cosh(bfloat %a) {
+;
+; CHECK-NOFP-LABEL: test_cosh:
+; CHECK-NOFP: @ %bb.0:
+; CHECK-NOFP-NEXT: .save {r7, lr}
+; CHECK-NOFP-NEXT: push {r7, lr}
+; CHECK-NOFP-NEXT: lsls r0, r0, #16
+; CHECK-NOFP-NEXT: bl coshf
+; CHECK-NOFP-NEXT: bl __truncsfbf2
+; CHECK-NOFP-NEXT: pop {r7, pc}
+;
+; CHECK-FP-LABEL: test_cosh:
+; CHECK-FP: @ %bb.0:
+; CHECK-FP-NEXT: .save {r7, lr}
+; CHECK-FP-NEXT: push {r7, lr}
+; CHECK-FP-NEXT: vmov r0, s0
+; CHECK-FP-NEXT: lsls r0, r0, #16
+; CHECK-FP-NEXT: vmov s0, r0
+; CHECK-FP-NEXT: bl coshf
+; CHECK-FP-NEXT: bl __truncsfbf2
+; CHECK-FP-NEXT: vmov.f16 r0, s0
+; CHECK-FP-NEXT: vmov s0, r0
+; CHECK-FP-NEXT: pop {r7, pc}
+ %r = call bfloat @llvm.cosh.f16(bfloat %a)
+ ret bfloat %r
+}
+
+define bfloat @test_sinh(bfloat %a) {
+;
+; CHECK-NOFP-LABEL: test_sinh:
+; CHECK-NOFP: @ %bb.0:
+; CHECK-NOFP-NEXT: .save {r7, lr}
+; CHECK-NOFP-NEXT: push {r7, lr}
+; CHECK-NOFP-NEXT: lsls r0, r0, #16
+; CHECK-NOFP-NEXT: bl sinhf
+; CHECK-NOFP-NEXT: bl __truncsfbf2
+; CHECK-NOFP-NEXT: pop {r7, pc}
+;
+; CHECK-FP-LABEL: test_sinh:
+; CHECK-FP: @ %bb.0:
+; CHECK-FP-NEXT: .save {r7, lr}
+; CHECK-FP-NEXT: push {r7, lr}
+; CHECK-FP-NEXT: vmov r0, s0
+; CHECK-FP-NEXT: lsls r0, r0, #16
+; CHECK-FP-NEXT: vmov s0, r0
+; CHECK-FP-NEXT: bl sinhf
+; CHECK-FP-NEXT: bl __truncsfbf2
+; CHECK-FP-NEXT: vmov.f16 r0, s0
+; CHECK-FP-NEXT: vmov s0, r0
+; CHECK-FP-NEXT: pop {r7, pc}
+ %r = call bfloat @llvm.sinh.f16(bfloat %a)
+ ret bfloat %r
+}
+
+define bfloat @test_tanh(bfloat %a) {
+;
+; CHECK-NOFP-LABEL: test_tanh:
+; CHECK-NOFP: @ %bb.0:
+; CHECK-NOFP-NEXT: .save {r7, lr}
+; CHECK-NOFP-NEXT: push {r7, lr}
+; CHECK-NOFP-NEXT: lsls r0, r0, #16
+; CHECK-NOFP-NEXT: bl tanhf
+; CHECK-NOFP-NEXT: bl __truncsfbf2
+; CHECK-NOFP-NEXT: pop {r7, pc}
+;
+; CHECK-FP-LABEL: test_tanh:
+; CHECK-FP: @ %bb.0:
+; CHECK-FP-NEXT: .save {r7, lr}
+; CHECK-FP-NEXT: push {r7, lr}
+; CHECK-FP-NEXT: vmov r0, s0
+; CHECK-FP-NEXT: lsls r0, r0, #16
+; CHECK-FP-NEXT: vmov s0, r0
+; CHECK-FP-NEXT: bl tanhf
+; CHECK-FP-NEXT: bl __truncsfbf2
+; CHECK-FP-NEXT: vmov.f16 r0, s0
+; CHECK-FP-NEXT: vmov s0, r0
+; CHECK-FP-NEXT: pop {r7, pc}
+ %r = call bfloat @llvm.tanh.f16(bfloat %a)
+ ret bfloat %r
+}
+
+define bfloat @test_pow(bfloat %a, bfloat %b) {
+;
+; CHECK-NOFP-LABEL: test_pow:
+; CHECK-NOFP: @ %bb.0:
+; CHECK-NOFP-NEXT: .save {r7, lr}
+; CHECK-NOFP-NEXT: push {r7, lr}
+; CHECK-NOFP-NEXT: lsls r0, r0, #16
+; CHECK-NOFP-NEXT: lsls r1, r1, #16
+; CHECK-NOFP-NEXT: bl powf
+; CHECK-NOFP-NEXT: bl __truncsfbf2
+; CHECK-NOFP-NEXT: pop {r7, pc}
+;
+; CHECK-FP-LABEL: test_pow:
+; CHECK-FP: @ %bb.0:
+; CHECK-FP-NEXT: .save {r7, lr}
+; CHECK-FP-NEXT: push {r7, lr}
+; CHECK-FP-NEXT: vmov r0, s1
+; CHECK-FP-NEXT: vmov r1, s0
+; CHECK-FP-NEXT: lsls r0, r0, #16
+; CHECK-FP-NEXT: lsls r1, r1, #16
+; CHECK-FP-NEXT: vmov s1, r0
+; CHECK-FP-NEXT: vmov s0, r1
+; CHECK-FP-NEXT: bl powf
+; CHECK-FP-NEXT: bl __truncsfbf2
+; CHECK-FP-NEXT: vmov.f16 r0, s0
+; CHECK-FP-NEXT: vmov s0, r0
+; CHECK-FP-NEXT: pop {r7, pc}
+ %r = call bfloat @llvm.pow.f16(bfloat %a, bfloat %b)
+ ret bfloat %r
+}
+
+define bfloat @test_exp(bfloat %a) {
+;
+; CHECK-NOFP-LABEL: test_exp:
+; CHECK-NOFP: @ %bb.0:
+; CHECK-NOFP-NEXT: .save {r7, lr}
+; CHECK-NOFP-NEXT: push {r7, lr}
+; CHECK-NOFP-NEXT: lsls r0, r0, #16
+; CHECK-NOFP-NEXT: bl expf
+; CHECK-NOFP-NEXT: bl __truncsfbf2
+; CHECK-NOFP-NEXT: pop {r7, pc}
+;
+; CHECK-FP-LABEL: test_exp:
+; CHECK-FP: @ %bb.0:
+; CHECK-FP-NEXT: .save {r7, lr}
+; CHECK-FP-NEXT: push {r7, lr}
+; CHECK-FP-NEXT: vmov r0, s0
+; CHECK-FP-NEXT: lsls r0, r0, #16
+; CHECK-FP-NEXT: vmov s0, r0
+; CHECK-FP-NEXT: bl expf
+; CHECK-FP-NEXT: bl __truncsfbf2
+; CHECK-FP-NEXT: vmov.f16 r0, s0
+; CHECK-FP-NEXT: vmov s0, r0
+; CHECK-FP-NEXT: pop {r7, pc}
+ %r = call bfloat @llvm.exp.f16(bfloat %a)
+ ret bfloat %r
+}
+
+define bfloat @test_exp2(bfloat %a) {
+;
+; CHECK-NOFP-LABEL: test_exp2:
+; CHECK-NOFP: @ %bb.0:
+; CHECK-NOFP-NEXT: .save {r7, lr}
+; CHECK-NOFP-NEXT: push {r7, lr}
+; CHECK-NOFP-NEXT: lsls r0, r0, #16
+; CHECK-NOFP-NEXT: bl exp2f
+; CHECK-NOFP-NEXT: bl __truncsfbf2
+; CHECK-NOFP-NEXT: pop {r7, pc}
+;
+; CHECK-FP-LABEL: test_exp2:
+; CHECK-FP: @ %bb.0:
+; CHECK-FP-NEXT: .save {r7, lr}
+; CHECK-FP-NEXT: push {r7, lr}
+; CHECK-FP-NEXT: vmov r0, s0
+; CHECK-FP-NEXT: lsls r0, r0, #16
+; CHECK-FP-NEXT: vmov s0, r0
+; CHECK-FP-NEXT: bl exp2f
+; CHECK-FP-NEXT: bl __truncsfbf2
+; CHECK-FP-NEXT: vmov.f16 r0, s0
+; CHECK-FP-NEXT: vmov s0, r0
+; CHECK-FP-NEXT: pop {r7, pc}
+ %r = call bfloat @llvm.exp2.f16(bfloat %a)
+ ret bfloat %r
+}
+
+define bfloat @test_log(bfloat %a) {
+;
+; CHECK-NOFP-LABEL: test_log:
+; CHECK-NOFP: @ %bb.0:
+; CHECK-NOFP-NEXT: .save {r7, lr}
+; CHECK-NOFP-NEXT: push {r7, lr}
+; CHECK-NOFP-NEXT: lsls r0, r0, #16
+; CHECK-NOFP-NEXT: bl logf
+; CHECK-NOFP-NEXT: bl __truncsfbf2
+; CHECK-NOFP-NEXT: pop {r7, pc}
+;
+; CHECK-FP-LABEL: test_log:
+; CHECK-FP: @ %bb.0:
+; CHECK-FP-NEXT: .save {r7, lr}
+; CHECK-FP-NEXT: push {r7, lr}
+; CHECK-FP-NEXT: vmov r0, s0
+; CHECK-FP-NEXT: lsls r0, r0, #16
+; CHECK-FP-NEXT: vmov s0, r0
+; CHECK-FP-NEXT: bl logf
+; CHECK-FP-NEXT: bl __truncsfbf2
+; CHECK-FP-NEXT: vmov.f16 r0, s0
+; CHECK-FP-NEXT: vmov s0, r0
+; CHECK-FP-NEXT: pop {r7, pc}
+ %r = call bfloat @llvm.log.f16(bfloat %a)
+ ret bfloat %r
+}
+
+define bfloat @test_log10(bfloat %a) {
+;
+; CHECK-NOFP-LABEL: test_log10:
+; CHECK-NOFP: @ %bb.0:
+; CHECK-NOFP-NEXT: .save {r7, lr}
+; CHECK-NOFP-NEXT: push {r7, lr}
+; CHECK-NOFP-NEXT: lsls r0, r0, #16
+; CHECK-NOFP-NEXT: bl log10f
+; CHECK-NOFP-NEXT: bl __truncsfbf2
+; CHECK-NOFP-NEXT: pop {r7, pc}
+;
+; CHECK-FP-LABEL: test_log10:
+; CHECK-FP: @ %bb.0:
+; CHECK-FP-NEXT: .save {r7, lr}
+; CHECK-FP-NEXT: push {r7, lr}
+; CHECK-FP-NEXT: vmov r0, s0
+; CHECK-FP-NEXT: lsls r0, r0, #16
+; CHECK-FP-NEXT: vmov s0, r0
+; CHECK-FP-NEXT: bl log10f
+; CHECK-FP-NEXT: bl __truncsfbf2
+; CHECK-FP-NEXT: vmov.f16 r0, s0
+; CHECK-FP-NEXT: vmov s0, r0
+; CHECK-FP-NEXT: pop {r7, pc}
+ %r = call bfloat @llvm.log10.f16(bfloat %a)
+ ret bfloat %r
+}
+
+define bfloat @test_log2(bfloat %a) {
+;
+; CHECK-NOFP-LABEL: test_log2:
+; CHECK-NOFP: @ %bb.0:
+; CHECK-NOFP-NEXT: .save {r7, lr}
+; CHECK-NOFP-NEXT: push {r7, lr}
+; CHECK-NOFP-NEXT: lsls r0, r0, #16
+; CHECK-NOFP-NEXT: bl log2f
+; CHECK-NOFP-NEXT: bl __truncsfbf2
+; CHECK-NOFP-NEXT: pop {r7, pc}
+;
+; CHECK-FP-LABEL: test_log2:
+; CHECK-FP: @ %bb.0:
+; CHECK-FP-NEXT: .save {r7, lr}
+; CHECK-FP-NEXT: push {r7, lr}
+; CHECK-FP-NEXT: vmov r0, s0
+; CHECK-FP-NEXT: lsls r0, r0, #16
+; CHECK-FP-NEXT: vmov s0, r0
+; CHECK-FP-NEXT: bl log2f
+; CHECK-FP-NEXT: bl __truncsfbf2
+; CHECK-FP-NEXT: vmov.f16 r0, s0
+; CHECK-FP-NEXT: vmov s0, r0
+; CHECK-FP-NEXT: pop {r7, pc}
+ %r = call bfloat @llvm.log2.f16(bfloat %a)
+ ret bfloat %r
+}
+
+define bfloat @test_fma(bfloat %a, bfloat %b, bfloat %c) {
+;
+; CHECK-NOFP-LABEL: test_fma:
+; CHECK-NOFP: @ %bb.0:
+; CHECK-NOFP-NEXT: .save {r7, lr}
+; CHECK-NOFP-NEXT: push {r7, lr}
+; CHECK-NOFP-NEXT: lsls r0, r0, #16
+; CHECK-NOFP-NEXT: lsls r1, r1, #16
+; CHECK-NOFP-NEXT: lsls r2, r2, #16
+; CHECK-NOFP-NEXT: bl fmaf
+; CHECK-NOFP-NEXT: bl __truncsfbf2
+; CHECK-NOFP-NEXT: pop {r7, pc}
+;
+; CHECK-FP-LABEL: test_fma:
+; CHECK-FP: @ %bb.0:
+; CHECK-FP-NEXT: .save {r7, lr}
+; CHECK-FP-NEXT: push {r7, lr}
+; CHECK-FP-NEXT: vmov r0, s0
+; CHECK-FP-NEXT: vmov r1, s1
+; CHECK-FP-NEXT: vmov r2, s2
+; CHECK-FP-NEXT: lsls r0, r0, #16
+; CHECK-FP-NEXT: lsls r1, r1, #16
+; CHECK-FP-NEXT: vmov s4, r0
+; CHECK-FP-NEXT: lsls r0, r2, #16
+; CHECK-FP-NEXT: vmov s2, r1
+; CHECK-FP-NEXT: vmov s0, r0
+; CHECK-FP-NEXT: vfma.f32 s0, s4, s2
+; CHECK-FP-NEXT: bl __truncsfbf2
+; CHECK-FP-NEXT: vmov.f16 r0, s0
+; CHECK-FP-NEXT: vmov s0, r0
+; CHECK-FP-NEXT: pop {r7, pc}
+ %r = call bfloat @llvm.fma.f16(bfloat %a, bfloat %b, bfloat %c)
+ ret bfloat %r
+}
+
+define bfloat @test_fabs(bfloat %a) {
+; CHECK-NOFP-LABEL: test_fabs:
+; CHECK-NOFP: @ %bb.0:
+; CHECK-NOFP-NEXT: bfc r0, #15, #17
+; CHECK-NOFP-NEXT: bx lr
+;
+; CHECK-FP-LABEL: test_fabs:
+; CHECK-FP: @ %bb.0:
+; CHECK-FP-NEXT: vmov r0, s0
+; CHECK-FP-NEXT: bfc r0, #15, #17
+; CHECK-FP-NEXT: vmov s0, r0
+; CHECK-FP-NEXT: bx lr
+ %r = call bfloat @llvm.fabs.f16(bfloat %a)
+ ret bfloat %r
+}
+
+define bfloat @test_minnum(bfloat %a, bfloat %b) {
+;
+; CHECK-NOFP-LABEL: test_minnum:
+; CHECK-NOFP: @ %bb.0:
+; CHECK-NOFP-NEXT: .save {r7, lr}
+; CHECK-NOFP-NEXT: push {r7, lr}
+; CHECK-NOFP-NEXT: lsls r0, r0, #16
+; CHECK-NOFP-NEXT: lsls r1, r1, #16
+; CHECK-NOFP-NEXT: bl fminf
+; CHECK-NOFP-NEXT: bl __truncsfbf2
+; CHECK-NOFP-NEXT: pop {r7, pc}
+;
+; CHECK-FP-LABEL: test_minnum:
+; CHECK-FP: @ %bb.0:
+; CHECK-FP-NEXT: .save {r7, lr}
+; CHECK-FP-NEXT: push {r7, lr}
+; CHECK-FP-NEXT: vmov r0, s1
+; CHECK-FP-NEXT: vmov r1, s0
+; CHECK-FP-NEXT: lsls r0, r0, #16
+; CHECK-FP-NEXT: vmov s0, r0
+; CHECK-FP-NEXT: lsls r0, r1, #16
+; CHECK-FP-NEXT: vmov s2, r0
+; CHECK-FP-NEXT: vminnm.f32 s0, s2, s0
+; CHECK-FP-NEXT: bl __truncsfbf2
+; CHECK-FP-NEXT: vmov.f16 r0, s0
+; CHECK-FP-NEXT: vmov s0, r0
+; CHECK-FP-NEXT: pop {r7, pc}
+ %r = call bfloat @llvm.minnum.f16(bfloat %a, bfloat %b)
+ ret bfloat %r
+}
+
+define bfloat @test_maxnum(bfloat %a, bfloat %b) {
+;
+; CHECK-NOFP-LABEL: test_maxnum:
+; CHECK-NOFP: @ %bb.0:
+; CHECK-NOFP-NEXT: .save {r7, lr}
+; CHECK-NOFP-NEXT: push {r7, lr}
+; CHECK-NOFP-NEXT: lsls r0, r0, #16
+; CHECK-NOFP-NEXT: lsls r1, r1, #16
+; CHECK-NOFP-NEXT: bl fmaxf
+; CHECK-NOFP-NEXT: bl __truncsfbf2
+; CHECK-NOFP-NEXT: pop {r7, pc}
+;
+; CHECK-FP-LABEL: test_maxnum:
+; CHECK-FP: @ %bb.0:
+; CHECK-FP-NEXT: .save {r7, lr}
+; CHECK-FP-NEXT: push {r7, lr}
+; CHECK-FP-NEXT: vmov r0, s1
+; CHECK-FP-NEXT: vmov r1, s0
+; CHECK-FP-NEXT: lsls r0, r0, #16
+; CHECK-FP-NEXT: vmov s0, r0
+; CHECK-FP-NEXT: lsls r0, r1, #16
+; CHECK-FP-NEXT: vmov s2, r0
+; CHECK-FP-NEXT: vmaxnm.f32 s0, s2, s0
+; CHECK-FP-NEXT: bl __truncsfbf2
+; CHECK-FP-NEXT: vmov.f16 r0, s0
+; CHECK-FP-NEXT: vmov s0, r0
+; CHECK-FP-NEXT: pop {r7, pc}
+ %r = call bfloat @llvm.maxnum.f16(bfloat %a, bfloat %b)
+ ret bfloat %r
+}
+
+define bfloat @test_copysign(bfloat %a, bfloat %b) {
+;
+; CHECK-NOFP-LABEL: test_copysign:
+; CHECK-NOFP: @ %bb.0:
+; CHECK-NOFP-NEXT: lsrs r1, r1, #15
+; CHECK-NOFP-NEXT: bfi r0, r1, #15, #17
+; CHECK-NOFP-NEXT: bx lr
+;
+; CHECK-FP-LABEL: test_copysign:
+; CHECK-FP: @ %bb.0:
+; CHECK-FP-NEXT: vmov r0, s0
+; CHECK-FP-NEXT: vmov r1, s1
+; CHECK-FP-NEXT: and r1, r1, #32768
+; CHECK-FP-NEXT: bfc r0, #15, #17
+; CHECK-FP-NEXT: add r0, r1
+; CHECK-FP-NEXT: vmov.f16 s0, r0
+; CHECK-FP-NEXT: vmov.f16 r0, s0
+; CHECK-FP-NEXT: vmov s0, r0
+; CHECK-FP-NEXT: bx lr
+ %r = call bfloat @llvm.copysign.f16(bfloat %a, bfloat %b)
+ ret bfloat %r
+}
+
+define bfloat @test_copysign_f32(bfloat %a, float %b) {
+;
+; CHECK-NOFP-LABEL: test_copysign_f32:
+; CHECK-NOFP: @ %bb.0:
+; CHECK-NOFP-NEXT: and r1, r1, #-2147483648
+; CHECK-NOFP-NEXT: bfc r0, #15, #17
+; CHECK-NOFP-NEXT: orr.w r0, r0, r1, lsr #16
+; CHECK-NOFP-NEXT: bx lr
+;
+; CHECK-FP-LABEL: test_copysign_f32:
+; CHECK-FP: @ %bb.0:
+; CHECK-FP-NEXT: vmov r0, s0
+; CHECK-FP-NEXT: vmov r1, s1
+; CHECK-FP-NEXT: and r1, r1, #-2147483648
+; CHECK-FP-NEXT: bfc r0, #15, #17
+; CHECK-FP-NEXT: orr.w r0, r0, r1, lsr #16
+; CHECK-FP-NEXT: vmov.f16 s0, r0
+; CHECK-FP-NEXT: vmov.f16 r0, s0
+; CHECK-FP-NEXT: vmov s0, r0
+; CHECK-FP-NEXT: bx lr
+ %tb = fptrunc float %b to bfloat
+ %r = call bfloat @llvm.copysign.f16(bfloat %a, bfloat %tb)
+ ret bfloat %r
+}
+
+define bfloat @test_copysign_f64(bfloat %a, double %b) {
+;
+; CHECK-NOFP-LABEL: test_copysign_f64:
+; CHECK-NOFP: @ %bb.0:
+; CHECK-NOFP-NEXT: and r1, r3, #-2147483648
+; CHECK-NOFP-NEXT: bfc r0, #15, #17
+; CHECK-NOFP-NEXT: orr.w r0, r0, r1, lsr #16
+; CHECK-NOFP-NEXT: bx lr
+;
+; CHECK-FP-LABEL: test_copysign_f64:
+; CHECK-FP: @ %bb.0:
+; CHECK-FP-NEXT: vmov r0, s0
+; CHECK-FP-NEXT: vmov r1, r2, d1
+; CHECK-FP-NEXT: and r1, r2, #-2147483648
+; CHECK-FP-NEXT: bfc r0, #15, #17
+; CHECK-FP-NEXT: orr.w r0, r0, r1, lsr #16
+; CHECK-FP-NEXT: vmov.f16 s0, r0
+; CHECK-FP-NEXT: vmov.f16 r0, s0
+; CHECK-FP-NEXT: vmov s0, r0
+; CHECK-FP-NEXT: bx lr
+ %tb = fptrunc double %b to bfloat
+ %r = call bfloat @llvm.copysign.f16(bfloat %a, bfloat %tb)
+ ret bfloat %r
+}
+
+; away the (fpext (fp_round <result>)) here.
+
+define float @test_copysign_extended(bfloat %a, bfloat %b) {
+;
+; CHECK-NOFP-LABEL: test_copysign_extended:
+; CHECK-NOFP: @ %bb.0:
+; CHECK-NOFP-NEXT: lsrs r1, r1, #15
+; CHECK-NOFP-NEXT: bfi r0, r1, #15, #17
+; CHECK-NOFP-NEXT: lsls r0, r0, #16
+; CHECK-NOFP-NEXT: bx lr
+;
+; CHECK-FP-LABEL: test_copysign_extended:
+; CHECK-FP: @ %bb.0:
+; CHECK-FP-NEXT: vmov r0, s0
+; CHECK-FP-NEXT: vmov r1, s1
+; CHECK-FP-NEXT: and r1, r1, #32768
+; CHECK-FP-NEXT: bfc r0, #15, #17
+; CHECK-FP-NEXT: add r0, r1
+; CHECK-FP-NEXT: lsls r0, r0, #16
+; CHECK-FP-NEXT: vmov s0, r0
+; CHECK-FP-NEXT: bx lr
+ %r = call bfloat @llvm.copysign.f16(bfloat %a, bfloat %b)
+ %xr = fpext bfloat %r to float
+ ret float %xr
+}
+
+define bfloat @test_floor(bfloat %a) {
+;
+; CHECK-NOFP-LABEL: test_floor:
+; CHECK-NOFP: @ %bb.0:
+; CHECK-NOFP-NEXT: .save {r7, lr}
+; CHECK-NOFP-NEXT: push {r7, lr}
+; CHECK-NOFP-NEXT: lsls r0, r0, #16
+; CHECK-NOFP-NEXT: bl floorf
+; CHECK-NOFP-NEXT: bl __truncsfbf2
+; CHECK-NOFP-NEXT: pop {r7, pc}
+;
+; CHECK-FP-LABEL: test_floor:
+; CHECK-FP: @ %bb.0:
+; CHECK-FP-NEXT: .save {r7, lr}
+; CHECK-FP-NEXT: push {r7, lr}
+; CHECK-FP-NEXT: vmov r0, s0
+; CHECK-FP-NEXT: lsls r0, r0, #16
+; CHECK-FP-NEXT: vmov s0, r0
+; CHECK-FP-NEXT: vrintm.f32 s0, s0
+; CHECK-FP-NEXT: bl __truncsfbf2
+; CHECK-FP-NEXT: vmov.f16 r0, s0
+; CHECK-FP-NEXT: vmov s0, r0
+; CHECK-FP-NEXT: pop {r7, pc}
+ %r = call bfloat @llvm.floor.f16(bfloat %a)
+ ret bfloat %r
+}
+
+define bfloat @test_ceil(bfloat %a) {
+;
+; CHECK-NOFP-LABEL: test_ceil:
+; CHECK-NOFP: @ %bb.0:
+; CHECK-NOFP-NEXT: .save {r7, lr}
+; CHECK-NOFP-NEXT: push {r7, lr}
+; CHECK-NOFP-NEXT: lsls r0, r0, #16
+; CHECK-NOFP-NEXT: bl ceilf
+; CHECK-NOFP-NEXT: bl __truncsfbf2
+; CHECK-NOFP-NEXT: pop {r7, pc}
+;
+; CHECK-FP-LABEL: test_ceil:
+; CHECK-FP: @ %bb.0:
+; CHECK-FP-NEXT: .save {r7, lr}
+; CHECK-FP-NEXT: push {r7, lr}
+; CHECK-FP-NEXT: vmov r0, s0
+; CHECK-FP-NEXT: lsls r0, r0, #16
+; CHECK-FP-NEXT: vmov s0, r0
+; CHECK-FP-NEXT: vrintp.f32 s0, s0
+; CHECK-FP-NEXT: bl __truncsfbf2
+; CHECK-FP-NEXT: vmov.f16 r0, s0
+; CHECK-FP-NEXT: vmov s0, r0
+; CHECK-FP-NEXT: pop {r7, pc}
+ %r = call bfloat @llvm.ceil.f16(bfloat %a)
+ ret bfloat %r
+}
+
+define bfloat @test_trunc(bfloat %a) {
+;
+; CHECK-NOFP-LABEL: test_trunc:
+; CHECK-NOFP: @ %bb.0:
+; CHECK-NOFP-NEXT: .save {r7, lr}
+; CHECK-NOFP-NEXT: push {r7, lr}
+; CHECK-NOFP-NEXT: lsls r0, r0, #16
+; CHECK-NOFP-NEXT: bl truncf
+; CHECK-NOFP-NEXT: bl __truncsfbf2
+; CHECK-NOFP-NEXT: pop {r7, pc}
+;
+; CHECK-FP-LABEL: test_trunc:
+; CHECK-FP: @ %bb.0:
+; CHECK-FP-NEXT: .save {r7, lr}
+; CHECK-FP-NEXT: push {r7, lr}
+; CHECK-FP-NEXT: vmov r0, s0
+; CHECK-FP-NEXT: lsls r0, r0, #16
+; CHECK-FP-NEXT: vmov s0, r0
+; CHECK-FP-NEXT: vrintz.f32 s0, s0
+; CHECK-FP-NEXT: bl __truncsfbf2
+; CHECK-FP-NEXT: vmov.f16 r0, s0
+; CHECK-FP-NEXT: vmov s0, r0
+; CHECK-FP-NEXT: pop {r7, pc}
+ %r = call bfloat @llvm.trunc.f16(bfloat %a)
+ ret bfloat %r
+}
+
+define bfloat @test_rint(bfloat %a) {
+;
+; CHECK-NOFP-LABEL: test_rint:
+; CHECK-NOFP: @ %bb.0:
+; CHECK-NOFP-NEXT: .save {r7, lr}
+; CHECK-NOFP-NEXT: push {r7, lr}
+; CHECK-NOFP-NEXT: lsls r0, r0, #16
+; CHECK-NOFP-NEXT: bl rintf
+; CHECK-NOFP-NEXT: bl __truncsfbf2
+; CHECK-NOFP-NEXT: pop {r7, pc}
+;
+; CHECK-FP-LABEL: test_rint:
+; CHECK-FP: @ %bb.0:
+; CHECK-FP-NEXT: .save {r7, lr}
+; CHECK-FP-NEXT: push {r7, lr}
+; CHECK-FP-NEXT: vmov r0, s0
+; CHECK-FP-NEXT: lsls r0, r0, #16
+; CHECK-FP-NEXT: vmov s0, r0
+; CHECK-FP-NEXT: vrintx.f32 s0, s0
+; CHECK-FP-NEXT: bl __truncsfbf2
+; CHECK-FP-NEXT: vmov.f16 r0, s0
+; CHECK-FP-NEXT: vmov s0, r0
+; CHECK-FP-NEXT: pop {r7, pc}
+ %r = call bfloat @llvm.rint.f16(bfloat %a)
+ ret bfloat %r
+}
+
+define bfloat @test_nearbyint(bfloat %a) {
+;
+; CHECK-NOFP-LABEL: test_nearbyint:
+; CHECK-NOFP: @ %bb.0:
+; CHECK-NOFP-NEXT: .save {r7, lr}
+; CHECK-NOFP-NEXT: push {r7, lr}
+; CHECK-NOFP-NEXT: lsls r0, r0, #16
+; CHECK-NOFP-NEXT: bl nearbyintf
+; CHECK-NOFP-NEXT: bl __truncsfbf2
+; CHECK-NOFP-NEXT: pop {r7, pc}
+;
+; CHECK-FP-LABEL: test_nearbyint:
+; CHECK-FP: @ %bb.0:
+; CHECK-FP-NEXT: .save {r7, lr}
+; CHECK-FP-NEXT: push {r7, lr}
+; CHECK-FP-NEXT: vmov r0, s0
+; CHECK-FP-NEXT: lsls r0, r0, #16
+; CHECK-FP-NEXT: vmov s0, r0
+; CHECK-FP-NEXT: vrintr.f32 s0, s0
+; CHECK-FP-NEXT: bl __truncsfbf2
+; CHECK-FP-NEXT: vmov.f16 r0, s0
+; CHECK-FP-NEXT: vmov s0, r0
+; CHECK-FP-NEXT: pop {r7, pc}
+ %r = call bfloat @llvm.nearbyint.f16(bfloat %a)
+ ret bfloat %r
+}
+
+define bfloat @test_round(bfloat %a) {
+;
+; CHECK-NOFP-LABEL: test_round:
+; CHECK-NOFP: @ %bb.0:
+; CHECK-NOFP-NEXT: .save {r7, lr}
+; CHECK-NOFP-NEXT: push {r7, lr}
+; CHECK-NOFP-NEXT: lsls r0, r0, #16
+; CHECK-NOFP-NEXT: bl roundf
+; CHECK-NOFP-NEXT: bl __truncsfbf2
+; CHECK-NOFP-NEXT: pop {r7, pc}
+;
+; CHECK-FP-LABEL: test_round:
+; CHECK-FP: @ %bb.0:
+; CHECK-FP-NEXT: .save {r7, lr}
+; CHECK-FP-NEXT: push {r7, lr}
+; CHECK-FP-NEXT: vmov r0, s0
+; CHECK-FP-NEXT: lsls r0, r0, #16
+; CHECK-FP-NEXT: vmov s0, r0
+; CHECK-FP-NEXT: vrinta.f32 s0, s0
+; CHECK-FP-NEXT: bl __truncsfbf2
+; CHECK-FP-NEXT: vmov.f16 r0, s0
+; CHECK-FP-NEXT: vmov s0, r0
+; CHECK-FP-NEXT: pop {r7, pc}
+ %r = call bfloat @llvm.round.f16(bfloat %a)
+ ret bfloat %r
+}
+
+define bfloat @test_roundeven(bfloat %a) {
+;
+; CHECK-NOFP-LABEL: test_roundeven:
+; CHECK-NOFP: @ %bb.0:
+; CHECK-NOFP-NEXT: .save {r7, lr}
+; CHECK-NOFP-NEXT: push {r7, lr}
+; CHECK-NOFP-NEXT: lsls r0, r0, #16
+; CHECK-NOFP-NEXT: bl roundevenf
+; CHECK-NOFP-NEXT: bl __truncsfbf2
+; CHECK-NOFP-NEXT: pop {r7, pc}
+;
+; CHECK-FP-LABEL: test_roundeven:
+; CHECK-FP: @ %bb.0:
+; CHECK-FP-NEXT: .save {r7, lr}
+; CHECK-FP-NEXT: push {r7, lr}
+; CHECK-FP-NEXT: vmov r0, s0
+; CHECK-FP-NEXT: lsls r0, r0, #16
+; CHECK-FP-NEXT: vmov s0, r0
+; CHECK-FP-NEXT: bl roundevenf
+; CHECK-FP-NEXT: bl __truncsfbf2
+; CHECK-FP-NEXT: vmov.f16 r0, s0
+; CHECK-FP-NEXT: vmov s0, r0
+; CHECK-FP-NEXT: pop {r7, pc}
+ %r = call bfloat @llvm.roundeven.f16(bfloat %a)
+ ret bfloat %r
+}
+
+define bfloat @test_fmuladd(bfloat %a, bfloat %b, bfloat %c) {
+;
+; CHECK-NOFP-LABEL: test_fmuladd:
+; CHECK-NOFP: @ %bb.0:
+; CHECK-NOFP-NEXT: .save {r4, lr}
+; CHECK-NOFP-NEXT: push {r4, lr}
+; CHECK-NOFP-NEXT: lsls r0, r0, #16
+; CHECK-NOFP-NEXT: lsls r1, r1, #16
+; CHECK-NOFP-NEXT: mov r4, r2
+; CHECK-NOFP-NEXT: bl __aeabi_fmul
+; CHECK-NOFP-NEXT: bl __truncsfbf2
+; CHECK-NOFP-NEXT: lsls r0, r0, #16
+; CHECK-NOFP-NEXT: lsls r1, r4, #16
+; CHECK-NOFP-NEXT: bl __aeabi_fadd
+; CHECK-NOFP-NEXT: bl __truncsfbf2
+; CHECK-NOFP-NEXT: pop {r4, pc}
+;
+; CHECK-FP-LABEL: test_fmuladd:
+; CHECK-FP: @ %bb.0:
+; CHECK-FP-NEXT: .save {r7, lr}
+; CHECK-FP-NEXT: push {r7, lr}
+; CHECK-FP-NEXT: .vsave {d8}
+; CHECK-FP-NEXT: vpush {d8}
+; CHECK-FP-NEXT: vmov r0, s1
+; CHECK-FP-NEXT: vmov r1, s0
+; CHECK-FP-NEXT: vmov.f32 s16, s2
+; CHECK-FP-NEXT: lsls r0, r0, #16
+; CHECK-FP-NEXT: vmov s0, r0
+; CHECK-FP-NEXT: lsls r0, r1, #16
+; CHECK-FP-NEXT: vmov s2, r0
+; CHECK-FP-NEXT: vmul.f32 s0, s2, s0
+; CHECK-FP-NEXT: bl __truncsfbf2
+; CHECK-FP-NEXT: vmov r0, s16
+; CHECK-FP-NEXT: vmov r1, s0
+; CHECK-FP-NEXT: lsls r0, r0, #16
+; CHECK-FP-NEXT: vmov s0, r0
+; CHECK-FP-NEXT: lsls r0, r1, #16
+; CHECK-FP-NEXT: vmov s2, r0
+; CHECK-FP-NEXT: vadd.f32 s0, s2, s0
+; CHECK-FP-NEXT: bl __truncsfbf2
+; CHECK-FP-NEXT: vmov.f16 r0, s0
+; CHECK-FP-NEXT: vmov s0, r0
+; CHECK-FP-NEXT: vpop {d8}
+; CHECK-FP-NEXT: pop {r7, pc}
+ %r = call bfloat @llvm.fmuladd.f16(bfloat %a, bfloat %b, bfloat %c)
+ ret bfloat %r
+}
More information about the llvm-commits
mailing list