[llvm] 9ee4fe6 - [ARM] Fix Crashes in fp16/bf16 Inline Asm
Archibald Elliott via llvm-commits
llvm-commits at lists.llvm.org
Thu Apr 13 07:34:43 PDT 2023
Author: Archibald Elliott
Date: 2023-04-13T15:34:04+01:00
New Revision: 9ee4fe63bc0ad4c685760272e464c1591bfc16ea
URL: https://github.com/llvm/llvm-project/commit/9ee4fe63bc0ad4c685760272e464c1591bfc16ea
DIFF: https://github.com/llvm/llvm-project/commit/9ee4fe63bc0ad4c685760272e464c1591bfc16ea.diff
LOG: [ARM] Fix Crashes in fp16/bf16 Inline Asm
We were still seeing occasional crashes with inline assembly blocks
using fp16/bf16 after my previous patches:
- https://reviews.llvm.org/rGff4027d152d0
- https://reviews.llvm.org/rG7d15212b8c0c
- https://reviews.llvm.org/rG20b2d11896d9
It turns out:
- The original two commits were wrong, and we should have always been
choosing the SPR register class, not the HPR register class, so that
LLVM's SelectionDAGBuilder correctly did the right splits/joins.
- The `splitValueIntoRegisterParts`/`joinRegisterPartsIntoValue` changes
from rG20b2d11896d9 are still correct, even though they sometimes
result in inefficient codegen of casts between fp16/bf16 and i32/f32
(which is visible in these tests).
This patch fixes crashes in `getCopyToParts` and when trying to select
`(bf16 (bitconvert (fp16 ...)))` dags when Neon is enabled.
This patch also adds support for passing fp16/bf16 values using the 'x'
constraint that is LLVM-specific. This should broadly match how we pass
with 't' and 'w', but with a different set of valid S registers.
Differential Revision: https://reviews.llvm.org/D147715
Added:
Modified:
llvm/lib/Target/ARM/ARMISelLowering.cpp
llvm/test/CodeGen/ARM/inlineasm-fp-half.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index a6b92593c495..355128786c5b 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -20347,13 +20347,7 @@ RCPair ARMTargetLowering::getRegForInlineAsmConstraint(
case 'w':
if (VT == MVT::Other)
break;
- if (VT == MVT::f16)
- return RCPair(0U, Subtarget->hasFullFP16() ? &ARM::HPRRegClass
- : &ARM::SPRRegClass);
- if (VT == MVT::bf16)
- return RCPair(0U, Subtarget->hasBF16() ? &ARM::HPRRegClass
- : &ARM::SPRRegClass);
- if (VT == MVT::f32)
+ if (VT == MVT::f32 || VT == MVT::f16 || VT == MVT::bf16)
return RCPair(0U, &ARM::SPRRegClass);
if (VT.getSizeInBits() == 64)
return RCPair(0U, &ARM::DPRRegClass);
@@ -20363,7 +20357,7 @@ RCPair ARMTargetLowering::getRegForInlineAsmConstraint(
case 'x':
if (VT == MVT::Other)
break;
- if (VT == MVT::f32)
+ if (VT == MVT::f32 || VT == MVT::f16 || VT == MVT::bf16)
return RCPair(0U, &ARM::SPR_8RegClass);
if (VT.getSizeInBits() == 64)
return RCPair(0U, &ARM::DPR_8RegClass);
@@ -20373,13 +20367,7 @@ RCPair ARMTargetLowering::getRegForInlineAsmConstraint(
case 't':
if (VT == MVT::Other)
break;
- if (VT == MVT::f16)
- return RCPair(0U, Subtarget->hasFullFP16() ? &ARM::HPRRegClass
- : &ARM::SPRRegClass);
- if (VT == MVT::bf16)
- return RCPair(0U, Subtarget->hasBF16() ? &ARM::HPRRegClass
- : &ARM::SPRRegClass);
- if (VT == MVT::f32 || VT == MVT::i32)
+ if (VT == MVT::f32 || VT == MVT::i32 || VT == MVT::f16 || VT == MVT::bf16)
return RCPair(0U, &ARM::SPRRegClass);
if (VT.getSizeInBits() == 64)
return RCPair(0U, &ARM::DPR_VFP2RegClass);
diff --git a/llvm/test/CodeGen/ARM/inlineasm-fp-half.ll b/llvm/test/CodeGen/ARM/inlineasm-fp-half.ll
index 9840e3f63c5a..554e5ba72c5d 100644
--- a/llvm/test/CodeGen/ARM/inlineasm-fp-half.ll
+++ b/llvm/test/CodeGen/ARM/inlineasm-fp-half.ll
@@ -1,21 +1,34 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+
; No FP16/BF16
-; RUN: llc -mtriple=arm-none-eabi -mattr=+armv8.2-a,+fp-armv8,-fullfp16,-bf16,-neon %s -o - | FileCheck %s --check-prefixes=NO-FP16-SOFTFP
-; RUN: llc -mtriple=thumb-none-eabi -mattr=+armv8.2-a,+fp-armv8,-fullfp16,-bf16,-neon %s -o - | FileCheck %s --check-prefixes=NO-FP16-SOFTFP
-; RUN: llc -mtriple=arm-none-eabihf -mattr=+armv8.2-a,+fp-armv8,-fullfp16,-bf16,-neon %s -o - | FileCheck %s --check-prefixes=NO-FP16-HARD
-; RUN: llc -mtriple=thumb-none-eabihf -mattr=+armv8.2-a,+fp-armv8,-fullfp16,-bf16,-neon %s -o - | FileCheck %s --check-prefixes=NO-FP16-HARD
+; RUN: llc -mtriple=arm-none-eabi -mattr=+armv8-a,+fp-armv8,-fullfp16,-bf16,-neon %s -o - | FileCheck %s --check-prefix=NO-FP16-SOFTFP
+; RUN: llc -mtriple=thumb-none-eabi -mattr=+armv8-a,+fp-armv8,-fullfp16,-bf16,-neon %s -o - | FileCheck %s --check-prefix=NO-FP16-SOFTFP
+; RUN: llc -mtriple=arm-none-eabi -mattr=+armv8-a,+fp-armv8,-fullfp16,-bf16,+neon %s -o - | FileCheck %s --check-prefix=NO-FP16-SOFTFP
+; RUN: llc -mtriple=thumb-none-eabi -mattr=+armv8-a,+fp-armv8,-fullfp16,-bf16,+neon %s -o - | FileCheck %s --check-prefix=NO-FP16-SOFTFP
+; RUN: llc -mtriple=arm-none-eabihf -mattr=+armv8-a,+fp-armv8,-fullfp16,-bf16,-neon %s -o - | FileCheck %s --check-prefix=NO-FP16-HARD
+; RUN: llc -mtriple=thumb-none-eabihf -mattr=+armv8-a,+fp-armv8,-fullfp16,-bf16,-neon %s -o - | FileCheck %s --check-prefix=NO-FP16-HARD
+; RUN: llc -mtriple=arm-none-eabihf -mattr=+armv8-a,+fp-armv8,-fullfp16,-bf16,+neon %s -o - | FileCheck %s --check-prefix=NO-FP16-HARD
+; RUN: llc -mtriple=thumb-none-eabihf -mattr=+armv8-a,+fp-armv8,-fullfp16,-bf16,+neon %s -o - | FileCheck %s --check-prefix=NO-FP16-HARD
; With FP16, Without BF16
-; RUN: llc -mtriple=arm-none-eabi -mattr=+armv8.2-a,+fp-armv8,+fullfp16,-bf16,-neon %s -o - | FileCheck %s --check-prefixes=FP16-SOFTFP
-; RUN: llc -mtriple=thumb-none-eabi -mattr=+armv8.2-a,+fp-armv8,+fullfp16,-bf16,-neon %s -o - | FileCheck %s --check-prefixes=FP16-SOFTFP
-; RUN: llc -mtriple=arm-none-eabihf -mattr=+armv8.2-a,+fp-armv8,+fullfp16,-bf16,-neon %s -o - | FileCheck %s --check-prefixes=FP16-HARD
-; RUN: llc -mtriple=thumb-none-eabihf -mattr=+armv8.2-a,+fp-armv8,+fullfp16,-bf16,-neon %s -o - | FileCheck %s --check-prefixes=FP16-HARD
+; RUN: llc -mtriple=arm-none-eabi -mattr=+armv8-a,+fp-armv8,+fullfp16,-bf16,-neon %s -o - | FileCheck %s --check-prefix=FP16-SOFTFP
+; RUN: llc -mtriple=thumb-none-eabi -mattr=+armv8-a,+fp-armv8,+fullfp16,-bf16,-neon %s -o - | FileCheck %s --check-prefix=FP16-SOFTFP
+; RUN: llc -mtriple=arm-none-eabi -mattr=+armv8-a,+fp-armv8,+fullfp16,-bf16,+neon %s -o - | FileCheck %s --check-prefix=FP16-SOFTFP
+; RUN: llc -mtriple=thumb-none-eabi -mattr=+armv8-a,+fp-armv8,+fullfp16,-bf16,+neon %s -o - | FileCheck %s --check-prefix=FP16-SOFTFP
+; RUN: llc -mtriple=arm-none-eabihf -mattr=+armv8-a,+fp-armv8,+fullfp16,-bf16,-neon %s -o - | FileCheck %s --check-prefix=FP16-HARD
+; RUN: llc -mtriple=thumb-none-eabihf -mattr=+armv8-a,+fp-armv8,+fullfp16,-bf16,-neon %s -o - | FileCheck %s --check-prefix=FP16-HARD
+; RUN: llc -mtriple=arm-none-eabihf -mattr=+armv8-a,+fp-armv8,+fullfp16,-bf16,+neon %s -o - | FileCheck %s --check-prefix=FP16-HARD
+; RUN: llc -mtriple=thumb-none-eabihf -mattr=+armv8-a,+fp-armv8,+fullfp16,-bf16,+neon %s -o - | FileCheck %s --check-prefix=FP16-HARD
; With FP16/BF16
-; RUN: llc -mtriple=arm-none-eabi -mattr=+armv8.2-a,+fp-armv8,+fullfp16,+bf16,-neon %s -o - | FileCheck %s --check-prefixes=FP16-SOFTFP,BF16-SOFTFP
-; RUN: llc -mtriple=thumb-none-eabi -mattr=+armv8.2-a,+fp-armv8,+fullfp16,+bf16,-neon %s -o - | FileCheck %s --check-prefixes=FP16-SOFTFP,BF16-SOFTFP
-; RUN: llc -mtriple=arm-none-eabihf -mattr=+armv8.2-a,+fp-armv8,+fullfp16,+bf16,-neon %s -o - | FileCheck %s --check-prefixes=FP16-HARD,BF16-HARD
-; RUN: llc -mtriple=thumb-none-eabihf -mattr=+armv8.2-a,+fp-armv8,+fullfp16,+bf16,-neon %s -o - | FileCheck %s --check-prefixes=FP16-HARD,BF16-HARD
+; RUN: llc -mtriple=arm-none-eabi -mattr=+armv8-a,+fp-armv8,+fullfp16,+bf16,-neon %s -o - | FileCheck %s --check-prefix=BF16-SOFTFP
+; RUN: llc -mtriple=thumb-none-eabi -mattr=+armv8-a,+fp-armv8,+fullfp16,+bf16,-neon %s -o - | FileCheck %s --check-prefix=BF16-SOFTFP
+; RUN: llc -mtriple=arm-none-eabi -mattr=+armv8-a,+fp-armv8,+fullfp16,+bf16,+neon %s -o - | FileCheck %s --check-prefix=SIMD-BF16-SOFTFP
+; RUN: llc -mtriple=thumb-none-eabi -mattr=+armv8-a,+fp-armv8,+fullfp16,+bf16,+neon %s -o - | FileCheck %s --check-prefix=SIMD-BF16-SOFTFP
+; RUN: llc -mtriple=arm-none-eabihf -mattr=+armv8-a,+fp-armv8,+fullfp16,+bf16,-neon %s -o - | FileCheck %s --check-prefix=BF16-HARD
+; RUN: llc -mtriple=thumb-none-eabihf -mattr=+armv8-a,+fp-armv8,+fullfp16,+bf16,-neon %s -o - | FileCheck %s --check-prefix=BF16-HARD
+; RUN: llc -mtriple=arm-none-eabihf -mattr=+armv8-a,+fp-armv8,+fullfp16,+bf16,+neon %s -o - | FileCheck %s --check-prefix=SIMD-BF16-HARD
+; RUN: llc -mtriple=thumb-none-eabihf -mattr=+armv8-a,+fp-armv8,+fullfp16,+bf16,+neon %s -o - | FileCheck %s --check-prefix=SIMD-BF16-HARD
; This test ensures that we can use `w` and `t` constraints to allocate
; S-registers for 16-bit FP inputs and outputs for inline assembly, with either
@@ -41,6 +54,8 @@ define half @half_t(half %x) nounwind {
; FP16-SOFTFP-LABEL: half_t:
; FP16-SOFTFP: @ %bb.0: @ %entry
; FP16-SOFTFP-NEXT: vmov.f16 s0, r0
+; FP16-SOFTFP-NEXT: vmov.f16 r0, s0
+; FP16-SOFTFP-NEXT: vmov s0, r0
; FP16-SOFTFP-NEXT: @APP
; FP16-SOFTFP-NEXT: vmov.f32 s0, s0
; FP16-SOFTFP-NEXT: @NO_APP
@@ -49,10 +64,52 @@ define half @half_t(half %x) nounwind {
;
; FP16-HARD-LABEL: half_t:
; FP16-HARD: @ %bb.0: @ %entry
+; FP16-HARD-NEXT: vmov.f16 r0, s0
+; FP16-HARD-NEXT: vmov s0, r0
; FP16-HARD-NEXT: @APP
; FP16-HARD-NEXT: vmov.f32 s0, s0
; FP16-HARD-NEXT: @NO_APP
; FP16-HARD-NEXT: bx lr
+;
+; BF16-SOFTFP-LABEL: half_t:
+; BF16-SOFTFP: @ %bb.0: @ %entry
+; BF16-SOFTFP-NEXT: vmov.f16 s0, r0
+; BF16-SOFTFP-NEXT: vmov.f16 r0, s0
+; BF16-SOFTFP-NEXT: vmov s0, r0
+; BF16-SOFTFP-NEXT: @APP
+; BF16-SOFTFP-NEXT: vmov.f32 s0, s0
+; BF16-SOFTFP-NEXT: @NO_APP
+; BF16-SOFTFP-NEXT: vmov r0, s0
+; BF16-SOFTFP-NEXT: bx lr
+;
+; SIMD-BF16-SOFTFP-LABEL: half_t:
+; SIMD-BF16-SOFTFP: @ %bb.0: @ %entry
+; SIMD-BF16-SOFTFP-NEXT: vmov.f16 s0, r0
+; SIMD-BF16-SOFTFP-NEXT: vmov.f16 r0, s0
+; SIMD-BF16-SOFTFP-NEXT: vmov s0, r0
+; SIMD-BF16-SOFTFP-NEXT: @APP
+; SIMD-BF16-SOFTFP-NEXT: vmov.f32 s0, s0
+; SIMD-BF16-SOFTFP-NEXT: @NO_APP
+; SIMD-BF16-SOFTFP-NEXT: vmov r0, s0
+; SIMD-BF16-SOFTFP-NEXT: bx lr
+;
+; BF16-HARD-LABEL: half_t:
+; BF16-HARD: @ %bb.0: @ %entry
+; BF16-HARD-NEXT: vmov.f16 r0, s0
+; BF16-HARD-NEXT: vmov s0, r0
+; BF16-HARD-NEXT: @APP
+; BF16-HARD-NEXT: vmov.f32 s0, s0
+; BF16-HARD-NEXT: @NO_APP
+; BF16-HARD-NEXT: bx lr
+;
+; SIMD-BF16-HARD-LABEL: half_t:
+; SIMD-BF16-HARD: @ %bb.0: @ %entry
+; SIMD-BF16-HARD-NEXT: vmov.f16 r0, s0
+; SIMD-BF16-HARD-NEXT: vmov s0, r0
+; SIMD-BF16-HARD-NEXT: @APP
+; SIMD-BF16-HARD-NEXT: vmov.f32 s0, s0
+; SIMD-BF16-HARD-NEXT: @NO_APP
+; SIMD-BF16-HARD-NEXT: bx lr
entry:
%0 = tail call half asm "vmov $0, $1", "=t,t"(half %x)
ret half %0
@@ -78,6 +135,8 @@ define half @half_w(half %x) nounwind {
; FP16-SOFTFP-LABEL: half_w:
; FP16-SOFTFP: @ %bb.0: @ %entry
; FP16-SOFTFP-NEXT: vmov.f16 s0, r0
+; FP16-SOFTFP-NEXT: vmov.f16 r0, s0
+; FP16-SOFTFP-NEXT: vmov s0, r0
; FP16-SOFTFP-NEXT: @APP
; FP16-SOFTFP-NEXT: vmov.f32 s0, s0
; FP16-SOFTFP-NEXT: @NO_APP
@@ -86,15 +145,138 @@ define half @half_w(half %x) nounwind {
;
; FP16-HARD-LABEL: half_w:
; FP16-HARD: @ %bb.0: @ %entry
+; FP16-HARD-NEXT: vmov.f16 r0, s0
+; FP16-HARD-NEXT: vmov s0, r0
; FP16-HARD-NEXT: @APP
; FP16-HARD-NEXT: vmov.f32 s0, s0
; FP16-HARD-NEXT: @NO_APP
; FP16-HARD-NEXT: bx lr
+;
+; BF16-SOFTFP-LABEL: half_w:
+; BF16-SOFTFP: @ %bb.0: @ %entry
+; BF16-SOFTFP-NEXT: vmov.f16 s0, r0
+; BF16-SOFTFP-NEXT: vmov.f16 r0, s0
+; BF16-SOFTFP-NEXT: vmov s0, r0
+; BF16-SOFTFP-NEXT: @APP
+; BF16-SOFTFP-NEXT: vmov.f32 s0, s0
+; BF16-SOFTFP-NEXT: @NO_APP
+; BF16-SOFTFP-NEXT: vmov r0, s0
+; BF16-SOFTFP-NEXT: bx lr
+;
+; SIMD-BF16-SOFTFP-LABEL: half_w:
+; SIMD-BF16-SOFTFP: @ %bb.0: @ %entry
+; SIMD-BF16-SOFTFP-NEXT: vmov.f16 s0, r0
+; SIMD-BF16-SOFTFP-NEXT: vmov.f16 r0, s0
+; SIMD-BF16-SOFTFP-NEXT: vmov s0, r0
+; SIMD-BF16-SOFTFP-NEXT: @APP
+; SIMD-BF16-SOFTFP-NEXT: vmov.f32 s0, s0
+; SIMD-BF16-SOFTFP-NEXT: @NO_APP
+; SIMD-BF16-SOFTFP-NEXT: vmov r0, s0
+; SIMD-BF16-SOFTFP-NEXT: bx lr
+;
+; BF16-HARD-LABEL: half_w:
+; BF16-HARD: @ %bb.0: @ %entry
+; BF16-HARD-NEXT: vmov.f16 r0, s0
+; BF16-HARD-NEXT: vmov s0, r0
+; BF16-HARD-NEXT: @APP
+; BF16-HARD-NEXT: vmov.f32 s0, s0
+; BF16-HARD-NEXT: @NO_APP
+; BF16-HARD-NEXT: bx lr
+;
+; SIMD-BF16-HARD-LABEL: half_w:
+; SIMD-BF16-HARD: @ %bb.0: @ %entry
+; SIMD-BF16-HARD-NEXT: vmov.f16 r0, s0
+; SIMD-BF16-HARD-NEXT: vmov s0, r0
+; SIMD-BF16-HARD-NEXT: @APP
+; SIMD-BF16-HARD-NEXT: vmov.f32 s0, s0
+; SIMD-BF16-HARD-NEXT: @NO_APP
+; SIMD-BF16-HARD-NEXT: bx lr
entry:
%0 = tail call half asm "vmov $0, $1", "=w,w"(half %x)
ret half %0
}
+define half @half_x(half %x) nounwind {
+; NO-FP16-SOFTFP-LABEL: half_x:
+; NO-FP16-SOFTFP: @ %bb.0: @ %entry
+; NO-FP16-SOFTFP-NEXT: vmov s0, r0
+; NO-FP16-SOFTFP-NEXT: @APP
+; NO-FP16-SOFTFP-NEXT: vmov.f32 s0, s0
+; NO-FP16-SOFTFP-NEXT: @NO_APP
+; NO-FP16-SOFTFP-NEXT: vmov r0, s0
+; NO-FP16-SOFTFP-NEXT: bx lr
+;
+; NO-FP16-HARD-LABEL: half_x:
+; NO-FP16-HARD: @ %bb.0: @ %entry
+; NO-FP16-HARD-NEXT: @APP
+; NO-FP16-HARD-NEXT: vmov.f32 s0, s0
+; NO-FP16-HARD-NEXT: @NO_APP
+; NO-FP16-HARD-NEXT: bx lr
+;
+; FP16-SOFTFP-LABEL: half_x:
+; FP16-SOFTFP: @ %bb.0: @ %entry
+; FP16-SOFTFP-NEXT: vmov.f16 s0, r0
+; FP16-SOFTFP-NEXT: vmov.f16 r0, s0
+; FP16-SOFTFP-NEXT: vmov s0, r0
+; FP16-SOFTFP-NEXT: @APP
+; FP16-SOFTFP-NEXT: vmov.f32 s0, s0
+; FP16-SOFTFP-NEXT: @NO_APP
+; FP16-SOFTFP-NEXT: vmov r0, s0
+; FP16-SOFTFP-NEXT: bx lr
+;
+; FP16-HARD-LABEL: half_x:
+; FP16-HARD: @ %bb.0: @ %entry
+; FP16-HARD-NEXT: vmov.f16 r0, s0
+; FP16-HARD-NEXT: vmov s0, r0
+; FP16-HARD-NEXT: @APP
+; FP16-HARD-NEXT: vmov.f32 s0, s0
+; FP16-HARD-NEXT: @NO_APP
+; FP16-HARD-NEXT: bx lr
+;
+; BF16-SOFTFP-LABEL: half_x:
+; BF16-SOFTFP: @ %bb.0: @ %entry
+; BF16-SOFTFP-NEXT: vmov.f16 s0, r0
+; BF16-SOFTFP-NEXT: vmov.f16 r0, s0
+; BF16-SOFTFP-NEXT: vmov s0, r0
+; BF16-SOFTFP-NEXT: @APP
+; BF16-SOFTFP-NEXT: vmov.f32 s0, s0
+; BF16-SOFTFP-NEXT: @NO_APP
+; BF16-SOFTFP-NEXT: vmov r0, s0
+; BF16-SOFTFP-NEXT: bx lr
+;
+; SIMD-BF16-SOFTFP-LABEL: half_x:
+; SIMD-BF16-SOFTFP: @ %bb.0: @ %entry
+; SIMD-BF16-SOFTFP-NEXT: vmov.f16 s0, r0
+; SIMD-BF16-SOFTFP-NEXT: vmov.f16 r0, s0
+; SIMD-BF16-SOFTFP-NEXT: vmov s0, r0
+; SIMD-BF16-SOFTFP-NEXT: @APP
+; SIMD-BF16-SOFTFP-NEXT: vmov.f32 s0, s0
+; SIMD-BF16-SOFTFP-NEXT: @NO_APP
+; SIMD-BF16-SOFTFP-NEXT: vmov r0, s0
+; SIMD-BF16-SOFTFP-NEXT: bx lr
+;
+; BF16-HARD-LABEL: half_x:
+; BF16-HARD: @ %bb.0: @ %entry
+; BF16-HARD-NEXT: vmov.f16 r0, s0
+; BF16-HARD-NEXT: vmov s0, r0
+; BF16-HARD-NEXT: @APP
+; BF16-HARD-NEXT: vmov.f32 s0, s0
+; BF16-HARD-NEXT: @NO_APP
+; BF16-HARD-NEXT: bx lr
+;
+; SIMD-BF16-HARD-LABEL: half_x:
+; SIMD-BF16-HARD: @ %bb.0: @ %entry
+; SIMD-BF16-HARD-NEXT: vmov.f16 r0, s0
+; SIMD-BF16-HARD-NEXT: vmov s0, r0
+; SIMD-BF16-HARD-NEXT: @APP
+; SIMD-BF16-HARD-NEXT: vmov.f32 s0, s0
+; SIMD-BF16-HARD-NEXT: @NO_APP
+; SIMD-BF16-HARD-NEXT: bx lr
+entry:
+ %0 = tail call half asm "vmov $0, $1", "=x,x"(half %x)
+ ret half %0
+}
+
define bfloat @bf16_t(bfloat %x) nounwind {
; NO-FP16-SOFTFP-LABEL: bf16_t:
; NO-FP16-SOFTFP: @ %bb.0: @ %entry
@@ -127,6 +309,42 @@ define bfloat @bf16_t(bfloat %x) nounwind {
; FP16-HARD-NEXT: vmov.f32 s0, s0
; FP16-HARD-NEXT: @NO_APP
; FP16-HARD-NEXT: bx lr
+;
+; BF16-SOFTFP-LABEL: bf16_t:
+; BF16-SOFTFP: @ %bb.0: @ %entry
+; BF16-SOFTFP-NEXT: vmov s0, r0
+; BF16-SOFTFP-NEXT: @APP
+; BF16-SOFTFP-NEXT: vmov.f32 s0, s0
+; BF16-SOFTFP-NEXT: @NO_APP
+; BF16-SOFTFP-NEXT: vmov r0, s0
+; BF16-SOFTFP-NEXT: bx lr
+;
+; SIMD-BF16-SOFTFP-LABEL: bf16_t:
+; SIMD-BF16-SOFTFP: @ %bb.0: @ %entry
+; SIMD-BF16-SOFTFP-NEXT: vmov.f16 s0, r0
+; SIMD-BF16-SOFTFP-NEXT: vmov.f16 r0, s0
+; SIMD-BF16-SOFTFP-NEXT: vmov s0, r0
+; SIMD-BF16-SOFTFP-NEXT: @APP
+; SIMD-BF16-SOFTFP-NEXT: vmov.f32 s0, s0
+; SIMD-BF16-SOFTFP-NEXT: @NO_APP
+; SIMD-BF16-SOFTFP-NEXT: vmov r0, s0
+; SIMD-BF16-SOFTFP-NEXT: bx lr
+;
+; BF16-HARD-LABEL: bf16_t:
+; BF16-HARD: @ %bb.0: @ %entry
+; BF16-HARD-NEXT: @APP
+; BF16-HARD-NEXT: vmov.f32 s0, s0
+; BF16-HARD-NEXT: @NO_APP
+; BF16-HARD-NEXT: bx lr
+;
+; SIMD-BF16-HARD-LABEL: bf16_t:
+; SIMD-BF16-HARD: @ %bb.0: @ %entry
+; SIMD-BF16-HARD-NEXT: vmov.f16 r0, s0
+; SIMD-BF16-HARD-NEXT: vmov s0, r0
+; SIMD-BF16-HARD-NEXT: @APP
+; SIMD-BF16-HARD-NEXT: vmov.f32 s0, s0
+; SIMD-BF16-HARD-NEXT: @NO_APP
+; SIMD-BF16-HARD-NEXT: bx lr
entry:
%0 = tail call bfloat asm "vmov $0, $1", "=t,t"(bfloat %x)
ret bfloat %0
@@ -164,10 +382,116 @@ define bfloat @bf16_w(bfloat %x) nounwind {
; FP16-HARD-NEXT: vmov.f32 s0, s0
; FP16-HARD-NEXT: @NO_APP
; FP16-HARD-NEXT: bx lr
+;
+; BF16-SOFTFP-LABEL: bf16_w:
+; BF16-SOFTFP: @ %bb.0: @ %entry
+; BF16-SOFTFP-NEXT: vmov s0, r0
+; BF16-SOFTFP-NEXT: @APP
+; BF16-SOFTFP-NEXT: vmov.f32 s0, s0
+; BF16-SOFTFP-NEXT: @NO_APP
+; BF16-SOFTFP-NEXT: vmov r0, s0
+; BF16-SOFTFP-NEXT: bx lr
+;
+; SIMD-BF16-SOFTFP-LABEL: bf16_w:
+; SIMD-BF16-SOFTFP: @ %bb.0: @ %entry
+; SIMD-BF16-SOFTFP-NEXT: vmov.f16 s0, r0
+; SIMD-BF16-SOFTFP-NEXT: vmov.f16 r0, s0
+; SIMD-BF16-SOFTFP-NEXT: vmov s0, r0
+; SIMD-BF16-SOFTFP-NEXT: @APP
+; SIMD-BF16-SOFTFP-NEXT: vmov.f32 s0, s0
+; SIMD-BF16-SOFTFP-NEXT: @NO_APP
+; SIMD-BF16-SOFTFP-NEXT: vmov r0, s0
+; SIMD-BF16-SOFTFP-NEXT: bx lr
+;
+; BF16-HARD-LABEL: bf16_w:
+; BF16-HARD: @ %bb.0: @ %entry
+; BF16-HARD-NEXT: @APP
+; BF16-HARD-NEXT: vmov.f32 s0, s0
+; BF16-HARD-NEXT: @NO_APP
+; BF16-HARD-NEXT: bx lr
+;
+; SIMD-BF16-HARD-LABEL: bf16_w:
+; SIMD-BF16-HARD: @ %bb.0: @ %entry
+; SIMD-BF16-HARD-NEXT: vmov.f16 r0, s0
+; SIMD-BF16-HARD-NEXT: vmov s0, r0
+; SIMD-BF16-HARD-NEXT: @APP
+; SIMD-BF16-HARD-NEXT: vmov.f32 s0, s0
+; SIMD-BF16-HARD-NEXT: @NO_APP
+; SIMD-BF16-HARD-NEXT: bx lr
entry:
%0 = tail call bfloat asm "vmov $0, $1", "=w,w"(bfloat %x)
ret bfloat %0
}
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; BF16-HARD: {{.*}}
-; BF16-SOFTFP: {{.*}}
+
+define bfloat @bf16_x(bfloat %x) nounwind {
+; NO-FP16-SOFTFP-LABEL: bf16_x:
+; NO-FP16-SOFTFP: @ %bb.0: @ %entry
+; NO-FP16-SOFTFP-NEXT: vmov s0, r0
+; NO-FP16-SOFTFP-NEXT: @APP
+; NO-FP16-SOFTFP-NEXT: vmov.f32 s0, s0
+; NO-FP16-SOFTFP-NEXT: @NO_APP
+; NO-FP16-SOFTFP-NEXT: vmov r0, s0
+; NO-FP16-SOFTFP-NEXT: bx lr
+;
+; NO-FP16-HARD-LABEL: bf16_x:
+; NO-FP16-HARD: @ %bb.0: @ %entry
+; NO-FP16-HARD-NEXT: @APP
+; NO-FP16-HARD-NEXT: vmov.f32 s0, s0
+; NO-FP16-HARD-NEXT: @NO_APP
+; NO-FP16-HARD-NEXT: bx lr
+;
+; FP16-SOFTFP-LABEL: bf16_x:
+; FP16-SOFTFP: @ %bb.0: @ %entry
+; FP16-SOFTFP-NEXT: vmov s0, r0
+; FP16-SOFTFP-NEXT: @APP
+; FP16-SOFTFP-NEXT: vmov.f32 s0, s0
+; FP16-SOFTFP-NEXT: @NO_APP
+; FP16-SOFTFP-NEXT: vmov r0, s0
+; FP16-SOFTFP-NEXT: bx lr
+;
+; FP16-HARD-LABEL: bf16_x:
+; FP16-HARD: @ %bb.0: @ %entry
+; FP16-HARD-NEXT: @APP
+; FP16-HARD-NEXT: vmov.f32 s0, s0
+; FP16-HARD-NEXT: @NO_APP
+; FP16-HARD-NEXT: bx lr
+;
+; BF16-SOFTFP-LABEL: bf16_x:
+; BF16-SOFTFP: @ %bb.0: @ %entry
+; BF16-SOFTFP-NEXT: vmov s0, r0
+; BF16-SOFTFP-NEXT: @APP
+; BF16-SOFTFP-NEXT: vmov.f32 s0, s0
+; BF16-SOFTFP-NEXT: @NO_APP
+; BF16-SOFTFP-NEXT: vmov r0, s0
+; BF16-SOFTFP-NEXT: bx lr
+;
+; SIMD-BF16-SOFTFP-LABEL: bf16_x:
+; SIMD-BF16-SOFTFP: @ %bb.0: @ %entry
+; SIMD-BF16-SOFTFP-NEXT: vmov.f16 s0, r0
+; SIMD-BF16-SOFTFP-NEXT: vmov.f16 r0, s0
+; SIMD-BF16-SOFTFP-NEXT: vmov s0, r0
+; SIMD-BF16-SOFTFP-NEXT: @APP
+; SIMD-BF16-SOFTFP-NEXT: vmov.f32 s0, s0
+; SIMD-BF16-SOFTFP-NEXT: @NO_APP
+; SIMD-BF16-SOFTFP-NEXT: vmov r0, s0
+; SIMD-BF16-SOFTFP-NEXT: bx lr
+;
+; BF16-HARD-LABEL: bf16_x:
+; BF16-HARD: @ %bb.0: @ %entry
+; BF16-HARD-NEXT: @APP
+; BF16-HARD-NEXT: vmov.f32 s0, s0
+; BF16-HARD-NEXT: @NO_APP
+; BF16-HARD-NEXT: bx lr
+;
+; SIMD-BF16-HARD-LABEL: bf16_x:
+; SIMD-BF16-HARD: @ %bb.0: @ %entry
+; SIMD-BF16-HARD-NEXT: vmov.f16 r0, s0
+; SIMD-BF16-HARD-NEXT: vmov s0, r0
+; SIMD-BF16-HARD-NEXT: @APP
+; SIMD-BF16-HARD-NEXT: vmov.f32 s0, s0
+; SIMD-BF16-HARD-NEXT: @NO_APP
+; SIMD-BF16-HARD-NEXT: bx lr
+entry:
+ %0 = tail call bfloat asm "vmov $0, $1", "=x,x"(bfloat %x)
+ ret bfloat %0
+}
More information about the llvm-commits
mailing list