[llvm] 4472648 - [ARM] Expand bf16 expanding/rounding fp loads/stores
David Green via llvm-commits
llvm-commits at lists.llvm.org
Fri Dec 20 01:03:34 PST 2024
Author: David Green
Date: 2024-12-20T09:03:28Z
New Revision: 44726489988a27c3cbd2f94188a2363e2080e045
URL: https://github.com/llvm/llvm-project/commit/44726489988a27c3cbd2f94188a2363e2080e045
DIFF: https://github.com/llvm/llvm-project/commit/44726489988a27c3cbd2f94188a2363e2080e045.diff
LOG: [ARM] Expand bf16 expanding/rounding fp loads/stores
As with other fp types, these should be expanded to prevent nodes that are
illegal for Arm.
Added:
Modified:
llvm/lib/Target/ARM/ARMISelLowering.cpp
llvm/test/CodeGen/Thumb2/bf16-instructions.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 92df91534fe07f..5ec2d8389c18e5 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -1113,12 +1113,15 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
for (MVT VT : MVT::fp_valuetypes()) {
setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand);
setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand);
+ setLoadExtAction(ISD::EXTLOAD, VT, MVT::bf16, Expand);
}
// ... or truncating stores
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
setTruncStoreAction(MVT::f32, MVT::f16, Expand);
setTruncStoreAction(MVT::f64, MVT::f16, Expand);
+ setTruncStoreAction(MVT::f32, MVT::bf16, Expand);
+ setTruncStoreAction(MVT::f64, MVT::bf16, Expand);
// ARM does not have i1 sign extending load.
for (MVT VT : MVT::integer_valuetypes())
diff --git a/llvm/test/CodeGen/Thumb2/bf16-instructions.ll b/llvm/test/CodeGen/Thumb2/bf16-instructions.ll
index 11c9c6028d342e..5de7afca25b849 100644
--- a/llvm/test/CodeGen/Thumb2/bf16-instructions.ll
+++ b/llvm/test/CodeGen/Thumb2/bf16-instructions.ll
@@ -220,6 +220,55 @@ define void @test_store(bfloat %a, ptr %b) {
ret void
}
+define void @test_truncstore32(float %a, ptr %b) {
+; CHECK-NOFP-LABEL: test_truncstore32:
+; CHECK-NOFP: @ %bb.0:
+; CHECK-NOFP-NEXT: .save {r4, lr}
+; CHECK-NOFP-NEXT: push {r4, lr}
+; CHECK-NOFP-NEXT: mov r4, r1
+; CHECK-NOFP-NEXT: bl __truncsfbf2
+; CHECK-NOFP-NEXT: strh r0, [r4]
+; CHECK-NOFP-NEXT: pop {r4, pc}
+;
+; CHECK-FP-LABEL: test_truncstore32:
+; CHECK-FP: @ %bb.0:
+; CHECK-FP-NEXT: .save {r4, lr}
+; CHECK-FP-NEXT: push {r4, lr}
+; CHECK-FP-NEXT: mov r4, r0
+; CHECK-FP-NEXT: bl __truncsfbf2
+; CHECK-FP-NEXT: vmov r0, s0
+; CHECK-FP-NEXT: strh r0, [r4]
+; CHECK-FP-NEXT: pop {r4, pc}
+ %r = fptrunc float %a to bfloat
+ store bfloat %r, ptr %b
+ ret void
+}
+
+define void @test_truncstore64(double %a, ptr %b) {
+; CHECK-NOFP-LABEL: test_truncstore64:
+; CHECK-NOFP: @ %bb.0:
+; CHECK-NOFP-NEXT: .save {r4, lr}
+; CHECK-NOFP-NEXT: push {r4, lr}
+; CHECK-NOFP-NEXT: mov r4, r2
+; CHECK-NOFP-NEXT: bl __truncdfbf2
+; CHECK-NOFP-NEXT: strh r0, [r4]
+; CHECK-NOFP-NEXT: pop {r4, pc}
+;
+; CHECK-FP-LABEL: test_truncstore64:
+; CHECK-FP: @ %bb.0:
+; CHECK-FP-NEXT: .save {r4, lr}
+; CHECK-FP-NEXT: push {r4, lr}
+; CHECK-FP-NEXT: mov r4, r0
+; CHECK-FP-NEXT: vmov r0, r1, d0
+; CHECK-FP-NEXT: bl __aeabi_d2f
+; CHECK-FP-NEXT: lsrs r0, r0, #16
+; CHECK-FP-NEXT: strh r0, [r4]
+; CHECK-FP-NEXT: pop {r4, pc}
+ %r = fptrunc double %a to bfloat
+ store bfloat %r, ptr %b
+ ret void
+}
+
define bfloat @test_load(ptr %a) {
; CHECK-NOFP-LABEL: test_load:
; CHECK-NOFP: @ %bb.0:
@@ -235,6 +284,48 @@ define bfloat @test_load(ptr %a) {
ret bfloat %r
}
+define float @test_loadext32(ptr %a) {
+; CHECK-NOFP-LABEL: test_loadext32:
+; CHECK-NOFP: @ %bb.0:
+; CHECK-NOFP-NEXT: ldrh r0, [r0]
+; CHECK-NOFP-NEXT: lsls r0, r0, #16
+; CHECK-NOFP-NEXT: bx lr
+;
+; CHECK-FP-LABEL: test_loadext32:
+; CHECK-FP: @ %bb.0:
+; CHECK-FP-NEXT: ldrh r0, [r0]
+; CHECK-FP-NEXT: lsls r0, r0, #16
+; CHECK-FP-NEXT: vmov s0, r0
+; CHECK-FP-NEXT: bx lr
+ %r = load bfloat, ptr %a
+ %d = fpext bfloat %r to float
+ ret float %d
+}
+
+define double @test_loadext64(ptr %a) {
+; CHECK-NOFP-LABEL: test_loadext64:
+; CHECK-NOFP: @ %bb.0:
+; CHECK-NOFP-NEXT: .save {r7, lr}
+; CHECK-NOFP-NEXT: push {r7, lr}
+; CHECK-NOFP-NEXT: ldrh r0, [r0]
+; CHECK-NOFP-NEXT: lsls r0, r0, #16
+; CHECK-NOFP-NEXT: bl __aeabi_f2d
+; CHECK-NOFP-NEXT: pop {r7, pc}
+;
+; CHECK-FP-LABEL: test_loadext64:
+; CHECK-FP: @ %bb.0:
+; CHECK-FP-NEXT: .save {r7, lr}
+; CHECK-FP-NEXT: push {r7, lr}
+; CHECK-FP-NEXT: ldrh r0, [r0]
+; CHECK-FP-NEXT: lsls r0, r0, #16
+; CHECK-FP-NEXT: bl __aeabi_f2d
+; CHECK-FP-NEXT: vmov d0, r0, r1
+; CHECK-FP-NEXT: pop {r7, pc}
+ %r = load bfloat, ptr %a
+ %d = fpext bfloat %r to double
+ ret double %d
+}
+
declare bfloat @test_callee(bfloat %a, bfloat %b)
define bfloat @test_call(bfloat %a, bfloat %b) {
@@ -867,8 +958,8 @@ define void @test_fccmp(bfloat %in, ptr %out) {
; CHECK-FP-LABEL: test_fccmp:
; CHECK-FP: @ %bb.0:
; CHECK-FP-NEXT: vmov r1, s0
-; CHECK-FP-NEXT: vldr s0, .LCPI30_0
-; CHECK-FP-NEXT: vldr s4, .LCPI30_1
+; CHECK-FP-NEXT: vldr s0, .LCPI34_0
+; CHECK-FP-NEXT: vldr s4, .LCPI34_1
; CHECK-FP-NEXT: lsls r2, r1, #16
; CHECK-FP-NEXT: vmov s2, r2
; CHECK-FP-NEXT: mov.w r2, #17664
@@ -882,9 +973,9 @@ define void @test_fccmp(bfloat %in, ptr %out) {
; CHECK-FP-NEXT: bx lr
; CHECK-FP-NEXT: .p2align 2
; CHECK-FP-NEXT: @ %bb.1:
-; CHECK-FP-NEXT: .LCPI30_0:
+; CHECK-FP-NEXT: .LCPI34_0:
; CHECK-FP-NEXT: .long 0x45000000 @ float 2048
-; CHECK-FP-NEXT: .LCPI30_1:
+; CHECK-FP-NEXT: .LCPI34_1:
; CHECK-FP-NEXT: .long 0x48000000 @ float 131072
%cmp1 = fcmp ogt bfloat %in, 0xR4800
%cmp2 = fcmp olt bfloat %in, 0xR4500
@@ -941,14 +1032,14 @@ define bfloat @test_phi(ptr %p1) {
; CHECK-NOFP-NEXT: push {r4, r5, r6, lr}
; CHECK-NOFP-NEXT: ldrh r6, [r0]
; CHECK-NOFP-NEXT: mov r4, r0
-; CHECK-NOFP-NEXT: .LBB32_1: @ %loop
+; CHECK-NOFP-NEXT: .LBB36_1: @ %loop
; CHECK-NOFP-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NOFP-NEXT: mov r0, r4
; CHECK-NOFP-NEXT: mov r5, r6
; CHECK-NOFP-NEXT: ldrh r6, [r4]
; CHECK-NOFP-NEXT: bl test_dummy
; CHECK-NOFP-NEXT: lsls r0, r0, #31
-; CHECK-NOFP-NEXT: bne .LBB32_1
+; CHECK-NOFP-NEXT: bne .LBB36_1
; CHECK-NOFP-NEXT: @ %bb.2: @ %return
; CHECK-NOFP-NEXT: mov r0, r5
; CHECK-NOFP-NEXT: pop {r4, r5, r6, pc}
@@ -962,7 +1053,7 @@ define bfloat @test_phi(ptr %p1) {
; CHECK-FP-NEXT: mov r4, r0
; CHECK-FP-NEXT: ldrh r0, [r0]
; CHECK-FP-NEXT: vmov s18, r0
-; CHECK-FP-NEXT: .LBB32_1: @ %loop
+; CHECK-FP-NEXT: .LBB36_1: @ %loop
; CHECK-FP-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-FP-NEXT: ldrh r0, [r4]
; CHECK-FP-NEXT: vmov.f32 s16, s18
@@ -970,7 +1061,7 @@ define bfloat @test_phi(ptr %p1) {
; CHECK-FP-NEXT: mov r0, r4
; CHECK-FP-NEXT: bl test_dummy
; CHECK-FP-NEXT: lsls r0, r0, #31
-; CHECK-FP-NEXT: bne .LBB32_1
+; CHECK-FP-NEXT: bne .LBB36_1
; CHECK-FP-NEXT: @ %bb.2: @ %return
; CHECK-FP-NEXT: vmov.f32 s0, s16
; CHECK-FP-NEXT: vpop {d8, d9}
More information about the llvm-commits
mailing list