[llvm] 4f12f47 - [AArch64] Switch to soft promoting half types. (#80576)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Feb 22 02:45:31 PST 2024
Author: Harald van Dijk
Date: 2024-02-22T10:45:27Z
New Revision: 4f12f47550eee85447c9ec37d27a20c6593d3d40
URL: https://github.com/llvm/llvm-project/commit/4f12f47550eee85447c9ec37d27a20c6593d3d40
DIFF: https://github.com/llvm/llvm-project/commit/4f12f47550eee85447c9ec37d27a20c6593d3d40.diff
LOG: [AArch64] Switch to soft promoting half types. (#80576)
The traditional promotion is known to generate wrong code.
Like #80440 for ARM, except that far less is affected as on AArch64,
hardware floating point support always includes FP16 support and is
unaffected by these changes. This only affects `-mgeneral-regs-only`
(Clang) / `-mattr=-fp-armv8` (LLVM).
Because this only affects a configuration where no FP support is
available at all, `useFPRegsForHalfType()` has no effect and is not
specified: `f32` was getting legalized as a parameter and return type to
an integer anyway.
Added:
Modified:
llvm/lib/Target/AArch64/AArch64ISelLowering.h
llvm/test/CodeGen/AArch64/strictfp_f16_abi_promote.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 436b21fd134632..bec13484450d78 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -1308,6 +1308,8 @@ class AArch64TargetLowering : public TargetLowering {
bool preferScalarizeSplat(SDNode *N) const override;
unsigned getMinimumJumpTableEntries() const override;
+
+ bool softPromoteHalfType() const override { return true; }
};
namespace AArch64 {
diff --git a/llvm/test/CodeGen/AArch64/strictfp_f16_abi_promote.ll b/llvm/test/CodeGen/AArch64/strictfp_f16_abi_promote.ll
index 37186cf22ccc71..a34f7abcc22a3f 100644
--- a/llvm/test/CodeGen/AArch64/strictfp_f16_abi_promote.ll
+++ b/llvm/test/CodeGen/AArch64/strictfp_f16_abi_promote.ll
@@ -70,22 +70,20 @@ define void @v3f16_arg(<3 x half> %arg, ptr %ptr) #0 {
; NOFP16-NEXT: .cfi_offset w22, -32
; NOFP16-NEXT: .cfi_offset w30, -48
; NOFP16-NEXT: mov w21, w0
-; NOFP16-NEXT: and w0, w2, #0xffff
+; NOFP16-NEXT: and w0, w1, #0xffff
; NOFP16-NEXT: mov x19, x3
-; NOFP16-NEXT: mov w20, w1
+; NOFP16-NEXT: mov w20, w2
; NOFP16-NEXT: bl __gnu_h2f_ieee
; NOFP16-NEXT: mov w22, w0
; NOFP16-NEXT: and w0, w21, #0xffff
; NOFP16-NEXT: bl __gnu_h2f_ieee
-; NOFP16-NEXT: mov w21, w0
+; NOFP16-NEXT: mov w8, w0
; NOFP16-NEXT: and w0, w20, #0xffff
+; NOFP16-NEXT: orr x21, x8, x22, lsl #32
; NOFP16-NEXT: bl __gnu_h2f_ieee
-; NOFP16-NEXT: mov w8, w21
-; NOFP16-NEXT: // kill: def $w0 killed $w0 def $x0
-; NOFP16-NEXT: str w22, [x19, #8]
-; NOFP16-NEXT: orr x8, x8, x0, lsl #32
+; NOFP16-NEXT: str x21, [x19]
; NOFP16-NEXT: ldp x22, x21, [sp, #16] // 16-byte Folded Reload
-; NOFP16-NEXT: str x8, [x19]
+; NOFP16-NEXT: str w0, [x19, #8]
; NOFP16-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload
; NOFP16-NEXT: ldr x30, [sp], #48 // 8-byte Folded Reload
; NOFP16-NEXT: ret
@@ -182,46 +180,17 @@ define void @v4f16_arg(<4 x half> %arg, ptr %ptr) #0 {
define void @outgoing_v4f16_return(ptr %ptr) #0 {
; NOFP16-LABEL: outgoing_v4f16_return:
; NOFP16: // %bb.0:
-; NOFP16-NEXT: stp x30, x23, [sp, #-48]! // 16-byte Folded Spill
-; NOFP16-NEXT: stp x22, x21, [sp, #16] // 16-byte Folded Spill
-; NOFP16-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill
-; NOFP16-NEXT: .cfi_def_cfa_offset 48
+; NOFP16-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill
+; NOFP16-NEXT: .cfi_def_cfa_offset 16
; NOFP16-NEXT: .cfi_offset w19, -8
-; NOFP16-NEXT: .cfi_offset w20, -16
-; NOFP16-NEXT: .cfi_offset w21, -24
-; NOFP16-NEXT: .cfi_offset w22, -32
-; NOFP16-NEXT: .cfi_offset w23, -40
-; NOFP16-NEXT: .cfi_offset w30, -48
+; NOFP16-NEXT: .cfi_offset w30, -16
; NOFP16-NEXT: mov x19, x0
; NOFP16-NEXT: bl v4f16_result
-; NOFP16-NEXT: and w0, w0, #0xffff
-; NOFP16-NEXT: mov w20, w1
-; NOFP16-NEXT: mov w21, w2
-; NOFP16-NEXT: mov w22, w3
-; NOFP16-NEXT: bl __gnu_h2f_ieee
-; NOFP16-NEXT: mov w23, w0
-; NOFP16-NEXT: and w0, w20, #0xffff
-; NOFP16-NEXT: bl __gnu_h2f_ieee
-; NOFP16-NEXT: mov w20, w0
-; NOFP16-NEXT: and w0, w21, #0xffff
-; NOFP16-NEXT: bl __gnu_h2f_ieee
-; NOFP16-NEXT: mov w21, w0
-; NOFP16-NEXT: and w0, w22, #0xffff
-; NOFP16-NEXT: bl __gnu_h2f_ieee
-; NOFP16-NEXT: bl __gnu_f2h_ieee
-; NOFP16-NEXT: strh w0, [x19, #6]
-; NOFP16-NEXT: mov w0, w21
-; NOFP16-NEXT: bl __gnu_f2h_ieee
-; NOFP16-NEXT: strh w0, [x19, #4]
-; NOFP16-NEXT: mov w0, w20
-; NOFP16-NEXT: bl __gnu_f2h_ieee
-; NOFP16-NEXT: strh w0, [x19, #2]
-; NOFP16-NEXT: mov w0, w23
-; NOFP16-NEXT: bl __gnu_f2h_ieee
+; NOFP16-NEXT: strh w2, [x19, #4]
+; NOFP16-NEXT: strh w3, [x19, #6]
+; NOFP16-NEXT: strh w1, [x19, #2]
; NOFP16-NEXT: strh w0, [x19]
-; NOFP16-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload
-; NOFP16-NEXT: ldp x22, x21, [sp, #16] // 16-byte Folded Reload
-; NOFP16-NEXT: ldp x30, x23, [sp], #48 // 16-byte Folded Reload
+; NOFP16-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload
; NOFP16-NEXT: ret
%val = call <4 x half> @v4f16_result()
store <4 x half> %val, ptr %ptr
@@ -231,82 +200,21 @@ define void @outgoing_v4f16_return(ptr %ptr) #0 {
define void @outgoing_v8f16_return(ptr %ptr) #0 {
; NOFP16-LABEL: outgoing_v8f16_return:
; NOFP16: // %bb.0:
-; NOFP16-NEXT: stp x30, x27, [sp, #-80]! // 16-byte Folded Spill
-; NOFP16-NEXT: stp x26, x25, [sp, #16] // 16-byte Folded Spill
-; NOFP16-NEXT: stp x24, x23, [sp, #32] // 16-byte Folded Spill
-; NOFP16-NEXT: stp x22, x21, [sp, #48] // 16-byte Folded Spill
-; NOFP16-NEXT: stp x20, x19, [sp, #64] // 16-byte Folded Spill
-; NOFP16-NEXT: .cfi_def_cfa_offset 80
+; NOFP16-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill
+; NOFP16-NEXT: .cfi_def_cfa_offset 16
; NOFP16-NEXT: .cfi_offset w19, -8
-; NOFP16-NEXT: .cfi_offset w20, -16
-; NOFP16-NEXT: .cfi_offset w21, -24
-; NOFP16-NEXT: .cfi_offset w22, -32
-; NOFP16-NEXT: .cfi_offset w23, -40
-; NOFP16-NEXT: .cfi_offset w24, -48
-; NOFP16-NEXT: .cfi_offset w25, -56
-; NOFP16-NEXT: .cfi_offset w26, -64
-; NOFP16-NEXT: .cfi_offset w27, -72
-; NOFP16-NEXT: .cfi_offset w30, -80
+; NOFP16-NEXT: .cfi_offset w30, -16
; NOFP16-NEXT: mov x19, x0
; NOFP16-NEXT: bl v8f16_result
-; NOFP16-NEXT: and w0, w0, #0xffff
-; NOFP16-NEXT: mov w21, w1
-; NOFP16-NEXT: mov w22, w2
-; NOFP16-NEXT: mov w23, w3
-; NOFP16-NEXT: mov w24, w4
-; NOFP16-NEXT: mov w25, w5
-; NOFP16-NEXT: mov w26, w6
-; NOFP16-NEXT: mov w27, w7
-; NOFP16-NEXT: bl __gnu_h2f_ieee
-; NOFP16-NEXT: mov w20, w0
-; NOFP16-NEXT: and w0, w21, #0xffff
-; NOFP16-NEXT: bl __gnu_h2f_ieee
-; NOFP16-NEXT: mov w21, w0
-; NOFP16-NEXT: and w0, w22, #0xffff
-; NOFP16-NEXT: bl __gnu_h2f_ieee
-; NOFP16-NEXT: mov w22, w0
-; NOFP16-NEXT: and w0, w23, #0xffff
-; NOFP16-NEXT: bl __gnu_h2f_ieee
-; NOFP16-NEXT: mov w23, w0
-; NOFP16-NEXT: and w0, w24, #0xffff
-; NOFP16-NEXT: bl __gnu_h2f_ieee
-; NOFP16-NEXT: mov w24, w0
-; NOFP16-NEXT: and w0, w25, #0xffff
-; NOFP16-NEXT: bl __gnu_h2f_ieee
-; NOFP16-NEXT: mov w25, w0
-; NOFP16-NEXT: and w0, w26, #0xffff
-; NOFP16-NEXT: bl __gnu_h2f_ieee
-; NOFP16-NEXT: mov w26, w0
-; NOFP16-NEXT: and w0, w27, #0xffff
-; NOFP16-NEXT: bl __gnu_h2f_ieee
-; NOFP16-NEXT: bl __gnu_f2h_ieee
-; NOFP16-NEXT: strh w0, [x19, #14]
-; NOFP16-NEXT: mov w0, w26
-; NOFP16-NEXT: bl __gnu_f2h_ieee
-; NOFP16-NEXT: strh w0, [x19, #12]
-; NOFP16-NEXT: mov w0, w25
-; NOFP16-NEXT: bl __gnu_f2h_ieee
-; NOFP16-NEXT: strh w0, [x19, #10]
-; NOFP16-NEXT: mov w0, w24
-; NOFP16-NEXT: bl __gnu_f2h_ieee
-; NOFP16-NEXT: strh w0, [x19, #8]
-; NOFP16-NEXT: mov w0, w23
-; NOFP16-NEXT: bl __gnu_f2h_ieee
-; NOFP16-NEXT: strh w0, [x19, #6]
-; NOFP16-NEXT: mov w0, w22
-; NOFP16-NEXT: bl __gnu_f2h_ieee
-; NOFP16-NEXT: strh w0, [x19, #4]
-; NOFP16-NEXT: mov w0, w21
-; NOFP16-NEXT: bl __gnu_f2h_ieee
-; NOFP16-NEXT: strh w0, [x19, #2]
-; NOFP16-NEXT: mov w0, w20
-; NOFP16-NEXT: bl __gnu_f2h_ieee
+; NOFP16-NEXT: strh w5, [x19, #10]
+; NOFP16-NEXT: strh w7, [x19, #14]
+; NOFP16-NEXT: strh w6, [x19, #12]
+; NOFP16-NEXT: strh w4, [x19, #8]
+; NOFP16-NEXT: strh w3, [x19, #6]
+; NOFP16-NEXT: strh w2, [x19, #4]
+; NOFP16-NEXT: strh w1, [x19, #2]
; NOFP16-NEXT: strh w0, [x19]
-; NOFP16-NEXT: ldp x20, x19, [sp, #64] // 16-byte Folded Reload
-; NOFP16-NEXT: ldp x22, x21, [sp, #48] // 16-byte Folded Reload
-; NOFP16-NEXT: ldp x24, x23, [sp, #32] // 16-byte Folded Reload
-; NOFP16-NEXT: ldp x26, x25, [sp, #16] // 16-byte Folded Reload
-; NOFP16-NEXT: ldp x30, x27, [sp], #80 // 16-byte Folded Reload
+; NOFP16-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload
; NOFP16-NEXT: ret
%val = call <8 x half> @v8f16_result()
store <8 x half> %val, ptr %ptr
More information about the llvm-commits
mailing list