[llvm] [AArch64] Switch to soft promoting half types. (PR #80576)
via llvm-commits
llvm-commits at lists.llvm.org
Sat Feb 3 18:47:48 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-aarch64
Author: Harald van Dijk (hvdijk)
<details>
<summary>Changes</summary>
The traditional promotion is known to generate wrong code.
Like #<!-- -->80440 for ARM, except that far less is affected as on AArch64, hardware floating point support always includes FP16 support and is unaffected by these changes. This only affects `-mgeneral-regs-only` (Clang) / `-mattr=-fp-armv8` (LLVM).
---
Full diff: https://github.com/llvm/llvm-project/pull/80576.diff
2 Files Affected:
- (modified) llvm/lib/Target/AArch64/AArch64ISelLowering.h (+4)
- (modified) llvm/test/CodeGen/AArch64/strictfp_f16_abi_promote.ll (+24-116)
``````````diff
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 436b21fd13463..160eafab8c364 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -1308,6 +1308,10 @@ class AArch64TargetLowering : public TargetLowering {
bool preferScalarizeSplat(SDNode *N) const override;
unsigned getMinimumJumpTableEntries() const override;
+
+ bool softPromoteHalfType() const override { return true; }
+
+ bool useFPRegsForHalfType() const override { return true; }
};
namespace AArch64 {
diff --git a/llvm/test/CodeGen/AArch64/strictfp_f16_abi_promote.ll b/llvm/test/CodeGen/AArch64/strictfp_f16_abi_promote.ll
index 37186cf22ccc7..a34f7abcc22a3 100644
--- a/llvm/test/CodeGen/AArch64/strictfp_f16_abi_promote.ll
+++ b/llvm/test/CodeGen/AArch64/strictfp_f16_abi_promote.ll
@@ -70,22 +70,20 @@ define void @v3f16_arg(<3 x half> %arg, ptr %ptr) #0 {
; NOFP16-NEXT: .cfi_offset w22, -32
; NOFP16-NEXT: .cfi_offset w30, -48
; NOFP16-NEXT: mov w21, w0
-; NOFP16-NEXT: and w0, w2, #0xffff
+; NOFP16-NEXT: and w0, w1, #0xffff
; NOFP16-NEXT: mov x19, x3
-; NOFP16-NEXT: mov w20, w1
+; NOFP16-NEXT: mov w20, w2
; NOFP16-NEXT: bl __gnu_h2f_ieee
; NOFP16-NEXT: mov w22, w0
; NOFP16-NEXT: and w0, w21, #0xffff
; NOFP16-NEXT: bl __gnu_h2f_ieee
-; NOFP16-NEXT: mov w21, w0
+; NOFP16-NEXT: mov w8, w0
; NOFP16-NEXT: and w0, w20, #0xffff
+; NOFP16-NEXT: orr x21, x8, x22, lsl #32
; NOFP16-NEXT: bl __gnu_h2f_ieee
-; NOFP16-NEXT: mov w8, w21
-; NOFP16-NEXT: // kill: def $w0 killed $w0 def $x0
-; NOFP16-NEXT: str w22, [x19, #8]
-; NOFP16-NEXT: orr x8, x8, x0, lsl #32
+; NOFP16-NEXT: str x21, [x19]
; NOFP16-NEXT: ldp x22, x21, [sp, #16] // 16-byte Folded Reload
-; NOFP16-NEXT: str x8, [x19]
+; NOFP16-NEXT: str w0, [x19, #8]
; NOFP16-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload
; NOFP16-NEXT: ldr x30, [sp], #48 // 8-byte Folded Reload
; NOFP16-NEXT: ret
@@ -182,46 +180,17 @@ define void @v4f16_arg(<4 x half> %arg, ptr %ptr) #0 {
define void @outgoing_v4f16_return(ptr %ptr) #0 {
; NOFP16-LABEL: outgoing_v4f16_return:
; NOFP16: // %bb.0:
-; NOFP16-NEXT: stp x30, x23, [sp, #-48]! // 16-byte Folded Spill
-; NOFP16-NEXT: stp x22, x21, [sp, #16] // 16-byte Folded Spill
-; NOFP16-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill
-; NOFP16-NEXT: .cfi_def_cfa_offset 48
+; NOFP16-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill
+; NOFP16-NEXT: .cfi_def_cfa_offset 16
; NOFP16-NEXT: .cfi_offset w19, -8
-; NOFP16-NEXT: .cfi_offset w20, -16
-; NOFP16-NEXT: .cfi_offset w21, -24
-; NOFP16-NEXT: .cfi_offset w22, -32
-; NOFP16-NEXT: .cfi_offset w23, -40
-; NOFP16-NEXT: .cfi_offset w30, -48
+; NOFP16-NEXT: .cfi_offset w30, -16
; NOFP16-NEXT: mov x19, x0
; NOFP16-NEXT: bl v4f16_result
-; NOFP16-NEXT: and w0, w0, #0xffff
-; NOFP16-NEXT: mov w20, w1
-; NOFP16-NEXT: mov w21, w2
-; NOFP16-NEXT: mov w22, w3
-; NOFP16-NEXT: bl __gnu_h2f_ieee
-; NOFP16-NEXT: mov w23, w0
-; NOFP16-NEXT: and w0, w20, #0xffff
-; NOFP16-NEXT: bl __gnu_h2f_ieee
-; NOFP16-NEXT: mov w20, w0
-; NOFP16-NEXT: and w0, w21, #0xffff
-; NOFP16-NEXT: bl __gnu_h2f_ieee
-; NOFP16-NEXT: mov w21, w0
-; NOFP16-NEXT: and w0, w22, #0xffff
-; NOFP16-NEXT: bl __gnu_h2f_ieee
-; NOFP16-NEXT: bl __gnu_f2h_ieee
-; NOFP16-NEXT: strh w0, [x19, #6]
-; NOFP16-NEXT: mov w0, w21
-; NOFP16-NEXT: bl __gnu_f2h_ieee
-; NOFP16-NEXT: strh w0, [x19, #4]
-; NOFP16-NEXT: mov w0, w20
-; NOFP16-NEXT: bl __gnu_f2h_ieee
-; NOFP16-NEXT: strh w0, [x19, #2]
-; NOFP16-NEXT: mov w0, w23
-; NOFP16-NEXT: bl __gnu_f2h_ieee
+; NOFP16-NEXT: strh w2, [x19, #4]
+; NOFP16-NEXT: strh w3, [x19, #6]
+; NOFP16-NEXT: strh w1, [x19, #2]
; NOFP16-NEXT: strh w0, [x19]
-; NOFP16-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload
-; NOFP16-NEXT: ldp x22, x21, [sp, #16] // 16-byte Folded Reload
-; NOFP16-NEXT: ldp x30, x23, [sp], #48 // 16-byte Folded Reload
+; NOFP16-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload
; NOFP16-NEXT: ret
%val = call <4 x half> @v4f16_result()
store <4 x half> %val, ptr %ptr
@@ -231,82 +200,21 @@ define void @outgoing_v4f16_return(ptr %ptr) #0 {
define void @outgoing_v8f16_return(ptr %ptr) #0 {
; NOFP16-LABEL: outgoing_v8f16_return:
; NOFP16: // %bb.0:
-; NOFP16-NEXT: stp x30, x27, [sp, #-80]! // 16-byte Folded Spill
-; NOFP16-NEXT: stp x26, x25, [sp, #16] // 16-byte Folded Spill
-; NOFP16-NEXT: stp x24, x23, [sp, #32] // 16-byte Folded Spill
-; NOFP16-NEXT: stp x22, x21, [sp, #48] // 16-byte Folded Spill
-; NOFP16-NEXT: stp x20, x19, [sp, #64] // 16-byte Folded Spill
-; NOFP16-NEXT: .cfi_def_cfa_offset 80
+; NOFP16-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill
+; NOFP16-NEXT: .cfi_def_cfa_offset 16
; NOFP16-NEXT: .cfi_offset w19, -8
-; NOFP16-NEXT: .cfi_offset w20, -16
-; NOFP16-NEXT: .cfi_offset w21, -24
-; NOFP16-NEXT: .cfi_offset w22, -32
-; NOFP16-NEXT: .cfi_offset w23, -40
-; NOFP16-NEXT: .cfi_offset w24, -48
-; NOFP16-NEXT: .cfi_offset w25, -56
-; NOFP16-NEXT: .cfi_offset w26, -64
-; NOFP16-NEXT: .cfi_offset w27, -72
-; NOFP16-NEXT: .cfi_offset w30, -80
+; NOFP16-NEXT: .cfi_offset w30, -16
; NOFP16-NEXT: mov x19, x0
; NOFP16-NEXT: bl v8f16_result
-; NOFP16-NEXT: and w0, w0, #0xffff
-; NOFP16-NEXT: mov w21, w1
-; NOFP16-NEXT: mov w22, w2
-; NOFP16-NEXT: mov w23, w3
-; NOFP16-NEXT: mov w24, w4
-; NOFP16-NEXT: mov w25, w5
-; NOFP16-NEXT: mov w26, w6
-; NOFP16-NEXT: mov w27, w7
-; NOFP16-NEXT: bl __gnu_h2f_ieee
-; NOFP16-NEXT: mov w20, w0
-; NOFP16-NEXT: and w0, w21, #0xffff
-; NOFP16-NEXT: bl __gnu_h2f_ieee
-; NOFP16-NEXT: mov w21, w0
-; NOFP16-NEXT: and w0, w22, #0xffff
-; NOFP16-NEXT: bl __gnu_h2f_ieee
-; NOFP16-NEXT: mov w22, w0
-; NOFP16-NEXT: and w0, w23, #0xffff
-; NOFP16-NEXT: bl __gnu_h2f_ieee
-; NOFP16-NEXT: mov w23, w0
-; NOFP16-NEXT: and w0, w24, #0xffff
-; NOFP16-NEXT: bl __gnu_h2f_ieee
-; NOFP16-NEXT: mov w24, w0
-; NOFP16-NEXT: and w0, w25, #0xffff
-; NOFP16-NEXT: bl __gnu_h2f_ieee
-; NOFP16-NEXT: mov w25, w0
-; NOFP16-NEXT: and w0, w26, #0xffff
-; NOFP16-NEXT: bl __gnu_h2f_ieee
-; NOFP16-NEXT: mov w26, w0
-; NOFP16-NEXT: and w0, w27, #0xffff
-; NOFP16-NEXT: bl __gnu_h2f_ieee
-; NOFP16-NEXT: bl __gnu_f2h_ieee
-; NOFP16-NEXT: strh w0, [x19, #14]
-; NOFP16-NEXT: mov w0, w26
-; NOFP16-NEXT: bl __gnu_f2h_ieee
-; NOFP16-NEXT: strh w0, [x19, #12]
-; NOFP16-NEXT: mov w0, w25
-; NOFP16-NEXT: bl __gnu_f2h_ieee
-; NOFP16-NEXT: strh w0, [x19, #10]
-; NOFP16-NEXT: mov w0, w24
-; NOFP16-NEXT: bl __gnu_f2h_ieee
-; NOFP16-NEXT: strh w0, [x19, #8]
-; NOFP16-NEXT: mov w0, w23
-; NOFP16-NEXT: bl __gnu_f2h_ieee
-; NOFP16-NEXT: strh w0, [x19, #6]
-; NOFP16-NEXT: mov w0, w22
-; NOFP16-NEXT: bl __gnu_f2h_ieee
-; NOFP16-NEXT: strh w0, [x19, #4]
-; NOFP16-NEXT: mov w0, w21
-; NOFP16-NEXT: bl __gnu_f2h_ieee
-; NOFP16-NEXT: strh w0, [x19, #2]
-; NOFP16-NEXT: mov w0, w20
-; NOFP16-NEXT: bl __gnu_f2h_ieee
+; NOFP16-NEXT: strh w5, [x19, #10]
+; NOFP16-NEXT: strh w7, [x19, #14]
+; NOFP16-NEXT: strh w6, [x19, #12]
+; NOFP16-NEXT: strh w4, [x19, #8]
+; NOFP16-NEXT: strh w3, [x19, #6]
+; NOFP16-NEXT: strh w2, [x19, #4]
+; NOFP16-NEXT: strh w1, [x19, #2]
; NOFP16-NEXT: strh w0, [x19]
-; NOFP16-NEXT: ldp x20, x19, [sp, #64] // 16-byte Folded Reload
-; NOFP16-NEXT: ldp x22, x21, [sp, #48] // 16-byte Folded Reload
-; NOFP16-NEXT: ldp x24, x23, [sp, #32] // 16-byte Folded Reload
-; NOFP16-NEXT: ldp x26, x25, [sp, #16] // 16-byte Folded Reload
-; NOFP16-NEXT: ldp x30, x27, [sp], #80 // 16-byte Folded Reload
+; NOFP16-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload
; NOFP16-NEXT: ret
%val = call <8 x half> @v8f16_result()
store <8 x half> %val, ptr %ptr
``````````
</details>
https://github.com/llvm/llvm-project/pull/80576
More information about the llvm-commits
mailing list