[llvm] 744c005 - [AArch64][CodeGen] Fix crash when fptrunc returns fp16 with +nofp attr (#81724)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Feb 22 11:15:56 PST 2024
Author: Nashe Mncube
Date: 2024-02-22T19:15:52Z
New Revision: 744c0057e7dc0d1d046a4867cece2f31fee9bb23
URL: https://github.com/llvm/llvm-project/commit/744c0057e7dc0d1d046a4867cece2f31fee9bb23
DIFF: https://github.com/llvm/llvm-project/commit/744c0057e7dc0d1d046a4867cece2f31fee9bb23.diff
LOG: [AArch64][CodeGen] Fix crash when fptrunc returns fp16 with +nofp attr (#81724)
When performing lowering of the fptrunc opcode returning fp16 with the
+nofp flag enabled we could trigger a compiler crash. This is because we
had no custom lowering implemented. This patch
the case in which we need to promote an fp16 return type
for fptrunc when the +nofp attr is enabled.
Added:
llvm/test/CodeGen/AArch64/16bit-float-promotion-with-nofp.ll
Modified:
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/test/CodeGen/AArch64/strictfp_f16_abi_promote.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 184ebc19bc9ede..3b92e95d7c2876 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -541,10 +541,12 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Custom);
setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i64, Custom);
setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i128, Custom);
- setOperationAction(ISD::FP_ROUND, MVT::f16, Custom);
+ if (Subtarget->hasFPARMv8())
+ setOperationAction(ISD::FP_ROUND, MVT::f16, Custom);
setOperationAction(ISD::FP_ROUND, MVT::f32, Custom);
setOperationAction(ISD::FP_ROUND, MVT::f64, Custom);
- setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Custom);
+ if (Subtarget->hasFPARMv8())
+ setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Custom);
setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Custom);
setOperationAction(ISD::STRICT_FP_ROUND, MVT::f64, Custom);
@@ -947,9 +949,11 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setTruncStoreAction(MVT::f128, MVT::f32, Expand);
setTruncStoreAction(MVT::f128, MVT::f16, Expand);
- setOperationAction(ISD::BITCAST, MVT::i16, Custom);
- setOperationAction(ISD::BITCAST, MVT::f16, Custom);
- setOperationAction(ISD::BITCAST, MVT::bf16, Custom);
+ if (Subtarget->hasFPARMv8()) {
+ setOperationAction(ISD::BITCAST, MVT::i16, Custom);
+ setOperationAction(ISD::BITCAST, MVT::f16, Custom);
+ setOperationAction(ISD::BITCAST, MVT::bf16, Custom);
+ }
// Indexed loads and stores are supported.
for (unsigned im = (unsigned)ISD::PRE_INC;
diff --git a/llvm/test/CodeGen/AArch64/16bit-float-promotion-with-nofp.ll b/llvm/test/CodeGen/AArch64/16bit-float-promotion-with-nofp.ll
new file mode 100644
index 00000000000000..bfe9ab8424bb03
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/16bit-float-promotion-with-nofp.ll
@@ -0,0 +1,31 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -mtriple=aarch64 -mattr=-fp-armv8 -o - %s | FileCheck %s
+
+define half @f2h(float %a) {
+; CHECK-LABEL: f2h:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: bl __gnu_f2h_ieee
+; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+entry:
+ %0 = fptrunc float %a to half
+ ret half %0
+}
+
+define bfloat @f2bfloat(float %a) {
+; CHECK-LABEL: f2bfloat:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: bl __truncsfbf2
+; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+entry:
+ %0 = fptrunc float %a to bfloat
+ ret bfloat %0
+}
+
diff --git a/llvm/test/CodeGen/AArch64/strictfp_f16_abi_promote.ll b/llvm/test/CodeGen/AArch64/strictfp_f16_abi_promote.ll
index a34f7abcc22a3f..9fa5208cc8db68 100644
--- a/llvm/test/CodeGen/AArch64/strictfp_f16_abi_promote.ll
+++ b/llvm/test/CodeGen/AArch64/strictfp_f16_abi_promote.ll
@@ -131,26 +131,107 @@ define void @v4f16_arg(<4 x half> %arg, ptr %ptr) #0 {
ret void
}
-; FIXME:
-; define half @f16_return(float %arg) #0 {
-; %fptrunc = call half @llvm.experimental.constrained.fptrunc.f16.f32(float %arg, metadata !"round.tonearest", metadata !"fpexcept.strict")
-; ret half %fptrunc
-; }
+ define half @f16_return(float %arg) #0 {
+; NOFP16-LABEL: f16_return:
+; NOFP16: // %bb.0:
+; NOFP16-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; NOFP16-NEXT: .cfi_def_cfa_offset 16
+; NOFP16-NEXT: .cfi_offset w30, -16
+; NOFP16-NEXT: bl __gnu_f2h_ieee
+; NOFP16-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; NOFP16-NEXT: ret
+ %fptrunc = call half @llvm.experimental.constrained.fptrunc.f16.f32(float %arg, metadata !"round.tonearest", metadata !"fpexcept.strict")
+ ret half %fptrunc
+ }
-; define <2 x half> @v2f16_return(<2 x float> %arg) #0 {
-; %fptrunc = call <2 x half> @llvm.experimental.constrained.fptrunc.v2f16.v2f32(<2 x float> %arg, metadata !"round.tonearest", metadata !"fpexcept.strict")
-; ret <2 x half> %fptrunc
-; }
+ define <2 x half> @v2f16_return(<2 x float> %arg) #0 {
+; NOFP16-LABEL: v2f16_return:
+; NOFP16: // %bb.0:
+; NOFP16-NEXT: str x30, [sp, #-32]! // 8-byte Folded Spill
+; NOFP16-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill
+; NOFP16-NEXT: .cfi_def_cfa_offset 32
+; NOFP16-NEXT: .cfi_offset w19, -8
+; NOFP16-NEXT: .cfi_offset w20, -16
+; NOFP16-NEXT: .cfi_offset w30, -32
+; NOFP16-NEXT: mov w19, w0
+; NOFP16-NEXT: mov w0, w1
+; NOFP16-NEXT: bl __gnu_f2h_ieee
+; NOFP16-NEXT: mov w20, w0
+; NOFP16-NEXT: mov w0, w19
+; NOFP16-NEXT: bl __gnu_f2h_ieee
+; NOFP16-NEXT: mov w1, w20
+; NOFP16-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
+; NOFP16-NEXT: ldr x30, [sp], #32 // 8-byte Folded Reload
+; NOFP16-NEXT: ret
+ %fptrunc = call <2 x half> @llvm.experimental.constrained.fptrunc.v2f16.v2f32(<2 x float> %arg, metadata !"round.tonearest", metadata !"fpexcept.strict")
+ ret <2 x half> %fptrunc
+ }
-; define <3 x half> @v3f16_return(<3 x float> %arg) #0 {
-; %fptrunc = call <3 x half> @llvm.experimental.constrained.fptrunc.v3f16.v3f32(<3 x float> %arg, metadata !"round.tonearest", metadata !"fpexcept.strict")
-; ret <3 x half> %fptrunc
-; }
+ define <3 x half> @v3f16_return(<3 x float> %arg) #0 {
+; NOFP16-LABEL: v3f16_return:
+; NOFP16: // %bb.0:
+; NOFP16-NEXT: stp x30, x21, [sp, #-32]! // 16-byte Folded Spill
+; NOFP16-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill
+; NOFP16-NEXT: .cfi_def_cfa_offset 32
+; NOFP16-NEXT: .cfi_offset w19, -8
+; NOFP16-NEXT: .cfi_offset w20, -16
+; NOFP16-NEXT: .cfi_offset w21, -24
+; NOFP16-NEXT: .cfi_offset w30, -32
+; NOFP16-NEXT: mov w20, w0
+; NOFP16-NEXT: mov w0, w2
+; NOFP16-NEXT: mov w19, w1
+; NOFP16-NEXT: bl __gnu_f2h_ieee
+; NOFP16-NEXT: mov w21, w0
+; NOFP16-NEXT: mov w0, w19
+; NOFP16-NEXT: bl __gnu_f2h_ieee
+; NOFP16-NEXT: mov w19, w0
+; NOFP16-NEXT: mov w0, w20
+; NOFP16-NEXT: bl __gnu_f2h_ieee
+; NOFP16-NEXT: mov w1, w19
+; NOFP16-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
+; NOFP16-NEXT: mov w2, w21
+; NOFP16-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload
+; NOFP16-NEXT: ret
+ %fptrunc = call <3 x half> @llvm.experimental.constrained.fptrunc.v3f16.v3f32(<3 x float> %arg, metadata !"round.tonearest", metadata !"fpexcept.strict")
+ ret <3 x half> %fptrunc
+ }
-; define <4 x half> @v4f16_return(<4 x float> %arg) #0 {
-; %fptrunc = call <4 x half> @llvm.experimental.constrained.fptrunc.v4f16.v4f32(<4 x float> %arg, metadata !"round.tonearest", metadata !"fpexcept.strict")
-; ret <4 x half> %fptrunc
-; }
+ define <4 x half> @v4f16_return(<4 x float> %arg) #0 {
+; NOFP16-LABEL: v4f16_return:
+; NOFP16: // %bb.0:
+; NOFP16-NEXT: str x30, [sp, #-48]! // 8-byte Folded Spill
+; NOFP16-NEXT: stp x22, x21, [sp, #16] // 16-byte Folded Spill
+; NOFP16-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill
+; NOFP16-NEXT: .cfi_def_cfa_offset 48
+; NOFP16-NEXT: .cfi_offset w19, -8
+; NOFP16-NEXT: .cfi_offset w20, -16
+; NOFP16-NEXT: .cfi_offset w21, -24
+; NOFP16-NEXT: .cfi_offset w22, -32
+; NOFP16-NEXT: .cfi_offset w30, -48
+; NOFP16-NEXT: mov w21, w0
+; NOFP16-NEXT: mov w0, w3
+; NOFP16-NEXT: mov w19, w2
+; NOFP16-NEXT: mov w20, w1
+; NOFP16-NEXT: bl __gnu_f2h_ieee
+; NOFP16-NEXT: mov w22, w0
+; NOFP16-NEXT: mov w0, w19
+; NOFP16-NEXT: bl __gnu_f2h_ieee
+; NOFP16-NEXT: mov w19, w0
+; NOFP16-NEXT: mov w0, w20
+; NOFP16-NEXT: bl __gnu_f2h_ieee
+; NOFP16-NEXT: mov w20, w0
+; NOFP16-NEXT: mov w0, w21
+; NOFP16-NEXT: bl __gnu_f2h_ieee
+; NOFP16-NEXT: mov w1, w20
+; NOFP16-NEXT: mov w2, w19
+; NOFP16-NEXT: mov w3, w22
+; NOFP16-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload
+; NOFP16-NEXT: ldp x22, x21, [sp, #16] // 16-byte Folded Reload
+; NOFP16-NEXT: ldr x30, [sp], #48 // 8-byte Folded Reload
+; NOFP16-NEXT: ret
+ %fptrunc = call <4 x half> @llvm.experimental.constrained.fptrunc.v4f16.v4f32(<4 x float> %arg, metadata !"round.tonearest", metadata !"fpexcept.strict")
+ ret <4 x half> %fptrunc
+ }
; FIXME:
; define void @outgoing_f16_arg(ptr %ptr) #0 {
More information about the llvm-commits
mailing list