[llvm] [AArch64][CodeGen] Fix crash when fptrunc returns fp16 with +nofp attr (PR #81724)
Nashe Mncube via llvm-commits
llvm-commits at lists.llvm.org
Thu Feb 22 07:58:11 PST 2024
https://github.com/nasherm updated https://github.com/llvm/llvm-project/pull/81724
>From 9e2362f50791778536425a2989cfe5859cfcd115 Mon Sep 17 00:00:00 2001
From: nasmnc01 <nashe.mncube at arm.com>
Date: Wed, 14 Feb 2024 10:56:47 +0000
Subject: [PATCH 1/8] [AArch64][CodeGen] Fix crash when fptrunc returns fp16
with +nofp attr
When performing lowering of the fptrunc opcode returning fp16 with
the +nofp flag enabled we could trigger a compiler crash.
This is because we had no custom lowering implemented. This patch
implements a custom lowering for the case in which we need to
promote an fp16 return type for fptrunc when the +nofp attr is enabled.
Change-Id: Ibea20a676d40fde3f25e1ade365620071f46ff2b
---
.../Target/AArch64/AArch64ISelLowering.cpp | 9 ++++++++
.../AArch64/float16-promotion-with-nofp.ll | 21 +++++++++++++++++++
2 files changed, 30 insertions(+)
create mode 100644 llvm/test/CodeGen/AArch64/float16-promotion-with-nofp.ll
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 184ebc19bc9ede..ec472d3d256a8a 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -25043,6 +25043,15 @@ void AArch64TargetLowering::ReplaceNodeResults(
Results.push_back(
LowerToPredicatedOp(SDValue(N, 0), DAG, AArch64ISD::MULHU_PRED));
return;
+ case ISD::FP_ROUND:{
+ if (N->getValueType(0) == MVT::f16 && !Subtarget->hasFullFP16()) {
+ // Promote fp16 result to legal type
+ SDLoc DL(N);
+ EVT NVT = getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ Results.push_back(DAG.getNode(ISD::FP16_TO_FP, DL, NVT, N->getOperand(0)));
+ }
+ return;
+ }
case ISD::FP_TO_UINT:
case ISD::FP_TO_SINT:
case ISD::STRICT_FP_TO_SINT:
diff --git a/llvm/test/CodeGen/AArch64/float16-promotion-with-nofp.ll b/llvm/test/CodeGen/AArch64/float16-promotion-with-nofp.ll
new file mode 100644
index 00000000000000..03426579131a1d
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/float16-promotion-with-nofp.ll
@@ -0,0 +1,21 @@
+; RUN: llc -mcpu=cortex-r82 -O1 -o - %s | FileCheck %s
+
+; Source used:
+; __fp16 f2h(float a) { return a; }
+; Compiled with: clang --target=aarch64-arm-none-eabi -march=armv8-r+nofp
+
+define hidden noundef nofpclass(nan inf) half @f2h(float noundef nofpclass(nan inf) %a) local_unnamed_addr #0 {
+;CHECK: f2h: // @f2h
+;CHECK-NEXT: // %bb.0: // %entry
+;CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+;CHECK-NEXT: bl __gnu_h2f_ieee
+;CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+;CHECK-NEXT: ret
+entry:
+ %0 = fptrunc float %a to half
+ ret half %0
+}
+
+
+attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) "denormal-fp-math"="preserve-sign,preserve-sign" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+crc,+lse,+pauth,+ras,+rcpc,+sb,+ssbs,+v8r,-complxnum,-dotprod,-fmv,-fp-armv8,-fp16fml,-fullfp16,-jsconv,-neon,-rdm" }
+
>From a70a40cb34299dcf33ec164b9e328f99cf453552 Mon Sep 17 00:00:00 2001
From: nasmnc01 <nashe.mncube at arm.com>
Date: Wed, 14 Feb 2024 11:11:29 +0000
Subject: [PATCH 2/8] clang-format fixes
Change-Id: Icf71578773b4c44fe8d79edd984661ec79fe1b09
---
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 11 ++++++-----
1 file changed, 6 insertions(+), 5 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index ec472d3d256a8a..87220b7c2737bf 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -25043,12 +25043,13 @@ void AArch64TargetLowering::ReplaceNodeResults(
Results.push_back(
LowerToPredicatedOp(SDValue(N, 0), DAG, AArch64ISD::MULHU_PRED));
return;
- case ISD::FP_ROUND:{
+ case ISD::FP_ROUND: {
if (N->getValueType(0) == MVT::f16 && !Subtarget->hasFullFP16()) {
- // Promote fp16 result to legal type
- SDLoc DL(N);
- EVT NVT = getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
- Results.push_back(DAG.getNode(ISD::FP16_TO_FP, DL, NVT, N->getOperand(0)));
+ // Promote fp16 result to legal type
+ SDLoc DL(N);
+ EVT NVT = getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ Results.push_back(
+ DAG.getNode(ISD::FP16_TO_FP, DL, NVT, N->getOperand(0)));
}
return;
}
>From 752605f543953627163fc30436a0173b93df4171 Mon Sep 17 00:00:00 2001
From: nasmnc01 <nashe.mncube at arm.com>
Date: Wed, 14 Feb 2024 16:56:53 +0000
Subject: [PATCH 3/8] Responding to review comments
Change-Id: Ic64506bb76bcc8f059b1de9ea6041fe8c0093b9c
---
.../Target/AArch64/AArch64ISelLowering.cpp | 5 ++--
.../AArch64/float16-promotion-with-nofp.ll | 26 ++++++++-----------
2 files changed, 13 insertions(+), 18 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 87220b7c2737bf..0447f773f0f607 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -1880,6 +1880,7 @@ void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT,
}
// Lower fixed length vector operations to scalable equivalents.
+ setOperationAction(ISD::ANY_EXTEND, MVT::f32, Legal);
setOperationAction(ISD::ABS, VT, Custom);
setOperationAction(ISD::ADD, VT, Custom);
setOperationAction(ISD::AND, VT, Custom);
@@ -25045,11 +25046,9 @@ void AArch64TargetLowering::ReplaceNodeResults(
return;
case ISD::FP_ROUND: {
if (N->getValueType(0) == MVT::f16 && !Subtarget->hasFullFP16()) {
- // Promote fp16 result to legal type
SDLoc DL(N);
- EVT NVT = getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
Results.push_back(
- DAG.getNode(ISD::FP16_TO_FP, DL, NVT, N->getOperand(0)));
+ DAG.getNode(ISD::FP_TO_FP16, DL, MVT::i16, N->getOperand(0)));
}
return;
}
diff --git a/llvm/test/CodeGen/AArch64/float16-promotion-with-nofp.ll b/llvm/test/CodeGen/AArch64/float16-promotion-with-nofp.ll
index 03426579131a1d..714cbcc1fdb6db 100644
--- a/llvm/test/CodeGen/AArch64/float16-promotion-with-nofp.ll
+++ b/llvm/test/CodeGen/AArch64/float16-promotion-with-nofp.ll
@@ -1,21 +1,17 @@
-; RUN: llc -mcpu=cortex-r82 -O1 -o - %s | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -mtriple=aarch64 -mattr=-fp-armv8 -o - %s | FileCheck %s
-; Source used:
-; __fp16 f2h(float a) { return a; }
-; Compiled with: clang --target=aarch64-arm-none-eabi -march=armv8-r+nofp
-
-define hidden noundef nofpclass(nan inf) half @f2h(float noundef nofpclass(nan inf) %a) local_unnamed_addr #0 {
-;CHECK: f2h: // @f2h
-;CHECK-NEXT: // %bb.0: // %entry
-;CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
-;CHECK-NEXT: bl __gnu_h2f_ieee
-;CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
-;CHECK-NEXT: ret
+define half @f2h(float %a) {
+; CHECK-LABEL: f2h:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: bl __gnu_f2h_ieee
+; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
entry:
%0 = fptrunc float %a to half
ret half %0
}
-
-attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) "denormal-fp-math"="preserve-sign,preserve-sign" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+crc,+lse,+pauth,+ras,+rcpc,+sb,+ssbs,+v8r,-complxnum,-dotprod,-fmv,-fp-armv8,-fp16fml,-fullfp16,-jsconv,-neon,-rdm" }
-
>From e88f7938957e0058b647ffc039b19bc7357806e6 Mon Sep 17 00:00:00 2001
From: nasmnc01 <nashe.mncube at arm.com>
Date: Thu, 15 Feb 2024 13:46:26 +0000
Subject: [PATCH 4/8] Responding to review comments
Change-Id: Ib18e0ceeaab18d31d3fc43daab838ea95d62c2c1
---
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 17 ++++++++---------
.../AArch64/float16-promotion-with-nofp.ll | 3 +++
2 files changed, 11 insertions(+), 9 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 0447f773f0f607..3aeeff7ad72a33 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -541,12 +541,15 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Custom);
setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i64, Custom);
setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i128, Custom);
- setOperationAction(ISD::FP_ROUND, MVT::f16, Custom);
setOperationAction(ISD::FP_ROUND, MVT::f32, Custom);
setOperationAction(ISD::FP_ROUND, MVT::f64, Custom);
setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Custom);
setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Custom);
setOperationAction(ISD::STRICT_FP_ROUND, MVT::f64, Custom);
+ if (!Subtarget->hasFullFP16())
+ setOperationAction(ISD::FP_ROUND, MVT::f16, Expand);
+ else
+ setOperationAction(ISD::FP_ROUND, MVT::f16, Custom);
setOperationAction(ISD::FP_TO_UINT_SAT, MVT::i32, Custom);
setOperationAction(ISD::FP_TO_UINT_SAT, MVT::i64, Custom);
@@ -24616,6 +24619,10 @@ void AArch64TargetLowering::ReplaceBITCASTResults(
EVT VT = N->getValueType(0);
EVT SrcVT = Op.getValueType();
+ // Default to the generic legalizer
+ if (SrcVT == MVT::f16 && !Subtarget->hasFullFP16())
+ return;
+
if (VT == MVT::v2i16 && SrcVT == MVT::i32) {
CustomNonLegalBITCASTResults(N, Results, DAG, MVT::v2i32, MVT::v4i16);
return;
@@ -25044,14 +25051,6 @@ void AArch64TargetLowering::ReplaceNodeResults(
Results.push_back(
LowerToPredicatedOp(SDValue(N, 0), DAG, AArch64ISD::MULHU_PRED));
return;
- case ISD::FP_ROUND: {
- if (N->getValueType(0) == MVT::f16 && !Subtarget->hasFullFP16()) {
- SDLoc DL(N);
- Results.push_back(
- DAG.getNode(ISD::FP_TO_FP16, DL, MVT::i16, N->getOperand(0)));
- }
- return;
- }
case ISD::FP_TO_UINT:
case ISD::FP_TO_SINT:
case ISD::STRICT_FP_TO_SINT:
diff --git a/llvm/test/CodeGen/AArch64/float16-promotion-with-nofp.ll b/llvm/test/CodeGen/AArch64/float16-promotion-with-nofp.ll
index 714cbcc1fdb6db..e3c72883976fca 100644
--- a/llvm/test/CodeGen/AArch64/float16-promotion-with-nofp.ll
+++ b/llvm/test/CodeGen/AArch64/float16-promotion-with-nofp.ll
@@ -8,6 +8,9 @@ define half @f2h(float %a) {
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: .cfi_offset w30, -16
; CHECK-NEXT: bl __gnu_f2h_ieee
+; CHECK-NEXT: and w0, w0, #0xffff
+; CHECK-NEXT: bl __gnu_h2f_ieee
+; CHECK-NEXT: bl __gnu_f2h_ieee
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
entry:
>From ea9475e327d47174400b321515903e19330a4aad Mon Sep 17 00:00:00 2001
From: nasmnc01 <nashe.mncube at arm.com>
Date: Mon, 19 Feb 2024 11:37:05 +0000
Subject: [PATCH 5/8] Using hasFPARMv8 over hasFullFP16
Change-Id: I92af1dc9413486d2c20d90aebf0e377b077ad428
---
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 8 +++-----
1 file changed, 3 insertions(+), 5 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 3aeeff7ad72a33..f8488bd548c09d 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -546,9 +546,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Custom);
setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Custom);
setOperationAction(ISD::STRICT_FP_ROUND, MVT::f64, Custom);
- if (!Subtarget->hasFullFP16())
- setOperationAction(ISD::FP_ROUND, MVT::f16, Expand);
- else
+
+ if (Subtarget->hasFPARMv8())
setOperationAction(ISD::FP_ROUND, MVT::f16, Custom);
setOperationAction(ISD::FP_TO_UINT_SAT, MVT::i32, Custom);
@@ -1883,7 +1882,6 @@ void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT,
}
// Lower fixed length vector operations to scalable equivalents.
- setOperationAction(ISD::ANY_EXTEND, MVT::f32, Legal);
setOperationAction(ISD::ABS, VT, Custom);
setOperationAction(ISD::ADD, VT, Custom);
setOperationAction(ISD::AND, VT, Custom);
@@ -24620,7 +24618,7 @@ void AArch64TargetLowering::ReplaceBITCASTResults(
EVT SrcVT = Op.getValueType();
// Default to the generic legalizer
- if (SrcVT == MVT::f16 && !Subtarget->hasFullFP16())
+ if (SrcVT == MVT::f16 && !Subtarget->hasFPARMv8())
return;
if (VT == MVT::v2i16 && SrcVT == MVT::i32) {
>From fb775fbf256efff0f8acf1498557dc8c4362af7a Mon Sep 17 00:00:00 2001
From: nasmnc01 <nashe.mncube at arm.com>
Date: Wed, 21 Feb 2024 16:37:41 +0000
Subject: [PATCH 6/8] Responding to review comments
Change-Id: Ic1b7f8774ae529680597b4e032c4a63416ac927a
---
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 11 +++++++----
...fp.ll => 16bit-float-promotion-with-nofp.ll} | 17 ++++++++++++++---
2 files changed, 21 insertions(+), 7 deletions(-)
rename llvm/test/CodeGen/AArch64/{float16-promotion-with-nofp.ll => 16bit-float-promotion-with-nofp.ll} (57%)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index f8488bd548c09d..20d492b707365b 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -547,8 +547,11 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Custom);
setOperationAction(ISD::STRICT_FP_ROUND, MVT::f64, Custom);
- if (Subtarget->hasFPARMv8())
- setOperationAction(ISD::FP_ROUND, MVT::f16, Custom);
+ if (Subtarget->hasFPARMv8()) {
+ setOperationAction(ISD::BITCAST, MVT::i16, Custom);
+ setOperationAction(ISD::BITCAST, MVT::f16, Custom);
+ setOperationAction(ISD::BITCAST, MVT::bf16, Custom);
+ }
setOperationAction(ISD::FP_TO_UINT_SAT, MVT::i32, Custom);
setOperationAction(ISD::FP_TO_UINT_SAT, MVT::i64, Custom);
@@ -24617,8 +24620,8 @@ void AArch64TargetLowering::ReplaceBITCASTResults(
EVT VT = N->getValueType(0);
EVT SrcVT = Op.getValueType();
- // Default to the generic legalizer
- if (SrcVT == MVT::f16 && !Subtarget->hasFPARMv8())
+ if (!Subtarget->hasFPARMv8() &&
+ (SrcVT == MVT::f16 || SrcVT == MVT::i16 || SrcVT == MVT::bf16))
return;
if (VT == MVT::v2i16 && SrcVT == MVT::i32) {
diff --git a/llvm/test/CodeGen/AArch64/float16-promotion-with-nofp.ll b/llvm/test/CodeGen/AArch64/16bit-float-promotion-with-nofp.ll
similarity index 57%
rename from llvm/test/CodeGen/AArch64/float16-promotion-with-nofp.ll
rename to llvm/test/CodeGen/AArch64/16bit-float-promotion-with-nofp.ll
index e3c72883976fca..bfe9ab8424bb03 100644
--- a/llvm/test/CodeGen/AArch64/float16-promotion-with-nofp.ll
+++ b/llvm/test/CodeGen/AArch64/16bit-float-promotion-with-nofp.ll
@@ -8,9 +8,6 @@ define half @f2h(float %a) {
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: .cfi_offset w30, -16
; CHECK-NEXT: bl __gnu_f2h_ieee
-; CHECK-NEXT: and w0, w0, #0xffff
-; CHECK-NEXT: bl __gnu_h2f_ieee
-; CHECK-NEXT: bl __gnu_f2h_ieee
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
entry:
@@ -18,3 +15,17 @@ entry:
ret half %0
}
+define bfloat @f2bfloat(float %a) {
+; CHECK-LABEL: f2bfloat:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: bl __truncsfbf2
+; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+entry:
+ %0 = fptrunc float %a to bfloat
+ ret bfloat %0
+}
+
>From 46a77e3f05b3060f0ad20873da93fd966a6752e0 Mon Sep 17 00:00:00 2001
From: nasmnc01 <nashe.mncube at arm.com>
Date: Thu, 22 Feb 2024 12:03:01 +0000
Subject: [PATCH 7/8] Rebasing onto similar PR
Change-Id: Icdb7c3742dc30da0652701ffc6c7468b8504e416
---
.../Target/AArch64/AArch64ISelLowering.cpp | 23 ++++++++-----------
1 file changed, 9 insertions(+), 14 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 20d492b707365b..3b92e95d7c2876 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -541,18 +541,15 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Custom);
setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i64, Custom);
setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i128, Custom);
+ if (Subtarget->hasFPARMv8())
+ setOperationAction(ISD::FP_ROUND, MVT::f16, Custom);
setOperationAction(ISD::FP_ROUND, MVT::f32, Custom);
setOperationAction(ISD::FP_ROUND, MVT::f64, Custom);
- setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Custom);
+ if (Subtarget->hasFPARMv8())
+ setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Custom);
setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Custom);
setOperationAction(ISD::STRICT_FP_ROUND, MVT::f64, Custom);
- if (Subtarget->hasFPARMv8()) {
- setOperationAction(ISD::BITCAST, MVT::i16, Custom);
- setOperationAction(ISD::BITCAST, MVT::f16, Custom);
- setOperationAction(ISD::BITCAST, MVT::bf16, Custom);
- }
-
setOperationAction(ISD::FP_TO_UINT_SAT, MVT::i32, Custom);
setOperationAction(ISD::FP_TO_UINT_SAT, MVT::i64, Custom);
setOperationAction(ISD::FP_TO_SINT_SAT, MVT::i32, Custom);
@@ -952,9 +949,11 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setTruncStoreAction(MVT::f128, MVT::f32, Expand);
setTruncStoreAction(MVT::f128, MVT::f16, Expand);
- setOperationAction(ISD::BITCAST, MVT::i16, Custom);
- setOperationAction(ISD::BITCAST, MVT::f16, Custom);
- setOperationAction(ISD::BITCAST, MVT::bf16, Custom);
+ if (Subtarget->hasFPARMv8()) {
+ setOperationAction(ISD::BITCAST, MVT::i16, Custom);
+ setOperationAction(ISD::BITCAST, MVT::f16, Custom);
+ setOperationAction(ISD::BITCAST, MVT::bf16, Custom);
+ }
// Indexed loads and stores are supported.
for (unsigned im = (unsigned)ISD::PRE_INC;
@@ -24620,10 +24619,6 @@ void AArch64TargetLowering::ReplaceBITCASTResults(
EVT VT = N->getValueType(0);
EVT SrcVT = Op.getValueType();
- if (!Subtarget->hasFPARMv8() &&
- (SrcVT == MVT::f16 || SrcVT == MVT::i16 || SrcVT == MVT::bf16))
- return;
-
if (VT == MVT::v2i16 && SrcVT == MVT::i32) {
CustomNonLegalBITCASTResults(N, Results, DAG, MVT::v2i32, MVT::v4i16);
return;
>From f7df5b2a865d13baed671c1c7c8b7ad0d700102f Mon Sep 17 00:00:00 2001
From: nasmnc01 <nashe.mncube at arm.com>
Date: Thu, 22 Feb 2024 15:39:42 +0000
Subject: [PATCH 8/8] Uncommenting f16_return within
strictfp_f16_abi_promote.ll
Change-Id: I11779e4a7e4494b45c1a166ebc46ef5887b38770
---
.../AArch64/strictfp_f16_abi_promote.ll | 115 +++++++++++++++---
1 file changed, 98 insertions(+), 17 deletions(-)
diff --git a/llvm/test/CodeGen/AArch64/strictfp_f16_abi_promote.ll b/llvm/test/CodeGen/AArch64/strictfp_f16_abi_promote.ll
index a34f7abcc22a3f..9fa5208cc8db68 100644
--- a/llvm/test/CodeGen/AArch64/strictfp_f16_abi_promote.ll
+++ b/llvm/test/CodeGen/AArch64/strictfp_f16_abi_promote.ll
@@ -131,26 +131,107 @@ define void @v4f16_arg(<4 x half> %arg, ptr %ptr) #0 {
ret void
}
-; FIXME:
-; define half @f16_return(float %arg) #0 {
-; %fptrunc = call half @llvm.experimental.constrained.fptrunc.f16.f32(float %arg, metadata !"round.tonearest", metadata !"fpexcept.strict")
-; ret half %fptrunc
-; }
+ define half @f16_return(float %arg) #0 {
+; NOFP16-LABEL: f16_return:
+; NOFP16: // %bb.0:
+; NOFP16-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; NOFP16-NEXT: .cfi_def_cfa_offset 16
+; NOFP16-NEXT: .cfi_offset w30, -16
+; NOFP16-NEXT: bl __gnu_f2h_ieee
+; NOFP16-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; NOFP16-NEXT: ret
+ %fptrunc = call half @llvm.experimental.constrained.fptrunc.f16.f32(float %arg, metadata !"round.tonearest", metadata !"fpexcept.strict")
+ ret half %fptrunc
+ }
-; define <2 x half> @v2f16_return(<2 x float> %arg) #0 {
-; %fptrunc = call <2 x half> @llvm.experimental.constrained.fptrunc.v2f16.v2f32(<2 x float> %arg, metadata !"round.tonearest", metadata !"fpexcept.strict")
-; ret <2 x half> %fptrunc
-; }
+ define <2 x half> @v2f16_return(<2 x float> %arg) #0 {
+; NOFP16-LABEL: v2f16_return:
+; NOFP16: // %bb.0:
+; NOFP16-NEXT: str x30, [sp, #-32]! // 8-byte Folded Spill
+; NOFP16-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill
+; NOFP16-NEXT: .cfi_def_cfa_offset 32
+; NOFP16-NEXT: .cfi_offset w19, -8
+; NOFP16-NEXT: .cfi_offset w20, -16
+; NOFP16-NEXT: .cfi_offset w30, -32
+; NOFP16-NEXT: mov w19, w0
+; NOFP16-NEXT: mov w0, w1
+; NOFP16-NEXT: bl __gnu_f2h_ieee
+; NOFP16-NEXT: mov w20, w0
+; NOFP16-NEXT: mov w0, w19
+; NOFP16-NEXT: bl __gnu_f2h_ieee
+; NOFP16-NEXT: mov w1, w20
+; NOFP16-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
+; NOFP16-NEXT: ldr x30, [sp], #32 // 8-byte Folded Reload
+; NOFP16-NEXT: ret
+ %fptrunc = call <2 x half> @llvm.experimental.constrained.fptrunc.v2f16.v2f32(<2 x float> %arg, metadata !"round.tonearest", metadata !"fpexcept.strict")
+ ret <2 x half> %fptrunc
+ }
-; define <3 x half> @v3f16_return(<3 x float> %arg) #0 {
-; %fptrunc = call <3 x half> @llvm.experimental.constrained.fptrunc.v3f16.v3f32(<3 x float> %arg, metadata !"round.tonearest", metadata !"fpexcept.strict")
-; ret <3 x half> %fptrunc
-; }
+ define <3 x half> @v3f16_return(<3 x float> %arg) #0 {
+; NOFP16-LABEL: v3f16_return:
+; NOFP16: // %bb.0:
+; NOFP16-NEXT: stp x30, x21, [sp, #-32]! // 16-byte Folded Spill
+; NOFP16-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill
+; NOFP16-NEXT: .cfi_def_cfa_offset 32
+; NOFP16-NEXT: .cfi_offset w19, -8
+; NOFP16-NEXT: .cfi_offset w20, -16
+; NOFP16-NEXT: .cfi_offset w21, -24
+; NOFP16-NEXT: .cfi_offset w30, -32
+; NOFP16-NEXT: mov w20, w0
+; NOFP16-NEXT: mov w0, w2
+; NOFP16-NEXT: mov w19, w1
+; NOFP16-NEXT: bl __gnu_f2h_ieee
+; NOFP16-NEXT: mov w21, w0
+; NOFP16-NEXT: mov w0, w19
+; NOFP16-NEXT: bl __gnu_f2h_ieee
+; NOFP16-NEXT: mov w19, w0
+; NOFP16-NEXT: mov w0, w20
+; NOFP16-NEXT: bl __gnu_f2h_ieee
+; NOFP16-NEXT: mov w1, w19
+; NOFP16-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
+; NOFP16-NEXT: mov w2, w21
+; NOFP16-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload
+; NOFP16-NEXT: ret
+ %fptrunc = call <3 x half> @llvm.experimental.constrained.fptrunc.v3f16.v3f32(<3 x float> %arg, metadata !"round.tonearest", metadata !"fpexcept.strict")
+ ret <3 x half> %fptrunc
+ }
-; define <4 x half> @v4f16_return(<4 x float> %arg) #0 {
-; %fptrunc = call <4 x half> @llvm.experimental.constrained.fptrunc.v4f16.v4f32(<4 x float> %arg, metadata !"round.tonearest", metadata !"fpexcept.strict")
-; ret <4 x half> %fptrunc
-; }
+ define <4 x half> @v4f16_return(<4 x float> %arg) #0 {
+; NOFP16-LABEL: v4f16_return:
+; NOFP16: // %bb.0:
+; NOFP16-NEXT: str x30, [sp, #-48]! // 8-byte Folded Spill
+; NOFP16-NEXT: stp x22, x21, [sp, #16] // 16-byte Folded Spill
+; NOFP16-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill
+; NOFP16-NEXT: .cfi_def_cfa_offset 48
+; NOFP16-NEXT: .cfi_offset w19, -8
+; NOFP16-NEXT: .cfi_offset w20, -16
+; NOFP16-NEXT: .cfi_offset w21, -24
+; NOFP16-NEXT: .cfi_offset w22, -32
+; NOFP16-NEXT: .cfi_offset w30, -48
+; NOFP16-NEXT: mov w21, w0
+; NOFP16-NEXT: mov w0, w3
+; NOFP16-NEXT: mov w19, w2
+; NOFP16-NEXT: mov w20, w1
+; NOFP16-NEXT: bl __gnu_f2h_ieee
+; NOFP16-NEXT: mov w22, w0
+; NOFP16-NEXT: mov w0, w19
+; NOFP16-NEXT: bl __gnu_f2h_ieee
+; NOFP16-NEXT: mov w19, w0
+; NOFP16-NEXT: mov w0, w20
+; NOFP16-NEXT: bl __gnu_f2h_ieee
+; NOFP16-NEXT: mov w20, w0
+; NOFP16-NEXT: mov w0, w21
+; NOFP16-NEXT: bl __gnu_f2h_ieee
+; NOFP16-NEXT: mov w1, w20
+; NOFP16-NEXT: mov w2, w19
+; NOFP16-NEXT: mov w3, w22
+; NOFP16-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload
+; NOFP16-NEXT: ldp x22, x21, [sp, #16] // 16-byte Folded Reload
+; NOFP16-NEXT: ldr x30, [sp], #48 // 8-byte Folded Reload
+; NOFP16-NEXT: ret
+ %fptrunc = call <4 x half> @llvm.experimental.constrained.fptrunc.v4f16.v4f32(<4 x float> %arg, metadata !"round.tonearest", metadata !"fpexcept.strict")
+ ret <4 x half> %fptrunc
+ }
; FIXME:
; define void @outgoing_f16_arg(ptr %ptr) #0 {
More information about the llvm-commits
mailing list