[llvm] [AArch64][CodeGen] Fix crash when fptrunc returns fp16 with +nofp attr (PR #81724)

Wed Feb 21 08:38:01 PST 2024

https://github.com/nasherm updated https://github.com/llvm/llvm-project/pull/81724

>From 6c7a740bf0004d97d7c8bebd15c4a878db6bdcc7 Mon Sep 17 00:00:00 2001
From: nasmnc01 <nashe.mncube at arm.com>
Date: Wed, 14 Feb 2024 10:56:47 +0000
Subject: [PATCH 1/6] [AArch64][CodeGen] Fix crash when fptrunc returns fp16
 with +nofp attr

When performing lowering of the fptrunc opcode returning fp16 with
the +nofp flag enabled we could trigger a compiler crash.
This is because we had no custom lowering implemented. This patch
implements a custom lowering for the case in which we need to
promote an fp16 return type for fptrunc when the +nofp attr is enabled.

Change-Id: Ibea20a676d40fde3f25e1ade365620071f46ff2b
---
 .../Target/AArch64/AArch64ISelLowering.cpp    |  9 ++++++++
 .../AArch64/float16-promotion-with-nofp.ll    | 21 +++++++++++++++++++
 2 files changed, 30 insertions(+)
 create mode 100644 llvm/test/CodeGen/AArch64/float16-promotion-with-nofp.ll

diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index a3b7e3128ac1a4..c58600ae386730 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -25008,6 +25008,15 @@ void AArch64TargetLowering::ReplaceNodeResults(
       Results.push_back(
           LowerToPredicatedOp(SDValue(N, 0), DAG, AArch64ISD::MULHU_PRED));
     return;
+  case ISD::FP_ROUND:{
+    if (N->getValueType(0) == MVT::f16 && !Subtarget->hasFullFP16()) {
+        // Promote fp16 result to legal type
+        SDLoc DL(N);
+        EVT NVT = getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+        Results.push_back(DAG.getNode(ISD::FP16_TO_FP, DL, NVT, N->getOperand(0)));
+    }
+    return;
+  }
   case ISD::FP_TO_UINT:
   case ISD::FP_TO_SINT:
   case ISD::STRICT_FP_TO_SINT:
diff --git a/llvm/test/CodeGen/AArch64/float16-promotion-with-nofp.ll b/llvm/test/CodeGen/AArch64/float16-promotion-with-nofp.ll
new file mode 100644
index 00000000000000..03426579131a1d
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/float16-promotion-with-nofp.ll
@@ -0,0 +1,21 @@
+; RUN: llc -mcpu=cortex-r82 -O1 -o - %s | FileCheck %s
+
+; Source used:
+; __fp16 f2h(float a) { return a; }
+; Compiled with: clang --target=aarch64-arm-none-eabi -march=armv8-r+nofp
+
+define hidden noundef nofpclass(nan inf) half @f2h(float noundef nofpclass(nan inf) %a) local_unnamed_addr #0 {
+;CHECK:      f2h:                                    // @f2h
+;CHECK-NEXT: // %bb.0:                               // %entry
+;CHECK-NEXT:     str x30, [sp, #-16]!                // 8-byte Folded Spill
+;CHECK-NEXT:     bl  __gnu_h2f_ieee
+;CHECK-NEXT:     ldr x30, [sp], #16                  // 8-byte Folded Reload
+;CHECK-NEXT:     ret
+entry:
+  %0 = fptrunc float %a to half
+  ret half %0
+}
+
+
+attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) "denormal-fp-math"="preserve-sign,preserve-sign" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+crc,+lse,+pauth,+ras,+rcpc,+sb,+ssbs,+v8r,-complxnum,-dotprod,-fmv,-fp-armv8,-fp16fml,-fullfp16,-jsconv,-neon,-rdm" }
+

>From d09478d325822074e7b3cd7050b88881f6c53b60 Mon Sep 17 00:00:00 2001
From: nasmnc01 <nashe.mncube at arm.com>
Date: Wed, 14 Feb 2024 11:11:29 +0000
Subject: [PATCH 2/6] clang-format fixes

Change-Id: Icf71578773b4c44fe8d79edd984661ec79fe1b09
---
 llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index c58600ae386730..a82bd77cce4bf7 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -25008,12 +25008,13 @@ void AArch64TargetLowering::ReplaceNodeResults(
       Results.push_back(
           LowerToPredicatedOp(SDValue(N, 0), DAG, AArch64ISD::MULHU_PRED));
     return;
-  case ISD::FP_ROUND:{
+  case ISD::FP_ROUND: {
     if (N->getValueType(0) == MVT::f16 && !Subtarget->hasFullFP16()) {
-        // Promote fp16 result to legal type
-        SDLoc DL(N);
-        EVT NVT = getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
-        Results.push_back(DAG.getNode(ISD::FP16_TO_FP, DL, NVT, N->getOperand(0)));
+      // Promote fp16 result to legal type
+      SDLoc DL(N);
+      EVT NVT = getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+      Results.push_back(
+          DAG.getNode(ISD::FP16_TO_FP, DL, NVT, N->getOperand(0)));
     }
     return;
   }

>From 1fa48a805a2d6d3662aa9a6a8950e9bac5614359 Mon Sep 17 00:00:00 2001
From: nasmnc01 <nashe.mncube at arm.com>
Date: Wed, 14 Feb 2024 16:56:53 +0000
Subject: [PATCH 3/6] Responding to review comments

Change-Id: Ic64506bb76bcc8f059b1de9ea6041fe8c0093b9c
---
 .../Target/AArch64/AArch64ISelLowering.cpp    |  5 ++--
 .../AArch64/float16-promotion-with-nofp.ll    | 26 ++++++++-----------
 2 files changed, 13 insertions(+), 18 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index a82bd77cce4bf7..63ca0f6575f77c 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -1880,6 +1880,7 @@ void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT,
   }
 
   // Lower fixed length vector operations to scalable equivalents.
+  setOperationAction(ISD::ANY_EXTEND, MVT::f32, Legal);
   setOperationAction(ISD::ABS, VT, Custom);
   setOperationAction(ISD::ADD, VT, Custom);
   setOperationAction(ISD::AND, VT, Custom);
@@ -25010,11 +25011,9 @@ void AArch64TargetLowering::ReplaceNodeResults(
     return;
   case ISD::FP_ROUND: {
     if (N->getValueType(0) == MVT::f16 && !Subtarget->hasFullFP16()) {
-      // Promote fp16 result to legal type
       SDLoc DL(N);
-      EVT NVT = getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
       Results.push_back(
-          DAG.getNode(ISD::FP16_TO_FP, DL, NVT, N->getOperand(0)));
+          DAG.getNode(ISD::FP_TO_FP16, DL, MVT::i16, N->getOperand(0)));
     }
     return;
   }
diff --git a/llvm/test/CodeGen/AArch64/float16-promotion-with-nofp.ll b/llvm/test/CodeGen/AArch64/float16-promotion-with-nofp.ll
index 03426579131a1d..714cbcc1fdb6db 100644
--- a/llvm/test/CodeGen/AArch64/float16-promotion-with-nofp.ll
+++ b/llvm/test/CodeGen/AArch64/float16-promotion-with-nofp.ll
@@ -1,21 +1,17 @@
-; RUN: llc -mcpu=cortex-r82 -O1 -o - %s | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -mtriple=aarch64 -mattr=-fp-armv8 -o - %s | FileCheck %s
 
-; Source used:
-; __fp16 f2h(float a) { return a; }
-; Compiled with: clang --target=aarch64-arm-none-eabi -march=armv8-r+nofp
-
-define hidden noundef nofpclass(nan inf) half @f2h(float noundef nofpclass(nan inf) %a) local_unnamed_addr #0 {
-;CHECK:      f2h:                                    // @f2h
-;CHECK-NEXT: // %bb.0:                               // %entry
-;CHECK-NEXT:     str x30, [sp, #-16]!                // 8-byte Folded Spill
-;CHECK-NEXT:     bl  __gnu_h2f_ieee
-;CHECK-NEXT:     ldr x30, [sp], #16                  // 8-byte Folded Reload
-;CHECK-NEXT:     ret
+define half @f2h(float %a) {
+; CHECK-LABEL: f2h:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl __gnu_f2h_ieee
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
 entry:
   %0 = fptrunc float %a to half
   ret half %0
 }
 
-
-attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) "denormal-fp-math"="preserve-sign,preserve-sign" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+crc,+lse,+pauth,+ras,+rcpc,+sb,+ssbs,+v8r,-complxnum,-dotprod,-fmv,-fp-armv8,-fp16fml,-fullfp16,-jsconv,-neon,-rdm" }
-

>From 28e123e77c9b71f7f5d8d6fc72dd680cdeb175c0 Mon Sep 17 00:00:00 2001
From: nasmnc01 <nashe.mncube at arm.com>
Date: Thu, 15 Feb 2024 13:46:26 +0000
Subject: [PATCH 4/6] Responding to review comments

Change-Id: Ib18e0ceeaab18d31d3fc43daab838ea95d62c2c1
---
 llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 17 ++++++++---------
 .../AArch64/float16-promotion-with-nofp.ll      |  3 +++
 2 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 63ca0f6575f77c..259d2ae42df9dd 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -541,12 +541,15 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
   setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Custom);
   setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i64, Custom);
   setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i128, Custom);
-  setOperationAction(ISD::FP_ROUND, MVT::f16, Custom);
   setOperationAction(ISD::FP_ROUND, MVT::f32, Custom);
   setOperationAction(ISD::FP_ROUND, MVT::f64, Custom);
   setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Custom);
   setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Custom);
   setOperationAction(ISD::STRICT_FP_ROUND, MVT::f64, Custom);
+  if (!Subtarget->hasFullFP16())
+    setOperationAction(ISD::FP_ROUND, MVT::f16, Expand);
+  else
+    setOperationAction(ISD::FP_ROUND, MVT::f16, Custom);
 
   setOperationAction(ISD::FP_TO_UINT_SAT, MVT::i32, Custom);
   setOperationAction(ISD::FP_TO_UINT_SAT, MVT::i64, Custom);
@@ -24581,6 +24584,10 @@ void AArch64TargetLowering::ReplaceBITCASTResults(
   EVT VT = N->getValueType(0);
   EVT SrcVT = Op.getValueType();
 
+  // Default to the generic legalizer
+  if (SrcVT == MVT::f16 && !Subtarget->hasFullFP16())
+    return;
+
   if (VT == MVT::v2i16 && SrcVT == MVT::i32) {
     CustomNonLegalBITCASTResults(N, Results, DAG, MVT::v2i32, MVT::v4i16);
     return;
@@ -25009,14 +25016,6 @@ void AArch64TargetLowering::ReplaceNodeResults(
       Results.push_back(
           LowerToPredicatedOp(SDValue(N, 0), DAG, AArch64ISD::MULHU_PRED));
     return;
-  case ISD::FP_ROUND: {
-    if (N->getValueType(0) == MVT::f16 && !Subtarget->hasFullFP16()) {
-      SDLoc DL(N);
-      Results.push_back(
-          DAG.getNode(ISD::FP_TO_FP16, DL, MVT::i16, N->getOperand(0)));
-    }
-    return;
-  }
   case ISD::FP_TO_UINT:
   case ISD::FP_TO_SINT:
   case ISD::STRICT_FP_TO_SINT:
diff --git a/llvm/test/CodeGen/AArch64/float16-promotion-with-nofp.ll b/llvm/test/CodeGen/AArch64/float16-promotion-with-nofp.ll
index 714cbcc1fdb6db..e3c72883976fca 100644
--- a/llvm/test/CodeGen/AArch64/float16-promotion-with-nofp.ll
+++ b/llvm/test/CodeGen/AArch64/float16-promotion-with-nofp.ll
@@ -8,6 +8,9 @@ define half @f2h(float %a) {
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    .cfi_offset w30, -16
 ; CHECK-NEXT:    bl __gnu_f2h_ieee
+; CHECK-NEXT:    and w0, w0, #0xffff
+; CHECK-NEXT:    bl __gnu_h2f_ieee
+; CHECK-NEXT:    bl __gnu_f2h_ieee
 ; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; CHECK-NEXT:    ret
 entry:

>From 0059e954cfbf1478d7c4e7cd4c7f7a79d30f6886 Mon Sep 17 00:00:00 2001
From: nasmnc01 <nashe.mncube at arm.com>
Date: Mon, 19 Feb 2024 11:37:05 +0000
Subject: [PATCH 5/6] Using hasFPARMv8 over hasFullFP16

Change-Id: I92af1dc9413486d2c20d90aebf0e377b077ad428
---
 llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 259d2ae42df9dd..d8920fca0a7921 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -546,9 +546,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
   setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Custom);
   setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Custom);
   setOperationAction(ISD::STRICT_FP_ROUND, MVT::f64, Custom);
-  if (!Subtarget->hasFullFP16())
-    setOperationAction(ISD::FP_ROUND, MVT::f16, Expand);
-  else
+
+  if (Subtarget->hasFPARMv8())
     setOperationAction(ISD::FP_ROUND, MVT::f16, Custom);
 
   setOperationAction(ISD::FP_TO_UINT_SAT, MVT::i32, Custom);
@@ -1883,7 +1882,6 @@ void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT,
   }
 
   // Lower fixed length vector operations to scalable equivalents.
-  setOperationAction(ISD::ANY_EXTEND, MVT::f32, Legal);
   setOperationAction(ISD::ABS, VT, Custom);
   setOperationAction(ISD::ADD, VT, Custom);
   setOperationAction(ISD::AND, VT, Custom);
@@ -24585,7 +24583,7 @@ void AArch64TargetLowering::ReplaceBITCASTResults(
   EVT SrcVT = Op.getValueType();
 
   // Default to the generic legalizer
-  if (SrcVT == MVT::f16 && !Subtarget->hasFullFP16())
+  if (SrcVT == MVT::f16 && !Subtarget->hasFPARMv8())
     return;
 
   if (VT == MVT::v2i16 && SrcVT == MVT::i32) {

>From eba9ee8e19aebd1b02bcdbe41f6ad10036b145f7 Mon Sep 17 00:00:00 2001
From: nasmnc01 <nashe.mncube at arm.com>
Date: Wed, 21 Feb 2024 16:37:41 +0000
Subject: [PATCH 6/6] Responding to review comments

Change-Id: Ic1b7f8774ae529680597b4e032c4a63416ac927a
---
 llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 11 +++++++----
 llvm/lib/Target/AArch64/AArch64ISelLowering.h   |  2 ++
 ...fp.ll => 16bit-float-promotion-with-nofp.ll} | 17 ++++++++++++++---
 3 files changed, 23 insertions(+), 7 deletions(-)
 rename llvm/test/CodeGen/AArch64/{float16-promotion-with-nofp.ll => 16bit-float-promotion-with-nofp.ll} (57%)

diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index d8920fca0a7921..c3078b0774fa4d 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -547,8 +547,11 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
   setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Custom);
   setOperationAction(ISD::STRICT_FP_ROUND, MVT::f64, Custom);
 
-  if (Subtarget->hasFPARMv8())
-    setOperationAction(ISD::FP_ROUND, MVT::f16, Custom);
+  if (Subtarget->hasFPARMv8()) {
+    setOperationAction(ISD::BITCAST, MVT::i16, Custom);
+    setOperationAction(ISD::BITCAST, MVT::f16, Custom);
+    setOperationAction(ISD::BITCAST, MVT::bf16, Custom);
+  }
 
   setOperationAction(ISD::FP_TO_UINT_SAT, MVT::i32, Custom);
   setOperationAction(ISD::FP_TO_UINT_SAT, MVT::i64, Custom);
@@ -24582,8 +24585,8 @@ void AArch64TargetLowering::ReplaceBITCASTResults(
   EVT VT = N->getValueType(0);
   EVT SrcVT = Op.getValueType();
 
-  // Default to the generic legalizer
-  if (SrcVT == MVT::f16 && !Subtarget->hasFPARMv8())
+  if (!Subtarget->hasFPARMv8() &&
+      ( SrcVT == MVT::f16 || SrcVT == MVT::i16 || SrcVT == MVT::bf16))
     return;
 
   if (VT == MVT::v2i16 && SrcVT == MVT::i32) {
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 436b21fd134632..bec13484450d78 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -1308,6 +1308,8 @@ class AArch64TargetLowering : public TargetLowering {
   bool preferScalarizeSplat(SDNode *N) const override;
 
   unsigned getMinimumJumpTableEntries() const override;
+
+  bool softPromoteHalfType() const override { return true; }
 };
 
 namespace AArch64 {
diff --git a/llvm/test/CodeGen/AArch64/float16-promotion-with-nofp.ll b/llvm/test/CodeGen/AArch64/16bit-float-promotion-with-nofp.ll
similarity index 57%
rename from llvm/test/CodeGen/AArch64/float16-promotion-with-nofp.ll
rename to llvm/test/CodeGen/AArch64/16bit-float-promotion-with-nofp.ll
index e3c72883976fca..bfe9ab8424bb03 100644
--- a/llvm/test/CodeGen/AArch64/float16-promotion-with-nofp.ll
+++ b/llvm/test/CodeGen/AArch64/16bit-float-promotion-with-nofp.ll
@@ -8,9 +8,6 @@ define half @f2h(float %a) {
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    .cfi_offset w30, -16
 ; CHECK-NEXT:    bl __gnu_f2h_ieee
-; CHECK-NEXT:    and w0, w0, #0xffff
-; CHECK-NEXT:    bl __gnu_h2f_ieee
-; CHECK-NEXT:    bl __gnu_f2h_ieee
 ; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; CHECK-NEXT:    ret
 entry:
@@ -18,3 +15,17 @@ entry:
   ret half %0
 }
 
+define bfloat @f2bfloat(float %a) {
+; CHECK-LABEL: f2bfloat:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl __truncsfbf2
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
+entry:
+  %0 = fptrunc float %a to bfloat
+  ret bfloat %0
+}
+