[llvm] SelectionDAG: Expand fminimumnum/fmaximumnum for promote (PR #135614)

Mon Apr 14 03:44:13 PDT 2025

https://github.com/wzssyqa created https://github.com/llvm/llvm-project/pull/135614

In SelectionDAG/TargetLowering.cpp, if backend set ISD::MAXNUM and ISD::MINNUM for VT as Promated, we can expand it to:
   FP_EXTEND
   FP_EXTEND
   MAXNUM_IEEE/MINNUM_IEEE
   FP_ROUND

And here we set MAXNUM_IEEE, MINNUM_IEEE as Promote for f16 on AArch64.

>From f25822c37f842a33b87c4e73721828f7ed0258e5 Mon Sep 17 00:00:00 2001
From: YunQiang Su <yunqiang at isrc.iscas.ac.cn>
Date: Mon, 14 Apr 2025 18:37:31 +0800
Subject: [PATCH] SelectionDAG: Expand fminimumnum/fmaximumnum for promote

In SelectionDAG/TargetLowering.cpp, if backend set ISD::MAXNUM
and ISD::MINNUM for VT as Promated, we can expand it to:
   FP_EXTEND
   FP_EXTEND
   MAXNUM_IEEE/MINNUM_IEEE
   FP_ROUND

And here we set MAXNUM_IEEE, MINNUM_IEEE as Promote for f16
on AArch64.
---
 .../CodeGen/SelectionDAG/TargetLowering.cpp   | 11 ++++++++
 .../Target/AArch64/AArch64ISelLowering.cpp    |  2 ++
 .../CodeGen/AArch64/fminmax-f16-promote.ll    | 28 +++++++++++++++++++
 3 files changed, 41 insertions(+)
 create mode 100644 llvm/test/CodeGen/AArch64/fminmax-f16-promote.ll

diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 89f806d8b1c30..9050b5a57541b 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -8645,6 +8645,17 @@ SDValue TargetLowering::expandFMINIMUMNUM_FMAXIMUMNUM(SDNode *Node,
     return DAG.getNode(NewOp, DL, VT, LHS, RHS, Flags);
   }
 
+  if (getOperationAction(NewOp, VT) == Promote) {
+    MVT PromoteVT = getTypeToPromoteTo(NewOp, VT.getSimpleVT());
+    if (isOperationLegal(NewOp, PromoteVT)) {
+      LHS = DAG.getNode(ISD::FP_EXTEND, DL, PromoteVT, LHS);
+      RHS = DAG.getNode(ISD::FP_EXTEND, DL, PromoteVT, RHS);
+      SDValue Result = DAG.getNode(NewOp, DL, PromoteVT, LHS, RHS, Flags);
+      return DAG.getNode(ISD::FP_ROUND, DL, VT, Result,
+                         DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
+    }
+  }
+
   // We can use FMINIMUM/FMAXIMUM if there is no NaN, since it has
   // same behaviors for all of other cases: +0.0 vs -0.0 included.
   if (Flags.hasNoNaNs() ||
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 830ec6886e6bc..c6bbfa4c12698 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -885,6 +885,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
 
   // AArch64 has implementations of a lot of rounding-like FP operations.
   // clang-format off
+  for (auto Op : {ISD::FMINNUM_IEEE,    ISD::FMAXNUM_IEEE})
+    setOperationAction(Op, MVT::f16, Promote);
   for (auto Op :
        {ISD::FFLOOR,          ISD::FNEARBYINT,      ISD::FCEIL,
         ISD::FRINT,           ISD::FTRUNC,          ISD::FROUND,
diff --git a/llvm/test/CodeGen/AArch64/fminmax-f16-promote.ll b/llvm/test/CodeGen/AArch64/fminmax-f16-promote.ll
new file mode 100644
index 0000000000000..abd0a8e6591ec
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/fminmax-f16-promote.ll
@@ -0,0 +1,28 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=aarch64 -verify-machineinstrs %s -o - | FileCheck %s
+
+define half @min(half noundef %a, half noundef %b) {
+; CHECK-LABEL: min:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    fcvt s1, h1
+; CHECK-NEXT:    fcvt s0, h0
+; CHECK-NEXT:    fminnm s0, s0, s1
+; CHECK-NEXT:    fcvt h0, s0
+; CHECK-NEXT:    ret
+entry:
+  %0 = tail call half @llvm.minimumnum.f16(half %a, half %b)
+  ret half %0
+}
+
+define half @max(half noundef %a, half noundef %b) {
+; CHECK-LABEL: max:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    fcvt s1, h1
+; CHECK-NEXT:    fcvt s0, h0
+; CHECK-NEXT:    fmaxnm s0, s0, s1
+; CHECK-NEXT:    fcvt h0, s0
+; CHECK-NEXT:    ret
+entry:
+  %0 = tail call half @llvm.maximumnum.f16(half %a, half %b)
+  ret half %0
+}