[llvm] [SystemZ] Mark fminimumnum/fmaximumnum as legal (PR #184595)

Wed Mar 4 03:51:01 PST 2026

https://github.com/nikic created https://github.com/llvm/llvm-project/pull/184595

Per my reading of the s390x PoO, in M=4 mode these instructions return the other operand for sNaN (rather than returning qNaN, as for M=0), which matches the semantics of fminimumnum/fmaximumnum.

I'd appreciate if an s390x expert can confirm that.

(I think this also means that the current lowering for strict_fminnum/strict_fmaxnum is incorrect and should use M=0 instead. Unlike the non-strict variants, these are required to have IEEE sNaN handling.)

>From 4ec0f3eb6bbb6a96841609e8931c4e914f1bb768 Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov at redhat.com>
Date: Wed, 4 Mar 2026 12:47:12 +0100
Subject: [PATCH] [SystemZ] Mark fminimumnum/fmaximumnum as legal

Per my reading of the s390x PoO, in M=4 mode these instructions
return the other operand for sNaN (rather than returning qNaN, as
for M=0), which matches the semantics of fminimumnum/fmaximumnum.
I'd appreciate if an s390x expert can confirm that.
---
 .../Target/SystemZ/SystemZISelLowering.cpp    |  32 +---
 llvm/lib/Target/SystemZ/SystemZInstrVector.td |   2 +
 .../SystemZ/fminimumnum-fmaximumnum.ll        | 164 ++++++++++++++++++
 3 files changed, 174 insertions(+), 24 deletions(-)
 create mode 100644 llvm/test/CodeGen/SystemZ/fminimumnum-fmaximumnum.ll

diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index eacaaddc5e4d4..2a9cb903f3921 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -694,30 +694,14 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
     setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
     setOperationAction(ISD::FROUNDEVEN, MVT::v4f32, Legal);
 
-    setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
-    setOperationAction(ISD::FMAXIMUM, MVT::f64, Legal);
-    setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
-    setOperationAction(ISD::FMINIMUM, MVT::f64, Legal);
-
-    setOperationAction(ISD::FMAXNUM, MVT::v2f64, Legal);
-    setOperationAction(ISD::FMAXIMUM, MVT::v2f64, Legal);
-    setOperationAction(ISD::FMINNUM, MVT::v2f64, Legal);
-    setOperationAction(ISD::FMINIMUM, MVT::v2f64, Legal);
-
-    setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
-    setOperationAction(ISD::FMAXIMUM, MVT::f32, Legal);
-    setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
-    setOperationAction(ISD::FMINIMUM, MVT::f32, Legal);
-
-    setOperationAction(ISD::FMAXNUM, MVT::v4f32, Legal);
-    setOperationAction(ISD::FMAXIMUM, MVT::v4f32, Legal);
-    setOperationAction(ISD::FMINNUM, MVT::v4f32, Legal);
-    setOperationAction(ISD::FMINIMUM, MVT::v4f32, Legal);
-
-    setOperationAction(ISD::FMAXNUM, MVT::f128, Legal);
-    setOperationAction(ISD::FMAXIMUM, MVT::f128, Legal);
-    setOperationAction(ISD::FMINNUM, MVT::f128, Legal);
-    setOperationAction(ISD::FMINIMUM, MVT::f128, Legal);
+    for (MVT Type : {MVT::f64, MVT::v2f64, MVT::f32, MVT::v4f32, MVT::f128}) {
+      setOperationAction(ISD::FMAXNUM, Type, Legal);
+      setOperationAction(ISD::FMAXIMUM, Type, Legal);
+      setOperationAction(ISD::FMAXIMUMNUM, Type, Legal);
+      setOperationAction(ISD::FMINNUM, Type, Legal);
+      setOperationAction(ISD::FMINIMUM, Type, Legal);
+      setOperationAction(ISD::FMINIMUMNUM, Type, Legal);
+    }
 
     // Handle constrained floating-point operations.
     setOperationAction(ISD::STRICT_FADD, MVT::v4f32, Legal);
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrVector.td b/llvm/lib/Target/SystemZ/SystemZInstrVector.td
index 0bbca28ca7be0..fe26bdc72799b 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrVector.td
+++ b/llvm/lib/Target/SystemZ/SystemZInstrVector.td
@@ -1599,6 +1599,7 @@ let Predicates = [FeatureVector] in {
   // Maximum.
   multiclass VectorMax<Instruction insn, TypedReg tr> {
     def : FPMinMax<insn, any_fmaxnum, tr, 4>;
+    def : FPMinMax<insn, fmaximumnum, tr, 4>;
     def : FPMinMax<insn, any_fmaximum, tr, 1>;
   }
   let Predicates = [FeatureVectorEnhancements1] in {
@@ -1625,6 +1626,7 @@ let Predicates = [FeatureVector] in {
   // Minimum.
   multiclass VectorMin<Instruction insn, TypedReg tr> {
     def : FPMinMax<insn, any_fminnum, tr, 4>;
+    def : FPMinMax<insn, fminimumnum, tr, 4>;
     def : FPMinMax<insn, any_fminimum, tr, 1>;
   }
   let Predicates = [FeatureVectorEnhancements1] in {
diff --git a/llvm/test/CodeGen/SystemZ/fminimumnum-fmaximumnum.ll b/llvm/test/CodeGen/SystemZ/fminimumnum-fmaximumnum.ll
new file mode 100644
index 0000000000000..b447e9119086f
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/fminimumnum-fmaximumnum.ll
@@ -0,0 +1,164 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc -mtriple=s390x-unknown-linux-gnu -mcpu=z14 < %s | FileCheck %s
+
+define half @fminimumnum_f16(half %a, half %b) nounwind {
+; CHECK-LABEL: fminimumnum_f16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    stmg %r14, %r15, 112(%r15)
+; CHECK-NEXT:    aghi %r15, -176
+; CHECK-NEXT:    std %f8, 168(%r15) # 8-byte Spill
+; CHECK-NEXT:    std %f9, 160(%r15) # 8-byte Spill
+; CHECK-NEXT:    ldr %f8, %f0
+; CHECK-NEXT:    ldr %f0, %f2
+; CHECK-NEXT:    brasl %r14, __extendhfsf2 at PLT
+; CHECK-NEXT:    ldr %f9, %f0
+; CHECK-NEXT:    ldr %f0, %f8
+; CHECK-NEXT:    brasl %r14, __extendhfsf2 at PLT
+; CHECK-NEXT:    wfminsb %f0, %f0, %f9, 4
+; CHECK-NEXT:    brasl %r14, __truncsfhf2 at PLT
+; CHECK-NEXT:    ld %f8, 168(%r15) # 8-byte Reload
+; CHECK-NEXT:    ld %f9, 160(%r15) # 8-byte Reload
+; CHECK-NEXT:    lmg %r14, %r15, 288(%r15)
+; CHECK-NEXT:    br %r14
+  %res = call half @llvm.minimumnum(half %a, half %b)
+  ret half %res
+}
+
+define float @fminimumnum_f32(float %a, float %b) {
+; CHECK-LABEL: fminimumnum_f32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    wfminsb %f0, %f0, %f2, 4
+; CHECK-NEXT:    br %r14
+  %res = call float @llvm.minimumnum(float %a, float %b)
+  ret float %res
+}
+
+define double @fminimumnum_f64(double %a, double %b) {
+; CHECK-LABEL: fminimumnum_f64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    wfmindb %f0, %f0, %f2, 4
+; CHECK-NEXT:    br %r14
+  %res = call double @llvm.minimumnum(double %a, double %b)
+  ret double %res
+}
+
+define fp128 @fminimumnum_f128(fp128 %a, fp128 %b) {
+; CHECK-LABEL: fminimumnum_f128:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vl %v0, 0(%r4), 3
+; CHECK-NEXT:    vl %v1, 0(%r3), 3
+; CHECK-NEXT:    wfminxb %v0, %v1, %v0, 4
+; CHECK-NEXT:    vst %v0, 0(%r2), 3
+; CHECK-NEXT:    br %r14
+  %res = call fp128 @llvm.minimumnum(fp128 %a, fp128 %b)
+  ret fp128 %res
+}
+
+define <4 x float> @fminimumnum_v4f32(<4 x float> %a, <4 x float> %b) {
+; CHECK-LABEL: fminimumnum_v4f32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vfminsb %v24, %v24, %v26, 4
+; CHECK-NEXT:    br %r14
+  %res = call <4 x float> @llvm.minimumnum(<4 x float> %a, <4 x float> %b)
+  ret <4 x float> %res
+}
+
+define <2 x double> @fminimumnum_v2f64(<2 x double> %a, <2 x double> %b) {
+; CHECK-LABEL: fminimumnum_v2f64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vfmindb %v24, %v24, %v26, 4
+; CHECK-NEXT:    br %r14
+  %res = call <2 x double> @llvm.minimumnum(<2 x double> %a, <2 x double> %b)
+  ret <2 x double> %res
+}
+
+define <4 x double> @fminimumnum_v4f64(<4 x double> %a, <4 x double> %b) {
+; CHECK-LABEL: fminimumnum_v4f64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vfmindb %v24, %v24, %v28, 4
+; CHECK-NEXT:    vfmindb %v26, %v26, %v30, 4
+; CHECK-NEXT:    br %r14
+  %res = call <4 x double> @llvm.minimumnum(<4 x double> %a, <4 x double> %b)
+  ret <4 x double> %res
+}
+
+define half @fmaximumnum_f16(half %a, half %b) nounwind {
+; CHECK-LABEL: fmaximumnum_f16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    stmg %r14, %r15, 112(%r15)
+; CHECK-NEXT:    aghi %r15, -176
+; CHECK-NEXT:    std %f8, 168(%r15) # 8-byte Spill
+; CHECK-NEXT:    std %f9, 160(%r15) # 8-byte Spill
+; CHECK-NEXT:    ldr %f8, %f0
+; CHECK-NEXT:    ldr %f0, %f2
+; CHECK-NEXT:    brasl %r14, __extendhfsf2 at PLT
+; CHECK-NEXT:    ldr %f9, %f0
+; CHECK-NEXT:    ldr %f0, %f8
+; CHECK-NEXT:    brasl %r14, __extendhfsf2 at PLT
+; CHECK-NEXT:    wfmaxsb %f0, %f0, %f9, 4
+; CHECK-NEXT:    brasl %r14, __truncsfhf2 at PLT
+; CHECK-NEXT:    ld %f8, 168(%r15) # 8-byte Reload
+; CHECK-NEXT:    ld %f9, 160(%r15) # 8-byte Reload
+; CHECK-NEXT:    lmg %r14, %r15, 288(%r15)
+; CHECK-NEXT:    br %r14
+  %res = call half @llvm.maximumnum(half %a, half %b)
+  ret half %res
+}
+
+define float @fmaximumnum_f32(float %a, float %b) {
+; CHECK-LABEL: fmaximumnum_f32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    wfmaxsb %f0, %f0, %f2, 4
+; CHECK-NEXT:    br %r14
+  %res = call float @llvm.maximumnum(float %a, float %b)
+  ret float %res
+}
+
+define double @fmaximumnum_f64(double %a, double %b) {
+; CHECK-LABEL: fmaximumnum_f64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    wfmaxdb %f0, %f0, %f2, 4
+; CHECK-NEXT:    br %r14
+  %res = call double @llvm.maximumnum(double %a, double %b)
+  ret double %res
+}
+
+define fp128 @fmaximumnum_f128(fp128 %a, fp128 %b) {
+; CHECK-LABEL: fmaximumnum_f128:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vl %v0, 0(%r4), 3
+; CHECK-NEXT:    vl %v1, 0(%r3), 3
+; CHECK-NEXT:    wfmaxxb %v0, %v1, %v0, 4
+; CHECK-NEXT:    vst %v0, 0(%r2), 3
+; CHECK-NEXT:    br %r14
+  %res = call fp128 @llvm.maximumnum(fp128 %a, fp128 %b)
+  ret fp128 %res
+}
+
+define <4 x float> @fmaximumnum_v4f32(<4 x float> %a, <4 x float> %b) {
+; CHECK-LABEL: fmaximumnum_v4f32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vfmaxsb %v24, %v24, %v26, 4
+; CHECK-NEXT:    br %r14
+  %res = call <4 x float> @llvm.maximumnum(<4 x float> %a, <4 x float> %b)
+  ret <4 x float> %res
+}
+
+define <2 x double> @fmaximumnum_v2f64(<2 x double> %a, <2 x double> %b) {
+; CHECK-LABEL: fmaximumnum_v2f64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vfmaxdb %v24, %v24, %v26, 4
+; CHECK-NEXT:    br %r14
+  %res = call <2 x double> @llvm.maximumnum(<2 x double> %a, <2 x double> %b)
+  ret <2 x double> %res
+}
+
+define <4 x double> @fmaximumnum_v4f64(<4 x double> %a, <4 x double> %b) {
+; CHECK-LABEL: fmaximumnum_v4f64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vfmaxdb %v24, %v24, %v28, 4
+; CHECK-NEXT:    vfmaxdb %v26, %v26, %v30, 4
+; CHECK-NEXT:    br %r14
+  %res = call <4 x double> @llvm.maximumnum(<4 x double> %a, <4 x double> %b)
+  ret <4 x double> %res
+}