[llvm] PowerPC/VSX: Select FMINNUM and FMAXNUM (PR #135739)

Mon Apr 14 20:12:09 PDT 2025

https://github.com/wzssyqa created https://github.com/llvm/llvm-project/pull/135739

In LangRef, we claim that FMINNUM and FMAXNUM should follow the minNum and maxNum operators in IEEE754-2008.

PowerPC/VSX does have these instructions XSMINDP and XSMAXDP.

Note: FMAXNUM_IEEE and FMINNUM_IEEE will be removed in future.

>From dbb39045251797baa9f5d7119948987a9bc39477 Mon Sep 17 00:00:00 2001
From: YunQiang Su <yunqiang at isrc.iscas.ac.cn>
Date: Mon, 14 Oct 2024 21:14:27 +0800
Subject: [PATCH 1/3] PowerPC/VSX: Select FMINNUM_IEEE and FMAXNUM_IEEE

PowerPC with VSX has vector instructions:
   XVMAXSP/XVMINSP/XVMAXDP/XVMINDP
which follow the semantics of minNUM/maxNUM of IEEE754-2008;

and scaler instructions
   XSMINDP/XSMAXDP
which also follow semantics of minNUM/maxNUM of IEEE754-2008.

Let's use them to define FMAXNUM_IEEE and FMINNUM_IEEE.

Currently, some
   Pat<(f64 (fminnum_ieee (fcanonicalize ..
are defined. They are not correct. Let's remove them.
In the future patch, we will define fcanonicalize for PowerPC/VSX,
then `fminimunnum/fmaximumnum` will be usable.
---
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp   |   6 +
 llvm/lib/Target/PowerPC/PPCInstrVSX.td        |  33 ++---
 .../test/CodeGen/PowerPC/fminimum-fmaximum.ll | 133 +++++-------------
 llvm/test/CodeGen/PowerPC/scalar-min-max.ll   |  20 ++-
 4 files changed, 61 insertions(+), 131 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 1f75425752a78..ed16d93591f35 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -777,6 +777,10 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
     setOperationAction(ISD::FMAXNUM_IEEE, MVT::f32, Legal);
     setOperationAction(ISD::FMINNUM_IEEE, MVT::f64, Legal);
     setOperationAction(ISD::FMINNUM_IEEE, MVT::f32, Legal);
+    setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
+    setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
+    setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
+    setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
   }
 
   if (Subtarget.hasAltivec()) {
@@ -811,6 +815,8 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
       if (Subtarget.hasVSX()) {
         setOperationAction(ISD::FMAXNUM, VT, Legal);
         setOperationAction(ISD::FMINNUM, VT, Legal);
+        setOperationAction(ISD::FMAXNUM_IEEE, VT, Legal);
+        setOperationAction(ISD::FMINNUM_IEEE, VT, Legal);
       }
 
       // Vector instructions introduced in P8
diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
index 19448210f5db1..0cb9daac3703e 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
@@ -2720,6 +2720,15 @@ def : Pat<(v2f64 (any_fmaxnum v2f64:$src1, v2f64:$src2)),
 def : Pat<(v2f64 (any_fminnum v2f64:$src1, v2f64:$src2)),
           (v2f64 (XVMINDP $src1, $src2))>;
 
+def : Pat<(v4f32 (fmaxnum_ieee v4f32:$src1, v4f32:$src2)),
+          (v4f32 (XVMAXSP $src1, $src2))>;
+def : Pat<(v4f32 (fminnum_ieee v4f32:$src1, v4f32:$src2)),
+          (v4f32 (XVMINSP $src1, $src2))>;
+def : Pat<(v2f64 (fmaxnum_ieee v2f64:$src1, v2f64:$src2)),
+          (v2f64 (XVMAXDP $src1, $src2))>;
+def : Pat<(v2f64 (fminnum_ieee v2f64:$src1, v2f64:$src2)),
+          (v2f64 (XVMINDP $src1, $src2))>;
+
 // f32 abs
 def : Pat<(f32 (fabs f32:$S)),
           (f32 (COPY_TO_REGCLASS (XSABSDP
@@ -2733,39 +2742,23 @@ def : Pat<(f32 (fneg (fabs f32:$S))),
 // f32 Min.
 def : Pat<(f32 (fminnum_ieee f32:$A, f32:$B)),
           (f32 FpMinMax.F32Min)>;
-def : Pat<(f32 (fminnum_ieee (fcanonicalize f32:$A), f32:$B)),
-          (f32 FpMinMax.F32Min)>;
-def : Pat<(f32 (fminnum_ieee f32:$A, (fcanonicalize f32:$B))),
-          (f32 FpMinMax.F32Min)>;
-def : Pat<(f32 (fminnum_ieee (fcanonicalize f32:$A), (fcanonicalize f32:$B))),
+def : Pat<(f32 (fminnum f32:$A, f32:$B)),
           (f32 FpMinMax.F32Min)>;
 // F32 Max.
 def : Pat<(f32 (fmaxnum_ieee f32:$A, f32:$B)),
           (f32 FpMinMax.F32Max)>;
-def : Pat<(f32 (fmaxnum_ieee (fcanonicalize f32:$A), f32:$B)),
-          (f32 FpMinMax.F32Max)>;
-def : Pat<(f32 (fmaxnum_ieee f32:$A, (fcanonicalize f32:$B))),
-          (f32 FpMinMax.F32Max)>;
-def : Pat<(f32 (fmaxnum_ieee (fcanonicalize f32:$A), (fcanonicalize f32:$B))),
+def : Pat<(f32 (fmaxnum f32:$A, f32:$B)),
           (f32 FpMinMax.F32Max)>;
 
 // f64 Min.
 def : Pat<(f64 (fminnum_ieee f64:$A, f64:$B)),
           (f64 (XSMINDP $A, $B))>;
-def : Pat<(f64 (fminnum_ieee (fcanonicalize f64:$A), f64:$B)),
-          (f64 (XSMINDP $A, $B))>;
-def : Pat<(f64 (fminnum_ieee f64:$A, (fcanonicalize f64:$B))),
-          (f64 (XSMINDP $A, $B))>;
-def : Pat<(f64 (fminnum_ieee (fcanonicalize f64:$A), (fcanonicalize f64:$B))),
+def : Pat<(f64 (fminnum f64:$A, f64:$B)),
           (f64 (XSMINDP $A, $B))>;
 // f64 Max.
 def : Pat<(f64 (fmaxnum_ieee f64:$A, f64:$B)),
           (f64 (XSMAXDP $A, $B))>;
-def : Pat<(f64 (fmaxnum_ieee (fcanonicalize f64:$A), f64:$B)),
-          (f64 (XSMAXDP $A, $B))>;
-def : Pat<(f64 (fmaxnum_ieee f64:$A, (fcanonicalize f64:$B))),
-          (f64 (XSMAXDP $A, $B))>;
-def : Pat<(f64 (fmaxnum_ieee (fcanonicalize f64:$A), (fcanonicalize f64:$B))),
+def : Pat<(f64 (fmaxnum f64:$A, f64:$B)),
           (f64 (XSMAXDP $A, $B))>;
 
 def : Pat<(int_ppc_vsx_stxvd2x_be v2f64:$rS, ForceXForm:$dst),
diff --git a/llvm/test/CodeGen/PowerPC/fminimum-fmaximum.ll b/llvm/test/CodeGen/PowerPC/fminimum-fmaximum.ll
index a99c25a4e4479..39cf136e10d77 100644
--- a/llvm/test/CodeGen/PowerPC/fminimum-fmaximum.ll
+++ b/llvm/test/CodeGen/PowerPC/fminimum-fmaximum.ll
@@ -301,22 +301,13 @@ define <4 x float> @v4f32_minimum(<4 x float> %a, <4 x float> %b) {
 ; VSX-NEXT:    xvcmpeqsp 1, 35, 35
 ; VSX-NEXT:    xvcmpeqsp 2, 34, 34
 ; VSX-NEXT:    addis 3, 2, .LCPI4_0 at toc@ha
-; VSX-NEXT:    xxleqv 36, 36, 36
-; VSX-NEXT:    xvminsp 0, 34, 35
-; VSX-NEXT:    vslw 4, 4, 4
 ; VSX-NEXT:    addi 3, 3, .LCPI4_0 at toc@l
 ; VSX-NEXT:    xxlnor 1, 1, 1
 ; VSX-NEXT:    xxlnor 2, 2, 2
-; VSX-NEXT:    vcmpequw 5, 2, 4
+; VSX-NEXT:    xvminsp 0, 34, 35
 ; VSX-NEXT:    xxlor 1, 2, 1
 ; VSX-NEXT:    lxvd2x 2, 0, 3
-; VSX-NEXT:    xxsel 0, 0, 2, 1
-; VSX-NEXT:    xxlxor 2, 2, 2
-; VSX-NEXT:    xvcmpeqsp 2, 0, 2
-; VSX-NEXT:    xxsel 1, 0, 34, 37
-; VSX-NEXT:    vcmpequw 2, 3, 4
-; VSX-NEXT:    xxsel 1, 1, 35, 34
-; VSX-NEXT:    xxsel 34, 0, 1, 2
+; VSX-NEXT:    xxsel 34, 0, 2, 1
 ; VSX-NEXT:    blr
 ;
 ; AIX-LABEL: v4f32_minimum:
@@ -324,21 +315,12 @@ define <4 x float> @v4f32_minimum(<4 x float> %a, <4 x float> %b) {
 ; AIX-NEXT:    xvcmpeqsp 1, 35, 35
 ; AIX-NEXT:    xvcmpeqsp 2, 34, 34
 ; AIX-NEXT:    ld 3, L..C4(2) # %const.0
-; AIX-NEXT:    xxleqv 36, 36, 36
 ; AIX-NEXT:    xvminsp 0, 34, 35
-; AIX-NEXT:    vslw 4, 4, 4
 ; AIX-NEXT:    xxlnor 1, 1, 1
 ; AIX-NEXT:    xxlnor 2, 2, 2
-; AIX-NEXT:    vcmpequw 5, 2, 4
 ; AIX-NEXT:    xxlor 1, 2, 1
 ; AIX-NEXT:    lxvw4x 2, 0, 3
-; AIX-NEXT:    xxsel 0, 0, 2, 1
-; AIX-NEXT:    xxlxor 2, 2, 2
-; AIX-NEXT:    xvcmpeqsp 2, 0, 2
-; AIX-NEXT:    xxsel 1, 0, 34, 37
-; AIX-NEXT:    vcmpequw 2, 3, 4
-; AIX-NEXT:    xxsel 1, 1, 35, 34
-; AIX-NEXT:    xxsel 34, 0, 1, 2
+; AIX-NEXT:    xxsel 34, 0, 2, 1
 ; AIX-NEXT:    blr
 entry:
   %m = call <4 x float> @llvm.minimum.v4f32(<4 x float> %a, <4 x float> %b)
@@ -377,16 +359,9 @@ define <4 x float> @v4f32_maximum(<4 x float> %a, <4 x float> %b) {
 ; VSX-NEXT:    xxlnor 1, 1, 1
 ; VSX-NEXT:    xxlnor 2, 2, 2
 ; VSX-NEXT:    xvmaxsp 0, 34, 35
-; VSX-NEXT:    xxlxor 36, 36, 36
-; VSX-NEXT:    vcmpequw 5, 2, 4
 ; VSX-NEXT:    xxlor 1, 2, 1
 ; VSX-NEXT:    lxvd2x 2, 0, 3
-; VSX-NEXT:    xxsel 0, 0, 2, 1
-; VSX-NEXT:    xvcmpeqsp 2, 0, 36
-; VSX-NEXT:    xxsel 1, 0, 34, 37
-; VSX-NEXT:    vcmpequw 2, 3, 4
-; VSX-NEXT:    xxsel 1, 1, 35, 34
-; VSX-NEXT:    xxsel 34, 0, 1, 2
+; VSX-NEXT:    xxsel 34, 0, 2, 1
 ; VSX-NEXT:    blr
 ;
 ; AIX-LABEL: v4f32_maximum:
@@ -395,18 +370,11 @@ define <4 x float> @v4f32_maximum(<4 x float> %a, <4 x float> %b) {
 ; AIX-NEXT:    xvcmpeqsp 2, 34, 34
 ; AIX-NEXT:    ld 3, L..C5(2) # %const.0
 ; AIX-NEXT:    xvmaxsp 0, 34, 35
-; AIX-NEXT:    xxlxor 36, 36, 36
 ; AIX-NEXT:    xxlnor 1, 1, 1
 ; AIX-NEXT:    xxlnor 2, 2, 2
-; AIX-NEXT:    vcmpequw 5, 2, 4
 ; AIX-NEXT:    xxlor 1, 2, 1
 ; AIX-NEXT:    lxvw4x 2, 0, 3
-; AIX-NEXT:    xxsel 0, 0, 2, 1
-; AIX-NEXT:    xvcmpeqsp 2, 0, 36
-; AIX-NEXT:    xxsel 1, 0, 34, 37
-; AIX-NEXT:    vcmpequw 2, 3, 4
-; AIX-NEXT:    xxsel 1, 1, 35, 34
-; AIX-NEXT:    xxsel 34, 0, 1, 2
+; AIX-NEXT:    xxsel 34, 0, 2, 1
 ; AIX-NEXT:    blr
 entry:
   %m = call <4 x float> @llvm.maximum.v4f32(<4 x float> %a, <4 x float> %b)
@@ -493,47 +461,28 @@ define <2 x double> @v2f64_minimum(<2 x double> %a, <2 x double> %b) {
 ; VSX-LABEL: v2f64_minimum:
 ; VSX:       # %bb.0: # %entry
 ; VSX-NEXT:    addis 3, 2, .LCPI6_0 at toc@ha
-; VSX-NEXT:    xvcmpeqdp 36, 35, 35
-; VSX-NEXT:    xvcmpeqdp 37, 34, 34
-; VSX-NEXT:    addi 3, 3, .LCPI6_0 at toc@l
-; VSX-NEXT:    xxlnor 36, 36, 36
-; VSX-NEXT:    xxlnor 37, 37, 37
 ; VSX-NEXT:    xvmindp 0, 34, 35
+; VSX-NEXT:    xvcmpeqdp 35, 35, 35
+; VSX-NEXT:    addi 3, 3, .LCPI6_0 at toc@l
+; VSX-NEXT:    xvcmpeqdp 34, 34, 34
+; VSX-NEXT:    xxlnor 35, 35, 35
+; VSX-NEXT:    xxlnor 34, 34, 34
 ; VSX-NEXT:    lxvd2x 2, 0, 3
-; VSX-NEXT:    addis 3, 2, .LCPI6_1 at toc@ha
-; VSX-NEXT:    xxlor 1, 37, 36
-; VSX-NEXT:    addi 3, 3, .LCPI6_1 at toc@l
-; VSX-NEXT:    lxvd2x 36, 0, 3
-; VSX-NEXT:    vcmpequd 5, 2, 4
-; VSX-NEXT:    xxsel 0, 0, 2, 1
-; VSX-NEXT:    xxlxor 2, 2, 2
-; VSX-NEXT:    xxsel 1, 0, 34, 37
-; VSX-NEXT:    vcmpequd 2, 3, 4
-; VSX-NEXT:    xxsel 1, 1, 35, 34
-; VSX-NEXT:    xvcmpeqdp 34, 0, 2
-; VSX-NEXT:    xxsel 34, 0, 1, 34
+; VSX-NEXT:    xxlor 1, 34, 35
+; VSX-NEXT:    xxsel 34, 0, 2, 1
 ; VSX-NEXT:    blr
 ;
 ; AIX-LABEL: v2f64_minimum:
 ; AIX:       # %bb.0: # %entry
 ; AIX-NEXT:    ld 3, L..C6(2) # %const.0
-; AIX-NEXT:    xvcmpeqdp 36, 35, 35
-; AIX-NEXT:    xvcmpeqdp 37, 34, 34
-; AIX-NEXT:    lxvd2x 2, 0, 3
-; AIX-NEXT:    ld 3, L..C7(2) # %const.1
-; AIX-NEXT:    xxlnor 36, 36, 36
-; AIX-NEXT:    xxlnor 37, 37, 37
 ; AIX-NEXT:    xvmindp 0, 34, 35
-; AIX-NEXT:    xxlor 1, 37, 36
-; AIX-NEXT:    lxvd2x 36, 0, 3
-; AIX-NEXT:    vcmpequd 5, 2, 4
-; AIX-NEXT:    xxsel 0, 0, 2, 1
-; AIX-NEXT:    xxlxor 2, 2, 2
-; AIX-NEXT:    xxsel 1, 0, 34, 37
-; AIX-NEXT:    vcmpequd 2, 3, 4
-; AIX-NEXT:    xxsel 1, 1, 35, 34
-; AIX-NEXT:    xvcmpeqdp 34, 0, 2
-; AIX-NEXT:    xxsel 34, 0, 1, 34
+; AIX-NEXT:    xvcmpeqdp 35, 35, 35
+; AIX-NEXT:    lxvd2x 2, 0, 3
+; AIX-NEXT:    xvcmpeqdp 34, 34, 34
+; AIX-NEXT:    xxlnor 35, 35, 35
+; AIX-NEXT:    xxlnor 34, 34, 34
+; AIX-NEXT:    xxlor 1, 34, 35
+; AIX-NEXT:    xxsel 34, 0, 2, 1
 ; AIX-NEXT:    blr
 entry:
   %m = call <2 x double> @llvm.minimum.v2f64(<2 x double> %a, <2 x double> %b)
@@ -618,42 +567,28 @@ define <2 x double> @v2f64_maximum(<2 x double> %a, <2 x double> %b) {
 ; VSX-LABEL: v2f64_maximum:
 ; VSX:       # %bb.0: # %entry
 ; VSX-NEXT:    addis 3, 2, .LCPI7_0 at toc@ha
-; VSX-NEXT:    xvcmpeqdp 36, 35, 35
-; VSX-NEXT:    xvcmpeqdp 37, 34, 34
-; VSX-NEXT:    addi 3, 3, .LCPI7_0 at toc@l
-; VSX-NEXT:    xxlnor 36, 36, 36
-; VSX-NEXT:    xxlnor 37, 37, 37
 ; VSX-NEXT:    xvmaxdp 0, 34, 35
+; VSX-NEXT:    xvcmpeqdp 35, 35, 35
+; VSX-NEXT:    addi 3, 3, .LCPI7_0 at toc@l
+; VSX-NEXT:    xvcmpeqdp 34, 34, 34
+; VSX-NEXT:    xxlnor 35, 35, 35
+; VSX-NEXT:    xxlnor 34, 34, 34
 ; VSX-NEXT:    lxvd2x 2, 0, 3
-; VSX-NEXT:    xxlor 1, 37, 36
-; VSX-NEXT:    xxlxor 36, 36, 36
-; VSX-NEXT:    vcmpequd 5, 2, 4
-; VSX-NEXT:    xxsel 0, 0, 2, 1
-; VSX-NEXT:    xxsel 1, 0, 34, 37
-; VSX-NEXT:    vcmpequd 2, 3, 4
-; VSX-NEXT:    xxsel 1, 1, 35, 34
-; VSX-NEXT:    xvcmpeqdp 34, 0, 36
-; VSX-NEXT:    xxsel 34, 0, 1, 34
+; VSX-NEXT:    xxlor 1, 34, 35
+; VSX-NEXT:    xxsel 34, 0, 2, 1
 ; VSX-NEXT:    blr
 ;
 ; AIX-LABEL: v2f64_maximum:
 ; AIX:       # %bb.0: # %entry
-; AIX-NEXT:    ld 3, L..C8(2) # %const.0
-; AIX-NEXT:    xvcmpeqdp 36, 35, 35
-; AIX-NEXT:    xvcmpeqdp 37, 34, 34
-; AIX-NEXT:    lxvd2x 2, 0, 3
-; AIX-NEXT:    xxlnor 36, 36, 36
-; AIX-NEXT:    xxlnor 37, 37, 37
+; AIX-NEXT:    ld 3, L..C7(2) # %const.0
 ; AIX-NEXT:    xvmaxdp 0, 34, 35
-; AIX-NEXT:    xxlor 1, 37, 36
-; AIX-NEXT:    xxlxor 36, 36, 36
-; AIX-NEXT:    vcmpequd 5, 2, 4
-; AIX-NEXT:    xxsel 0, 0, 2, 1
-; AIX-NEXT:    xxsel 1, 0, 34, 37
-; AIX-NEXT:    vcmpequd 2, 3, 4
-; AIX-NEXT:    xxsel 1, 1, 35, 34
-; AIX-NEXT:    xvcmpeqdp 34, 0, 36
-; AIX-NEXT:    xxsel 34, 0, 1, 34
+; AIX-NEXT:    xvcmpeqdp 35, 35, 35
+; AIX-NEXT:    lxvd2x 2, 0, 3
+; AIX-NEXT:    xvcmpeqdp 34, 34, 34
+; AIX-NEXT:    xxlnor 35, 35, 35
+; AIX-NEXT:    xxlnor 34, 34, 34
+; AIX-NEXT:    xxlor 1, 34, 35
+; AIX-NEXT:    xxsel 34, 0, 2, 1
 ; AIX-NEXT:    blr
 entry:
   %m = call <2 x double> @llvm.maximum.v2f64(<2 x double> %a, <2 x double> %b)
diff --git a/llvm/test/CodeGen/PowerPC/scalar-min-max.ll b/llvm/test/CodeGen/PowerPC/scalar-min-max.ll
index 216d498e85411..f6ea0d9cc2328 100644
--- a/llvm/test/CodeGen/PowerPC/scalar-min-max.ll
+++ b/llvm/test/CodeGen/PowerPC/scalar-min-max.ll
@@ -117,13 +117,12 @@ define dso_local float @testfmax_fast(float %a, float %b) local_unnamed_addr {
 ;
 ; NO-FAST-P9-LABEL: testfmax_fast:
 ; NO-FAST-P9:       # %bb.0: # %entry
-; NO-FAST-P9-NEXT:    xsmaxcdp f1, f1, f2
+; NO-FAST-P9-NEXT:    xsmaxdp f1, f1, f2
 ; NO-FAST-P9-NEXT:    blr
 ;
 ; NO-FAST-P8-LABEL: testfmax_fast:
 ; NO-FAST-P8:       # %bb.0: # %entry
-; NO-FAST-P8-NEXT:    xssubsp f0, f2, f1
-; NO-FAST-P8-NEXT:    fsel f1, f0, f2, f1
+; NO-FAST-P8-NEXT:    xsmaxdp f1, f1, f2
 ; NO-FAST-P8-NEXT:    blr
 entry:
   %cmp = fcmp nnan ninf ogt float %a, %b
@@ -138,13 +137,12 @@ define dso_local double @testdmax_fast(double %a, double %b) local_unnamed_addr
 ;
 ; NO-FAST-P9-LABEL: testdmax_fast:
 ; NO-FAST-P9:       # %bb.0: # %entry
-; NO-FAST-P9-NEXT:    xsmaxcdp f1, f1, f2
+; NO-FAST-P9-NEXT:    xsmaxdp f1, f1, f2
 ; NO-FAST-P9-NEXT:    blr
 ;
 ; NO-FAST-P8-LABEL: testdmax_fast:
 ; NO-FAST-P8:       # %bb.0: # %entry
-; NO-FAST-P8-NEXT:    xssubdp f0, f2, f1
-; NO-FAST-P8-NEXT:    fsel f1, f0, f2, f1
+; NO-FAST-P8-NEXT:    xsmaxdp f1, f1, f2
 ; NO-FAST-P8-NEXT:    blr
 entry:
   %cmp = fcmp nnan ninf ogt double %a, %b
@@ -159,13 +157,12 @@ define dso_local float @testfmin_fast(float %a, float %b) local_unnamed_addr {
 ;
 ; NO-FAST-P9-LABEL: testfmin_fast:
 ; NO-FAST-P9:       # %bb.0: # %entry
-; NO-FAST-P9-NEXT:    xsmincdp f1, f1, f2
+; NO-FAST-P9-NEXT:    xsmindp f1, f1, f2
 ; NO-FAST-P9-NEXT:    blr
 ;
 ; NO-FAST-P8-LABEL: testfmin_fast:
 ; NO-FAST-P8:       # %bb.0: # %entry
-; NO-FAST-P8-NEXT:    xssubsp f0, f1, f2
-; NO-FAST-P8-NEXT:    fsel f1, f0, f2, f1
+; NO-FAST-P8-NEXT:    xsmindp f1, f1, f2
 ; NO-FAST-P8-NEXT:    blr
 entry:
   %cmp = fcmp nnan ninf olt float %a, %b
@@ -180,13 +177,12 @@ define dso_local double @testdmin_fast(double %a, double %b) local_unnamed_addr
 ;
 ; NO-FAST-P9-LABEL: testdmin_fast:
 ; NO-FAST-P9:       # %bb.0: # %entry
-; NO-FAST-P9-NEXT:    xsmincdp f1, f1, f2
+; NO-FAST-P9-NEXT:    xsmindp f1, f1, f2
 ; NO-FAST-P9-NEXT:    blr
 ;
 ; NO-FAST-P8-LABEL: testdmin_fast:
 ; NO-FAST-P8:       # %bb.0: # %entry
-; NO-FAST-P8-NEXT:    xssubdp f0, f1, f2
-; NO-FAST-P8-NEXT:    fsel f1, f0, f2, f1
+; NO-FAST-P8-NEXT:    xsmindp f1, f1, f2
 ; NO-FAST-P8-NEXT:    blr
 entry:
   %cmp = fcmp nnan ninf olt double %a, %b

>From ce1748f97516dd97052e9f87f2735f1e3a2236f4 Mon Sep 17 00:00:00 2001
From: YunQiang Su <yunqiang at isrc.iscas.ac.cn>
Date: Tue, 15 Apr 2025 11:05:49 +0800
Subject: [PATCH 2/3] Revert "PowerPC/VSX: Select FMINNUM_IEEE and
 FMAXNUM_IEEE"

This reverts commit dbb39045251797baa9f5d7119948987a9bc39477.
---
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp   |   6 -
 llvm/lib/Target/PowerPC/PPCInstrVSX.td        |  33 +++--
 .../test/CodeGen/PowerPC/fminimum-fmaximum.ll | 133 +++++++++++++-----
 llvm/test/CodeGen/PowerPC/scalar-min-max.ll   |  20 +--
 4 files changed, 131 insertions(+), 61 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index ed16d93591f35..1f75425752a78 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -777,10 +777,6 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
     setOperationAction(ISD::FMAXNUM_IEEE, MVT::f32, Legal);
     setOperationAction(ISD::FMINNUM_IEEE, MVT::f64, Legal);
     setOperationAction(ISD::FMINNUM_IEEE, MVT::f32, Legal);
-    setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
-    setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
-    setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
-    setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
   }
 
   if (Subtarget.hasAltivec()) {
@@ -815,8 +811,6 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
       if (Subtarget.hasVSX()) {
         setOperationAction(ISD::FMAXNUM, VT, Legal);
         setOperationAction(ISD::FMINNUM, VT, Legal);
-        setOperationAction(ISD::FMAXNUM_IEEE, VT, Legal);
-        setOperationAction(ISD::FMINNUM_IEEE, VT, Legal);
       }
 
       // Vector instructions introduced in P8
diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
index 0cb9daac3703e..19448210f5db1 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
@@ -2720,15 +2720,6 @@ def : Pat<(v2f64 (any_fmaxnum v2f64:$src1, v2f64:$src2)),
 def : Pat<(v2f64 (any_fminnum v2f64:$src1, v2f64:$src2)),
           (v2f64 (XVMINDP $src1, $src2))>;
 
-def : Pat<(v4f32 (fmaxnum_ieee v4f32:$src1, v4f32:$src2)),
-          (v4f32 (XVMAXSP $src1, $src2))>;
-def : Pat<(v4f32 (fminnum_ieee v4f32:$src1, v4f32:$src2)),
-          (v4f32 (XVMINSP $src1, $src2))>;
-def : Pat<(v2f64 (fmaxnum_ieee v2f64:$src1, v2f64:$src2)),
-          (v2f64 (XVMAXDP $src1, $src2))>;
-def : Pat<(v2f64 (fminnum_ieee v2f64:$src1, v2f64:$src2)),
-          (v2f64 (XVMINDP $src1, $src2))>;
-
 // f32 abs
 def : Pat<(f32 (fabs f32:$S)),
           (f32 (COPY_TO_REGCLASS (XSABSDP
@@ -2742,23 +2733,39 @@ def : Pat<(f32 (fneg (fabs f32:$S))),
 // f32 Min.
 def : Pat<(f32 (fminnum_ieee f32:$A, f32:$B)),
           (f32 FpMinMax.F32Min)>;
-def : Pat<(f32 (fminnum f32:$A, f32:$B)),
+def : Pat<(f32 (fminnum_ieee (fcanonicalize f32:$A), f32:$B)),
+          (f32 FpMinMax.F32Min)>;
+def : Pat<(f32 (fminnum_ieee f32:$A, (fcanonicalize f32:$B))),
+          (f32 FpMinMax.F32Min)>;
+def : Pat<(f32 (fminnum_ieee (fcanonicalize f32:$A), (fcanonicalize f32:$B))),
           (f32 FpMinMax.F32Min)>;
 // F32 Max.
 def : Pat<(f32 (fmaxnum_ieee f32:$A, f32:$B)),
           (f32 FpMinMax.F32Max)>;
-def : Pat<(f32 (fmaxnum f32:$A, f32:$B)),
+def : Pat<(f32 (fmaxnum_ieee (fcanonicalize f32:$A), f32:$B)),
+          (f32 FpMinMax.F32Max)>;
+def : Pat<(f32 (fmaxnum_ieee f32:$A, (fcanonicalize f32:$B))),
+          (f32 FpMinMax.F32Max)>;
+def : Pat<(f32 (fmaxnum_ieee (fcanonicalize f32:$A), (fcanonicalize f32:$B))),
           (f32 FpMinMax.F32Max)>;
 
 // f64 Min.
 def : Pat<(f64 (fminnum_ieee f64:$A, f64:$B)),
           (f64 (XSMINDP $A, $B))>;
-def : Pat<(f64 (fminnum f64:$A, f64:$B)),
+def : Pat<(f64 (fminnum_ieee (fcanonicalize f64:$A), f64:$B)),
+          (f64 (XSMINDP $A, $B))>;
+def : Pat<(f64 (fminnum_ieee f64:$A, (fcanonicalize f64:$B))),
+          (f64 (XSMINDP $A, $B))>;
+def : Pat<(f64 (fminnum_ieee (fcanonicalize f64:$A), (fcanonicalize f64:$B))),
           (f64 (XSMINDP $A, $B))>;
 // f64 Max.
 def : Pat<(f64 (fmaxnum_ieee f64:$A, f64:$B)),
           (f64 (XSMAXDP $A, $B))>;
-def : Pat<(f64 (fmaxnum f64:$A, f64:$B)),
+def : Pat<(f64 (fmaxnum_ieee (fcanonicalize f64:$A), f64:$B)),
+          (f64 (XSMAXDP $A, $B))>;
+def : Pat<(f64 (fmaxnum_ieee f64:$A, (fcanonicalize f64:$B))),
+          (f64 (XSMAXDP $A, $B))>;
+def : Pat<(f64 (fmaxnum_ieee (fcanonicalize f64:$A), (fcanonicalize f64:$B))),
           (f64 (XSMAXDP $A, $B))>;
 
 def : Pat<(int_ppc_vsx_stxvd2x_be v2f64:$rS, ForceXForm:$dst),
diff --git a/llvm/test/CodeGen/PowerPC/fminimum-fmaximum.ll b/llvm/test/CodeGen/PowerPC/fminimum-fmaximum.ll
index 39cf136e10d77..a99c25a4e4479 100644
--- a/llvm/test/CodeGen/PowerPC/fminimum-fmaximum.ll
+++ b/llvm/test/CodeGen/PowerPC/fminimum-fmaximum.ll
@@ -301,13 +301,22 @@ define <4 x float> @v4f32_minimum(<4 x float> %a, <4 x float> %b) {
 ; VSX-NEXT:    xvcmpeqsp 1, 35, 35
 ; VSX-NEXT:    xvcmpeqsp 2, 34, 34
 ; VSX-NEXT:    addis 3, 2, .LCPI4_0 at toc@ha
+; VSX-NEXT:    xxleqv 36, 36, 36
+; VSX-NEXT:    xvminsp 0, 34, 35
+; VSX-NEXT:    vslw 4, 4, 4
 ; VSX-NEXT:    addi 3, 3, .LCPI4_0 at toc@l
 ; VSX-NEXT:    xxlnor 1, 1, 1
 ; VSX-NEXT:    xxlnor 2, 2, 2
-; VSX-NEXT:    xvminsp 0, 34, 35
+; VSX-NEXT:    vcmpequw 5, 2, 4
 ; VSX-NEXT:    xxlor 1, 2, 1
 ; VSX-NEXT:    lxvd2x 2, 0, 3
-; VSX-NEXT:    xxsel 34, 0, 2, 1
+; VSX-NEXT:    xxsel 0, 0, 2, 1
+; VSX-NEXT:    xxlxor 2, 2, 2
+; VSX-NEXT:    xvcmpeqsp 2, 0, 2
+; VSX-NEXT:    xxsel 1, 0, 34, 37
+; VSX-NEXT:    vcmpequw 2, 3, 4
+; VSX-NEXT:    xxsel 1, 1, 35, 34
+; VSX-NEXT:    xxsel 34, 0, 1, 2
 ; VSX-NEXT:    blr
 ;
 ; AIX-LABEL: v4f32_minimum:
@@ -315,12 +324,21 @@ define <4 x float> @v4f32_minimum(<4 x float> %a, <4 x float> %b) {
 ; AIX-NEXT:    xvcmpeqsp 1, 35, 35
 ; AIX-NEXT:    xvcmpeqsp 2, 34, 34
 ; AIX-NEXT:    ld 3, L..C4(2) # %const.0
+; AIX-NEXT:    xxleqv 36, 36, 36
 ; AIX-NEXT:    xvminsp 0, 34, 35
+; AIX-NEXT:    vslw 4, 4, 4
 ; AIX-NEXT:    xxlnor 1, 1, 1
 ; AIX-NEXT:    xxlnor 2, 2, 2
+; AIX-NEXT:    vcmpequw 5, 2, 4
 ; AIX-NEXT:    xxlor 1, 2, 1
 ; AIX-NEXT:    lxvw4x 2, 0, 3
-; AIX-NEXT:    xxsel 34, 0, 2, 1
+; AIX-NEXT:    xxsel 0, 0, 2, 1
+; AIX-NEXT:    xxlxor 2, 2, 2
+; AIX-NEXT:    xvcmpeqsp 2, 0, 2
+; AIX-NEXT:    xxsel 1, 0, 34, 37
+; AIX-NEXT:    vcmpequw 2, 3, 4
+; AIX-NEXT:    xxsel 1, 1, 35, 34
+; AIX-NEXT:    xxsel 34, 0, 1, 2
 ; AIX-NEXT:    blr
 entry:
   %m = call <4 x float> @llvm.minimum.v4f32(<4 x float> %a, <4 x float> %b)
@@ -359,9 +377,16 @@ define <4 x float> @v4f32_maximum(<4 x float> %a, <4 x float> %b) {
 ; VSX-NEXT:    xxlnor 1, 1, 1
 ; VSX-NEXT:    xxlnor 2, 2, 2
 ; VSX-NEXT:    xvmaxsp 0, 34, 35
+; VSX-NEXT:    xxlxor 36, 36, 36
+; VSX-NEXT:    vcmpequw 5, 2, 4
 ; VSX-NEXT:    xxlor 1, 2, 1
 ; VSX-NEXT:    lxvd2x 2, 0, 3
-; VSX-NEXT:    xxsel 34, 0, 2, 1
+; VSX-NEXT:    xxsel 0, 0, 2, 1
+; VSX-NEXT:    xvcmpeqsp 2, 0, 36
+; VSX-NEXT:    xxsel 1, 0, 34, 37
+; VSX-NEXT:    vcmpequw 2, 3, 4
+; VSX-NEXT:    xxsel 1, 1, 35, 34
+; VSX-NEXT:    xxsel 34, 0, 1, 2
 ; VSX-NEXT:    blr
 ;
 ; AIX-LABEL: v4f32_maximum:
@@ -370,11 +395,18 @@ define <4 x float> @v4f32_maximum(<4 x float> %a, <4 x float> %b) {
 ; AIX-NEXT:    xvcmpeqsp 2, 34, 34
 ; AIX-NEXT:    ld 3, L..C5(2) # %const.0
 ; AIX-NEXT:    xvmaxsp 0, 34, 35
+; AIX-NEXT:    xxlxor 36, 36, 36
 ; AIX-NEXT:    xxlnor 1, 1, 1
 ; AIX-NEXT:    xxlnor 2, 2, 2
+; AIX-NEXT:    vcmpequw 5, 2, 4
 ; AIX-NEXT:    xxlor 1, 2, 1
 ; AIX-NEXT:    lxvw4x 2, 0, 3
-; AIX-NEXT:    xxsel 34, 0, 2, 1
+; AIX-NEXT:    xxsel 0, 0, 2, 1
+; AIX-NEXT:    xvcmpeqsp 2, 0, 36
+; AIX-NEXT:    xxsel 1, 0, 34, 37
+; AIX-NEXT:    vcmpequw 2, 3, 4
+; AIX-NEXT:    xxsel 1, 1, 35, 34
+; AIX-NEXT:    xxsel 34, 0, 1, 2
 ; AIX-NEXT:    blr
 entry:
   %m = call <4 x float> @llvm.maximum.v4f32(<4 x float> %a, <4 x float> %b)
@@ -461,28 +493,47 @@ define <2 x double> @v2f64_minimum(<2 x double> %a, <2 x double> %b) {
 ; VSX-LABEL: v2f64_minimum:
 ; VSX:       # %bb.0: # %entry
 ; VSX-NEXT:    addis 3, 2, .LCPI6_0 at toc@ha
-; VSX-NEXT:    xvmindp 0, 34, 35
-; VSX-NEXT:    xvcmpeqdp 35, 35, 35
+; VSX-NEXT:    xvcmpeqdp 36, 35, 35
+; VSX-NEXT:    xvcmpeqdp 37, 34, 34
 ; VSX-NEXT:    addi 3, 3, .LCPI6_0 at toc@l
-; VSX-NEXT:    xvcmpeqdp 34, 34, 34
-; VSX-NEXT:    xxlnor 35, 35, 35
-; VSX-NEXT:    xxlnor 34, 34, 34
+; VSX-NEXT:    xxlnor 36, 36, 36
+; VSX-NEXT:    xxlnor 37, 37, 37
+; VSX-NEXT:    xvmindp 0, 34, 35
 ; VSX-NEXT:    lxvd2x 2, 0, 3
-; VSX-NEXT:    xxlor 1, 34, 35
-; VSX-NEXT:    xxsel 34, 0, 2, 1
+; VSX-NEXT:    addis 3, 2, .LCPI6_1 at toc@ha
+; VSX-NEXT:    xxlor 1, 37, 36
+; VSX-NEXT:    addi 3, 3, .LCPI6_1 at toc@l
+; VSX-NEXT:    lxvd2x 36, 0, 3
+; VSX-NEXT:    vcmpequd 5, 2, 4
+; VSX-NEXT:    xxsel 0, 0, 2, 1
+; VSX-NEXT:    xxlxor 2, 2, 2
+; VSX-NEXT:    xxsel 1, 0, 34, 37
+; VSX-NEXT:    vcmpequd 2, 3, 4
+; VSX-NEXT:    xxsel 1, 1, 35, 34
+; VSX-NEXT:    xvcmpeqdp 34, 0, 2
+; VSX-NEXT:    xxsel 34, 0, 1, 34
 ; VSX-NEXT:    blr
 ;
 ; AIX-LABEL: v2f64_minimum:
 ; AIX:       # %bb.0: # %entry
 ; AIX-NEXT:    ld 3, L..C6(2) # %const.0
-; AIX-NEXT:    xvmindp 0, 34, 35
-; AIX-NEXT:    xvcmpeqdp 35, 35, 35
+; AIX-NEXT:    xvcmpeqdp 36, 35, 35
+; AIX-NEXT:    xvcmpeqdp 37, 34, 34
 ; AIX-NEXT:    lxvd2x 2, 0, 3
-; AIX-NEXT:    xvcmpeqdp 34, 34, 34
-; AIX-NEXT:    xxlnor 35, 35, 35
-; AIX-NEXT:    xxlnor 34, 34, 34
-; AIX-NEXT:    xxlor 1, 34, 35
-; AIX-NEXT:    xxsel 34, 0, 2, 1
+; AIX-NEXT:    ld 3, L..C7(2) # %const.1
+; AIX-NEXT:    xxlnor 36, 36, 36
+; AIX-NEXT:    xxlnor 37, 37, 37
+; AIX-NEXT:    xvmindp 0, 34, 35
+; AIX-NEXT:    xxlor 1, 37, 36
+; AIX-NEXT:    lxvd2x 36, 0, 3
+; AIX-NEXT:    vcmpequd 5, 2, 4
+; AIX-NEXT:    xxsel 0, 0, 2, 1
+; AIX-NEXT:    xxlxor 2, 2, 2
+; AIX-NEXT:    xxsel 1, 0, 34, 37
+; AIX-NEXT:    vcmpequd 2, 3, 4
+; AIX-NEXT:    xxsel 1, 1, 35, 34
+; AIX-NEXT:    xvcmpeqdp 34, 0, 2
+; AIX-NEXT:    xxsel 34, 0, 1, 34
 ; AIX-NEXT:    blr
 entry:
   %m = call <2 x double> @llvm.minimum.v2f64(<2 x double> %a, <2 x double> %b)
@@ -567,28 +618,42 @@ define <2 x double> @v2f64_maximum(<2 x double> %a, <2 x double> %b) {
 ; VSX-LABEL: v2f64_maximum:
 ; VSX:       # %bb.0: # %entry
 ; VSX-NEXT:    addis 3, 2, .LCPI7_0 at toc@ha
-; VSX-NEXT:    xvmaxdp 0, 34, 35
-; VSX-NEXT:    xvcmpeqdp 35, 35, 35
+; VSX-NEXT:    xvcmpeqdp 36, 35, 35
+; VSX-NEXT:    xvcmpeqdp 37, 34, 34
 ; VSX-NEXT:    addi 3, 3, .LCPI7_0 at toc@l
-; VSX-NEXT:    xvcmpeqdp 34, 34, 34
-; VSX-NEXT:    xxlnor 35, 35, 35
-; VSX-NEXT:    xxlnor 34, 34, 34
+; VSX-NEXT:    xxlnor 36, 36, 36
+; VSX-NEXT:    xxlnor 37, 37, 37
+; VSX-NEXT:    xvmaxdp 0, 34, 35
 ; VSX-NEXT:    lxvd2x 2, 0, 3
-; VSX-NEXT:    xxlor 1, 34, 35
-; VSX-NEXT:    xxsel 34, 0, 2, 1
+; VSX-NEXT:    xxlor 1, 37, 36
+; VSX-NEXT:    xxlxor 36, 36, 36
+; VSX-NEXT:    vcmpequd 5, 2, 4
+; VSX-NEXT:    xxsel 0, 0, 2, 1
+; VSX-NEXT:    xxsel 1, 0, 34, 37
+; VSX-NEXT:    vcmpequd 2, 3, 4
+; VSX-NEXT:    xxsel 1, 1, 35, 34
+; VSX-NEXT:    xvcmpeqdp 34, 0, 36
+; VSX-NEXT:    xxsel 34, 0, 1, 34
 ; VSX-NEXT:    blr
 ;
 ; AIX-LABEL: v2f64_maximum:
 ; AIX:       # %bb.0: # %entry
-; AIX-NEXT:    ld 3, L..C7(2) # %const.0
-; AIX-NEXT:    xvmaxdp 0, 34, 35
-; AIX-NEXT:    xvcmpeqdp 35, 35, 35
+; AIX-NEXT:    ld 3, L..C8(2) # %const.0
+; AIX-NEXT:    xvcmpeqdp 36, 35, 35
+; AIX-NEXT:    xvcmpeqdp 37, 34, 34
 ; AIX-NEXT:    lxvd2x 2, 0, 3
-; AIX-NEXT:    xvcmpeqdp 34, 34, 34
-; AIX-NEXT:    xxlnor 35, 35, 35
-; AIX-NEXT:    xxlnor 34, 34, 34
-; AIX-NEXT:    xxlor 1, 34, 35
-; AIX-NEXT:    xxsel 34, 0, 2, 1
+; AIX-NEXT:    xxlnor 36, 36, 36
+; AIX-NEXT:    xxlnor 37, 37, 37
+; AIX-NEXT:    xvmaxdp 0, 34, 35
+; AIX-NEXT:    xxlor 1, 37, 36
+; AIX-NEXT:    xxlxor 36, 36, 36
+; AIX-NEXT:    vcmpequd 5, 2, 4
+; AIX-NEXT:    xxsel 0, 0, 2, 1
+; AIX-NEXT:    xxsel 1, 0, 34, 37
+; AIX-NEXT:    vcmpequd 2, 3, 4
+; AIX-NEXT:    xxsel 1, 1, 35, 34
+; AIX-NEXT:    xvcmpeqdp 34, 0, 36
+; AIX-NEXT:    xxsel 34, 0, 1, 34
 ; AIX-NEXT:    blr
 entry:
   %m = call <2 x double> @llvm.maximum.v2f64(<2 x double> %a, <2 x double> %b)
diff --git a/llvm/test/CodeGen/PowerPC/scalar-min-max.ll b/llvm/test/CodeGen/PowerPC/scalar-min-max.ll
index f6ea0d9cc2328..216d498e85411 100644
--- a/llvm/test/CodeGen/PowerPC/scalar-min-max.ll
+++ b/llvm/test/CodeGen/PowerPC/scalar-min-max.ll
@@ -117,12 +117,13 @@ define dso_local float @testfmax_fast(float %a, float %b) local_unnamed_addr {
 ;
 ; NO-FAST-P9-LABEL: testfmax_fast:
 ; NO-FAST-P9:       # %bb.0: # %entry
-; NO-FAST-P9-NEXT:    xsmaxdp f1, f1, f2
+; NO-FAST-P9-NEXT:    xsmaxcdp f1, f1, f2
 ; NO-FAST-P9-NEXT:    blr
 ;
 ; NO-FAST-P8-LABEL: testfmax_fast:
 ; NO-FAST-P8:       # %bb.0: # %entry
-; NO-FAST-P8-NEXT:    xsmaxdp f1, f1, f2
+; NO-FAST-P8-NEXT:    xssubsp f0, f2, f1
+; NO-FAST-P8-NEXT:    fsel f1, f0, f2, f1
 ; NO-FAST-P8-NEXT:    blr
 entry:
   %cmp = fcmp nnan ninf ogt float %a, %b
@@ -137,12 +138,13 @@ define dso_local double @testdmax_fast(double %a, double %b) local_unnamed_addr
 ;
 ; NO-FAST-P9-LABEL: testdmax_fast:
 ; NO-FAST-P9:       # %bb.0: # %entry
-; NO-FAST-P9-NEXT:    xsmaxdp f1, f1, f2
+; NO-FAST-P9-NEXT:    xsmaxcdp f1, f1, f2
 ; NO-FAST-P9-NEXT:    blr
 ;
 ; NO-FAST-P8-LABEL: testdmax_fast:
 ; NO-FAST-P8:       # %bb.0: # %entry
-; NO-FAST-P8-NEXT:    xsmaxdp f1, f1, f2
+; NO-FAST-P8-NEXT:    xssubdp f0, f2, f1
+; NO-FAST-P8-NEXT:    fsel f1, f0, f2, f1
 ; NO-FAST-P8-NEXT:    blr
 entry:
   %cmp = fcmp nnan ninf ogt double %a, %b
@@ -157,12 +159,13 @@ define dso_local float @testfmin_fast(float %a, float %b) local_unnamed_addr {
 ;
 ; NO-FAST-P9-LABEL: testfmin_fast:
 ; NO-FAST-P9:       # %bb.0: # %entry
-; NO-FAST-P9-NEXT:    xsmindp f1, f1, f2
+; NO-FAST-P9-NEXT:    xsmincdp f1, f1, f2
 ; NO-FAST-P9-NEXT:    blr
 ;
 ; NO-FAST-P8-LABEL: testfmin_fast:
 ; NO-FAST-P8:       # %bb.0: # %entry
-; NO-FAST-P8-NEXT:    xsmindp f1, f1, f2
+; NO-FAST-P8-NEXT:    xssubsp f0, f1, f2
+; NO-FAST-P8-NEXT:    fsel f1, f0, f2, f1
 ; NO-FAST-P8-NEXT:    blr
 entry:
   %cmp = fcmp nnan ninf olt float %a, %b
@@ -177,12 +180,13 @@ define dso_local double @testdmin_fast(double %a, double %b) local_unnamed_addr
 ;
 ; NO-FAST-P9-LABEL: testdmin_fast:
 ; NO-FAST-P9:       # %bb.0: # %entry
-; NO-FAST-P9-NEXT:    xsmindp f1, f1, f2
+; NO-FAST-P9-NEXT:    xsmincdp f1, f1, f2
 ; NO-FAST-P9-NEXT:    blr
 ;
 ; NO-FAST-P8-LABEL: testdmin_fast:
 ; NO-FAST-P8:       # %bb.0: # %entry
-; NO-FAST-P8-NEXT:    xsmindp f1, f1, f2
+; NO-FAST-P8-NEXT:    xssubdp f0, f1, f2
+; NO-FAST-P8-NEXT:    fsel f1, f0, f2, f1
 ; NO-FAST-P8-NEXT:    blr
 entry:
   %cmp = fcmp nnan ninf olt double %a, %b

>From 682b44320a6e1dec5efc9f87c39b7df2823bc2f5 Mon Sep 17 00:00:00 2001
From: YunQiang Su <yunqiang at isrc.iscas.ac.cn>
Date: Tue, 15 Apr 2025 11:07:13 +0800
Subject: [PATCH 3/3] PowerPC/VSX: Select FMINNUM and FMAXNUM

In LangRef, we claim that FMINNUM and FMAXNUM should follow the
minNum and maxNum operators in IEEE754-2008.

PowerPC/VSX does have these instructions XSMINDP and XSMAXDP.
---
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp |  4 ++++
 llvm/lib/Target/PowerPC/PPCInstrVSX.td      |  8 ++++++++
 llvm/test/CodeGen/PowerPC/scalar-min-max.ll | 20 ++++++++------------
 3 files changed, 20 insertions(+), 12 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 1f75425752a78..0ee93b5e8012c 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -777,6 +777,10 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
     setOperationAction(ISD::FMAXNUM_IEEE, MVT::f32, Legal);
     setOperationAction(ISD::FMINNUM_IEEE, MVT::f64, Legal);
     setOperationAction(ISD::FMINNUM_IEEE, MVT::f32, Legal);
+    setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
+    setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
+    setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
+    setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
   }
 
   if (Subtarget.hasAltivec()) {
diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
index 19448210f5db1..695e28bf1493c 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
@@ -2731,6 +2731,8 @@ def : Pat<(f32 (fneg (fabs f32:$S))),
                (COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>;
 
 // f32 Min.
+def : Pat<(f32 (fminnum f32:$A, f32:$B)),
+          (f32 FpMinMax.F32Min)>;
 def : Pat<(f32 (fminnum_ieee f32:$A, f32:$B)),
           (f32 FpMinMax.F32Min)>;
 def : Pat<(f32 (fminnum_ieee (fcanonicalize f32:$A), f32:$B)),
@@ -2742,6 +2744,8 @@ def : Pat<(f32 (fminnum_ieee (fcanonicalize f32:$A), (fcanonicalize f32:$B))),
 // F32 Max.
 def : Pat<(f32 (fmaxnum_ieee f32:$A, f32:$B)),
           (f32 FpMinMax.F32Max)>;
+def : Pat<(f32 (fmaxnum f32:$A, f32:$B)),
+          (f32 FpMinMax.F32Max)>;
 def : Pat<(f32 (fmaxnum_ieee (fcanonicalize f32:$A), f32:$B)),
           (f32 FpMinMax.F32Max)>;
 def : Pat<(f32 (fmaxnum_ieee f32:$A, (fcanonicalize f32:$B))),
@@ -2750,6 +2754,8 @@ def : Pat<(f32 (fmaxnum_ieee (fcanonicalize f32:$A), (fcanonicalize f32:$B))),
           (f32 FpMinMax.F32Max)>;
 
 // f64 Min.
+def : Pat<(f64 (fminnum f64:$A, f64:$B)),
+          (f64 (XSMINDP $A, $B))>;
 def : Pat<(f64 (fminnum_ieee f64:$A, f64:$B)),
           (f64 (XSMINDP $A, $B))>;
 def : Pat<(f64 (fminnum_ieee (fcanonicalize f64:$A), f64:$B)),
@@ -2759,6 +2765,8 @@ def : Pat<(f64 (fminnum_ieee f64:$A, (fcanonicalize f64:$B))),
 def : Pat<(f64 (fminnum_ieee (fcanonicalize f64:$A), (fcanonicalize f64:$B))),
           (f64 (XSMINDP $A, $B))>;
 // f64 Max.
+def : Pat<(f64 (fmaxnum f64:$A, f64:$B)),
+          (f64 (XSMAXDP $A, $B))>;
 def : Pat<(f64 (fmaxnum_ieee f64:$A, f64:$B)),
           (f64 (XSMAXDP $A, $B))>;
 def : Pat<(f64 (fmaxnum_ieee (fcanonicalize f64:$A), f64:$B)),
diff --git a/llvm/test/CodeGen/PowerPC/scalar-min-max.ll b/llvm/test/CodeGen/PowerPC/scalar-min-max.ll
index 216d498e85411..f6ea0d9cc2328 100644
--- a/llvm/test/CodeGen/PowerPC/scalar-min-max.ll
+++ b/llvm/test/CodeGen/PowerPC/scalar-min-max.ll
@@ -117,13 +117,12 @@ define dso_local float @testfmax_fast(float %a, float %b) local_unnamed_addr {
 ;
 ; NO-FAST-P9-LABEL: testfmax_fast:
 ; NO-FAST-P9:       # %bb.0: # %entry
-; NO-FAST-P9-NEXT:    xsmaxcdp f1, f1, f2
+; NO-FAST-P9-NEXT:    xsmaxdp f1, f1, f2
 ; NO-FAST-P9-NEXT:    blr
 ;
 ; NO-FAST-P8-LABEL: testfmax_fast:
 ; NO-FAST-P8:       # %bb.0: # %entry
-; NO-FAST-P8-NEXT:    xssubsp f0, f2, f1
-; NO-FAST-P8-NEXT:    fsel f1, f0, f2, f1
+; NO-FAST-P8-NEXT:    xsmaxdp f1, f1, f2
 ; NO-FAST-P8-NEXT:    blr
 entry:
   %cmp = fcmp nnan ninf ogt float %a, %b
@@ -138,13 +137,12 @@ define dso_local double @testdmax_fast(double %a, double %b) local_unnamed_addr
 ;
 ; NO-FAST-P9-LABEL: testdmax_fast:
 ; NO-FAST-P9:       # %bb.0: # %entry
-; NO-FAST-P9-NEXT:    xsmaxcdp f1, f1, f2
+; NO-FAST-P9-NEXT:    xsmaxdp f1, f1, f2
 ; NO-FAST-P9-NEXT:    blr
 ;
 ; NO-FAST-P8-LABEL: testdmax_fast:
 ; NO-FAST-P8:       # %bb.0: # %entry
-; NO-FAST-P8-NEXT:    xssubdp f0, f2, f1
-; NO-FAST-P8-NEXT:    fsel f1, f0, f2, f1
+; NO-FAST-P8-NEXT:    xsmaxdp f1, f1, f2
 ; NO-FAST-P8-NEXT:    blr
 entry:
   %cmp = fcmp nnan ninf ogt double %a, %b
@@ -159,13 +157,12 @@ define dso_local float @testfmin_fast(float %a, float %b) local_unnamed_addr {
 ;
 ; NO-FAST-P9-LABEL: testfmin_fast:
 ; NO-FAST-P9:       # %bb.0: # %entry
-; NO-FAST-P9-NEXT:    xsmincdp f1, f1, f2
+; NO-FAST-P9-NEXT:    xsmindp f1, f1, f2
 ; NO-FAST-P9-NEXT:    blr
 ;
 ; NO-FAST-P8-LABEL: testfmin_fast:
 ; NO-FAST-P8:       # %bb.0: # %entry
-; NO-FAST-P8-NEXT:    xssubsp f0, f1, f2
-; NO-FAST-P8-NEXT:    fsel f1, f0, f2, f1
+; NO-FAST-P8-NEXT:    xsmindp f1, f1, f2
 ; NO-FAST-P8-NEXT:    blr
 entry:
   %cmp = fcmp nnan ninf olt float %a, %b
@@ -180,13 +177,12 @@ define dso_local double @testdmin_fast(double %a, double %b) local_unnamed_addr
 ;
 ; NO-FAST-P9-LABEL: testdmin_fast:
 ; NO-FAST-P9:       # %bb.0: # %entry
-; NO-FAST-P9-NEXT:    xsmincdp f1, f1, f2
+; NO-FAST-P9-NEXT:    xsmindp f1, f1, f2
 ; NO-FAST-P9-NEXT:    blr
 ;
 ; NO-FAST-P8-LABEL: testdmin_fast:
 ; NO-FAST-P8:       # %bb.0: # %entry
-; NO-FAST-P8-NEXT:    xssubdp f0, f1, f2
-; NO-FAST-P8-NEXT:    fsel f1, f0, f2, f1
+; NO-FAST-P8-NEXT:    xsmindp f1, f1, f2
 ; NO-FAST-P8-NEXT:    blr
 entry:
   %cmp = fcmp nnan ninf olt double %a, %b