[llvm] c6f45f5 - PowerPC/VSX: Select FMINNUM and FMAXNUM (#135739)

Sun Dec 7 21:18:56 PST 2025

Author: YunQiang Su
Date: 2025-12-08T13:18:52+08:00
New Revision: c6f45f51fbb93fd6d38f2a22e61053831fc553d4

URL: https://github.com/llvm/llvm-project/commit/c6f45f51fbb93fd6d38f2a22e61053831fc553d4
DIFF: https://github.com/llvm/llvm-project/commit/c6f45f51fbb93fd6d38f2a22e61053831fc553d4.diff

LOG: PowerPC/VSX: Select FMINNUM and FMAXNUM (#135739)

In LangRef, we claim that FMINNUM and FMAXNUM should follow the minNum
and maxNum operators in IEEE754-2008.

PowerPC/VSX does have these instructions XSMINDP and XSMAXDP.

Now we use FMINNUM_IEEE and FMAXNUM_IEEE, since they are used by the
non-arch expand codes now.
In future, we may replace all FMINNUM_IEEE/FMAXNUM_IEEE with FMINNUM and
FMAXNUM.

---------

Co-authored-by: Your Name <you at example.com>

Added: 
    

Modified: 
    llvm/lib/Target/PowerPC/PPCISelLowering.cpp
    llvm/lib/Target/PowerPC/PPCInstrVSX.td
    llvm/test/CodeGen/PowerPC/fminimum-fmaximum.ll
    llvm/test/CodeGen/PowerPC/scalar-min-max.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 1c311d5480883..b795f61681ef8 100644

--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -794,6 +794,10 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
     setOperationAction(ISD::FMAXNUM_IEEE, MVT::f32, Legal);
     setOperationAction(ISD::FMINNUM_IEEE, MVT::f64, Legal);
     setOperationAction(ISD::FMINNUM_IEEE, MVT::f32, Legal);
+    setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
+    setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
+    setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
+    setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
     setOperationAction(ISD::FCANONICALIZE, MVT::f64, Legal);
     setOperationAction(ISD::FCANONICALIZE, MVT::f32, Legal);
   }
@@ -828,8 +832,11 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
       }
 
       if (Subtarget.hasVSX()) {
+        setOperationAction(ISD::FMAXNUM_IEEE, VT, Legal);
+        setOperationAction(ISD::FMINNUM_IEEE, VT, Legal);
         setOperationAction(ISD::FMAXNUM, VT, Legal);
         setOperationAction(ISD::FMINNUM, VT, Legal);
+        setOperationAction(ISD::FCANONICALIZE, VT, Legal);
       }
 
       // Vector instructions introduced in P8

diff  --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
index d72201df5b002..ed4b58fef3a26 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
@@ -2757,14 +2757,26 @@ def : Pat<(v1i128 (vselect v1i128:$vA, v1i128:$vB, v1i128:$vC)),
                         (COPY_TO_REGCLASS $vB, VSRC),
                         (COPY_TO_REGCLASS $vA, VSRC)), VRRC)>;
 
+def : Pat<(v4f32 (fmaxnum_ieee v4f32:$src1, v4f32:$src2)),
+          (v4f32 (XVMAXSP $src1, $src2))>;
 def : Pat<(v4f32 (any_fmaxnum v4f32:$src1, v4f32:$src2)),
           (v4f32 (XVMAXSP $src1, $src2))>;
+def : Pat<(v4f32 (fminnum_ieee v4f32:$src1, v4f32:$src2)),
+          (v4f32 (XVMINSP $src1, $src2))>;
 def : Pat<(v4f32 (any_fminnum v4f32:$src1, v4f32:$src2)),
           (v4f32 (XVMINSP $src1, $src2))>;
+def : Pat<(v4f32 (fcanonicalize v4f32:$src1)),
+          (v4f32 (XVMAXSP $src1, $src1))>;
+def : Pat<(v2f64 (fmaxnum_ieee v2f64:$src1, v2f64:$src2)),
+          (v2f64 (XVMAXDP $src1, $src2))>;
 def : Pat<(v2f64 (any_fmaxnum v2f64:$src1, v2f64:$src2)),
           (v2f64 (XVMAXDP $src1, $src2))>;
+def : Pat<(v2f64 (fminnum_ieee v2f64:$src1, v2f64:$src2)),
+          (v2f64 (XVMINDP $src1, $src2))>;
 def : Pat<(v2f64 (any_fminnum v2f64:$src1, v2f64:$src2)),
           (v2f64 (XVMINDP $src1, $src2))>;
+def : Pat<(v2f64 (fcanonicalize v2f64:$src1)),
+          (v2f64 (XVMAXDP $src1, $src1))>;
 
 // f32 abs
 def : Pat<(f32 (fabs f32:$S)),
@@ -2776,43 +2788,29 @@ def : Pat<(f32 (fneg (fabs f32:$S))),
           (f32 (COPY_TO_REGCLASS (XSNABSDP
                (COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>;
 
-// f32 Min.
+// Max and Min
 def : Pat<(f32 (fminnum_ieee f32:$A, f32:$B)),
           (f32 FpMinMax.F32Min)>;
-def : Pat<(f32 (fminnum_ieee (fcanonicalize f32:$A), f32:$B)),
-          (f32 FpMinMax.F32Min)>;
-def : Pat<(f32 (fminnum_ieee f32:$A, (fcanonicalize f32:$B))),
-          (f32 FpMinMax.F32Min)>;
-def : Pat<(f32 (fminnum_ieee (fcanonicalize f32:$A), (fcanonicalize f32:$B))),
-          (f32 FpMinMax.F32Min)>;
-// F32 Max.
 def : Pat<(f32 (fmaxnum_ieee f32:$A, f32:$B)),
           (f32 FpMinMax.F32Max)>;
-def : Pat<(f32 (fmaxnum_ieee (fcanonicalize f32:$A), f32:$B)),
-          (f32 FpMinMax.F32Max)>;
-def : Pat<(f32 (fmaxnum_ieee f32:$A, (fcanonicalize f32:$B))),
-          (f32 FpMinMax.F32Max)>;
-def : Pat<(f32 (fmaxnum_ieee (fcanonicalize f32:$A), (fcanonicalize f32:$B))),
+def : Pat<(f32 (fminnum f32:$A, f32:$B)),
+          (f32 FpMinMax.F32Min)>;
+def : Pat<(f32 (fmaxnum f32:$A, f32:$B)),
           (f32 FpMinMax.F32Max)>;
-
-// f64 Min.
+def : Pat<(f32 (fcanonicalize f32:$A)),
+          (f32 (COPY_TO_REGCLASS (XSMAXDP
+               (COPY_TO_REGCLASS $A, VSFRC),
+               (COPY_TO_REGCLASS $A, VSFRC)), VSSRC))>;
 def : Pat<(f64 (fminnum_ieee f64:$A, f64:$B)),
           (f64 (XSMINDP $A, $B))>;
-def : Pat<(f64 (fminnum_ieee (fcanonicalize f64:$A), f64:$B)),
-          (f64 (XSMINDP $A, $B))>;
-def : Pat<(f64 (fminnum_ieee f64:$A, (fcanonicalize f64:$B))),
-          (f64 (XSMINDP $A, $B))>;
-def : Pat<(f64 (fminnum_ieee (fcanonicalize f64:$A), (fcanonicalize f64:$B))),
-          (f64 (XSMINDP $A, $B))>;
-// f64 Max.
 def : Pat<(f64 (fmaxnum_ieee f64:$A, f64:$B)),
           (f64 (XSMAXDP $A, $B))>;
-def : Pat<(f64 (fmaxnum_ieee (fcanonicalize f64:$A), f64:$B)),
-          (f64 (XSMAXDP $A, $B))>;
-def : Pat<(f64 (fmaxnum_ieee f64:$A, (fcanonicalize f64:$B))),
-          (f64 (XSMAXDP $A, $B))>;
-def : Pat<(f64 (fmaxnum_ieee (fcanonicalize f64:$A), (fcanonicalize f64:$B))),
+def : Pat<(f64 (fminnum f64:$A, f64:$B)),
+          (f64 (XSMINDP $A, $B))>;
+def : Pat<(f64 (fmaxnum f64:$A, f64:$B)),
           (f64 (XSMAXDP $A, $B))>;
+def : Pat<(f64 (fcanonicalize f64:$A)),
+          (f64 (XSMAXDP $A, $A))>;
 
 def : Pat<(int_ppc_vsx_stxvd2x_be v2f64:$rS, ForceXForm:$dst),
             (STXVD2X $rS, ForceXForm:$dst)>;

diff  --git a/llvm/test/CodeGen/PowerPC/fminimum-fmaximum.ll b/llvm/test/CodeGen/PowerPC/fminimum-fmaximum.ll
index a99c25a4e4479..39cf136e10d77 100644
--- a/llvm/test/CodeGen/PowerPC/fminimum-fmaximum.ll
+++ b/llvm/test/CodeGen/PowerPC/fminimum-fmaximum.ll
@@ -301,22 +301,13 @@ define <4 x float> @v4f32_minimum(<4 x float> %a, <4 x float> %b) {
 ; VSX-NEXT:    xvcmpeqsp 1, 35, 35
 ; VSX-NEXT:    xvcmpeqsp 2, 34, 34
 ; VSX-NEXT:    addis 3, 2, .LCPI4_0 at toc@ha
-; VSX-NEXT:    xxleqv 36, 36, 36
-; VSX-NEXT:    xvminsp 0, 34, 35
-; VSX-NEXT:    vslw 4, 4, 4
 ; VSX-NEXT:    addi 3, 3, .LCPI4_0 at toc@l
 ; VSX-NEXT:    xxlnor 1, 1, 1
 ; VSX-NEXT:    xxlnor 2, 2, 2
-; VSX-NEXT:    vcmpequw 5, 2, 4
+; VSX-NEXT:    xvminsp 0, 34, 35
 ; VSX-NEXT:    xxlor 1, 2, 1
 ; VSX-NEXT:    lxvd2x 2, 0, 3
-; VSX-NEXT:    xxsel 0, 0, 2, 1
-; VSX-NEXT:    xxlxor 2, 2, 2
-; VSX-NEXT:    xvcmpeqsp 2, 0, 2
-; VSX-NEXT:    xxsel 1, 0, 34, 37
-; VSX-NEXT:    vcmpequw 2, 3, 4
-; VSX-NEXT:    xxsel 1, 1, 35, 34
-; VSX-NEXT:    xxsel 34, 0, 1, 2
+; VSX-NEXT:    xxsel 34, 0, 2, 1
 ; VSX-NEXT:    blr
 ;
 ; AIX-LABEL: v4f32_minimum:
@@ -324,21 +315,12 @@ define <4 x float> @v4f32_minimum(<4 x float> %a, <4 x float> %b) {
 ; AIX-NEXT:    xvcmpeqsp 1, 35, 35
 ; AIX-NEXT:    xvcmpeqsp 2, 34, 34
 ; AIX-NEXT:    ld 3, L..C4(2) # %const.0
-; AIX-NEXT:    xxleqv 36, 36, 36
 ; AIX-NEXT:    xvminsp 0, 34, 35
-; AIX-NEXT:    vslw 4, 4, 4
 ; AIX-NEXT:    xxlnor 1, 1, 1
 ; AIX-NEXT:    xxlnor 2, 2, 2
-; AIX-NEXT:    vcmpequw 5, 2, 4
 ; AIX-NEXT:    xxlor 1, 2, 1
 ; AIX-NEXT:    lxvw4x 2, 0, 3
-; AIX-NEXT:    xxsel 0, 0, 2, 1
-; AIX-NEXT:    xxlxor 2, 2, 2
-; AIX-NEXT:    xvcmpeqsp 2, 0, 2
-; AIX-NEXT:    xxsel 1, 0, 34, 37
-; AIX-NEXT:    vcmpequw 2, 3, 4
-; AIX-NEXT:    xxsel 1, 1, 35, 34
-; AIX-NEXT:    xxsel 34, 0, 1, 2
+; AIX-NEXT:    xxsel 34, 0, 2, 1
 ; AIX-NEXT:    blr
 entry:
   %m = call <4 x float> @llvm.minimum.v4f32(<4 x float> %a, <4 x float> %b)
@@ -377,16 +359,9 @@ define <4 x float> @v4f32_maximum(<4 x float> %a, <4 x float> %b) {
 ; VSX-NEXT:    xxlnor 1, 1, 1
 ; VSX-NEXT:    xxlnor 2, 2, 2
 ; VSX-NEXT:    xvmaxsp 0, 34, 35
-; VSX-NEXT:    xxlxor 36, 36, 36
-; VSX-NEXT:    vcmpequw 5, 2, 4
 ; VSX-NEXT:    xxlor 1, 2, 1
 ; VSX-NEXT:    lxvd2x 2, 0, 3
-; VSX-NEXT:    xxsel 0, 0, 2, 1
-; VSX-NEXT:    xvcmpeqsp 2, 0, 36
-; VSX-NEXT:    xxsel 1, 0, 34, 37
-; VSX-NEXT:    vcmpequw 2, 3, 4
-; VSX-NEXT:    xxsel 1, 1, 35, 34
-; VSX-NEXT:    xxsel 34, 0, 1, 2
+; VSX-NEXT:    xxsel 34, 0, 2, 1
 ; VSX-NEXT:    blr
 ;
 ; AIX-LABEL: v4f32_maximum:
@@ -395,18 +370,11 @@ define <4 x float> @v4f32_maximum(<4 x float> %a, <4 x float> %b) {
 ; AIX-NEXT:    xvcmpeqsp 2, 34, 34
 ; AIX-NEXT:    ld 3, L..C5(2) # %const.0
 ; AIX-NEXT:    xvmaxsp 0, 34, 35
-; AIX-NEXT:    xxlxor 36, 36, 36
 ; AIX-NEXT:    xxlnor 1, 1, 1
 ; AIX-NEXT:    xxlnor 2, 2, 2
-; AIX-NEXT:    vcmpequw 5, 2, 4
 ; AIX-NEXT:    xxlor 1, 2, 1
 ; AIX-NEXT:    lxvw4x 2, 0, 3
-; AIX-NEXT:    xxsel 0, 0, 2, 1
-; AIX-NEXT:    xvcmpeqsp 2, 0, 36
-; AIX-NEXT:    xxsel 1, 0, 34, 37
-; AIX-NEXT:    vcmpequw 2, 3, 4
-; AIX-NEXT:    xxsel 1, 1, 35, 34
-; AIX-NEXT:    xxsel 34, 0, 1, 2
+; AIX-NEXT:    xxsel 34, 0, 2, 1
 ; AIX-NEXT:    blr
 entry:
   %m = call <4 x float> @llvm.maximum.v4f32(<4 x float> %a, <4 x float> %b)
@@ -493,47 +461,28 @@ define <2 x double> @v2f64_minimum(<2 x double> %a, <2 x double> %b) {
 ; VSX-LABEL: v2f64_minimum:
 ; VSX:       # %bb.0: # %entry
 ; VSX-NEXT:    addis 3, 2, .LCPI6_0 at toc@ha
-; VSX-NEXT:    xvcmpeqdp 36, 35, 35
-; VSX-NEXT:    xvcmpeqdp 37, 34, 34
-; VSX-NEXT:    addi 3, 3, .LCPI6_0 at toc@l
-; VSX-NEXT:    xxlnor 36, 36, 36
-; VSX-NEXT:    xxlnor 37, 37, 37
 ; VSX-NEXT:    xvmindp 0, 34, 35
+; VSX-NEXT:    xvcmpeqdp 35, 35, 35
+; VSX-NEXT:    addi 3, 3, .LCPI6_0 at toc@l
+; VSX-NEXT:    xvcmpeqdp 34, 34, 34
+; VSX-NEXT:    xxlnor 35, 35, 35
+; VSX-NEXT:    xxlnor 34, 34, 34
 ; VSX-NEXT:    lxvd2x 2, 0, 3
-; VSX-NEXT:    addis 3, 2, .LCPI6_1 at toc@ha
-; VSX-NEXT:    xxlor 1, 37, 36
-; VSX-NEXT:    addi 3, 3, .LCPI6_1 at toc@l
-; VSX-NEXT:    lxvd2x 36, 0, 3
-; VSX-NEXT:    vcmpequd 5, 2, 4
-; VSX-NEXT:    xxsel 0, 0, 2, 1
-; VSX-NEXT:    xxlxor 2, 2, 2
-; VSX-NEXT:    xxsel 1, 0, 34, 37
-; VSX-NEXT:    vcmpequd 2, 3, 4
-; VSX-NEXT:    xxsel 1, 1, 35, 34
-; VSX-NEXT:    xvcmpeqdp 34, 0, 2
-; VSX-NEXT:    xxsel 34, 0, 1, 34
+; VSX-NEXT:    xxlor 1, 34, 35
+; VSX-NEXT:    xxsel 34, 0, 2, 1
 ; VSX-NEXT:    blr
 ;
 ; AIX-LABEL: v2f64_minimum:
 ; AIX:       # %bb.0: # %entry
 ; AIX-NEXT:    ld 3, L..C6(2) # %const.0
-; AIX-NEXT:    xvcmpeqdp 36, 35, 35
-; AIX-NEXT:    xvcmpeqdp 37, 34, 34
-; AIX-NEXT:    lxvd2x 2, 0, 3
-; AIX-NEXT:    ld 3, L..C7(2) # %const.1
-; AIX-NEXT:    xxlnor 36, 36, 36
-; AIX-NEXT:    xxlnor 37, 37, 37
 ; AIX-NEXT:    xvmindp 0, 34, 35
-; AIX-NEXT:    xxlor 1, 37, 36
-; AIX-NEXT:    lxvd2x 36, 0, 3
-; AIX-NEXT:    vcmpequd 5, 2, 4
-; AIX-NEXT:    xxsel 0, 0, 2, 1
-; AIX-NEXT:    xxlxor 2, 2, 2
-; AIX-NEXT:    xxsel 1, 0, 34, 37
-; AIX-NEXT:    vcmpequd 2, 3, 4
-; AIX-NEXT:    xxsel 1, 1, 35, 34
-; AIX-NEXT:    xvcmpeqdp 34, 0, 2
-; AIX-NEXT:    xxsel 34, 0, 1, 34
+; AIX-NEXT:    xvcmpeqdp 35, 35, 35
+; AIX-NEXT:    lxvd2x 2, 0, 3
+; AIX-NEXT:    xvcmpeqdp 34, 34, 34
+; AIX-NEXT:    xxlnor 35, 35, 35
+; AIX-NEXT:    xxlnor 34, 34, 34
+; AIX-NEXT:    xxlor 1, 34, 35
+; AIX-NEXT:    xxsel 34, 0, 2, 1
 ; AIX-NEXT:    blr
 entry:
   %m = call <2 x double> @llvm.minimum.v2f64(<2 x double> %a, <2 x double> %b)
@@ -618,42 +567,28 @@ define <2 x double> @v2f64_maximum(<2 x double> %a, <2 x double> %b) {
 ; VSX-LABEL: v2f64_maximum:
 ; VSX:       # %bb.0: # %entry
 ; VSX-NEXT:    addis 3, 2, .LCPI7_0 at toc@ha
-; VSX-NEXT:    xvcmpeqdp 36, 35, 35
-; VSX-NEXT:    xvcmpeqdp 37, 34, 34
-; VSX-NEXT:    addi 3, 3, .LCPI7_0 at toc@l
-; VSX-NEXT:    xxlnor 36, 36, 36
-; VSX-NEXT:    xxlnor 37, 37, 37
 ; VSX-NEXT:    xvmaxdp 0, 34, 35
+; VSX-NEXT:    xvcmpeqdp 35, 35, 35
+; VSX-NEXT:    addi 3, 3, .LCPI7_0 at toc@l
+; VSX-NEXT:    xvcmpeqdp 34, 34, 34
+; VSX-NEXT:    xxlnor 35, 35, 35
+; VSX-NEXT:    xxlnor 34, 34, 34
 ; VSX-NEXT:    lxvd2x 2, 0, 3
-; VSX-NEXT:    xxlor 1, 37, 36
-; VSX-NEXT:    xxlxor 36, 36, 36
-; VSX-NEXT:    vcmpequd 5, 2, 4
-; VSX-NEXT:    xxsel 0, 0, 2, 1
-; VSX-NEXT:    xxsel 1, 0, 34, 37
-; VSX-NEXT:    vcmpequd 2, 3, 4
-; VSX-NEXT:    xxsel 1, 1, 35, 34
-; VSX-NEXT:    xvcmpeqdp 34, 0, 36
-; VSX-NEXT:    xxsel 34, 0, 1, 34
+; VSX-NEXT:    xxlor 1, 34, 35
+; VSX-NEXT:    xxsel 34, 0, 2, 1
 ; VSX-NEXT:    blr
 ;
 ; AIX-LABEL: v2f64_maximum:
 ; AIX:       # %bb.0: # %entry
-; AIX-NEXT:    ld 3, L..C8(2) # %const.0
-; AIX-NEXT:    xvcmpeqdp 36, 35, 35
-; AIX-NEXT:    xvcmpeqdp 37, 34, 34
-; AIX-NEXT:    lxvd2x 2, 0, 3
-; AIX-NEXT:    xxlnor 36, 36, 36
-; AIX-NEXT:    xxlnor 37, 37, 37
+; AIX-NEXT:    ld 3, L..C7(2) # %const.0
 ; AIX-NEXT:    xvmaxdp 0, 34, 35
-; AIX-NEXT:    xxlor 1, 37, 36
-; AIX-NEXT:    xxlxor 36, 36, 36
-; AIX-NEXT:    vcmpequd 5, 2, 4
-; AIX-NEXT:    xxsel 0, 0, 2, 1
-; AIX-NEXT:    xxsel 1, 0, 34, 37
-; AIX-NEXT:    vcmpequd 2, 3, 4
-; AIX-NEXT:    xxsel 1, 1, 35, 34
-; AIX-NEXT:    xvcmpeqdp 34, 0, 36
-; AIX-NEXT:    xxsel 34, 0, 1, 34
+; AIX-NEXT:    xvcmpeqdp 35, 35, 35
+; AIX-NEXT:    lxvd2x 2, 0, 3
+; AIX-NEXT:    xvcmpeqdp 34, 34, 34
+; AIX-NEXT:    xxlnor 35, 35, 35
+; AIX-NEXT:    xxlnor 34, 34, 34
+; AIX-NEXT:    xxlor 1, 34, 35
+; AIX-NEXT:    xxsel 34, 0, 2, 1
 ; AIX-NEXT:    blr
 entry:
   %m = call <2 x double> @llvm.maximum.v2f64(<2 x double> %a, <2 x double> %b)

diff  --git a/llvm/test/CodeGen/PowerPC/scalar-min-max.ll b/llvm/test/CodeGen/PowerPC/scalar-min-max.ll
index 5f637e3ecddd3..7feeba8b99d5b 100644
--- a/llvm/test/CodeGen/PowerPC/scalar-min-max.ll
+++ b/llvm/test/CodeGen/PowerPC/scalar-min-max.ll
@@ -5,6 +5,7 @@
 ; RUN: llc -mcpu=pwr8 -ppc-asm-full-reg-names -verify-machineinstrs \
 ; RUN:   -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s \
 ; RUN:   --check-prefix=P8
+
 define dso_local float @testfmax(float %a, float %b) local_unnamed_addr {
 ; P9-LABEL: testfmax:
 ; P9:       # %bb.0: # %entry
@@ -24,19 +25,19 @@ entry:
   ret float %cond
 }
 
-define dso_local double @testdmax(double %a, double %b) local_unnamed_addr {
-; P9-LABEL: testdmax:
-; P9:       # %bb.0: # %entry
-; P9-NEXT:    xsmaxcdp f1, f1, f2
-; P9-NEXT:    blr
+define double @testdmax(double %a, double %b) local_unnamed_addr {
+; NO-FAST-P9-LABEL: testdmax:
+; NO-FAST-P9:       # %bb.0: # %entry
+; NO-FAST-P9-NEXT:    xsmaxcdp f1, f1, f2
+; NO-FAST-P9-NEXT:    blr
 ;
-; P8-LABEL: testdmax:
-; P8:       # %bb.0: # %entry
-; P8-NEXT:    xscmpudp cr0, f1, f2
-; P8-NEXT:    bgtlr cr0
-; P8-NEXT:  # %bb.1: # %entry
-; P8-NEXT:    fmr f1, f2
-; P8-NEXT:    blr
+; NO-FAST-P8-LABEL: testdmax:
+; NO-FAST-P8:       # %bb.0: # %entry
+; NO-FAST-P8-NEXT:    xscmpudp cr0, f1, f2
+; NO-FAST-P8-NEXT:    bgtlr cr0
+; NO-FAST-P8-NEXT:  # %bb.1: # %entry
+; NO-FAST-P8-NEXT:    fmr f1, f2
+; NO-FAST-P8-NEXT:    blr
 entry:
   %cmp = fcmp ogt double %a, %b
   %cond = select i1 %cmp, double %a, double %b
@@ -82,62 +83,242 @@ entry:
 }
 
 define dso_local float @testfmax_fast(float %a, float %b) local_unnamed_addr {
-; P9-LABEL: testfmax_fast:
-; P9:       # %bb.0: # %entry
-; P9-NEXT:    xsmaxdp f1, f1, f2
-; P9-NEXT:    blr
+; NO-FAST-P9-LABEL: testfmax_fast:
+; NO-FAST-P9:       # %bb.0: # %entry
+; NO-FAST-P9-NEXT:    xsmaxdp f1, f1, f2
+; NO-FAST-P9-NEXT:    blr
 ;
-; P8-LABEL: testfmax_fast:
-; P8:       # %bb.0: # %entry
-; P8-NEXT:    xsmaxdp f1, f1, f2
-; P8-NEXT:    blr
+; NO-FAST-P8-LABEL: testfmax_fast:
+; NO-FAST-P8:       # %bb.0: # %entry
+; NO-FAST-P8-NEXT:    xsmaxdp f1, f1, f2
+; NO-FAST-P8-NEXT:    blr
 entry:
   %cmp = fcmp nnan ninf ogt float %a, %b
   %cond = select nnan nsz i1 %cmp, float %a, float %b
   ret float %cond
 }
 define dso_local double @testdmax_fast(double %a, double %b) local_unnamed_addr {
-; P9-LABEL: testdmax_fast:
-; P9:       # %bb.0: # %entry
-; P9-NEXT:    xsmaxdp f1, f1, f2
-; P9-NEXT:    blr
+; NO-FAST-P9-LABEL: testdmax_fast:
+; NO-FAST-P9:       # %bb.0: # %entry
+; NO-FAST-P9-NEXT:    xsmaxdp f1, f1, f2
+; NO-FAST-P9-NEXT:    blr
 ;
-; P8-LABEL: testdmax_fast:
-; P8:       # %bb.0: # %entry
-; P8-NEXT:    xsmaxdp f1, f1, f2
-; P8-NEXT:    blr
+; NO-FAST-P8-LABEL: testdmax_fast:
+; NO-FAST-P8:       # %bb.0: # %entry
+; NO-FAST-P8-NEXT:    xsmaxdp f1, f1, f2
+; NO-FAST-P8-NEXT:    blr
 entry:
   %cmp = fcmp nnan ninf ogt double %a, %b
   %cond = select nnan nsz i1 %cmp, double %a, double %b
   ret double %cond
 }
 define dso_local float @testfmin_fast(float %a, float %b) local_unnamed_addr {
-; P9-LABEL: testfmin_fast:
-; P9:       # %bb.0: # %entry
-; P9-NEXT:    xsmindp f1, f1, f2
-; P9-NEXT:    blr
+; NO-FAST-P9-LABEL: testfmin_fast:
+; NO-FAST-P9:       # %bb.0: # %entry
+; NO-FAST-P9-NEXT:    xsmindp f1, f1, f2
+; NO-FAST-P9-NEXT:    blr
 ;
-; P8-LABEL: testfmin_fast:
-; P8:       # %bb.0: # %entry
-; P8-NEXT:    xsmindp f1, f1, f2
-; P8-NEXT:    blr
+; NO-FAST-P8-LABEL: testfmin_fast:
+; NO-FAST-P8:       # %bb.0: # %entry
+; NO-FAST-P8-NEXT:    xsmindp f1, f1, f2
+; NO-FAST-P8-NEXT:    blr
 entry:
   %cmp = fcmp nnan ninf olt float %a, %b
   %cond = select nnan nsz i1 %cmp, float %a, float %b
   ret float %cond
 }
 define dso_local double @testdmin_fast(double %a, double %b) local_unnamed_addr {
-; P9-LABEL: testdmin_fast:
-; P9:       # %bb.0: # %entry
-; P9-NEXT:    xsmindp f1, f1, f2
-; P9-NEXT:    blr
+; NO-FAST-P9-LABEL: testdmin_fast:
+; NO-FAST-P9:       # %bb.0: # %entry
+; NO-FAST-P9-NEXT:    xsmindp f1, f1, f2
+; NO-FAST-P9-NEXT:    blr
 ;
-; P8-LABEL: testdmin_fast:
-; P8:       # %bb.0: # %entry
-; P8-NEXT:    xsmindp f1, f1, f2
-; P8-NEXT:    blr
+; NO-FAST-P8-LABEL: testdmin_fast:
+; NO-FAST-P8:       # %bb.0: # %entry
+; NO-FAST-P8-NEXT:    xsmindp f1, f1, f2
+; NO-FAST-P8-NEXT:    blr
 entry:
   %cmp = fcmp nnan ninf olt double %a, %b
   %cond = select nnan nsz i1 %cmp, double %a, double %b
   ret double %cond
 }
+
+define float @testfminnum(float %a, float %b) {
+; NO-FAST-P9-LABEL: testfminnum:
+; NO-FAST-P9:       # %bb.0: # %entry
+; NO-FAST-P9-NEXT:    xsmindp f1, f1, f2
+; NO-FAST-P9-NEXT:    blr
+;
+; NO-FAST-P8-LABEL: testfminnum:
+; NO-FAST-P8:       # %bb.0: # %entry
+; NO-FAST-P8-NEXT:    xsmindp f1, f1, f2
+; NO-FAST-P8-NEXT:    blr
+entry:
+  %0 = tail call float @llvm.minnum.f32(float %a, float %b)
+  ret float %0
+}
+
+define float @testfmaxnum(float %a, float %b) {
+; NO-FAST-P9-LABEL: testfmaxnum:
+; NO-FAST-P9:       # %bb.0: # %entry
+; NO-FAST-P9-NEXT:    xsmaxdp f1, f1, f2
+; NO-FAST-P9-NEXT:    blr
+;
+; NO-FAST-P8-LABEL: testfmaxnum:
+; NO-FAST-P8:       # %bb.0: # %entry
+; NO-FAST-P8-NEXT:    xsmaxdp f1, f1, f2
+; NO-FAST-P8-NEXT:    blr
+entry:
+  %0 = tail call float @llvm.maxnum.f32(float %a, float %b)
+  ret float %0
+}
+
+define float @testfcanonicalize(float %a) {
+; NO-FAST-P9-LABEL: testfcanonicalize:
+; NO-FAST-P9:       # %bb.0: # %entry
+; NO-FAST-P9-NEXT:    xsmaxdp f1, f1, f1
+; NO-FAST-P9-NEXT:    blr
+;
+; NO-FAST-P8-LABEL: testfcanonicalize:
+; NO-FAST-P8:       # %bb.0: # %entry
+; NO-FAST-P8-NEXT:    xsmaxdp f1, f1, f1
+; NO-FAST-P8-NEXT:    blr
+entry:
+  %canonicalize = tail call float @llvm.canonicalize.f32(float %a)
+  ret float %canonicalize
+}
+
+define double @testdminnum(double %a, double %b) {
+; NO-FAST-P9-LABEL: testdminnum:
+; NO-FAST-P9:       # %bb.0: # %entry
+; NO-FAST-P9-NEXT:    xsmindp f1, f1, f2
+; NO-FAST-P9-NEXT:    blr
+;
+; NO-FAST-P8-LABEL: testdminnum:
+; NO-FAST-P8:       # %bb.0: # %entry
+; NO-FAST-P8-NEXT:    xsmindp f1, f1, f2
+; NO-FAST-P8-NEXT:    blr
+entry:
+  %0 = tail call double @llvm.minnum.f64(double %a, double %b)
+  ret double %0
+}
+
+define double @testdmaxnum(double %a, double %b) {
+; NO-FAST-P9-LABEL: testdmaxnum:
+; NO-FAST-P9:       # %bb.0: # %entry
+; NO-FAST-P9-NEXT:    xsmaxdp f1, f1, f2
+; NO-FAST-P9-NEXT:    blr
+;
+; NO-FAST-P8-LABEL: testdmaxnum:
+; NO-FAST-P8:       # %bb.0: # %entry
+; NO-FAST-P8-NEXT:    xsmaxdp f1, f1, f2
+; NO-FAST-P8-NEXT:    blr
+entry:
+  %0 = tail call double @llvm.maxnum.f64(double %a, double %b)
+  ret double %0
+}
+
+define double @testdcanonicalize(double %a) {
+; NO-FAST-P9-LABEL: testdcanonicalize:
+; NO-FAST-P9:       # %bb.0: # %entry
+; NO-FAST-P9-NEXT:    xsmaxdp f1, f1, f1
+; NO-FAST-P9-NEXT:    blr
+;
+; NO-FAST-P8-LABEL: testdcanonicalize:
+; NO-FAST-P8:       # %bb.0: # %entry
+; NO-FAST-P8-NEXT:    xsmaxdp f1, f1, f1
+; NO-FAST-P8-NEXT:    blr
+entry:
+  %canonicalize = tail call double @llvm.canonicalize.f64(double %a)
+  ret double %canonicalize
+}
+
+define <4 x float> @testfminnum_v4f32(<4 x float> %a, <4 x float> %b) {
+; NO-FAST-P9-LABEL: testfminnum_v4f32:
+; NO-FAST-P9:       # %bb.0: # %entry
+; NO-FAST-P9-NEXT:    xvminsp vs34, vs34, vs35
+; NO-FAST-P9-NEXT:    blr
+;
+; NO-FAST-P8-LABEL: testfminnum_v4f32:
+; NO-FAST-P8:       # %bb.0: # %entry
+; NO-FAST-P8-NEXT:    xvminsp vs34, vs34, vs35
+; NO-FAST-P8-NEXT:    blr
+entry:
+  %0 = tail call <4 x float> @llvm.minnum.v4f32(<4 x float> %a, <4 x float> %b)
+  ret <4 x float> %0
+}
+
+define <4 x float> @testfmaxnum_v4f32(<4 x float> %a, <4 x float> %b) {
+; NO-FAST-P9-LABEL: testfmaxnum_v4f32:
+; NO-FAST-P9:       # %bb.0: # %entry
+; NO-FAST-P9-NEXT:    xvmaxsp vs34, vs34, vs35
+; NO-FAST-P9-NEXT:    blr
+;
+; NO-FAST-P8-LABEL: testfmaxnum_v4f32:
+; NO-FAST-P8:       # %bb.0: # %entry
+; NO-FAST-P8-NEXT:    xvmaxsp vs34, vs34, vs35
+; NO-FAST-P8-NEXT:    blr
+entry:
+  %0 = tail call <4 x float> @llvm.maxnum.v4f32(<4 x float> %a, <4 x float> %b)
+  ret <4 x float> %0
+}
+
+define <4 x float> @testfcanonicalize_v4f32(<4 x float> %a) {
+; NO-FAST-P9-LABEL: testfcanonicalize_v4f32:
+; NO-FAST-P9:       # %bb.0: # %entry
+; NO-FAST-P9-NEXT:    xvmaxsp vs34, vs34, vs34
+; NO-FAST-P9-NEXT:    blr
+;
+; NO-FAST-P8-LABEL: testfcanonicalize_v4f32:
+; NO-FAST-P8:       # %bb.0: # %entry
+; NO-FAST-P8-NEXT:    xvmaxsp vs34, vs34, vs34
+; NO-FAST-P8-NEXT:    blr
+entry:
+  %0 = tail call <4 x float> @llvm.canonicalize.v4f32(<4 x float> %a)
+  ret <4 x float> %0
+}
+
+define <2 x double> @testdminnum_v2f64(<2 x double> %a, <2 x double> %b) {
+; NO-FAST-P9-LABEL: testdminnum_v2f64:
+; NO-FAST-P9:       # %bb.0: # %entry
+; NO-FAST-P9-NEXT:    xvmindp vs34, vs34, vs35
+; NO-FAST-P9-NEXT:    blr
+;
+; NO-FAST-P8-LABEL: testdminnum_v2f64:
+; NO-FAST-P8:       # %bb.0: # %entry
+; NO-FAST-P8-NEXT:    xvmindp vs34, vs34, vs35
+; NO-FAST-P8-NEXT:    blr
+entry:
+  %0 = tail call <2 x double> @llvm.minnum.v2f64(<2 x double> %a, <2 x double> %b)
+  ret <2 x double> %0
+}
+
+define <2 x double> @testdmaxnum_v2f64(<2 x double> %a, <2 x double> %b) {
+; NO-FAST-P9-LABEL: testdmaxnum_v2f64:
+; NO-FAST-P9:       # %bb.0: # %entry
+; NO-FAST-P9-NEXT:    xvmaxdp vs34, vs34, vs35
+; NO-FAST-P9-NEXT:    blr
+;
+; NO-FAST-P8-LABEL: testdmaxnum_v2f64:
+; NO-FAST-P8:       # %bb.0: # %entry
+; NO-FAST-P8-NEXT:    xvmaxdp vs34, vs34, vs35
+; NO-FAST-P8-NEXT:    blr
+entry:
+  %0 = tail call <2 x double> @llvm.maxnum.v2f64(<2 x double> %a, <2 x double> %b)
+  ret <2 x double> %0
+}
+
+define <2 x double> @testdcanonicalize_v2f64(<2 x double> %a) {
+; NO-FAST-P9-LABEL: testdcanonicalize_v2f64:
+; NO-FAST-P9:       # %bb.0: # %entry
+; NO-FAST-P9-NEXT:    xvmaxdp vs34, vs34, vs34
+; NO-FAST-P9-NEXT:    blr
+;
+; NO-FAST-P8-LABEL: testdcanonicalize_v2f64:
+; NO-FAST-P8:       # %bb.0: # %entry
+; NO-FAST-P8-NEXT:    xvmaxdp vs34, vs34, vs34
+; NO-FAST-P8-NEXT:    blr
+entry:
+  %0 = tail call <2 x double> @llvm.canonicalize.v2f64(<2 x double> %a)
+  ret <2 x double> %0
+}