[llvm] 25a41ad - [PowerPC] Emit scalar fp min/max instructions

Nemanja Ivanovic via llvm-commits llvm-commits at lists.llvm.org
Mon Oct 28 17:14:08 PDT 2019


Author: Nemanja Ivanovic
Date: 2019-10-28T19:13:33-05:00
New Revision: 25a41ad242000520629a274e83db1ea884d1c1e7

URL: https://github.com/llvm/llvm-project/commit/25a41ad242000520629a274e83db1ea884d1c1e7
DIFF: https://github.com/llvm/llvm-project/commit/25a41ad242000520629a274e83db1ea884d1c1e7.diff

LOG: [PowerPC] Emit scalar fp min/max instructions

VSX provides floating point minimum and maximum instructions that conform
to IEEE semantics. This legalizes the respective nodes and emits VSX code
for them. Furthermore, on Power9 cores we have xsmaxcdp and xsmincdp
instructions that conform to language semantics for the conditional operator
even in the presence of NaNs.

Differential revision: https://reviews.llvm.org/D62993

Added: 
    llvm/test/CodeGen/PowerPC/scalar-min-max.ll

Modified: 
    llvm/lib/Target/PowerPC/PPCISelLowering.cpp
    llvm/lib/Target/PowerPC/PPCISelLowering.h
    llvm/lib/Target/PowerPC/PPCInstrInfo.td
    llvm/lib/Target/PowerPC/PPCInstrVSX.td
    llvm/test/CodeGen/PowerPC/ctr-minmaxnum.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 075e3c8e94d9..a2b45be72c45 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -548,6 +548,13 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
     setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
   }
 
+  if (Subtarget.hasVSX()) {
+    setOperationAction(ISD::FMAXNUM_IEEE, MVT::f64, Legal);
+    setOperationAction(ISD::FMAXNUM_IEEE, MVT::f32, Legal);
+    setOperationAction(ISD::FMINNUM_IEEE, MVT::f64, Legal);
+    setOperationAction(ISD::FMINNUM_IEEE, MVT::f32, Legal);
+  }
+
   if (Subtarget.hasAltivec()) {
     // First set operation action for all vector types to expand. Then we
     // will selectively turn on ones that can be effectively codegen'd.
@@ -1294,6 +1301,8 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
   switch ((PPCISD::NodeType)Opcode) {
   case PPCISD::FIRST_NUMBER:    break;
   case PPCISD::FSEL:            return "PPCISD::FSEL";
+  case PPCISD::XSMAXCDP:        return "PPCISD::XSMAXCDP";
+  case PPCISD::XSMINCDP:        return "PPCISD::XSMINCDP";
   case PPCISD::FCFID:           return "PPCISD::FCFID";
   case PPCISD::FCFIDU:          return "PPCISD::FCFIDU";
   case PPCISD::FCFIDS:          return "PPCISD::FCFIDS";
@@ -7214,17 +7223,15 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
       !Op.getOperand(2).getValueType().isFloatingPoint())
     return Op;
 
+  bool HasNoInfs = DAG.getTarget().Options.NoInfsFPMath;
+  bool HasNoNaNs = DAG.getTarget().Options.NoNaNsFPMath;
   // We might be able to do better than this under some circumstances, but in
   // general, fsel-based lowering of select is a finite-math-only optimization.
   // For more information, see section F.3 of the 2.06 ISA specification.
-  if (!DAG.getTarget().Options.NoInfsFPMath ||
-      !DAG.getTarget().Options.NoNaNsFPMath)
+  // With ISA 3.0, we have xsmaxcdp/xsmincdp which are OK to emit even in the
+  // presence of infinities.
+  if (!Subtarget.hasP9Vector() && (!HasNoInfs || !HasNoNaNs))
     return Op;
-  // TODO: Propagate flags from the select rather than global settings.
-  SDNodeFlags Flags;
-  Flags.setNoInfs(true);
-  Flags.setNoNaNs(true);
-
   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
 
   EVT ResVT = Op.getValueType();
@@ -7233,6 +7240,27 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
   SDValue TV  = Op.getOperand(2), FV  = Op.getOperand(3);
   SDLoc dl(Op);
 
+  if (Subtarget.hasP9Vector() && LHS == TV && RHS == FV) {
+    switch (CC) {
+    default:
+      // Not a min/max but with finite math, we may still be able to use fsel.
+      if (HasNoInfs && HasNoNaNs)
+        break;
+      return Op;
+    case ISD::SETOGT:
+    case ISD::SETGT:
+      return DAG.getNode(PPCISD::XSMAXCDP, dl, Op.getValueType(), LHS, RHS);
+    case ISD::SETOLT:
+    case ISD::SETLT:
+      return DAG.getNode(PPCISD::XSMINCDP, dl, Op.getValueType(), LHS, RHS);
+    }
+  }
+
+  // TODO: Propagate flags from the select rather than global settings.
+  SDNodeFlags Flags;
+  Flags.setNoInfs(true);
+  Flags.setNoNaNs(true);
+
   // If the RHS of the comparison is a 0.0, we don't need to do the
   // subtraction at all.
   SDValue Sel1;

diff  --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index 62922ea2d4c4..7016a6014904 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -51,6 +51,9 @@ namespace llvm {
       ///
       FSEL,
 
+      /// XSMAXCDP, XSMINCDP - C-type min/max instructions.
+      XSMAXCDP, XSMINCDP,
+
       /// FCFID - The FCFID instruction, taking an f64 operand and producing
       /// and f64 value containing the FP representation of the integer that
       /// was temporarily in the f64 operand.

diff  --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
index 24183277519b..7c0b54c42bf9 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -117,6 +117,10 @@ def SDT_PPCextswsli : SDTypeProfile<1, 2, [  // extswsli
   SDTCisInt<0>, SDTCisInt<1>, SDTCisOpSmallerThanOp<1, 0>, SDTCisInt<2>
 ]>;
 
+def SDT_PPCFPMinMax : SDTypeProfile<1, 2, [
+  SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisFP<0>
+]>;
+
 //===----------------------------------------------------------------------===//
 // PowerPC specific DAG Nodes.
 //
@@ -165,7 +169,8 @@ def PPCfsel   : SDNode<"PPCISD::FSEL",
    // Type constraint for fsel.
    SDTypeProfile<1, 3, [SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, 
                         SDTCisFP<0>, SDTCisVT<1, f64>]>, []>;
-
+def PPCxsmaxc : SDNode<"PPCISD::XSMAXCDP", SDT_PPCFPMinMax, []>;
+def PPCxsminc : SDNode<"PPCISD::XSMINCDP", SDT_PPCFPMinMax, []>;
 def PPChi       : SDNode<"PPCISD::Hi", SDTIntBinOp, []>;
 def PPClo       : SDNode<"PPCISD::Lo", SDTIntBinOp, []>;
 def PPCtoc_entry: SDNode<"PPCISD::TOC_ENTRY", SDTIntBinOp,

diff  --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
index 2aad5860d87f..72e5fb756fcb 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
@@ -1255,6 +1255,55 @@ def : Pat<(f64 (PPCfcfidu (PPCmtvsra (i64 (vector_extract v2i64:$S, 1))))),
 } // AddedComplexity
 } // HasVSX
 
+def FpMinMax {
+  dag F32Min = (COPY_TO_REGCLASS (XSMINDP (COPY_TO_REGCLASS $A, VSFRC),
+                                          (COPY_TO_REGCLASS $B, VSFRC)),
+                                 VSSRC);
+  dag F32Max = (COPY_TO_REGCLASS (XSMAXDP (COPY_TO_REGCLASS $A, VSFRC),
+                                          (COPY_TO_REGCLASS $B, VSFRC)),
+                                 VSSRC);
+}
+
+let AddedComplexity = 400, Predicates = [HasVSX] in {
+  // f32 Min.
+  def : Pat<(f32 (fminnum_ieee f32:$A, f32:$B)),
+            (f32 FpMinMax.F32Min)>;
+  def : Pat<(f32 (fminnum_ieee (fcanonicalize f32:$A), f32:$B)),
+            (f32 FpMinMax.F32Min)>;
+  def : Pat<(f32 (fminnum_ieee f32:$A, (fcanonicalize f32:$B))),
+            (f32 FpMinMax.F32Min)>;
+  def : Pat<(f32 (fminnum_ieee (fcanonicalize f32:$A), (fcanonicalize f32:$B))),
+            (f32 FpMinMax.F32Min)>;
+  // F32 Max.
+  def : Pat<(f32 (fmaxnum_ieee f32:$A, f32:$B)),
+            (f32 FpMinMax.F32Max)>;
+  def : Pat<(f32 (fmaxnum_ieee (fcanonicalize f32:$A), f32:$B)),
+            (f32 FpMinMax.F32Max)>;
+  def : Pat<(f32 (fmaxnum_ieee f32:$A, (fcanonicalize f32:$B))),
+            (f32 FpMinMax.F32Max)>;
+  def : Pat<(f32 (fmaxnum_ieee (fcanonicalize f32:$A), (fcanonicalize f32:$B))),
+            (f32 FpMinMax.F32Max)>;
+
+  // f64 Min.
+  def : Pat<(f64 (fminnum_ieee f64:$A, f64:$B)),
+            (f64 (XSMINDP $A, $B))>;
+  def : Pat<(f64 (fminnum_ieee (fcanonicalize f64:$A), f64:$B)),
+            (f64 (XSMINDP $A, $B))>;
+  def : Pat<(f64 (fminnum_ieee f64:$A, (fcanonicalize f64:$B))),
+            (f64 (XSMINDP $A, $B))>;
+  def : Pat<(f64 (fminnum_ieee (fcanonicalize f64:$A), (fcanonicalize f64:$B))),
+            (f64 (XSMINDP $A, $B))>;
+  // f64 Max.
+  def : Pat<(f64 (fmaxnum_ieee f64:$A, f64:$B)),
+            (f64 (XSMAXDP $A, $B))>;
+  def : Pat<(f64 (fmaxnum_ieee (fcanonicalize f64:$A), f64:$B)),
+            (f64 (XSMAXDP $A, $B))>;
+  def : Pat<(f64 (fmaxnum_ieee f64:$A, (fcanonicalize f64:$B))),
+            (f64 (XSMAXDP $A, $B))>;
+  def : Pat<(f64 (fmaxnum_ieee (fcanonicalize f64:$A), (fcanonicalize f64:$B))),
+            (f64 (XSMAXDP $A, $B))>;
+}
+
 def ScalarLoads {
   dag Li8 =       (i32 (extloadi8 xoaddr:$src));
   dag ZELi8 =     (i32 (zextloadi8 xoaddr:$src));
@@ -2884,13 +2933,14 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
   //===--------------------------------------------------------------------===//
 
   // Maximum/Minimum Type-C/Type-J DP
-  // XT.dword[1] = 0xUUUU_UUUU_UUUU_UUUU, so we use vsrc for XT
-  def XSMAXCDP : XX3_XT5_XA5_XB5<60, 128, "xsmaxcdp", vsrc, vsfrc, vsfrc,
-                                 IIC_VecFP, []>;
+  def XSMAXCDP : XX3_XT5_XA5_XB5<60, 128, "xsmaxcdp", vsfrc, vsfrc, vsfrc,
+                                 IIC_VecFP,
+                                 [(set f64:$XT, (PPCxsmaxc f64:$XA, f64:$XB))]>;
   def XSMAXJDP : XX3_XT5_XA5_XB5<60, 144, "xsmaxjdp", vsrc, vsfrc, vsfrc,
                                  IIC_VecFP, []>;
-  def XSMINCDP : XX3_XT5_XA5_XB5<60, 136, "xsmincdp", vsrc, vsfrc, vsfrc,
-                                 IIC_VecFP, []>;
+  def XSMINCDP : XX3_XT5_XA5_XB5<60, 136, "xsmincdp", vsfrc, vsfrc, vsfrc,
+                                 IIC_VecFP,
+                                 [(set f64:$XT, (PPCxsminc f64:$XA, f64:$XB))]>;
   def XSMINJDP : XX3_XT5_XA5_XB5<60, 152, "xsminjdp", vsrc, vsfrc, vsfrc,
                                  IIC_VecFP, []>;
 
@@ -3697,6 +3747,15 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
   def : Pat<(f128 (fpextend f32:$src)),
             (f128 (XSCVDPQP (COPY_TO_REGCLASS $src, VFRC)))>;
 
+  def : Pat<(f32 (PPCxsmaxc f32:$XA, f32:$XB)),
+            (f32 (COPY_TO_REGCLASS (XSMAXCDP (COPY_TO_REGCLASS $XA, VSSRC),
+                                             (COPY_TO_REGCLASS $XB, VSSRC)),
+                                   VSSRC))>;
+  def : Pat<(f32 (PPCxsminc f32:$XA, f32:$XB)),
+            (f32 (COPY_TO_REGCLASS (XSMINCDP (COPY_TO_REGCLASS $XA, VSSRC),
+                                             (COPY_TO_REGCLASS $XB, VSSRC)),
+                                   VSSRC))>;
+
 } // end HasP9Vector, AddedComplexity
 
 let AddedComplexity = 400 in {

diff  --git a/llvm/test/CodeGen/PowerPC/ctr-minmaxnum.ll b/llvm/test/CodeGen/PowerPC/ctr-minmaxnum.ll
index a160074ba4f2..ed3c9f07c1a8 100644
--- a/llvm/test/CodeGen/PowerPC/ctr-minmaxnum.ll
+++ b/llvm/test/CodeGen/PowerPC/ctr-minmaxnum.ll
@@ -36,8 +36,8 @@ loop_exit:
 
 ; CHECK-LABEL: test1:
 ; CHECK-NOT: mtctr
-; CHECK: bl fminf
-; CHECK-NOT: bl fminf
+; CHECK: xsmindp
+; CHECK-NOT: xsmindp
 ; CHECK-NOT: mtctr
 ; CHECK: blr
 
@@ -59,9 +59,9 @@ loop_exit:
 
 ; CHECK-LABEL: test1v:
 ; CHECK: xvminsp
-; CHECK-NOT: bl fminf
+; CHECK-NOT: xsmindp
 ; CHECK: mtctr
-; CHECK-NOT: bl fminf
+; CHECK-NOT: xsmindp
 ; CHECK: blr
 
 ; QPX-LABEL: test1v:
@@ -87,8 +87,8 @@ loop_exit:
 
 ; CHECK-LABEL: test1a:
 ; CHECK-NOT: mtctr
-; CHECK: bl fminf
-; CHECK-NOT: bl fminf
+; CHECK: xsmindp
+; CHECK-NOT: xsmindp
 ; CHECK-NOT: mtctr
 ; CHECK: blr
 
@@ -110,8 +110,8 @@ loop_exit:
 
 ; CHECK-LABEL: test2:
 ; CHECK-NOT: mtctr
-; CHECK: bl fmaxf
-; CHECK-NOT: bl fmaxf
+; CHECK: xsmaxdp
+; CHECK-NOT: xsmaxdp
 ; CHECK-NOT: mtctr
 ; CHECK: blr
 
@@ -134,9 +134,9 @@ loop_exit:
 ; CHECK-LABEL: test2v:
 ; CHECK: xvmaxdp
 ; CHECK: xvmaxdp
-; CHECK-NOT: bl fmax
+; CHECK-NOT: xsmaxdp
 ; CHECK: mtctr
-; CHECK-NOT: bl fmax
+; CHECK-NOT: xsmaxdp
 ; CHECK: blr
 
 ; QPX-LABEL: test2v:
@@ -162,8 +162,8 @@ loop_exit:
 
 ; CHECK-LABEL: test2a:
 ; CHECK-NOT: mtctr
-; CHECK: bl fmaxf
-; CHECK-NOT: bl fmaxf
+; CHECK: xsmaxdp
+; CHECK-NOT: xsmaxdp
 ; CHECK-NOT: mtctr
 ; CHECK: blr
 
@@ -185,8 +185,8 @@ loop_exit:
 
 ; CHECK-LABEL: test3:
 ; CHECK-NOT: mtctr
-; CHECK: bl fmin
-; CHECK-NOT: bl fmin
+; CHECK: xsmindp
+; CHECK-NOT: xsmindp
 ; CHECK-NOT: mtctr
 ; CHECK: blr
 
@@ -208,8 +208,8 @@ loop_exit:
 
 ; CHECK-LABEL: test3a:
 ; CHECK-NOT: mtctr
-; CHECK: bl fmin
-; CHECK-NOT: bl fmin
+; CHECK: xsmindp
+; CHECK-NOT: xsmindp
 ; CHECK-NOT: mtctr
 ; CHECK: blr
 
@@ -231,8 +231,8 @@ loop_exit:
 
 ; CHECK-LABEL: test4:
 ; CHECK-NOT: mtctr
-; CHECK: bl fmax
-; CHECK-NOT: bl fmax
+; CHECK: xsmaxdp
+; CHECK-NOT: xsmaxdp
 ; CHECK-NOT: mtctr
 ; CHECK: blr
 
@@ -254,8 +254,8 @@ loop_exit:
 
 ; CHECK-LABEL: test4a:
 ; CHECK-NOT: mtctr
-; CHECK: bl fmax
-; CHECK-NOT: bl fmax
+; CHECK: xsmaxdp
+; CHECK-NOT: xsmaxdp
 ; CHECK-NOT: mtctr
 ; CHECK: blr
 

diff  --git a/llvm/test/CodeGen/PowerPC/scalar-min-max.ll b/llvm/test/CodeGen/PowerPC/scalar-min-max.ll
new file mode 100644
index 000000000000..a7e95228b74d
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/scalar-min-max.ll
@@ -0,0 +1,203 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mcpu=pwr8 -ppc-asm-full-reg-names --enable-unsafe-fp-math \
+; RUN:   -verify-machineinstrs --enable-no-signed-zeros-fp-math \
+; RUN:   --enable-no-nans-fp-math \
+; RUN:   -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s
+; RUN: llc -mcpu=pwr9 -ppc-asm-full-reg-names --enable-unsafe-fp-math \
+; RUN:   -verify-machineinstrs --enable-no-signed-zeros-fp-math \
+; RUN:   --enable-no-nans-fp-math \
+; RUN:   -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s
+; RUN: llc -mcpu=pwr9 -ppc-asm-full-reg-names -verify-machineinstrs \
+; RUN:   -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s \
+; RUN:   --check-prefix=NO-FAST-P9
+; RUN: llc -mcpu=pwr8 -ppc-asm-full-reg-names -verify-machineinstrs \
+; RUN:   -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s \
+; RUN:   --check-prefix=NO-FAST-P8
+define dso_local float @testfmax(float %a, float %b) local_unnamed_addr {
+; CHECK-LABEL: testfmax:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xsmaxdp f1, f1, f2
+; CHECK-NEXT:    blr
+;
+; NO-FAST-P9-LABEL: testfmax:
+; NO-FAST-P9:       # %bb.0: # %entry
+; NO-FAST-P9-NEXT:    xsmaxcdp f1, f1, f2
+; NO-FAST-P9-NEXT:    blr
+;
+; NO-FAST-P8-LABEL: testfmax:
+; NO-FAST-P8:       # %bb.0: # %entry
+; NO-FAST-P8-NEXT:    fcmpu cr0, f1, f2
+; NO-FAST-P8-NEXT:    bgtlr cr0
+; NO-FAST-P8-NEXT:  # %bb.1: # %entry
+; NO-FAST-P8-NEXT:    fmr f1, f2
+; NO-FAST-P8-NEXT:    blr
+entry:
+  %cmp = fcmp ogt float %a, %b
+  %cond = select i1 %cmp, float %a, float %b
+  ret float %cond
+}
+
+define dso_local double @testdmax(double %a, double %b) local_unnamed_addr {
+; CHECK-LABEL: testdmax:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xsmaxdp f1, f1, f2
+; CHECK-NEXT:    blr
+;
+; NO-FAST-P9-LABEL: testdmax:
+; NO-FAST-P9:       # %bb.0: # %entry
+; NO-FAST-P9-NEXT:    xsmaxcdp f1, f1, f2
+; NO-FAST-P9-NEXT:    blr
+;
+; NO-FAST-P8-LABEL: testdmax:
+; NO-FAST-P8:       # %bb.0: # %entry
+; NO-FAST-P8-NEXT:    xscmpudp cr0, f1, f2
+; NO-FAST-P8-NEXT:    bgtlr cr0
+; NO-FAST-P8-NEXT:  # %bb.1: # %entry
+; NO-FAST-P8-NEXT:    fmr f1, f2
+; NO-FAST-P8-NEXT:    blr
+entry:
+  %cmp = fcmp ogt double %a, %b
+  %cond = select i1 %cmp, double %a, double %b
+  ret double %cond
+}
+
+define dso_local float @testfmin(float %a, float %b) local_unnamed_addr {
+; CHECK-LABEL: testfmin:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xsmindp f1, f1, f2
+; CHECK-NEXT:    blr
+;
+; NO-FAST-P9-LABEL: testfmin:
+; NO-FAST-P9:       # %bb.0: # %entry
+; NO-FAST-P9-NEXT:    xsmincdp f1, f1, f2
+; NO-FAST-P9-NEXT:    blr
+;
+; NO-FAST-P8-LABEL: testfmin:
+; NO-FAST-P8:       # %bb.0: # %entry
+; NO-FAST-P8-NEXT:    fcmpu cr0, f1, f2
+; NO-FAST-P8-NEXT:    bltlr cr0
+; NO-FAST-P8-NEXT:  # %bb.1: # %entry
+; NO-FAST-P8-NEXT:    fmr f1, f2
+; NO-FAST-P8-NEXT:    blr
+entry:
+  %cmp = fcmp olt float %a, %b
+  %cond = select i1 %cmp, float %a, float %b
+  ret float %cond
+}
+
+define dso_local double @testdmin(double %a, double %b) local_unnamed_addr {
+; CHECK-LABEL: testdmin:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xsmindp f1, f1, f2
+; CHECK-NEXT:    blr
+;
+; NO-FAST-P9-LABEL: testdmin:
+; NO-FAST-P9:       # %bb.0: # %entry
+; NO-FAST-P9-NEXT:    xsmincdp f1, f1, f2
+; NO-FAST-P9-NEXT:    blr
+;
+; NO-FAST-P8-LABEL: testdmin:
+; NO-FAST-P8:       # %bb.0: # %entry
+; NO-FAST-P8-NEXT:    xscmpudp cr0, f1, f2
+; NO-FAST-P8-NEXT:    bltlr cr0
+; NO-FAST-P8-NEXT:  # %bb.1: # %entry
+; NO-FAST-P8-NEXT:    fmr f1, f2
+; NO-FAST-P8-NEXT:    blr
+entry:
+  %cmp = fcmp olt double %a, %b
+  %cond = select i1 %cmp, double %a, double %b
+  ret double %cond
+}
+
+define dso_local float @testfmax_fast(float %a, float %b) local_unnamed_addr {
+; CHECK-LABEL: testfmax_fast:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xsmaxdp f1, f1, f2
+; CHECK-NEXT:    blr
+;
+; NO-FAST-P9-LABEL: testfmax_fast:
+; NO-FAST-P9:       # %bb.0: # %entry
+; NO-FAST-P9-NEXT:    xsmaxcdp f1, f1, f2
+; NO-FAST-P9-NEXT:    blr
+;
+; NO-FAST-P8-LABEL: testfmax_fast:
+; NO-FAST-P8:       # %bb.0: # %entry
+; NO-FAST-P8-NEXT:    fcmpu cr0, f1, f2
+; NO-FAST-P8-NEXT:    bgtlr cr0
+; NO-FAST-P8-NEXT:  # %bb.1: # %entry
+; NO-FAST-P8-NEXT:    fmr f1, f2
+; NO-FAST-P8-NEXT:    blr
+entry:
+  %cmp = fcmp fast ogt float %a, %b
+  %cond = select i1 %cmp, float %a, float %b
+  ret float %cond
+}
+define dso_local double @testdmax_fast(double %a, double %b) local_unnamed_addr {
+; CHECK-LABEL: testdmax_fast:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xsmaxdp f1, f1, f2
+; CHECK-NEXT:    blr
+;
+; NO-FAST-P9-LABEL: testdmax_fast:
+; NO-FAST-P9:       # %bb.0: # %entry
+; NO-FAST-P9-NEXT:    xsmaxcdp f1, f1, f2
+; NO-FAST-P9-NEXT:    blr
+;
+; NO-FAST-P8-LABEL: testdmax_fast:
+; NO-FAST-P8:       # %bb.0: # %entry
+; NO-FAST-P8-NEXT:    xscmpudp cr0, f1, f2
+; NO-FAST-P8-NEXT:    bgtlr cr0
+; NO-FAST-P8-NEXT:  # %bb.1: # %entry
+; NO-FAST-P8-NEXT:    fmr f1, f2
+; NO-FAST-P8-NEXT:    blr
+entry:
+  %cmp = fcmp fast ogt double %a, %b
+  %cond = select i1 %cmp, double %a, double %b
+  ret double %cond
+}
+define dso_local float @testfmin_fast(float %a, float %b) local_unnamed_addr {
+; CHECK-LABEL: testfmin_fast:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xsmindp f1, f1, f2
+; CHECK-NEXT:    blr
+;
+; NO-FAST-P9-LABEL: testfmin_fast:
+; NO-FAST-P9:       # %bb.0: # %entry
+; NO-FAST-P9-NEXT:    xsmincdp f1, f1, f2
+; NO-FAST-P9-NEXT:    blr
+;
+; NO-FAST-P8-LABEL: testfmin_fast:
+; NO-FAST-P8:       # %bb.0: # %entry
+; NO-FAST-P8-NEXT:    fcmpu cr0, f1, f2
+; NO-FAST-P8-NEXT:    bltlr cr0
+; NO-FAST-P8-NEXT:  # %bb.1: # %entry
+; NO-FAST-P8-NEXT:    fmr f1, f2
+; NO-FAST-P8-NEXT:    blr
+entry:
+  %cmp = fcmp fast olt float %a, %b
+  %cond = select i1 %cmp, float %a, float %b
+  ret float %cond
+}
+define dso_local double @testdmin_fast(double %a, double %b) local_unnamed_addr {
+; CHECK-LABEL: testdmin_fast:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xsmindp f1, f1, f2
+; CHECK-NEXT:    blr
+;
+; NO-FAST-P9-LABEL: testdmin_fast:
+; NO-FAST-P9:       # %bb.0: # %entry
+; NO-FAST-P9-NEXT:    xsmincdp f1, f1, f2
+; NO-FAST-P9-NEXT:    blr
+;
+; NO-FAST-P8-LABEL: testdmin_fast:
+; NO-FAST-P8:       # %bb.0: # %entry
+; NO-FAST-P8-NEXT:    xscmpudp cr0, f1, f2
+; NO-FAST-P8-NEXT:    bltlr cr0
+; NO-FAST-P8-NEXT:  # %bb.1: # %entry
+; NO-FAST-P8-NEXT:    fmr f1, f2
+; NO-FAST-P8-NEXT:    blr
+entry:
+  %cmp = fcmp fast olt double %a, %b
+  %cond = select i1 %cmp, double %a, double %b
+  ret double %cond
+}


        


More information about the llvm-commits mailing list