[PATCH] D62993: [PowerPC] Emit scalar min/max instructions with unsafe fp math

Thu Jun 6 20:39:38 PDT 2019

nemanjai created this revision.
nemanjai added reviewers: hfinkel, jsji, kbarton, lei, stefanp.
Herald added a subscriber: hiraditya.
Herald added a project: LLVM.

This is something I meant to do a long time ago but never got around to it. These instructions should be an improvement over the compare/fsel sequence we currently emit.

The semantics of the instructions as specified in the ISA match the semantics specified in the description of the nodes.


Repository:
  rL LLVM

https://reviews.llvm.org/D62993

Files:
  llvm/lib/Target/PowerPC/PPCISelLowering.cpp
  llvm/lib/Target/PowerPC/PPCInstrVSX.td
  llvm/test/CodeGen/PowerPC/scalar-min-max.ll


Index: llvm/test/CodeGen/PowerPC/scalar-min-max.ll
===================================================================

--- /dev/null
+++ llvm/test/CodeGen/PowerPC/scalar-min-max.ll
@@ -0,0 +1,48 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mcpu=pwr8 -ppc-asm-full-reg-names --enable-unsafe-fp-math \
+; RUN:   -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s
+define dso_local float @testfmax(float %a, float %b) local_unnamed_addr #0 {
+; CHECK-LABEL: testfmax:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xsmaxdp f1, f1, f2
+; CHECK-NEXT:    blr
+entry:
+  %cmp = fcmp fast ogt float %a, %b
+  %cond = select i1 %cmp, float %a, float %b
+  ret float %cond
+}
+
+define dso_local double @testdmax(double %a, double %b) local_unnamed_addr #0 {
+; CHECK-LABEL: testdmax:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xsmaxdp f1, f1, f2
+; CHECK-NEXT:    blr
+entry:
+  %cmp = fcmp fast ogt double %a, %b
+  %cond = select i1 %cmp, double %a, double %b
+  ret double %cond
+}
+
+define dso_local float @testfmin(float %a, float %b) local_unnamed_addr #0 {
+; CHECK-LABEL: testfmin:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xsmindp f1, f1, f2
+; CHECK-NEXT:    blr
+entry:
+  %cmp = fcmp fast olt float %a, %b
+  %cond = select i1 %cmp, float %a, float %b
+  ret float %cond
+}
+
+define dso_local double @testdmin(double %a, double %b) local_unnamed_addr #0 {
+; CHECK-LABEL: testdmin:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xsmindp f1, f1, f2
+; CHECK-NEXT:    blr
+entry:
+  %cmp = fcmp fast olt double %a, %b
+  %cond = select i1 %cmp, double %a, double %b
+  ret double %cond
+}
+
+attributes #0 = { "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" }
Index: llvm/lib/Target/PowerPC/PPCInstrVSX.td
===================================================================
--- llvm/lib/Target/PowerPC/PPCInstrVSX.td
+++ llvm/lib/Target/PowerPC/PPCInstrVSX.td
@@ -971,6 +971,18 @@
 
 def : Pat<(v4i32 (vnot_ppc v4i32:$A)),
           (v4i32 (XXLNOR $A, $A))>;
+def : Pat<(f32 (fminnum_ieee f32:$A, f32:$B)),
+          (f32 (COPY_TO_REGCLASS (XSMINDP (COPY_TO_REGCLASS $A, VSFRC),
+                                          (COPY_TO_REGCLASS $B, VSFRC)),
+                                 VSSRC))>;
+def : Pat<(f32 (fmaxnum_ieee f32:$A, f32:$B)),
+          (f32 (COPY_TO_REGCLASS (XSMAXDP (COPY_TO_REGCLASS $A, VSFRC),
+                                          (COPY_TO_REGCLASS $B, VSFRC)),
+                                 VSSRC))>;
+def : Pat<(f64 (fminnum_ieee f64:$A, f64:$B)),
+          (f64 (XSMINDP $A, $B))>;
+def : Pat<(f64 (fmaxnum_ieee f64:$A, f64:$B)),
+          (f64 (XSMAXDP $A, $B))>;
 let Predicates = [IsBigEndian] in {
 def : Pat<(v2f64 (scalar_to_vector f64:$A)),
           (v2f64 (SUBREG_TO_REG (i64 1), $A, sub_64))>;
Index: llvm/lib/Target/PowerPC/PPCISelLowering.cpp
===================================================================
--- llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -547,6 +547,12 @@
     setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
   }
 
+  if (TM.Options.UnsafeFPMath && Subtarget.hasVSX()) {
+    setOperationAction(ISD::FMAXNUM_IEEE, MVT::f64, Legal);
+    setOperationAction(ISD::FMAXNUM_IEEE, MVT::f32, Legal);
+    setOperationAction(ISD::FMINNUM_IEEE, MVT::f64, Legal);
+    setOperationAction(ISD::FMINNUM_IEEE, MVT::f32, Legal);
+  }
   if (Subtarget.hasAltivec()) {
     // First set operation action for all vector types to expand. Then we
     // will selectively turn on ones that can be effectively codegen'd.


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D62993.203487.patch
Type: text/x-patch
Size: 3632 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20190607/01764ab2/attachment-0001.bin>