[llvm] r218698 - Split the estimate() interface into separate functions for each type. NFC.

Tue Sep 30 13:28:48 PDT 2014

Author: spatel
Date: Tue Sep 30 15:28:48 2014
New Revision: 218698

URL: http://llvm.org/viewvc/llvm-project?rev=218698&view=rev
Log:
Split the estimate() interface into separate functions for each type. NFC.

It was hacky to use an opcode as a switch because it won't always match
(rsqrte != sqrte), and it looks like we'll need to add more special casing
per arch than I had hoped for. Eg, x86 will prefer a different NR estimate
implementation. ARM will want to use it's 'step' instructions. There also
don't appear to be any new estimate instructions in any arch in a long,
long time. Altivec vloge and vexpte may have been the first and last in
that field...


Modified:
    llvm/trunk/include/llvm/Target/TargetLowering.h
    llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
    llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp
    llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h

Modified: llvm/trunk/include/llvm/Target/TargetLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Target/TargetLowering.h?rev=218698&r1=218697&r2=218698&view=diff
==============================================================================

--- llvm/trunk/include/llvm/Target/TargetLowering.h (original)
+++ llvm/trunk/include/llvm/Target/TargetLowering.h Tue Sep 30 15:28:48 2014
@@ -2624,21 +2624,37 @@ public:
     return SDValue();
   }
 
-  /// Hooks for building estimates in place of, for example, slower divisions
-  /// and square roots. These are not builder functions themselves, just the
-  /// target-specific variables needed for building the estimate algorithm.
-
-  /// Return an estimate value for the input opcode and input operand.
-  /// The RefinementSteps output is the number of refinement iterations
-  /// required to generate a sufficient (though not necessarily IEEE-754
-  /// compliant) estimate for the value type.
+  /// Hooks for building estimates in place of slower divisions and square
+  /// roots.
+  
+  /// Return a reciprocal square root estimate value for the input operand.
+  /// The RefinementSteps output is the number of Newton-Raphson refinement
+  /// iterations required to generate a sufficient (though not necessarily
+  /// IEEE-754 compliant) estimate for the value type.
+  /// A target may choose to implement its own refinement within this function.
+  /// If that's true, then return '0' as the number of RefinementSteps to avoid
+  /// any further refinement of the estimate.
   /// An empty SDValue return means no estimate sequence can be created.
-  virtual SDValue getEstimate(unsigned Opcode, SDValue Operand,
+  virtual SDValue getRsqrtEstimate(SDValue Operand,
                               DAGCombinerInfo &DCI,
                               unsigned &RefinementSteps) const {
     return SDValue();
   }
-  
+
+  /// Return a reciprocal estimate value for the input operand.
+  /// The RefinementSteps output is the number of Newton-Raphson refinement
+  /// iterations required to generate a sufficient (though not necessarily
+  /// IEEE-754 compliant) estimate for the value type.
+  /// A target may choose to implement its own refinement within this function.
+  /// If that's true, then return '0' as the number of RefinementSteps to avoid
+  /// any further refinement of the estimate.
+  /// An empty SDValue return means no estimate sequence can be created.
+  virtual SDValue getRecipEstimate(SDValue Operand,
+                                   DAGCombinerInfo &DCI,
+                                   unsigned &RefinementSteps) const {
+    return SDValue();
+  }
+
   //===--------------------------------------------------------------------===//
   // Legalization utility functions
   //

Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=218698&r1=218697&r2=218698&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Tue Sep 30 15:28:48 2014
@@ -11779,7 +11779,7 @@ SDValue DAGCombiner::BuildReciprocalEsti
   TargetLowering::DAGCombinerInfo DCI(DAG, Level, false, this);
 
   unsigned Iterations;
-  if (SDValue Est = TLI.getEstimate(ISD::FDIV, Op, DCI, Iterations)) {
+  if (SDValue Est = TLI.getRecipEstimate(Op, DCI, Iterations)) {
     // Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
     // For the reciprocal, we need to find the zero of the function:
     //   F(X) = A X - 1 [which has a zero at X = 1/A]
@@ -11820,7 +11820,7 @@ SDValue DAGCombiner::BuildRsqrtEstimate(
   // Expose the DAG combiner to the target combiner implementations.
   TargetLowering::DAGCombinerInfo DCI(DAG, Level, false, this);
   unsigned Iterations;
-  if (SDValue Est = TLI.getEstimate(ISD::FSQRT, Op, DCI, Iterations)) {
+  if (SDValue Est = TLI.getRsqrtEstimate(Op, DCI, Iterations)) {
     // Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
     // For the reciprocal sqrt, we need to find the zero of the function:
     //   F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]

Modified: llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp?rev=218698&r1=218697&r2=218698&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp Tue Sep 30 15:28:48 2014
@@ -7458,25 +7458,34 @@ PPCTargetLowering::EmitInstrWithCustomIn
 // Target Optimization Hooks
 //===----------------------------------------------------------------------===//
 
-SDValue PPCTargetLowering::getEstimate(unsigned Opcode, SDValue Operand,
-                                       DAGCombinerInfo &DCI,
-                                       unsigned &RefinementSteps) const {
+SDValue PPCTargetLowering::getRsqrtEstimate(SDValue Operand,
+                                            DAGCombinerInfo &DCI,
+                                            unsigned &RefinementSteps) const {
   EVT VT = Operand.getValueType();
-  SDValue RV;
-  if (Opcode == ISD::FSQRT) {
-    if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) ||
-        (VT == MVT::f64 && Subtarget.hasFRSQRTE())  ||
-        (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
-        (VT == MVT::v2f64 && Subtarget.hasVSX()))
-      RV = DCI.DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand);
-  } else if (Opcode == ISD::FDIV) {
-    if ((VT == MVT::f32 && Subtarget.hasFRES()) ||
-        (VT == MVT::f64 && Subtarget.hasFRE())  ||
-        (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
-        (VT == MVT::v2f64 && Subtarget.hasVSX()))
-      RV = DCI.DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand);
+  if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) ||
+      (VT == MVT::f64 && Subtarget.hasFRSQRTE())  ||
+      (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
+      (VT == MVT::v2f64 && Subtarget.hasVSX())) {
+    // Convergence is quadratic, so we essentially double the number of digits
+    // correct after every iteration. For both FRE and FRSQRTE, the minimum
+    // architected relative accuracy is 2^-5. When hasRecipPrec(), this is
+    // 2^-14. IEEE float has 23 digits and double has 52 digits.
+    RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3;
+    if (VT.getScalarType() == MVT::f64)
+      ++RefinementSteps;
+    return DCI.DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand);
   }
-  if (RV.getNode()) {
+  return SDValue();
+}
+
+SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand,
+                                            DAGCombinerInfo &DCI,
+                                            unsigned &RefinementSteps) const {
+  EVT VT = Operand.getValueType();
+  if ((VT == MVT::f32 && Subtarget.hasFRES()) ||
+      (VT == MVT::f64 && Subtarget.hasFRE())  ||
+      (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
+      (VT == MVT::v2f64 && Subtarget.hasVSX())) {
     // Convergence is quadratic, so we essentially double the number of digits
     // correct after every iteration. For both FRE and FRSQRTE, the minimum
     // architected relative accuracy is 2^-5. When hasRecipPrec(), this is
@@ -7484,8 +7493,9 @@ SDValue PPCTargetLowering::getEstimate(u
     RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3;
     if (VT.getScalarType() == MVT::f64)
       ++RefinementSteps;
+    return DCI.DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand);
   }
-  return RV;
+  return SDValue();
 }
 
 static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base,

Modified: llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h?rev=218698&r1=218697&r2=218698&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h Tue Sep 30 15:28:48 2014
@@ -701,9 +701,10 @@ namespace llvm {
     SDValue DAGCombineExtBoolTrunc(SDNode *N, DAGCombinerInfo &DCI) const;
     SDValue DAGCombineTruncBoolExt(SDNode *N, DAGCombinerInfo &DCI) const;
 
-    SDValue getEstimate(unsigned Opcode, SDValue Operand,
-                        DAGCombinerInfo &DCI,
-                        unsigned &RefinementSteps) const override;
+    SDValue getRsqrtEstimate(SDValue Operand, DAGCombinerInfo &DCI,
+                             unsigned &RefinementSteps) const override;
+    SDValue getRecipEstimate(SDValue Operand, DAGCombinerInfo &DCI,
+                             unsigned &RefinementSteps) const override;
 
     CCAssignFn *useFastISelCCs(unsigned Flag) const;
   };