[llvm] r286523 - [DAG Combiner] Fix the native computation of the Newton series for reciprocals
Evandro Menezes via llvm-commits
llvm-commits at lists.llvm.org
Thu Nov 10 15:31:06 PST 2016
Author: evandro
Date: Thu Nov 10 17:31:06 2016
New Revision: 286523
URL: http://llvm.org/viewvc/llvm-project?rev=286523&view=rev
Log:
[DAG Combiner] Fix the native computation of the Newton series for reciprocals
The generic infrastructure to compute the Newton series for reciprocal and
reciprocal square root was conceived to allow a target to compute the series
itself. However, the original code did not properly consider this condition
if returned by a target. This patch addresses the issues to allow a target
to compute the series on its own.
Differential revision: https://reviews.llvm.org/D22975
Modified:
llvm/trunk/include/llvm/Target/TargetLowering.h
llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.h
llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.h
llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp
llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/lib/Target/X86/X86ISelLowering.h
Modified: llvm/trunk/include/llvm/Target/TargetLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Target/TargetLowering.h?rev=286523&r1=286522&r2=286523&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Target/TargetLowering.h (original)
+++ llvm/trunk/include/llvm/Target/TargetLowering.h Thu Nov 10 17:31:06 2016
@@ -2986,21 +2986,24 @@ public:
/// Hooks for building estimates in place of slower divisions and square
/// roots.
- /// Return a reciprocal square root estimate value for the input operand.
+ /// Return either a square root or its reciprocal estimate value for the input
+ /// operand.
/// \p Enabled is a ReciprocalEstimate enum with value either 'Unspecified' or
/// 'Enabled' as set by a potential default override attribute.
/// If \p RefinementSteps is 'Unspecified', the number of Newton-Raphson
/// refinement iterations required to generate a sufficient (though not
/// necessarily IEEE-754 compliant) estimate is returned in that parameter.
/// The boolean UseOneConstNR output is used to select a Newton-Raphson
- /// algorithm implementation that uses one constant or two constants.
+ /// algorithm implementation that uses either one or two constants.
+ /// The boolean Reciprocal is used to select whether the estimate is for the
+ /// square root of the input operand or the reciprocal of its square root.
/// A target may choose to implement its own refinement within this function.
/// If that's true, then return '0' as the number of RefinementSteps to avoid
/// any further refinement of the estimate.
/// An empty SDValue return means no estimate sequence can be created.
- virtual SDValue getRsqrtEstimate(SDValue Operand, SelectionDAG &DAG,
- int Enabled, int &RefinementSteps,
- bool &UseOneConstNR) const {
+ virtual SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG,
+ int Enabled, int &RefinementSteps,
+ bool &UseOneConstNR, bool Reciprocal) const {
return SDValue();
}
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=286523&r1=286522&r2=286523&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Thu Nov 10 17:31:06 2016
@@ -14928,6 +14928,12 @@ SDValue DAGCombiner::BuildUDIV(SDNode *N
return S;
}
+/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
+/// For the reciprocal, we need to find the zero of the function:
+/// F(X) = A X - 1 [which has a zero at X = 1/A]
+/// =>
+/// X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
+/// does not require additional intermediate precision]
SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op, SDNodeFlags *Flags) {
if (Level >= AfterLegalizeDAG)
return SDValue();
@@ -14947,19 +14953,13 @@ SDValue DAGCombiner::BuildReciprocalEsti
// refinement steps.
int Iterations = TLI.getDivRefinementSteps(VT, MF);
if (SDValue Est = TLI.getRecipEstimate(Op, DAG, Enabled, Iterations)) {
+ AddToWorklist(Est.getNode());
+
if (Iterations) {
- // Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
- // For the reciprocal, we need to find the zero of the function:
- // F(X) = A X - 1 [which has a zero at X = 1/A]
- // =>
- // X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
- // does not require additional intermediate precision]
EVT VT = Op.getValueType();
SDLoc DL(Op);
SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
- AddToWorklist(Est.getNode());
-
// Newton iterations: Est = Est + Est (1 - Arg * Est)
for (int i = 0; i < Iterations; ++i) {
SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, Est, Flags);
@@ -15100,12 +15100,30 @@ SDValue DAGCombiner::buildSqrtEstimateIm
bool UseOneConstNR = false;
if (SDValue Est =
- TLI.getRsqrtEstimate(Op, DAG, Enabled, Iterations, UseOneConstNR)) {
+ TLI.getSqrtEstimate(Op, DAG, Enabled, Iterations, UseOneConstNR,
+ Reciprocal)) {
AddToWorklist(Est.getNode());
+
if (Iterations) {
Est = UseOneConstNR
- ? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal)
- : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal);
+ ? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal)
+ : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal);
+
+ if (!Reciprocal) {
+ // Unfortunately, Est is now NaN if the input was exactly 0.0.
+ // Select out this case and force the answer to 0.0.
+ EVT VT = Op.getValueType();
+ SDLoc DL(Op);
+
+ SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
+ EVT CCVT = getSetCCResultType(VT);
+ SDValue ZeroCmp = DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ);
+ AddToWorklist(ZeroCmp.getNode());
+
+ Est = DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT, DL, VT,
+ ZeroCmp, FPZero, Est);
+ AddToWorklist(Est.getNode());
+ }
}
return Est;
}
@@ -15118,23 +15136,7 @@ SDValue DAGCombiner::buildRsqrtEstimate(
}
SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags *Flags) {
- SDValue Est = buildSqrtEstimateImpl(Op, Flags, false);
- if (!Est)
- return SDValue();
-
- // Unfortunately, Est is now NaN if the input was exactly 0.
- // Select out this case and force the answer to 0.
- EVT VT = Est.getValueType();
- SDLoc DL(Op);
- SDValue Zero = DAG.getConstantFP(0.0, DL, VT);
- EVT CCVT = getSetCCResultType(VT);
- SDValue ZeroCmp = DAG.getSetCC(DL, CCVT, Op, Zero, ISD::SETEQ);
- AddToWorklist(ZeroCmp.getNode());
-
- Est = DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT, DL, VT, ZeroCmp,
- Zero, Est);
- AddToWorklist(Est.getNode());
- return Est;
+ return buildSqrtEstimateImpl(Op, Flags, false);
}
/// Return true if base is a frame index, which is known not to alias with
Modified: llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp?rev=286523&r1=286522&r2=286523&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp Thu Nov 10 17:31:06 2016
@@ -4644,10 +4644,11 @@ static SDValue getEstimate(const AArch64
return SDValue();
}
-SDValue AArch64TargetLowering::getRsqrtEstimate(SDValue Operand,
- SelectionDAG &DAG, int Enabled,
- int &ExtraSteps,
- bool &UseOneConst) const {
+SDValue AArch64TargetLowering::getSqrtEstimate(SDValue Operand,
+ SelectionDAG &DAG, int Enabled,
+ int &ExtraSteps,
+ bool &UseOneConst,
+ bool Reciprocal) const {
if (Enabled == ReciprocalEstimate::Enabled ||
(Enabled == ReciprocalEstimate::Unspecified && Subtarget->useRSqrt()))
if (SDValue Estimate = getEstimate(Subtarget, AArch64ISD::FRSQRTE, Operand,
Modified: llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.h?rev=286523&r1=286522&r2=286523&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.h (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.h Thu Nov 10 17:31:06 2016
@@ -534,8 +534,9 @@ private:
SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
std::vector<SDNode *> *Created) const override;
- SDValue getRsqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
- int &ExtraSteps, bool &UseOneConst) const override;
+ SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
+ int &ExtraSteps, bool &UseOneConst,
+ bool Reciprocal) const override;
SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
int &ExtraSteps) const override;
unsigned combineRepeatedFPDivisors() const override;
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp?rev=286523&r1=286522&r2=286523&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp Thu Nov 10 17:31:06 2016
@@ -2978,10 +2978,11 @@ const char* AMDGPUTargetLowering::getTar
return nullptr;
}
-SDValue AMDGPUTargetLowering::getRsqrtEstimate(SDValue Operand,
- SelectionDAG &DAG, int Enabled,
- int &RefinementSteps,
- bool &UseOneConstNR) const {
+SDValue AMDGPUTargetLowering::getSqrtEstimate(SDValue Operand,
+ SelectionDAG &DAG, int Enabled,
+ int &RefinementSteps,
+ bool &UseOneConstNR,
+ bool Reciprocal) const {
EVT VT = Operand.getValueType();
if (VT == MVT::f32) {
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.h?rev=286523&r1=286522&r2=286523&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.h Thu Nov 10 17:31:06 2016
@@ -172,9 +172,9 @@ public:
bool isFsqrtCheap(SDValue Operand, SelectionDAG &DAG) const override {
return true;
}
- SDValue getRsqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
- int &RefinementSteps,
- bool &UseOneConstNR) const override;
+ SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
+ int &RefinementSteps, bool &UseOneConstNR,
+ bool Reciprocal) const override;
SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
int &RefinementSteps) const override;
Modified: llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp?rev=286523&r1=286522&r2=286523&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp Thu Nov 10 17:31:06 2016
@@ -9637,9 +9637,10 @@ static int getEstimateRefinementSteps(EV
return RefinementSteps;
}
-SDValue PPCTargetLowering::getRsqrtEstimate(SDValue Operand, SelectionDAG &DAG,
- int Enabled, int &RefinementSteps,
- bool &UseOneConstNR) const {
+SDValue PPCTargetLowering::getSqrtEstimate(SDValue Operand, SelectionDAG &DAG,
+ int Enabled, int &RefinementSteps,
+ bool &UseOneConstNR,
+ bool Reciprocal) const {
EVT VT = Operand.getValueType();
if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) ||
(VT == MVT::f64 && Subtarget.hasFRSQRTE()) ||
Modified: llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h?rev=286523&r1=286522&r2=286523&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h Thu Nov 10 17:31:06 2016
@@ -968,9 +968,9 @@ namespace llvm {
SDValue DAGCombineTruncBoolExt(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineFPToIntToFP(SDNode *N, DAGCombinerInfo &DCI) const;
- SDValue getRsqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
- int &RefinementSteps,
- bool &UseOneConstNR) const override;
+ SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
+ int &RefinementSteps, bool &UseOneConstNR,
+ bool Reciprocal) const override;
SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
int &RefinementSteps) const override;
unsigned combineRepeatedFPDivisors() const override;
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=286523&r1=286522&r2=286523&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Thu Nov 10 17:31:06 2016
@@ -15296,10 +15296,11 @@ bool X86TargetLowering::isFsqrtCheap(SDV
/// The minimum architected relative accuracy is 2^-12. We need one
/// Newton-Raphson step to have a good float result (24 bits of precision).
-SDValue X86TargetLowering::getRsqrtEstimate(SDValue Op,
- SelectionDAG &DAG, int Enabled,
- int &RefinementSteps,
- bool &UseOneConstNR) const {
+SDValue X86TargetLowering::getSqrtEstimate(SDValue Op,
+ SelectionDAG &DAG, int Enabled,
+ int &RefinementSteps,
+ bool &UseOneConstNR,
+ bool Reciprocal) const {
EVT VT = Op.getValueType();
// SSE1 has rsqrtss and rsqrtps. AVX adds a 256-bit variant for rsqrtps.
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.h?rev=286523&r1=286522&r2=286523&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.h (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.h Thu Nov 10 17:31:06 2016
@@ -1268,9 +1268,9 @@ namespace llvm {
bool isFsqrtCheap(SDValue Operand, SelectionDAG &DAG) const override;
/// Use rsqrt* to speed up sqrt calculations.
- SDValue getRsqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
- int &RefinementSteps,
- bool &UseOneConstNR) const override;
+ SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
+ int &RefinementSteps, bool &UseOneConstNR,
+ bool Reciprocal) const override;
/// Use rcp* to speed up fdiv calculations.
SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
More information about the llvm-commits
mailing list