[PATCH] D66050: Improve division estimation of floating points.
Chaofan Qiu via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Sat Aug 10 03:41:02 PDT 2019
qiucf created this revision.
qiucf added a reviewer: hfinkel.
qiucf added a project: LLVM.
Herald added subscribers: llvm-commits, hiraditya.
Current implementation of _fast_ division (`A/B`) is to:
1. Get an initial estimation of reciprocal of B
2. Use Newton's iteration method to improve the reciprocal
3. Multiply the estimation with A
Compared with GCC, this loses some precision since multiplication is done after all iterations. If we multiply before the last iteration, the result will be more accurate.
Repository:
rL LLVM
https://reviews.llvm.org/D66050
Files:
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
Index: llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
===================================================================
--- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -529,7 +529,7 @@
SDValue BuildSDIVPow2(SDNode *N);
SDValue BuildUDIV(SDNode *N);
SDValue BuildLogBase2(SDValue V, const SDLoc &DL);
- SDValue BuildReciprocalEstimate(SDValue Op, SDNodeFlags Flags);
+ SDValue BuildDivEstimate(SDValue N, SDValue Op, SDNodeFlags Flags);
SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags);
SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags);
SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip);
@@ -12435,10 +12435,8 @@
}
// Fold into a reciprocal estimate and multiply instead of a real divide.
- if (SDValue RV = BuildReciprocalEstimate(N1, Flags)) {
- AddToWorklist(RV.getNode());
- return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
- }
+ if (SDValue RV = BuildDivEstimate(N0, N1, Flags))
+ return RV;
}
// (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
@@ -20024,7 +20022,9 @@
/// =>
/// X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
/// does not require additional intermediate precision]
-SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op, SDNodeFlags Flags) {
+/// For the last iteration, put numerator N into it to gain more precision:
+/// Result = N X_i + X_i (N - N A X_i)
+SDValue DAGCombiner::BuildDivEstimate(SDValue N, SDValue Op, SDNodeFlags Flags) {
if (Level >= AfterLegalizeDAG)
return SDValue();
@@ -20042,28 +20042,43 @@
// Estimates may be explicitly enabled for this type with a custom number of
// refinement steps.
int Iterations = TLI.getDivRefinementSteps(VT, MF);
+
if (SDValue Est = TLI.getRecipEstimate(Op, DAG, Enabled, Iterations)) {
AddToWorklist(Est.getNode());
+ SDLoc DL(Op);
+
if (Iterations) {
- SDLoc DL(Op);
SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
- // Newton iterations: Est = Est + Est (1 - Arg * Est)
+ // Newton iterations: Est = Est + Est (N - Arg * Est)
+ // If this is the last time of iteration, try taking numerator into consideration.
for (int i = 0; i < Iterations; ++i) {
- SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, Est, Flags);
+ SDValue MulEst = Est;
+
+ if (i == Iterations - 1) {
+ MulEst = DAG.getNode(ISD::FMUL, DL, VT, N, Est, Flags);
+ AddToWorklist(MulEst.getNode());
+ }
+
+ SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, MulEst, Flags);
AddToWorklist(NewEst.getNode());
- NewEst = DAG.getNode(ISD::FSUB, DL, VT, FPOne, NewEst, Flags);
+ NewEst = DAG.getNode(ISD::FSUB, DL, VT, (i == Iterations - 1 ? N : FPOne), NewEst, Flags);
AddToWorklist(NewEst.getNode());
NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
AddToWorklist(NewEst.getNode());
- Est = DAG.getNode(ISD::FADD, DL, VT, Est, NewEst, Flags);
+ Est = DAG.getNode(ISD::FADD, DL, VT, MulEst, NewEst, Flags);
AddToWorklist(Est.getNode());
}
+ } else {
+ // If no iterations are available, multiply with N.
+ Est = DAG.getNode(ISD::FMUL, DL, VT, Est, N, Flags);
+ AddToWorklist(Est.getNode());
}
+
return Est;
}
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D66050.214518.patch
Type: text/x-patch
Size: 3420 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20190810/bc20a674/attachment.bin>
More information about the llvm-commits
mailing list