[llvm] r219445 - Improve sqrt estimate algorithm (fast-math)
Sanjay Patel
spatel at rotateright.com
Thu Oct 9 14:26:36 PDT 2014
Author: spatel
Date: Thu Oct 9 16:26:35 2014
New Revision: 219445
URL: http://llvm.org/viewvc/llvm-project?rev=219445&view=rev
Log:
Improve sqrt estimate algorithm (fast-math)
This patch changes the fast-math implementation for calculating sqrt(x) from:
y = 1 / (1 / sqrt(x))
to:
y = x * (1 / sqrt(x))
This has 2 benefits: less code / faster code and one less estimate instruction
that may lose precision.
The only target that will be affected (until http://reviews.llvm.org/D5658 is approved)
is PPC. The difference in codegen for PPC is 2 less flops for a single-precision sqrtf
or vector sqrtf and 4 less flops for a double-precision sqrt.
We also eliminate a constant load and extra register usage.
Differential Revision: http://reviews.llvm.org/D5682
Modified:
llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/trunk/test/CodeGen/PowerPC/recipest.ll
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=219445&r1=219444&r2=219445&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Thu Oct 9 16:26:35 2014
@@ -7088,26 +7088,25 @@ SDValue DAGCombiner::visitFREM(SDNode *N
SDValue DAGCombiner::visitFSQRT(SDNode *N) {
if (DAG.getTarget().Options.UnsafeFPMath) {
- // Compute this as 1/(1/sqrt(X)): the reciprocal of the reciprocal sqrt.
+ // Compute this as X * (1/sqrt(X)) = X * (X ** -0.5)
if (SDValue RV = BuildRsqrtEstimate(N->getOperand(0))) {
AddToWorklist(RV.getNode());
- RV = BuildReciprocalEstimate(RV);
- if (RV.getNode()) {
- // Unfortunately, RV is now NaN if the input was exactly 0.
- // Select out this case and force the answer to 0.
- EVT VT = RV.getValueType();
-
- SDValue Zero = DAG.getConstantFP(0.0, VT);
- SDValue ZeroCmp =
- DAG.getSetCC(SDLoc(N), TLI.getSetCCResultType(*DAG.getContext(), VT),
- N->getOperand(0), Zero, ISD::SETEQ);
- AddToWorklist(ZeroCmp.getNode());
- AddToWorklist(RV.getNode());
+ EVT VT = RV.getValueType();
+ RV = DAG.getNode(ISD::FMUL, SDLoc(N), VT, N->getOperand(0), RV);
+ AddToWorklist(RV.getNode());
+
+ // Unfortunately, RV is now NaN if the input was exactly 0.
+ // Select out this case and force the answer to 0.
+ SDValue Zero = DAG.getConstantFP(0.0, VT);
+ SDValue ZeroCmp =
+ DAG.getSetCC(SDLoc(N), TLI.getSetCCResultType(*DAG.getContext(), VT),
+ N->getOperand(0), Zero, ISD::SETEQ);
+ AddToWorklist(ZeroCmp.getNode());
+ AddToWorklist(RV.getNode());
- RV = DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT,
- SDLoc(N), VT, ZeroCmp, Zero, RV);
- return RV;
- }
+ RV = DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT,
+ SDLoc(N), VT, ZeroCmp, Zero, RV);
+ return RV;
}
}
return SDValue();
Modified: llvm/trunk/test/CodeGen/PowerPC/recipest.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/recipest.ll?rev=219445&r1=219444&r2=219445&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/recipest.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/recipest.ll Thu Oct 9 16:26:35 2014
@@ -197,11 +197,7 @@ define double @foo3(double %a) nounwind
; CHECK-NEXT: fmul
; CHECK-NEXT: fmadd
; CHECK-NEXT: fmul
-; CHECK-NEXT: fre
-; CHECK-NEXT: fnmsub
-; CHECK-NEXT: fmadd
-; CHECK-NEXT: fnmsub
-; CHECK-NEXT: fmadd
+; CHECK-NEXT: fmul
; CHECK: blr
; CHECK-SAFE: @foo3
@@ -220,9 +216,7 @@ define float @goo3(float %a) nounwind {
; CHECK: fmuls
; CHECK-NEXT: fmadds
; CHECK-NEXT: fmuls
-; CHECK-NEXT: fres
-; CHECK-NEXT: fnmsubs
-; CHECK-NEXT: fmadds
+; CHECK-NEXT: fmuls
; CHECK: blr
; CHECK-SAFE: @goo3
@@ -236,7 +230,6 @@ define <4 x float> @hoo3(<4 x float> %a)
; CHECK: @hoo3
; CHECK: vrsqrtefp
-; CHECK-DAG: vrefp
; CHECK-DAG: vcmpeqfp
; CHECK-SAFE: @hoo3
More information about the llvm-commits
mailing list