[llvm] r369662 - [DAGCombiner] Remove explicit call to AddToWorklist in sqrt and reciprocal computations
Amaury Sechet via llvm-commits
llvm-commits at lists.llvm.org
Thu Aug 22 08:35:45 PDT 2019
Author: deadalnix
Date: Thu Aug 22 08:35:45 2019
New Revision: 369662
URL: http://llvm.org/viewvc/llvm-project?rev=369662&view=rev
Log:
[DAGCombiner] Remove explicit call to AddToWorklist in sqrt and reciprocal computations
Summary: These nodes end up being processed regardless due to DAGCombiner ensuring arguments are processed. This changes the order in which nodes are processed, which fixes an issue on PowerPC.
Reviewers: craig.topper, efriedma, RKSimon, lebedev.ri, mcberg2017, stefanp, hfinkel
Subscribers: nemanjai, MaskRay, jsji, steven.zhang, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D66548
Modified:
llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/trunk/test/CodeGen/PowerPC/qpx-recipest.ll
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=369662&r1=369661&r2=369662&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Thu Aug 22 08:35:45 2019
@@ -20084,16 +20084,9 @@ SDValue DAGCombiner::BuildReciprocalEsti
// Newton iterations: Est = Est + Est (1 - Arg * Est)
for (int i = 0; i < Iterations; ++i) {
SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, Est, Flags);
- AddToWorklist(NewEst.getNode());
-
NewEst = DAG.getNode(ISD::FSUB, DL, VT, FPOne, NewEst, Flags);
- AddToWorklist(NewEst.getNode());
-
NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
- AddToWorklist(NewEst.getNode());
-
Est = DAG.getNode(ISD::FADD, DL, VT, Est, NewEst, Flags);
- AddToWorklist(Est.getNode());
}
}
return Est;
@@ -20118,31 +20111,19 @@ SDValue DAGCombiner::buildSqrtNROneConst
// We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that
// this entire sequence requires only one FP constant.
SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags);
- AddToWorklist(HalfArg.getNode());
-
HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags);
- AddToWorklist(HalfArg.getNode());
// Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
for (unsigned i = 0; i < Iterations; ++i) {
SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags);
- AddToWorklist(NewEst.getNode());
-
NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags);
- AddToWorklist(NewEst.getNode());
-
NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags);
- AddToWorklist(NewEst.getNode());
-
Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
- AddToWorklist(Est.getNode());
}
// If non-reciprocal square root is requested, multiply the result by Arg.
- if (!Reciprocal) {
+ if (!Reciprocal)
Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags);
- AddToWorklist(Est.getNode());
- }
return Est;
}
@@ -20168,13 +20149,8 @@ SDValue DAGCombiner::buildSqrtNRTwoConst
// E = (E * -0.5) * ((A * E) * E + -3.0)
for (unsigned i = 0; i < Iterations; ++i) {
SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est, Flags);
- AddToWorklist(AE.getNode());
-
SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est, Flags);
- AddToWorklist(AEE.getNode());
-
SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree, Flags);
- AddToWorklist(RHS.getNode());
// When calculating a square root at the last iteration build:
// S = ((A * E) * -0.5) * ((A * E) * E + -3.0)
@@ -20187,10 +20163,8 @@ SDValue DAGCombiner::buildSqrtNRTwoConst
// SQRT: LHS = (A * E) * -0.5
LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf, Flags);
}
- AddToWorklist(LHS.getNode());
Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS, Flags);
- AddToWorklist(Est.getNode());
}
return Est;
@@ -20247,16 +20221,11 @@ SDValue DAGCombiner::buildSqrtEstimateIm
SDValue Fabs = DAG.getNode(ISD::FABS, DL, VT, Op);
SDValue IsDenorm = DAG.getSetCC(DL, CCVT, Fabs, NormC, ISD::SETLT);
Est = DAG.getNode(SelOpcode, DL, VT, IsDenorm, FPZero, Est);
- AddToWorklist(Fabs.getNode());
- AddToWorklist(IsDenorm.getNode());
- AddToWorklist(Est.getNode());
} else {
// X == 0.0 ? 0.0 : Est
SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
SDValue IsZero = DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ);
Est = DAG.getNode(SelOpcode, DL, VT, IsZero, FPZero, Est);
- AddToWorklist(IsZero.getNode());
- AddToWorklist(Est.getNode());
}
}
}
Modified: llvm/trunk/test/CodeGen/PowerPC/qpx-recipest.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/qpx-recipest.ll?rev=369662&r1=369661&r2=369662&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/qpx-recipest.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/qpx-recipest.ll Thu Aug 22 08:35:45 2019
@@ -57,8 +57,6 @@ entry:
ret <4 x double> %r
}
-; FIXME: We're currently loading two constants here (1.5 and -1.5), and using
-; an qvfmadd instead of a qvfnmsubs
define <4 x double> @foof_fmf(<4 x double> %a, <4 x float> %b) nounwind {
; CHECK-LABEL: foof_fmf:
; CHECK: # %bb.0: # %entry
@@ -66,12 +64,9 @@ define <4 x double> @foof_fmf(<4 x doubl
; CHECK-NEXT: qvfrsqrtes 3, 2
; CHECK-NEXT: addi 3, 3, .LCPI2_0 at toc@l
; CHECK-NEXT: qvlfsx 0, 0, 3
-; CHECK-NEXT: addis 3, 2, .LCPI2_1 at toc@ha
-; CHECK-NEXT: addi 3, 3, .LCPI2_1 at toc@l
-; CHECK-NEXT: qvlfsx 4, 0, 3
-; CHECK-NEXT: qvfmadds 0, 2, 0, 2
-; CHECK-NEXT: qvfmuls 2, 3, 3
-; CHECK-NEXT: qvfmadds 0, 0, 2, 4
+; CHECK-NEXT: qvfmuls 4, 3, 3
+; CHECK-NEXT: qvfnmsubs 2, 2, 0, 2
+; CHECK-NEXT: qvfmadds 0, 2, 4, 0
; CHECK-NEXT: qvfmuls 0, 3, 0
; CHECK-NEXT: qvfmul 1, 1, 0
; CHECK-NEXT: blr
@@ -179,8 +174,6 @@ entry:
ret <4 x float> %r
}
-; FIXME: We're currently loading two constants here (1.5 and -1.5), and using
-; an qvfmadd instead of a qvfnmsubs
define <4 x float> @goo_fmf(<4 x float> %a, <4 x float> %b) nounwind {
; CHECK-LABEL: goo_fmf:
; CHECK: # %bb.0: # %entry
@@ -188,12 +181,9 @@ define <4 x float> @goo_fmf(<4 x float>
; CHECK-NEXT: qvfrsqrtes 3, 2
; CHECK-NEXT: addi 3, 3, .LCPI6_0 at toc@l
; CHECK-NEXT: qvlfsx 0, 0, 3
-; CHECK-NEXT: addis 3, 2, .LCPI6_1 at toc@ha
-; CHECK-NEXT: addi 3, 3, .LCPI6_1 at toc@l
-; CHECK-NEXT: qvlfsx 4, 0, 3
-; CHECK-NEXT: qvfmadds 0, 2, 0, 2
-; CHECK-NEXT: qvfmuls 2, 3, 3
-; CHECK-NEXT: qvfmadds 0, 0, 2, 4
+; CHECK-NEXT: qvfmuls 4, 3, 3
+; CHECK-NEXT: qvfnmsubs 2, 2, 0, 2
+; CHECK-NEXT: qvfmadds 0, 2, 4, 0
; CHECK-NEXT: qvfmuls 0, 3, 0
; CHECK-NEXT: qvfmuls 1, 1, 0
; CHECK-NEXT: blr
@@ -360,8 +350,6 @@ entry:
ret <4 x double> %r
}
-; FIXME: We're currently loading two constants here (1.5 and -1.5), and using
-; an qvfmadds instead of a qvfnmsubs
define <4 x float> @goo3_fmf(<4 x float> %a) nounwind {
; CHECK-LABEL: goo3_fmf:
; CHECK: # %bb.0: # %entry
@@ -369,14 +357,11 @@ define <4 x float> @goo3_fmf(<4 x float>
; CHECK-NEXT: qvfrsqrtes 2, 1
; CHECK-NEXT: addi 3, 3, .LCPI14_1 at toc@l
; CHECK-NEXT: qvlfsx 0, 0, 3
-; CHECK-NEXT: addis 3, 2, .LCPI14_2 at toc@ha
-; CHECK-NEXT: addi 3, 3, .LCPI14_2 at toc@l
-; CHECK-NEXT: qvlfsx 3, 0, 3
; CHECK-NEXT: addis 3, 2, .LCPI14_0 at toc@ha
-; CHECK-NEXT: qvfmuls 4, 2, 2
; CHECK-NEXT: addi 3, 3, .LCPI14_0 at toc@l
-; CHECK-NEXT: qvfmadds 0, 1, 0, 1
-; CHECK-NEXT: qvfmadds 0, 0, 4, 3
+; CHECK-NEXT: qvfmuls 4, 2, 2
+; CHECK-NEXT: qvfnmsubs 3, 1, 0, 1
+; CHECK-NEXT: qvfmadds 0, 3, 4, 0
; CHECK-NEXT: qvlfsx 3, 0, 3
; CHECK-NEXT: qvfmuls 0, 2, 0
; CHECK-NEXT: qvfmuls 0, 0, 1
More information about the llvm-commits
mailing list