[llvm] r360144 - [PowerPC] Use the two-constant NR algorithm for refining estimates

Tue May 7 06:48:03 PDT 2019

Author: nemanjai
Date: Tue May  7 06:48:03 2019
New Revision: 360144

URL: http://llvm.org/viewvc/llvm-project?rev=360144&view=rev
Log:
[PowerPC] Use the two-constant NR algorithm for refining estimates

The single-constant algorithm produces infinities on a lot of denormal values.
The precision of the two-constant algorithm is actually sufficient across the
range of denormals. We will switch to that algorithm for now to avoid the
infinities on denormals. In the future, we will re-evaluate the algorithm to
find the optimal one for PowerPC.

Differential revision: https://reviews.llvm.org/D60037

Modified:
    llvm/trunk/lib/Target/PowerPC/PPC.td
    llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp
    llvm/trunk/lib/Target/PowerPC/PPCSubtarget.cpp
    llvm/trunk/lib/Target/PowerPC/PPCSubtarget.h
    llvm/trunk/test/CodeGen/PowerPC/fma-mutate.ll
    llvm/trunk/test/CodeGen/PowerPC/fmf-propagation.ll
    llvm/trunk/test/CodeGen/PowerPC/recipest.ll
    llvm/trunk/test/CodeGen/PowerPC/vsx-fma-mutate-trivial-copy.ll

Modified: llvm/trunk/lib/Target/PowerPC/PPC.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPC.td?rev=360144&r1=360143&r2=360144&view=diff
==============================================================================

--- llvm/trunk/lib/Target/PowerPC/PPC.td (original)
+++ llvm/trunk/lib/Target/PowerPC/PPC.td Tue May  7 06:48:03 2019
@@ -135,6 +135,9 @@ def FeatureQPX       : SubtargetFeature<
 def FeatureVSX       : SubtargetFeature<"vsx","HasVSX", "true",
                                         "Enable VSX instructions",
                                         [FeatureAltivec]>;
+def FeatureTwoConstNR :
+  SubtargetFeature<"two-const-nr", "NeedsTwoConstNR", "true",
+                   "Requires two constant Newton-Raphson computation">;
 def FeatureP8Altivec : SubtargetFeature<"power8-altivec", "HasP8Altivec", "true",
                                         "Enable POWER8 Altivec instructions",
                                         [FeatureAltivec]>;
@@ -227,7 +230,7 @@ def ProcessorFeatures {
        FeaturePOPCNTD, FeatureCMPB, FeatureLDBRX,
        Feature64Bit /*, Feature64BitRegs */,
        FeatureBPERMD, FeatureExtDiv,
-       FeatureMFTB, DeprecatedDST];
+       FeatureMFTB, DeprecatedDST, FeatureTwoConstNR];
   list<SubtargetFeature> Power8SpecificFeatures =
       [DirectivePwr8, FeatureP8Altivec, FeatureP8Vector, FeatureP8Crypto,
        FeatureHTM, FeatureDirectMove, FeatureICBT, FeaturePartwordAtomic,

Modified: llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp?rev=360144&r1=360143&r2=360144&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp Tue May  7 06:48:03 2019
@@ -11145,7 +11145,9 @@ SDValue PPCTargetLowering::getSqrtEstima
     if (RefinementSteps == ReciprocalEstimate::Unspecified)
       RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
 
-    UseOneConstNR = true;
+    // The Newton-Raphson computation with a single constant does not provide
+    // enough accuracy on some CPUs.
+    UseOneConstNR = !Subtarget.needsTwoConstNR();
     return DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand);
   }
   return SDValue();

Modified: llvm/trunk/lib/Target/PowerPC/PPCSubtarget.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCSubtarget.cpp?rev=360144&r1=360143&r2=360144&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCSubtarget.cpp (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCSubtarget.cpp Tue May  7 06:48:03 2019
@@ -67,6 +67,7 @@ void PPCSubtarget::initializeEnvironment
   HasFPU = false;
   HasQPX = false;
   HasVSX = false;
+  NeedsTwoConstNR = false;
   HasP8Vector = false;
   HasP8Altivec = false;
   HasP8Crypto = false;

Modified: llvm/trunk/lib/Target/PowerPC/PPCSubtarget.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCSubtarget.h?rev=360144&r1=360143&r2=360144&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCSubtarget.h (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCSubtarget.h Tue May  7 06:48:03 2019
@@ -98,6 +98,7 @@ protected:
   bool HasSPE;
   bool HasQPX;
   bool HasVSX;
+  bool NeedsTwoConstNR;
   bool HasP8Vector;
   bool HasP8Altivec;
   bool HasP8Crypto;
@@ -246,6 +247,7 @@ public:
   bool hasFPU() const { return HasFPU; }
   bool hasQPX() const { return HasQPX; }
   bool hasVSX() const { return HasVSX; }
+  bool needsTwoConstNR() const { return NeedsTwoConstNR; }
   bool hasP8Vector() const { return HasP8Vector; }
   bool hasP8Altivec() const { return HasP8Altivec; }
   bool hasP8Crypto() const { return HasP8Crypto; }

Modified: llvm/trunk/test/CodeGen/PowerPC/fma-mutate.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/fma-mutate.ll?rev=360144&r1=360143&r2=360144&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/fma-mutate.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/fma-mutate.ll Tue May  7 06:48:03 2019
@@ -14,8 +14,7 @@ define double @foo3(double %a) nounwind
   ret double %r
 
 ; CHECK: @foo3
-; CHECK: fmr [[REG:[0-9]+]], [[REG2:[0-9]+]]
-; CHECK: xsnmsubadp [[REG]], {{[0-9]+}}, [[REG2]]
+; CHECK-NOT: fmr
 ; CHECK: xsmaddmdp
 ; CHECK: xsmaddadp
 }

Modified: llvm/trunk/test/CodeGen/PowerPC/fmf-propagation.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/fmf-propagation.ll?rev=360144&r1=360143&r2=360144&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/fmf-propagation.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/fmf-propagation.ll Tue May  7 06:48:03 2019
@@ -284,16 +284,16 @@ define float @sqrt_afn(float %x) {
 ; FMF-NEXT:    fcmpu 0, 1, 0
 ; FMF-NEXT:    beq 0, .LBB10_2
 ; FMF-NEXT:  # %bb.1:
+; FMF-NEXT:    xsrsqrtesp 0, 1
 ; FMF-NEXT:    addis 3, 2, .LCPI10_0 at toc@ha
-; FMF-NEXT:    xsrsqrtesp 3, 1
-; FMF-NEXT:    lfs 0, .LCPI10_0 at toc@l(3)
-; FMF-NEXT:    xsmulsp 2, 1, 0
-; FMF-NEXT:    xsmulsp 4, 3, 3
-; FMF-NEXT:    xssubsp 2, 2, 1
-; FMF-NEXT:    xsmulsp 2, 2, 4
-; FMF-NEXT:    xssubsp 0, 0, 2
-; FMF-NEXT:    xsmulsp 0, 3, 0
-; FMF-NEXT:    xsmulsp 0, 0, 1
+; FMF-NEXT:    addis 4, 2, .LCPI10_1 at toc@ha
+; FMF-NEXT:    lfs 2, .LCPI10_0 at toc@l(3)
+; FMF-NEXT:    lfs 3, .LCPI10_1 at toc@l(4)
+; FMF-NEXT:    xsmulsp 1, 1, 0
+; FMF-NEXT:    xsmulsp 0, 1, 0
+; FMF-NEXT:    xsmulsp 1, 1, 2
+; FMF-NEXT:    xsaddsp 0, 0, 3
+; FMF-NEXT:    xsmulsp 0, 1, 0
 ; FMF-NEXT:  .LBB10_2:
 ; FMF-NEXT:    fmr 1, 0
 ; FMF-NEXT:    blr
@@ -304,16 +304,15 @@ define float @sqrt_afn(float %x) {
 ; GLOBAL-NEXT:    fcmpu 0, 1, 0
 ; GLOBAL-NEXT:    beq 0, .LBB10_2
 ; GLOBAL-NEXT:  # %bb.1:
-; GLOBAL-NEXT:    xsrsqrtesp 2, 1
-; GLOBAL-NEXT:    fneg 0, 1
+; GLOBAL-NEXT:    xsrsqrtesp 0, 1
 ; GLOBAL-NEXT:    addis 3, 2, .LCPI10_0 at toc@ha
-; GLOBAL-NEXT:    fmr 4, 1
-; GLOBAL-NEXT:    lfs 3, .LCPI10_0 at toc@l(3)
-; GLOBAL-NEXT:    xsmaddasp 4, 0, 3
-; GLOBAL-NEXT:    xsmulsp 0, 2, 2
-; GLOBAL-NEXT:    xsmaddasp 3, 4, 0
-; GLOBAL-NEXT:    xsmulsp 0, 2, 3
-; GLOBAL-NEXT:    xsmulsp 0, 0, 1
+; GLOBAL-NEXT:    addis 4, 2, .LCPI10_1 at toc@ha
+; GLOBAL-NEXT:    lfs 2, .LCPI10_0 at toc@l(3)
+; GLOBAL-NEXT:    lfs 3, .LCPI10_1 at toc@l(4)
+; GLOBAL-NEXT:    xsmulsp 1, 1, 0
+; GLOBAL-NEXT:    xsmaddasp 2, 1, 0
+; GLOBAL-NEXT:    xsmulsp 0, 1, 3
+; GLOBAL-NEXT:    xsmulsp 0, 0, 2
 ; GLOBAL-NEXT:  .LBB10_2:
 ; GLOBAL-NEXT:    fmr 1, 0
 ; GLOBAL-NEXT:    blr
@@ -338,16 +337,15 @@ define float @sqrt_fast(float %x) {
 ; FMF-NEXT:    fcmpu 0, 1, 0
 ; FMF-NEXT:    beq 0, .LBB11_2
 ; FMF-NEXT:  # %bb.1:
-; FMF-NEXT:    xsrsqrtesp 2, 1
-; FMF-NEXT:    fneg 0, 1
+; FMF-NEXT:    xsrsqrtesp 0, 1
 ; FMF-NEXT:    addis 3, 2, .LCPI11_0 at toc@ha
-; FMF-NEXT:    fmr 4, 1
-; FMF-NEXT:    lfs 3, .LCPI11_0 at toc@l(3)
-; FMF-NEXT:    xsmaddasp 4, 0, 3
-; FMF-NEXT:    xsmulsp 0, 2, 2
-; FMF-NEXT:    xsmaddasp 3, 4, 0
-; FMF-NEXT:    xsmulsp 0, 2, 3
-; FMF-NEXT:    xsmulsp 0, 0, 1
+; FMF-NEXT:    addis 4, 2, .LCPI11_1 at toc@ha
+; FMF-NEXT:    lfs 2, .LCPI11_0 at toc@l(3)
+; FMF-NEXT:    lfs 3, .LCPI11_1 at toc@l(4)
+; FMF-NEXT:    xsmulsp 1, 1, 0
+; FMF-NEXT:    xsmaddasp 2, 1, 0
+; FMF-NEXT:    xsmulsp 0, 1, 3
+; FMF-NEXT:    xsmulsp 0, 0, 2
 ; FMF-NEXT:  .LBB11_2:
 ; FMF-NEXT:    fmr 1, 0
 ; FMF-NEXT:    blr
@@ -358,16 +356,15 @@ define float @sqrt_fast(float %x) {
 ; GLOBAL-NEXT:    fcmpu 0, 1, 0
 ; GLOBAL-NEXT:    beq 0, .LBB11_2
 ; GLOBAL-NEXT:  # %bb.1:
-; GLOBAL-NEXT:    xsrsqrtesp 2, 1
-; GLOBAL-NEXT:    fneg 0, 1
+; GLOBAL-NEXT:    xsrsqrtesp 0, 1
 ; GLOBAL-NEXT:    addis 3, 2, .LCPI11_0 at toc@ha
-; GLOBAL-NEXT:    fmr 4, 1
-; GLOBAL-NEXT:    lfs 3, .LCPI11_0 at toc@l(3)
-; GLOBAL-NEXT:    xsmaddasp 4, 0, 3
-; GLOBAL-NEXT:    xsmulsp 0, 2, 2
-; GLOBAL-NEXT:    xsmaddasp 3, 4, 0
-; GLOBAL-NEXT:    xsmulsp 0, 2, 3
-; GLOBAL-NEXT:    xsmulsp 0, 0, 1
+; GLOBAL-NEXT:    addis 4, 2, .LCPI11_1 at toc@ha
+; GLOBAL-NEXT:    lfs 2, .LCPI11_0 at toc@l(3)
+; GLOBAL-NEXT:    lfs 3, .LCPI11_1 at toc@l(4)
+; GLOBAL-NEXT:    xsmulsp 1, 1, 0
+; GLOBAL-NEXT:    xsmaddasp 2, 1, 0
+; GLOBAL-NEXT:    xsmulsp 0, 1, 3
+; GLOBAL-NEXT:    xsmulsp 0, 0, 2
 ; GLOBAL-NEXT:  .LBB11_2:
 ; GLOBAL-NEXT:    fmr 1, 0
 ; GLOBAL-NEXT:    blr

Modified: llvm/trunk/test/CodeGen/PowerPC/recipest.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/recipest.ll?rev=360144&r1=360143&r2=360144&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/recipest.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/recipest.ll Tue May  7 06:48:03 2019
@@ -14,15 +14,16 @@ define double @foo(double %a, double %b)
   ret double %r
 
 ; CHECK: @foo
-; CHECK-DAG: frsqrte
-; CHECK-DAG: fnmsub
+; CHECK: frsqrte
 ; CHECK: fmul
 ; CHECK-NEXT: fmadd
 ; CHECK-NEXT: fmul
 ; CHECK-NEXT: fmul
+; CHECK-NEXT: fmul
 ; CHECK-NEXT: fmadd
 ; CHECK-NEXT: fmul
 ; CHECK-NEXT: fmul
+; CHECK-NEXT: fmul
 ; CHECK: blr
 
 ; CHECK-SAFE: @foo
@@ -53,10 +54,10 @@ define double @foof(double %a, float %b)
 
 ; CHECK: @foof
 ; CHECK-DAG: frsqrtes
-; CHECK-DAG: fnmsubs
 ; CHECK: fmuls
 ; CHECK-NEXT: fmadds
 ; CHECK-NEXT: fmuls
+; CHECK-NEXT: fmuls
 ; CHECK-NEXT: fmul
 ; CHECK-NEXT: blr
 
@@ -74,13 +75,14 @@ define float @food(float %a, double %b)
 
 ; CHECK: @foo
 ; CHECK-DAG: frsqrte
-; CHECK-DAG: fnmsub
 ; CHECK: fmul
 ; CHECK-NEXT: fmadd
 ; CHECK-NEXT: fmul
 ; CHECK-NEXT: fmul
+; CHECK-NEXT: fmul
 ; CHECK-NEXT: fmadd
 ; CHECK-NEXT: fmul
+; CHECK-NEXT: fmul
 ; CHECK-NEXT: frsp
 ; CHECK-NEXT: fmuls
 ; CHECK-NEXT: blr
@@ -98,11 +100,11 @@ define float @goo(float %a, float %b) no
 
 ; CHECK: @goo
 ; CHECK-DAG: frsqrtes
-; CHECK-DAG: fnmsubs
 ; CHECK: fmuls
 ; CHECK-NEXT: fmadds
 ; CHECK-NEXT: fmuls
 ; CHECK-NEXT: fmuls
+; CHECK-NEXT: fmuls
 ; CHECK-NEXT: blr
 
 ; CHECK-SAFE: @goo
@@ -138,7 +140,6 @@ define float @rsqrt_fmul(float %a, float
 ; CHECK-DAG: fres
 ; CHECK-DAG: fnmsubs
 ; CHECK-DAG: fmuls
-; CHECK-DAG: fnmsubs
 ; CHECK-DAG: fmadds
 ; CHECK-DAG: fmadds
 ; CHECK: fmuls
@@ -219,11 +220,11 @@ define double @foo3(double %a) nounwind
 ; CHECK: @foo3
 ; CHECK: fcmpu
 ; CHECK-DAG: frsqrte
-; CHECK-DAG: fnmsub
 ; CHECK: fmul
 ; CHECK-NEXT: fmadd
 ; CHECK-NEXT: fmul
 ; CHECK-NEXT: fmul
+; CHECK-NEXT: fmul
 ; CHECK-NEXT: fmadd
 ; CHECK-NEXT: fmul
 ; CHECK-NEXT: fmul
@@ -241,7 +242,6 @@ define float @goo3(float %a) nounwind {
 ; CHECK: @goo3
 ; CHECK: fcmpu
 ; CHECK-DAG: frsqrtes
-; CHECK-DAG: fnmsubs
 ; CHECK: fmuls
 ; CHECK-NEXT: fmadds
 ; CHECK-NEXT: fmuls

Modified: llvm/trunk/test/CodeGen/PowerPC/vsx-fma-mutate-trivial-copy.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/vsx-fma-mutate-trivial-copy.ll?rev=360144&r1=360143&r2=360144&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/vsx-fma-mutate-trivial-copy.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/vsx-fma-mutate-trivial-copy.ll Tue May  7 06:48:03 2019
@@ -8,7 +8,7 @@ entry:
   br i1 undef, label %for.body.lr.ph, label %for.end
 
 ; CHECK-LABEL: @LSH_recall_init
-; CHECK: xsnmsubadp
+; CHECK: xsmaddadp
 
 for.body.lr.ph:                                   ; preds = %entry
   %conv3 = fpext float %W to double