[llvm] r221819 - Expose the number of Newton-Raphson iterations applied to the hardware's reciprocal estimate as a parameter (x86).

Sanjay Patel spatel at rotateright.com
Wed Nov 12 13:39:01 PST 2014


Author: spatel
Date: Wed Nov 12 15:39:01 2014
New Revision: 221819

URL: http://llvm.org/viewvc/llvm-project?rev=221819&view=rev
Log:
Expose the number of Newton-Raphson iterations applied to the hardware's reciprocal estimate as a parameter (x86).

This is a follow-on to r221706 and r221731 and discussed in more detail in PR21385.

This patch also loosens the testcase checking for btver2. We know that the "1.0" will be loaded, but
we can't tell exactly when, so replace the CHECK-NEXT specifiers with plain CHECKs. The CHECK-NEXT
sequence relied on a quirk of post-RA-scheduling that may change independently of anything in these tests.


Modified:
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/test/CodeGen/X86/recip-fastmath.ll

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=221819&r1=221818&r2=221819&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Wed Nov 12 15:39:01 2014
@@ -71,6 +71,12 @@ static cl::opt<bool> ExperimentalVectorS
     cl::desc("Enable an experimental vector shuffle lowering code path."),
     cl::Hidden);
 
+static cl::opt<int> ReciprocalEstimateRefinementSteps(
+    "x86-recip-refinement-steps", cl::init(1),
+    cl::desc("Specify the number of Newton-Raphson iterations applied to the "
+             "result of the hardware reciprocal estimate instruction."),
+    cl::NotHidden);
+
 // Forward declarations.
 static SDValue getMOVL(SelectionDAG &DAG, SDLoc dl, EVT VT, SDValue V1,
                        SDValue V2);
@@ -14543,9 +14549,7 @@ SDValue X86TargetLowering::getRecipEstim
   // along with FMA, this could be a throughput win.
   if ((Subtarget->hasSSE1() && (VT == MVT::f32 || VT == MVT::v4f32)) ||
       (Subtarget->hasAVX() && VT == MVT::v8f32)) {
-    // TODO: Expose this as a user-configurable parameter to allow for
-    // speed vs. accuracy flexibility.
-    RefinementSteps = 1;
+    RefinementSteps = ReciprocalEstimateRefinementSteps;
     return DCI.DAG.getNode(X86ISD::FRCP, SDLoc(Op), VT, Op);
   }
   return SDValue();

Modified: llvm/trunk/test/CodeGen/X86/recip-fastmath.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/recip-fastmath.ll?rev=221819&r1=221818&r2=221819&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/recip-fastmath.ll (original)
+++ llvm/trunk/test/CodeGen/X86/recip-fastmath.ll Wed Nov 12 15:39:01 2014
@@ -1,5 +1,6 @@
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=core2 | FileCheck %s
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=btver2 | FileCheck %s --check-prefix=BTVER2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+use-recip-est,+avx -x86-recip-refinement-steps=2 | FileCheck %s --check-prefix=REFINE
 
 ; If the target's divss/divps instructions are substantially
 ; slower than rcpss/rcpps with a Newton-Raphson refinement,
@@ -21,11 +22,23 @@ define float @reciprocal_estimate(float
 
 ; BTVER2-LABEL: reciprocal_estimate:
 ; BTVER2: vrcpss
-; BTVER2-NEXT: vmulss
-; BTVER2-NEXT: vsubss
-; BTVER2-NEXT: vmulss
-; BTVER2-NEXT: vaddss
+; BTVER2: vmulss
+; BTVER2: vsubss
+; BTVER2: vmulss
+; BTVER2: vaddss
 ; BTVER2-NEXT: retq
+
+; REFINE-LABEL: reciprocal_estimate:
+; REFINE: vrcpss
+; REFINE: vmulss
+; REFINE: vsubss
+; REFINE: vmulss
+; REFINE: vaddss
+; REFINE: vmulss
+; REFINE: vsubss
+; REFINE: vmulss
+; REFINE: vaddss
+; REFINE-NEXT: retq
 }
 
 define <4 x float> @reciprocal_estimate_v4f32(<4 x float> %x) #0 {
@@ -40,11 +53,23 @@ define <4 x float> @reciprocal_estimate_
 
 ; BTVER2-LABEL: reciprocal_estimate_v4f32:
 ; BTVER2: vrcpps
-; BTVER2-NEXT: vmulps
-; BTVER2-NEXT: vsubps
-; BTVER2-NEXT: vmulps
-; BTVER2-NEXT: vaddps
+; BTVER2: vmulps
+; BTVER2: vsubps
+; BTVER2: vmulps
+; BTVER2: vaddps
 ; BTVER2-NEXT: retq
+
+; REFINE-LABEL: reciprocal_estimate_v4f32:
+; REFINE: vrcpps
+; REFINE: vmulps
+; REFINE: vsubps
+; REFINE: vmulps
+; REFINE: vaddps
+; REFINE: vmulps
+; REFINE: vsubps
+; REFINE: vmulps
+; REFINE: vaddps
+; REFINE-NEXT: retq
 }
 
 define <8 x float> @reciprocal_estimate_v8f32(<8 x float> %x) #0 {
@@ -62,11 +87,23 @@ define <8 x float> @reciprocal_estimate_
 
 ; BTVER2-LABEL: reciprocal_estimate_v8f32:
 ; BTVER2: vrcpps
-; BTVER2-NEXT: vmulps
-; BTVER2-NEXT: vsubps
-; BTVER2-NEXT: vmulps
-; BTVER2-NEXT: vaddps
+; BTVER2: vmulps
+; BTVER2: vsubps
+; BTVER2: vmulps
+; BTVER2: vaddps
 ; BTVER2-NEXT: retq
+
+; REFINE-LABEL: reciprocal_estimate_v8f32:
+; REFINE: vrcpps
+; REFINE: vmulps
+; REFINE: vsubps
+; REFINE: vmulps
+; REFINE: vaddps
+; REFINE: vmulps
+; REFINE: vsubps
+; REFINE: vmulps
+; REFINE: vaddps
+; REFINE-NEXT: retq
 }
 
 attributes #0 = { "unsafe-fp-math"="true" }





More information about the llvm-commits mailing list