[llvm-commits] [dragonegg] r132709 - in /dragonegg/trunk/src/x86: Target.cpp x86_builtins
Duncan Sands
baldrick at free.fr
Mon Jun 6 23:44:40 PDT 2011
Author: baldrick
Date: Tue Jun 7 01:44:40 2011
New Revision: 132709
URL: http://llvm.org/viewvc/llvm-project?rev=132709&view=rev
Log:
Implement rsqrtps_nr (rsqrtps with a Newton-Raphson step).
Modified:
dragonegg/trunk/src/x86/Target.cpp
dragonegg/trunk/src/x86/x86_builtins
Modified: dragonegg/trunk/src/x86/Target.cpp
URL: http://llvm.org/viewvc/llvm-project/dragonegg/trunk/src/x86/Target.cpp?rev=132709&r1=132708&r2=132709&view=diff
==============================================================================
--- dragonegg/trunk/src/x86/Target.cpp (original)
+++ dragonegg/trunk/src/x86/Target.cpp Tue Jun 7 01:44:40 2011
@@ -776,6 +776,20 @@
SI->setAlignment(16);
return true;
}
+ case rsqrtps_nr: {
+ // rsqrtps with a Newton-Raphson step to improve accuracy:
+ // rsqrtps_nr(x) = rsqrtps(x) * -0.5 * (rsqrtps(x) * x * rsqrtps(x) - 3.0)
+ Function *rsqrtps = Intrinsic::getDeclaration(TheModule,
+ Intrinsic::x86_sse_rsqrt_ps);
+ Value *X = Ops[0]; // x
+ Value *R = Builder.CreateCall(rsqrtps, X); // rsqrtps(x)
+ Value *RHS = Builder.CreateFAdd(Builder.CreateFMul(Builder.CreateFMul(R, X),
+ R),
+ ConstantFP::get(X->getType(), -3.0));
+ Value *LHS = Builder.CreateFMul(R, ConstantFP::get(X->getType(), -0.5));
+ Result = Builder.CreateFMul(LHS, RHS);
+ return true;
+ }
case sqrtps_nr: {
// Turn this into sqrtps without a Newton-Raphson step - sqrtps is already
// accurate enough.
Modified: dragonegg/trunk/src/x86/x86_builtins
URL: http://llvm.org/viewvc/llvm-project/dragonegg/trunk/src/x86/x86_builtins?rev=132709&r1=132708&r2=132709&view=diff
==============================================================================
--- dragonegg/trunk/src/x86/x86_builtins (original)
+++ dragonegg/trunk/src/x86/x86_builtins Tue Jun 7 01:44:40 2011
@@ -515,7 +515,7 @@
//DEFINE_BUILTIN(rsqrtf),
//DEFINE_BUILTIN(rsqrtps),
//DEFINE_BUILTIN(rsqrtps256),
-//DEFINE_BUILTIN(rsqrtps_nr),
+DEFINE_BUILTIN(rsqrtps_nr),
//DEFINE_BUILTIN(rsqrtps_nr256),
//DEFINE_BUILTIN(rsqrtss),
//DEFINE_BUILTIN(sfence),
More information about the llvm-commits
mailing list