[llvm-commits] [dragonegg] r132710 - in /dragonegg/trunk/src/x86: Target.cpp x86_builtins

Duncan Sands baldrick at free.fr
Tue Jun 7 07:16:28 PDT 2011


Author: baldrick
Date: Tue Jun  7 09:16:28 2011
New Revision: 132710

URL: http://llvm.org/viewvc/llvm-project?rev=132710&view=rev
Log:
Implement rsqrtf.  Note that while this works fine for float and
double operands, it fails miserably for long double.  However the
long double code is identical to gcc's, so this failure is OK :)

Modified:
    dragonegg/trunk/src/x86/Target.cpp
    dragonegg/trunk/src/x86/x86_builtins

Modified: dragonegg/trunk/src/x86/Target.cpp
URL: http://llvm.org/viewvc/llvm-project/dragonegg/trunk/src/x86/Target.cpp?rev=132710&r1=132709&r2=132710&view=diff
==============================================================================
--- dragonegg/trunk/src/x86/Target.cpp (original)
+++ dragonegg/trunk/src/x86/Target.cpp Tue Jun  7 09:16:28 2011
@@ -776,6 +776,33 @@
     SI->setAlignment(16);
     return true;
   }
+  case rsqrtf: {
+    // rsqrtss with a Newton-Raphson step to improve accuracy:
+    //   rsqrtf(x) = rsqrtss(x) * -0.5 * (rsqrtss(x) * x * rsqrtss(x) - 3.0)
+    Function *rsqrtss = Intrinsic::getDeclaration(TheModule,
+                                                  Intrinsic::x86_sse_rsqrt_ss);
+    // As rsqrtss is declared as taking a <4 x float> operand, mulch the operand
+    // into a vector.
+    Value *X = Ops[0];
+    const Type *FloatTy = Type::getFloatTy(Context);
+    Value *AsFloat = Builder.CreateFPTrunc(X, FloatTy);
+    const Type *V4SFTy = VectorType::get(FloatTy, 4);
+    Value *AsVec = Builder.CreateInsertElement(UndefValue::get(V4SFTy), AsFloat,
+                                               Builder.getInt32(0));
+    // Take the reciprocal square root of the vector and mulch it back into a
+    // scalar of the original type.
+    AsVec = Builder.CreateCall(rsqrtss, AsVec);
+    Value *R = Builder.CreateExtractElement(AsVec, Builder.getInt32(0));
+    R = Builder.CreateFPExt(R, X->getType()); // rsqrtss(x)
+
+    // Perform the Newton-Raphson step.
+    Value *RHS = Builder.CreateFAdd(Builder.CreateFMul(Builder.CreateFMul(R, X),
+                                                       R),
+                                    ConstantFP::get(X->getType(), -3.0));
+    Value *LHS = Builder.CreateFMul(R, ConstantFP::get(X->getType(), -0.5));
+    Result = Builder.CreateFMul(LHS, RHS);
+    return true;
+  }
   case rsqrtps_nr: {
     // rsqrtps with a Newton-Raphson step to improve accuracy:
     //   rsqrtps_nr(x) = rsqrtps(x) * -0.5 * (rsqrtps(x) * x * rsqrtps(x) - 3.0)

Modified: dragonegg/trunk/src/x86/x86_builtins
URL: http://llvm.org/viewvc/llvm-project/dragonegg/trunk/src/x86/x86_builtins?rev=132710&r1=132709&r2=132710&view=diff
==============================================================================
--- dragonegg/trunk/src/x86/x86_builtins (original)
+++ dragonegg/trunk/src/x86/x86_builtins Tue Jun  7 09:16:28 2011
@@ -512,7 +512,7 @@
 //DEFINE_BUILTIN(roundps256),
 //DEFINE_BUILTIN(roundsd),
 //DEFINE_BUILTIN(roundss),
-//DEFINE_BUILTIN(rsqrtf),
+DEFINE_BUILTIN(rsqrtf),
 //DEFINE_BUILTIN(rsqrtps),
 //DEFINE_BUILTIN(rsqrtps256),
 DEFINE_BUILTIN(rsqrtps_nr),





More information about the llvm-commits mailing list