[libc-commits] [libc] [libc][math] Use Estrin's scheme to improve asinf and acosf performance. (PR #187885)

via libc-commits libc-commits at lists.llvm.org
Sat Mar 21 13:33:29 PDT 2026


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-libc

Author: None (lntue)

<details>
<summary>Changes</summary>



---
Full diff: https://github.com/llvm/llvm-project/pull/187885.diff


3 Files Affected:

- (modified) libc/src/__support/math/acosf.h (+3-3) 
- (modified) libc/src/__support/math/asinf.h (+11-47) 
- (modified) libc/src/__support/math/inv_trigf_utils.h (+20-11) 


``````````diff
diff --git a/libc/src/__support/math/acosf.h b/libc/src/__support/math/acosf.h
index 4cdf8ef704f25..c17b5dbf3160f 100644
--- a/libc/src/__support/math/acosf.h
+++ b/libc/src/__support/math/acosf.h
@@ -77,10 +77,10 @@ LIBC_INLINE constexpr float acosf(float x) {
 
     // For |x| <= 0.5, we approximate acosf(x) by:
     //   acos(x) = pi/2 - asin(x) = pi/2 - x * P(x^2)
-    // Where P(X^2) = Q(X) is a degree-20 minimax even polynomial approximating
+    // Where P(X^2) = Q(X) is a degree-24 minimax even polynomial approximating
     // asin(x)/x on [0, 0.5] generated by Sollya with:
-    // > Q = fpminimax(asin(x)/x, [|0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20|],
-    //                 [|1, D...|], [0, 0.5]);
+    // > Q = fpminimax(asin(x)/x, [|0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20,
+    //                              22, 24|], [|1, D...|], [0, 0.5]);
     double xd = static_cast<double>(x);
     double xsq = xd * xd;
     double x3 = xd * xsq;
diff --git a/libc/src/__support/math/asinf.h b/libc/src/__support/math/asinf.h
index c00b764a41853..08cfb192754e4 100644
--- a/libc/src/__support/math/asinf.h
+++ b/libc/src/__support/math/asinf.h
@@ -25,36 +25,12 @@ namespace math {
 
 LIBC_INLINE constexpr float asinf(float x) {
   using namespace inv_trigf_utils_internal;
-
-#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
-  constexpr size_t N_EXCEPTS = 2;
-
-  // Exceptional values when |x| <= 0.5
-  constexpr fputil::ExceptValues<float, N_EXCEPTS> ASINF_EXCEPTS_LO = {{
-      // (inputs, RZ output, RU offset, RD offset, RN offset)
-      // x = 0x1.137f0cp-5, asinf(x) = 0x1.138c58p-5 (RZ)
-      {0x3d09bf86, 0x3d09c62c, 1, 0, 1},
-      // x = 0x1.cbf43cp-4, asinf(x) = 0x1.cced1cp-4 (RZ)
-      {0x3de5fa1e, 0x3de6768e, 1, 0, 0},
-  }};
-
-  // Exceptional values when 0.5 < |x| <= 1
-  constexpr fputil::ExceptValues<float, N_EXCEPTS> ASINF_EXCEPTS_HI = {{
-      // (inputs, RZ output, RU offset, RD offset, RN offset)
-      // x = 0x1.107434p-1, asinf(x) = 0x1.1f4b64p-1 (RZ)
-      {0x3f083a1a, 0x3f0fa5b2, 1, 0, 0},
-      // x = 0x1.ee836cp-1, asinf(x) = 0x1.4f0654p0 (RZ)
-      {0x3f7741b6, 0x3fa7832a, 1, 0, 0},
-  }};
-#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
-
-  using namespace inv_trigf_utils_internal;
   using FPBits = typename fputil::FPBits<float>;
 
   FPBits xbits(x);
   uint32_t x_uint = xbits.uintval();
   uint32_t x_abs = xbits.uintval() & 0x7fff'ffffU;
-  constexpr double SIGN[2] = {1.0, -1.0};
+  constexpr double TWO[2] = {-2.0, 2.0};
   uint32_t x_sign = x_uint >> 31;
 
   // |x| <= 0.5-ish
@@ -89,19 +65,12 @@ LIBC_INLINE constexpr float asinf(float x) {
 #endif // LIBC_TARGET_CPU_HAS_FMA_FLOAT
     }
 
-#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
-    // Check for exceptional values
-    if (auto r = ASINF_EXCEPTS_LO.lookup_odd(x_abs, x_sign);
-        LIBC_UNLIKELY(r.has_value()))
-      return r.value();
-#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
-
     // For |x| <= 0.5, we approximate asinf(x) by:
     //   asin(x) = x * P(x^2)
-    // Where P(X^2) = Q(X) is a degree-20 minimax even polynomial approximating
+    // Where P(X^2) = Q(X) is a degree-24 minimax even polynomial approximating
     // asin(x)/x on [0, 0.5] generated by Sollya with:
-    // > Q = fpminimax(asin(x)/x, [|0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20|],
-    //                 [|1, D...|], [0, 0.5]);
+    // > Q = fpminimax(asin(x)/x, [|0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20,
+    //                              22, 24|], [|1, D...|], [0, 0.5]);
     // An exhaustive test shows that this approximation works well up to a
     // little more than 0.5.
     double xd = static_cast<double>(x);
@@ -126,13 +95,6 @@ LIBC_INLINE constexpr float asinf(float x) {
     return FPBits::quiet_nan().get_val();
   }
 
-#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
-  // Check for exceptional values
-  if (auto r = ASINF_EXCEPTS_HI.lookup_odd(x_abs, x_sign);
-      LIBC_UNLIKELY(r.has_value()))
-    return r.value();
-#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
-
   // When |x| > 0.5, we perform range reduction as follow:
   //
   // Assume further that 0.5 < x <= 1, and let:
@@ -156,12 +118,14 @@ LIBC_INLINE constexpr float asinf(float x) {
   // |x| <= 0.5:
   //   asin(x) ~ pi/2 - 2 * sqrt(u) * P(u),
 
+  constexpr double M_PI_OVER_4 = -0x1.921fb54442d18p-1;
+
   xbits.set_sign(Sign::POS);
-  double sign = SIGN[x_sign];
-  double xd = static_cast<double>(xbits.get_val());
-  double u = fputil::multiply_add(-0.5, xd, 0.5);
-  double c1 = sign * (-2 * fputil::sqrt<double>(u));
-  double c2 = fputil::multiply_add(sign, M_MATH_PI_2, c1);
+  double sign_two = TWO[x_sign]; // sign * (-2)
+  double uf = fputil::multiply_add(-0.5f, xbits.get_val(), 0.5f);
+  double u = static_cast<double>(uf);
+  double c1 = sign_two * fputil::sqrt<double>(u);
+  double c2 = fputil::multiply_add(sign_two, M_PI_OVER_4, c1);
   double c3 = c1 * u;
 
   double r = asin_eval(u);
diff --git a/libc/src/__support/math/inv_trigf_utils.h b/libc/src/__support/math/inv_trigf_utils.h
index 83fe9f7f6201c..558d91760f8f9 100644
--- a/libc/src/__support/math/inv_trigf_utils.h
+++ b/libc/src/__support/math/inv_trigf_utils.h
@@ -159,22 +159,31 @@ LIBC_INLINE double atan_eval_no_table(double num, double den,
   return fputil::multiply_add(q3, d, q);
 }
 
-// > Q = fpminimax(asin(x)/x, [|0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20|],
+// > Q = fpminimax(asin(x)/x, [|0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24|],
 //                 [|1, D...|], [0, 0.5]);
-LIBC_INLINE_VAR constexpr double ASIN_COEFFS[10] = {
-    0x1.5555555540fa1p-3, 0x1.333333512edc2p-4, 0x1.6db6cc1541b31p-5,
-    0x1.f1caff324770ep-6, 0x1.6e43899f5f4f4p-6, 0x1.1f847cf652577p-6,
-    0x1.9b60f47f87146p-7, 0x1.259e2634c494fp-6, -0x1.df946fa875ddp-8,
-    0x1.02311ecf99c28p-5};
+// > dirtyinfnorm((asin(x) - x*Q)/asin(x), [0, 0.5]);
+// 0x1.1ff...p-56
+LIBC_INLINE_VAR constexpr double ASIN_COEFFS[12] = {
+    0x1.555555555538p-3,  0x1.333333336fd5bp-4,  0x1.6db6db41ce4bcp-5,
+    0x1.f1c72c66896dep-6, 0x1.6e89f0a0ac64bp-6,  0x1.1c6c111de4074p-6,
+    0x1.c6fa84b5699acp-7, 0x1.8ed60a3e6dd19p-7,  0x1.ab3a090750049p-8,
+    0x1.405213cd1ef46p-6, -0x1.0a5a381f73f65p-6, 0x1.05985a32a9045p-5,
+};
 
 // Evaluate P(x^2) - 1, where P(x^2) ~ asin(x)/x
 LIBC_INLINE double asin_eval(double xsq) {
   double x4 = xsq * xsq;
-  double r1 = fputil::polyeval(x4, ASIN_COEFFS[0], ASIN_COEFFS[2],
-                               ASIN_COEFFS[4], ASIN_COEFFS[6], ASIN_COEFFS[8]);
-  double r2 = fputil::polyeval(x4, ASIN_COEFFS[1], ASIN_COEFFS[3],
-                               ASIN_COEFFS[5], ASIN_COEFFS[7], ASIN_COEFFS[9]);
-  return fputil::multiply_add(xsq, r2, r1);
+  double c0 = fputil::multiply_add(xsq, ASIN_COEFFS[1], ASIN_COEFFS[0]);
+  double c1 = fputil::multiply_add(xsq, ASIN_COEFFS[3], ASIN_COEFFS[2]);
+  double c2 = fputil::multiply_add(xsq, ASIN_COEFFS[5], ASIN_COEFFS[4]);
+  double c3 = fputil::multiply_add(xsq, ASIN_COEFFS[7], ASIN_COEFFS[6]);
+  double c4 = fputil::multiply_add(xsq, ASIN_COEFFS[9], ASIN_COEFFS[8]);
+  double c5 = fputil::multiply_add(xsq, ASIN_COEFFS[11], ASIN_COEFFS[10]);
+  double x8 = x4 * x4;
+  double d0 = fputil::multiply_add(x4, c1, c0);
+  double d1 = fputil::multiply_add(x4, c3, c2);
+  double d2 = fputil::multiply_add(x4, c5, c4);
+  return fputil::polyeval(x8, d0, d1, d2);
 }
 
 // the coefficients for the polynomial approximation of asin(x)/(pi*x) in the

``````````

</details>


https://github.com/llvm/llvm-project/pull/187885


More information about the libc-commits mailing list