[libc-commits] [libc] [libc][math] Use Estrin's scheme to improve asinf and acosf performance. (PR #187885)
via libc-commits
libc-commits at lists.llvm.org
Sat Mar 21 13:33:29 PDT 2026
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-libc
Author: None (lntue)
<details>
<summary>Changes</summary>
---
Full diff: https://github.com/llvm/llvm-project/pull/187885.diff
3 Files Affected:
- (modified) libc/src/__support/math/acosf.h (+3-3)
- (modified) libc/src/__support/math/asinf.h (+11-47)
- (modified) libc/src/__support/math/inv_trigf_utils.h (+20-11)
``````````diff
diff --git a/libc/src/__support/math/acosf.h b/libc/src/__support/math/acosf.h
index 4cdf8ef704f25..c17b5dbf3160f 100644
--- a/libc/src/__support/math/acosf.h
+++ b/libc/src/__support/math/acosf.h
@@ -77,10 +77,10 @@ LIBC_INLINE constexpr float acosf(float x) {
// For |x| <= 0.5, we approximate acosf(x) by:
// acos(x) = pi/2 - asin(x) = pi/2 - x * P(x^2)
- // Where P(X^2) = Q(X) is a degree-20 minimax even polynomial approximating
+ // Where P(X^2) = Q(X) is a degree-24 minimax even polynomial approximating
// asin(x)/x on [0, 0.5] generated by Sollya with:
- // > Q = fpminimax(asin(x)/x, [|0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20|],
- // [|1, D...|], [0, 0.5]);
+ // > Q = fpminimax(asin(x)/x, [|0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20,
+ // 22, 24|], [|1, D...|], [0, 0.5]);
double xd = static_cast<double>(x);
double xsq = xd * xd;
double x3 = xd * xsq;
diff --git a/libc/src/__support/math/asinf.h b/libc/src/__support/math/asinf.h
index c00b764a41853..08cfb192754e4 100644
--- a/libc/src/__support/math/asinf.h
+++ b/libc/src/__support/math/asinf.h
@@ -25,36 +25,12 @@ namespace math {
LIBC_INLINE constexpr float asinf(float x) {
using namespace inv_trigf_utils_internal;
-
-#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
- constexpr size_t N_EXCEPTS = 2;
-
- // Exceptional values when |x| <= 0.5
- constexpr fputil::ExceptValues<float, N_EXCEPTS> ASINF_EXCEPTS_LO = {{
- // (inputs, RZ output, RU offset, RD offset, RN offset)
- // x = 0x1.137f0cp-5, asinf(x) = 0x1.138c58p-5 (RZ)
- {0x3d09bf86, 0x3d09c62c, 1, 0, 1},
- // x = 0x1.cbf43cp-4, asinf(x) = 0x1.cced1cp-4 (RZ)
- {0x3de5fa1e, 0x3de6768e, 1, 0, 0},
- }};
-
- // Exceptional values when 0.5 < |x| <= 1
- constexpr fputil::ExceptValues<float, N_EXCEPTS> ASINF_EXCEPTS_HI = {{
- // (inputs, RZ output, RU offset, RD offset, RN offset)
- // x = 0x1.107434p-1, asinf(x) = 0x1.1f4b64p-1 (RZ)
- {0x3f083a1a, 0x3f0fa5b2, 1, 0, 0},
- // x = 0x1.ee836cp-1, asinf(x) = 0x1.4f0654p0 (RZ)
- {0x3f7741b6, 0x3fa7832a, 1, 0, 0},
- }};
-#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
-
- using namespace inv_trigf_utils_internal;
using FPBits = typename fputil::FPBits<float>;
FPBits xbits(x);
uint32_t x_uint = xbits.uintval();
uint32_t x_abs = xbits.uintval() & 0x7fff'ffffU;
- constexpr double SIGN[2] = {1.0, -1.0};
+ constexpr double TWO[2] = {-2.0, 2.0};
uint32_t x_sign = x_uint >> 31;
// |x| <= 0.5-ish
@@ -89,19 +65,12 @@ LIBC_INLINE constexpr float asinf(float x) {
#endif // LIBC_TARGET_CPU_HAS_FMA_FLOAT
}
-#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
- // Check for exceptional values
- if (auto r = ASINF_EXCEPTS_LO.lookup_odd(x_abs, x_sign);
- LIBC_UNLIKELY(r.has_value()))
- return r.value();
-#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
-
// For |x| <= 0.5, we approximate asinf(x) by:
// asin(x) = x * P(x^2)
- // Where P(X^2) = Q(X) is a degree-20 minimax even polynomial approximating
+ // Where P(X^2) = Q(X) is a degree-24 minimax even polynomial approximating
// asin(x)/x on [0, 0.5] generated by Sollya with:
- // > Q = fpminimax(asin(x)/x, [|0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20|],
- // [|1, D...|], [0, 0.5]);
+ // > Q = fpminimax(asin(x)/x, [|0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20,
+ // 22, 24|], [|1, D...|], [0, 0.5]);
// An exhaustive test shows that this approximation works well up to a
// little more than 0.5.
double xd = static_cast<double>(x);
@@ -126,13 +95,6 @@ LIBC_INLINE constexpr float asinf(float x) {
return FPBits::quiet_nan().get_val();
}
-#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
- // Check for exceptional values
- if (auto r = ASINF_EXCEPTS_HI.lookup_odd(x_abs, x_sign);
- LIBC_UNLIKELY(r.has_value()))
- return r.value();
-#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
-
// When |x| > 0.5, we perform range reduction as follow:
//
// Assume further that 0.5 < x <= 1, and let:
@@ -156,12 +118,14 @@ LIBC_INLINE constexpr float asinf(float x) {
// |x| <= 0.5:
// asin(x) ~ pi/2 - 2 * sqrt(u) * P(u),
+ constexpr double M_PI_OVER_4 = -0x1.921fb54442d18p-1;
+
xbits.set_sign(Sign::POS);
- double sign = SIGN[x_sign];
- double xd = static_cast<double>(xbits.get_val());
- double u = fputil::multiply_add(-0.5, xd, 0.5);
- double c1 = sign * (-2 * fputil::sqrt<double>(u));
- double c2 = fputil::multiply_add(sign, M_MATH_PI_2, c1);
+ double sign_two = TWO[x_sign]; // sign * (-2)
+ double uf = fputil::multiply_add(-0.5f, xbits.get_val(), 0.5f);
+ double u = static_cast<double>(uf);
+ double c1 = sign_two * fputil::sqrt<double>(u);
+ double c2 = fputil::multiply_add(sign_two, M_PI_OVER_4, c1);
double c3 = c1 * u;
double r = asin_eval(u);
diff --git a/libc/src/__support/math/inv_trigf_utils.h b/libc/src/__support/math/inv_trigf_utils.h
index 83fe9f7f6201c..558d91760f8f9 100644
--- a/libc/src/__support/math/inv_trigf_utils.h
+++ b/libc/src/__support/math/inv_trigf_utils.h
@@ -159,22 +159,31 @@ LIBC_INLINE double atan_eval_no_table(double num, double den,
return fputil::multiply_add(q3, d, q);
}
-// > Q = fpminimax(asin(x)/x, [|0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20|],
+// > Q = fpminimax(asin(x)/x, [|0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24|],
// [|1, D...|], [0, 0.5]);
-LIBC_INLINE_VAR constexpr double ASIN_COEFFS[10] = {
- 0x1.5555555540fa1p-3, 0x1.333333512edc2p-4, 0x1.6db6cc1541b31p-5,
- 0x1.f1caff324770ep-6, 0x1.6e43899f5f4f4p-6, 0x1.1f847cf652577p-6,
- 0x1.9b60f47f87146p-7, 0x1.259e2634c494fp-6, -0x1.df946fa875ddp-8,
- 0x1.02311ecf99c28p-5};
+// > dirtyinfnorm((asin(x) - x*Q)/asin(x), [0, 0.5]);
+// 0x1.1ff...p-56
+LIBC_INLINE_VAR constexpr double ASIN_COEFFS[12] = {
+ 0x1.555555555538p-3, 0x1.333333336fd5bp-4, 0x1.6db6db41ce4bcp-5,
+ 0x1.f1c72c66896dep-6, 0x1.6e89f0a0ac64bp-6, 0x1.1c6c111de4074p-6,
+ 0x1.c6fa84b5699acp-7, 0x1.8ed60a3e6dd19p-7, 0x1.ab3a090750049p-8,
+ 0x1.405213cd1ef46p-6, -0x1.0a5a381f73f65p-6, 0x1.05985a32a9045p-5,
+};
// Evaluate P(x^2) - 1, where P(x^2) ~ asin(x)/x
LIBC_INLINE double asin_eval(double xsq) {
double x4 = xsq * xsq;
- double r1 = fputil::polyeval(x4, ASIN_COEFFS[0], ASIN_COEFFS[2],
- ASIN_COEFFS[4], ASIN_COEFFS[6], ASIN_COEFFS[8]);
- double r2 = fputil::polyeval(x4, ASIN_COEFFS[1], ASIN_COEFFS[3],
- ASIN_COEFFS[5], ASIN_COEFFS[7], ASIN_COEFFS[9]);
- return fputil::multiply_add(xsq, r2, r1);
+ double c0 = fputil::multiply_add(xsq, ASIN_COEFFS[1], ASIN_COEFFS[0]);
+ double c1 = fputil::multiply_add(xsq, ASIN_COEFFS[3], ASIN_COEFFS[2]);
+ double c2 = fputil::multiply_add(xsq, ASIN_COEFFS[5], ASIN_COEFFS[4]);
+ double c3 = fputil::multiply_add(xsq, ASIN_COEFFS[7], ASIN_COEFFS[6]);
+ double c4 = fputil::multiply_add(xsq, ASIN_COEFFS[9], ASIN_COEFFS[8]);
+ double c5 = fputil::multiply_add(xsq, ASIN_COEFFS[11], ASIN_COEFFS[10]);
+ double x8 = x4 * x4;
+ double d0 = fputil::multiply_add(x4, c1, c0);
+ double d1 = fputil::multiply_add(x4, c3, c2);
+ double d2 = fputil::multiply_add(x4, c5, c4);
+ return fputil::polyeval(x8, d0, d1, d2);
}
// the coefficients for the polynomial approximation of asin(x)/(pi*x) in the
``````````
</details>
https://github.com/llvm/llvm-project/pull/187885
More information about the libc-commits
mailing list