[libc-commits] [libc] [libc][math] Improve the performance of sin/cos for small inputs |x| < 2^-4. (PR #201748)
via libc-commits
libc-commits at lists.llvm.org
Thu Jun 4 22:19:18 PDT 2026
https://github.com/lntue created https://github.com/llvm/llvm-project/pull/201748
Use a degree-9 polynomial for fast path for sin(x) with errors bounded by `|x| * 2^-68 + 2* ulp(x^3 / 6)`.
>From d707ffe770f6408c1ac46b7a7b6d41f91a73788e Mon Sep 17 00:00:00 2001
From: Tue Ly <lntue.h at gmail.com>
Date: Fri, 5 Jun 2026 05:14:13 +0000
Subject: [PATCH] [libc][math] Improve the performance of sin/cos for small
inputs |x| < 2^-4.
---
libc/src/__support/math/sin.h | 109 +++++++++++++++++++++++-----------
1 file changed, 75 insertions(+), 34 deletions(-)
diff --git a/libc/src/__support/math/sin.h b/libc/src/__support/math/sin.h
index cd0c15abe62db..046b83404790e 100644
--- a/libc/src/__support/math/sin.h
+++ b/libc/src/__support/math/sin.h
@@ -29,6 +29,44 @@ namespace LIBC_NAMESPACE_DECL {
namespace math {
+LIBC_INLINE double
+sin_accurate(double x, uint16_t x_e, unsigned k,
+ const range_reduction_double_internal::LargeRangeReduction
+ &range_reduction_large) {
+ using namespace math::range_reduction_double_internal;
+ using FPBits = typename fputil::FPBits<double>;
+
+ DFloat128 u_f128, sin_u, cos_u;
+ if (LIBC_LIKELY(x_e < FPBits::EXP_BIAS + FAST_PASS_EXPONENT))
+ u_f128 = range_reduction_small_f128(x);
+ else
+ u_f128 = range_reduction_large.accurate();
+
+ math::sincos_eval_internal::sincos_eval(u_f128, sin_u, cos_u);
+
+ auto get_sin_k = [](unsigned kk) -> DFloat128 {
+ unsigned idx = (kk & 64) ? 64 - (kk & 63) : (kk & 63);
+ DFloat128 ans = SIN_K_PI_OVER_128_F128[idx];
+ if (kk & 128)
+ ans.sign = Sign::NEG;
+ return ans;
+ };
+
+ // cos(k * pi/128) = sin(k * pi/128 + pi/2) = sin((k + 64) * pi/128).
+ DFloat128 sin_k_f128 = get_sin_k(k);
+ DFloat128 cos_k_f128 = get_sin_k(k + 64);
+
+ // sin(x) = sin(k * pi/128 + u)
+ // = sin(u) * cos(k*pi/128) + cos(u) * sin(k*pi/128)
+ DFloat128 r = fputil::quick_add(fputil::quick_mul(sin_k_f128, cos_u),
+ fputil::quick_mul(cos_k_f128, sin_u));
+
+ // TODO: Add assertion if Ziv's accuracy tests fail in debug mode.
+ // https://github.com/llvm/llvm-project/issues/96452.
+
+ return static_cast<double>(r);
+}
+
LIBC_INLINE double sin(double x) {
using namespace math::range_reduction_double_internal;
using FPBits = typename fputil::FPBits<double>;
@@ -43,7 +81,7 @@ LIBC_INLINE double sin(double x) {
// |x| < 2^16
if (LIBC_LIKELY(x_e < FPBits::EXP_BIAS + FAST_PASS_EXPONENT)) {
// |x| < 2^-7
- if (LIBC_UNLIKELY(x_e < FPBits::EXP_BIAS - 7)) {
+ if (LIBC_UNLIKELY(x_e < FPBits::EXP_BIAS - 4)) {
// |x| < 2^-26, |sin(x) - x| < ulp(x)/2.
if (LIBC_UNLIKELY(x_e < FPBits::EXP_BIAS - 26)) {
// Signed zeros.
@@ -64,9 +102,40 @@ LIBC_INLINE double sin(double x) {
#endif // LIBC_TARGET_CPU_HAS_FMA_DOUBLE
}
// No range reduction needed.
- k = 0;
- y.lo = 0.0;
- y.hi = x;
+
+ // Use degree-9 polynomial approximation:
+ // sin(x) ~ x + a1 * x^3 + a2 * x^5 + a3 * x^7 + a4 * x^9
+ // ~ x + x^3 * Q(x^2).
+ // > P = fpminimax(sin(x)/x, [|0, 2, 4, 6, 8|], [|1, D...|], [0, 2^-4]);
+ // > dirtyinfnorm((sin(x) - x*P)/sin(x), [-2^-4, 2^-4]);
+ // 0x1.3c2e...p-69
+ // > P;
+ constexpr double COEFFS[] = {-0x1.5555555555555p-3, 0x1.111111110f491p-7,
+ -0x1.a01a00e16af3ep-13,
+ 0x1.71c24233f1bafp-19};
+ double x_sq = x * x;
+ double c0 = fputil::multiply_add(x_sq, COEFFS[1], COEFFS[0]);
+ double c1 = fputil::multiply_add(x_sq, COEFFS[3], COEFFS[2]);
+ double x4 = x_sq * x_sq;
+ double x3 = x * x_sq;
+ double r_lo = fputil::multiply_add(x4, c1, c0) * x3;
+
+#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+ return x + r_lo;
+#else
+ // Overall errors <= 2 * ulp(x^3/6) + |x| * 2^-68.
+ double err = fputil::multiply_add(x_sq, 0x1.0p-53, 0x1.0p-68);
+ double r_lo_u = fputil::multiply_add(x, err, r_lo);
+ double r_lo_l = fputil::multiply_add(-x, err, r_lo);
+ double r_upper = x + r_lo_u;
+ double r_lower = x + r_lo_l;
+
+ if (LIBC_LIKELY(r_upper == r_lower))
+ return r_upper;
+
+ k = range_reduction_small(x, y);
+ return sin_accurate(x, x_e, k, range_reduction_large);
+#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS
} else {
// Small range reduction.
k = range_reduction_small(x, y);
@@ -124,7 +193,7 @@ LIBC_INLINE double sin(double x) {
DoubleDouble sin_k_cos_y = fputil::quick_mult(cos_y, sin_k);
DoubleDouble cos_k_sin_y = fputil::quick_mult(sin_y, cos_k);
- DoubleDouble rr = fputil::exact_add<false>(sin_k_cos_y.hi, cos_k_sin_y.hi);
+ DoubleDouble rr = fputil::exact_add(sin_k_cos_y.hi, cos_k_sin_y.hi);
rr.lo += sin_k_cos_y.lo + cos_k_sin_y.lo;
#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
@@ -142,35 +211,7 @@ LIBC_INLINE double sin(double x) {
if (LIBC_LIKELY(r_upper == r_lower))
return r_upper;
- DFloat128 u_f128, sin_u, cos_u;
- if (LIBC_LIKELY(x_e < FPBits::EXP_BIAS + FAST_PASS_EXPONENT))
- u_f128 = range_reduction_small_f128(x);
- else
- u_f128 = range_reduction_large.accurate();
-
- math::sincos_eval_internal::sincos_eval(u_f128, sin_u, cos_u);
-
- auto get_sin_k = [](unsigned kk) -> DFloat128 {
- unsigned idx = (kk & 64) ? 64 - (kk & 63) : (kk & 63);
- DFloat128 ans = SIN_K_PI_OVER_128_F128[idx];
- if (kk & 128)
- ans.sign = Sign::NEG;
- return ans;
- };
-
- // cos(k * pi/128) = sin(k * pi/128 + pi/2) = sin((k + 64) * pi/128).
- DFloat128 sin_k_f128 = get_sin_k(k);
- DFloat128 cos_k_f128 = get_sin_k(k + 64);
-
- // sin(x) = sin(k * pi/128 + u)
- // = sin(u) * cos(k*pi/128) + cos(u) * sin(k*pi/128)
- DFloat128 r = fputil::quick_add(fputil::quick_mul(sin_k_f128, cos_u),
- fputil::quick_mul(cos_k_f128, sin_u));
-
- // TODO: Add assertion if Ziv's accuracy tests fail in debug mode.
- // https://github.com/llvm/llvm-project/issues/96452.
-
- return static_cast<double>(r);
+ return sin_accurate(x, x_e, k, range_reduction_large);
#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
}
More information about the libc-commits
mailing list