[libc-commits] [libc] [libc][math] Improve the performance of sin/cos for small inputs |x| < 2^-4. (PR #201748)
via libc-commits
libc-commits at lists.llvm.org
Fri Jun 5 06:24:06 PDT 2026
https://github.com/lntue updated https://github.com/llvm/llvm-project/pull/201748
>From d707ffe770f6408c1ac46b7a7b6d41f91a73788e Mon Sep 17 00:00:00 2001
From: Tue Ly <lntue.h at gmail.com>
Date: Fri, 5 Jun 2026 05:14:13 +0000
Subject: [PATCH 1/3] [libc][math] Improve the performance of sin/cos for small
inputs |x| < 2^-4.
---
libc/src/__support/math/sin.h | 109 +++++++++++++++++++++++-----------
1 file changed, 75 insertions(+), 34 deletions(-)
diff --git a/libc/src/__support/math/sin.h b/libc/src/__support/math/sin.h
index cd0c15abe62db..046b83404790e 100644
--- a/libc/src/__support/math/sin.h
+++ b/libc/src/__support/math/sin.h
@@ -29,6 +29,44 @@ namespace LIBC_NAMESPACE_DECL {
namespace math {
+LIBC_INLINE double
+sin_accurate(double x, uint16_t x_e, unsigned k,
+ const range_reduction_double_internal::LargeRangeReduction
+ &range_reduction_large) {
+ using namespace math::range_reduction_double_internal;
+ using FPBits = typename fputil::FPBits<double>;
+
+ DFloat128 u_f128, sin_u, cos_u;
+ if (LIBC_LIKELY(x_e < FPBits::EXP_BIAS + FAST_PASS_EXPONENT))
+ u_f128 = range_reduction_small_f128(x);
+ else
+ u_f128 = range_reduction_large.accurate();
+
+ math::sincos_eval_internal::sincos_eval(u_f128, sin_u, cos_u);
+
+ auto get_sin_k = [](unsigned kk) -> DFloat128 {
+ unsigned idx = (kk & 64) ? 64 - (kk & 63) : (kk & 63);
+ DFloat128 ans = SIN_K_PI_OVER_128_F128[idx];
+ if (kk & 128)
+ ans.sign = Sign::NEG;
+ return ans;
+ };
+
+ // cos(k * pi/128) = sin(k * pi/128 + pi/2) = sin((k + 64) * pi/128).
+ DFloat128 sin_k_f128 = get_sin_k(k);
+ DFloat128 cos_k_f128 = get_sin_k(k + 64);
+
+ // sin(x) = sin(k * pi/128 + u)
+ // = sin(u) * cos(k*pi/128) + cos(u) * sin(k*pi/128)
+ DFloat128 r = fputil::quick_add(fputil::quick_mul(sin_k_f128, cos_u),
+ fputil::quick_mul(cos_k_f128, sin_u));
+
+ // TODO: Add assertion if Ziv's accuracy tests fail in debug mode.
+ // https://github.com/llvm/llvm-project/issues/96452.
+
+ return static_cast<double>(r);
+}
+
LIBC_INLINE double sin(double x) {
using namespace math::range_reduction_double_internal;
using FPBits = typename fputil::FPBits<double>;
@@ -43,7 +81,7 @@ LIBC_INLINE double sin(double x) {
// |x| < 2^16
if (LIBC_LIKELY(x_e < FPBits::EXP_BIAS + FAST_PASS_EXPONENT)) {
// |x| < 2^-7
- if (LIBC_UNLIKELY(x_e < FPBits::EXP_BIAS - 7)) {
+ if (LIBC_UNLIKELY(x_e < FPBits::EXP_BIAS - 4)) {
// |x| < 2^-26, |sin(x) - x| < ulp(x)/2.
if (LIBC_UNLIKELY(x_e < FPBits::EXP_BIAS - 26)) {
// Signed zeros.
@@ -64,9 +102,40 @@ LIBC_INLINE double sin(double x) {
#endif // LIBC_TARGET_CPU_HAS_FMA_DOUBLE
}
// No range reduction needed.
- k = 0;
- y.lo = 0.0;
- y.hi = x;
+
+ // Use degree-9 polynomial approximation:
+ // sin(x) ~ x + a1 * x^3 + a2 * x^5 + a3 * x^7 + a4 * x^9
+ // ~ x + x^3 * Q(x^2).
+ // > P = fpminimax(sin(x)/x, [|0, 2, 4, 6, 8|], [|1, D...|], [0, 2^-4]);
+ // > dirtyinfnorm((sin(x) - x*P)/sin(x), [-2^-4, 2^-4]);
+ // 0x1.3c2e...p-69
+ // > P;
+ constexpr double COEFFS[] = {-0x1.5555555555555p-3, 0x1.111111110f491p-7,
+ -0x1.a01a00e16af3ep-13,
+ 0x1.71c24233f1bafp-19};
+ double x_sq = x * x;
+ double c0 = fputil::multiply_add(x_sq, COEFFS[1], COEFFS[0]);
+ double c1 = fputil::multiply_add(x_sq, COEFFS[3], COEFFS[2]);
+ double x4 = x_sq * x_sq;
+ double x3 = x * x_sq;
+ double r_lo = fputil::multiply_add(x4, c1, c0) * x3;
+
+#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+ return x + r_lo;
+#else
+ // Overall errors <= 2 * ulp(x^3/6) + |x| * 2^-68.
+ double err = fputil::multiply_add(x_sq, 0x1.0p-53, 0x1.0p-68);
+ double r_lo_u = fputil::multiply_add(x, err, r_lo);
+ double r_lo_l = fputil::multiply_add(-x, err, r_lo);
+ double r_upper = x + r_lo_u;
+ double r_lower = x + r_lo_l;
+
+ if (LIBC_LIKELY(r_upper == r_lower))
+ return r_upper;
+
+ k = range_reduction_small(x, y);
+ return sin_accurate(x, x_e, k, range_reduction_large);
+#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS
} else {
// Small range reduction.
k = range_reduction_small(x, y);
@@ -124,7 +193,7 @@ LIBC_INLINE double sin(double x) {
DoubleDouble sin_k_cos_y = fputil::quick_mult(cos_y, sin_k);
DoubleDouble cos_k_sin_y = fputil::quick_mult(sin_y, cos_k);
- DoubleDouble rr = fputil::exact_add<false>(sin_k_cos_y.hi, cos_k_sin_y.hi);
+ DoubleDouble rr = fputil::exact_add(sin_k_cos_y.hi, cos_k_sin_y.hi);
rr.lo += sin_k_cos_y.lo + cos_k_sin_y.lo;
#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
@@ -142,35 +211,7 @@ LIBC_INLINE double sin(double x) {
if (LIBC_LIKELY(r_upper == r_lower))
return r_upper;
- DFloat128 u_f128, sin_u, cos_u;
- if (LIBC_LIKELY(x_e < FPBits::EXP_BIAS + FAST_PASS_EXPONENT))
- u_f128 = range_reduction_small_f128(x);
- else
- u_f128 = range_reduction_large.accurate();
-
- math::sincos_eval_internal::sincos_eval(u_f128, sin_u, cos_u);
-
- auto get_sin_k = [](unsigned kk) -> DFloat128 {
- unsigned idx = (kk & 64) ? 64 - (kk & 63) : (kk & 63);
- DFloat128 ans = SIN_K_PI_OVER_128_F128[idx];
- if (kk & 128)
- ans.sign = Sign::NEG;
- return ans;
- };
-
- // cos(k * pi/128) = sin(k * pi/128 + pi/2) = sin((k + 64) * pi/128).
- DFloat128 sin_k_f128 = get_sin_k(k);
- DFloat128 cos_k_f128 = get_sin_k(k + 64);
-
- // sin(x) = sin(k * pi/128 + u)
- // = sin(u) * cos(k*pi/128) + cos(u) * sin(k*pi/128)
- DFloat128 r = fputil::quick_add(fputil::quick_mul(sin_k_f128, cos_u),
- fputil::quick_mul(cos_k_f128, sin_u));
-
- // TODO: Add assertion if Ziv's accuracy tests fail in debug mode.
- // https://github.com/llvm/llvm-project/issues/96452.
-
- return static_cast<double>(r);
+ return sin_accurate(x, x_e, k, range_reduction_large);
#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
}
>From 6e4ebaa43a7fe01063bdd888d141a5542cd38763 Mon Sep 17 00:00:00 2001
From: Tue Ly <lntue.h at gmail.com>
Date: Fri, 5 Jun 2026 05:29:37 +0000
Subject: [PATCH 2/3] Skip sin_accurate definition when
LIBC_MATH_HAS_SKIP_ACCURATE_PASS is defined.
---
libc/src/__support/math/sin.h | 2 ++
1 file changed, 2 insertions(+)
diff --git a/libc/src/__support/math/sin.h b/libc/src/__support/math/sin.h
index 046b83404790e..5b7d9b71b401d 100644
--- a/libc/src/__support/math/sin.h
+++ b/libc/src/__support/math/sin.h
@@ -29,6 +29,7 @@ namespace LIBC_NAMESPACE_DECL {
namespace math {
+#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
LIBC_INLINE double
sin_accurate(double x, uint16_t x_e, unsigned k,
const range_reduction_double_internal::LargeRangeReduction
@@ -66,6 +67,7 @@ sin_accurate(double x, uint16_t x_e, unsigned k,
return static_cast<double>(r);
}
+#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
LIBC_INLINE double sin(double x) {
using namespace math::range_reduction_double_internal;
>From a0359b0a1300241a9fde3fc2ee9339bc4356b175 Mon Sep 17 00:00:00 2001
From: Tue Ly <lntue.h at gmail.com>
Date: Fri, 5 Jun 2026 13:23:24 +0000
Subject: [PATCH 3/3] Improve cos in a similar way.
---
libc/src/__support/math/cos.h | 112 +++++++++++++++++++++++-----------
1 file changed, 77 insertions(+), 35 deletions(-)
diff --git a/libc/src/__support/math/cos.h b/libc/src/__support/math/cos.h
index f968a999aeb3c..ef589ef3a198d 100644
--- a/libc/src/__support/math/cos.h
+++ b/libc/src/__support/math/cos.h
@@ -30,6 +30,47 @@ namespace LIBC_NAMESPACE_DECL {
namespace math {
+#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+LIBC_INLINE double
+cos_accurate(double x, uint16_t x_e, unsigned k,
+ const range_reduction_double_internal::LargeRangeReduction
+ &range_reduction_large) {
+ using namespace math::range_reduction_double_internal;
+ using FPBits = typename fputil::FPBits<double>;
+
+ DFloat128 u_f128, sin_u, cos_u;
+ if (LIBC_LIKELY(x_e < FPBits::EXP_BIAS + FAST_PASS_EXPONENT))
+ u_f128 = range_reduction_small_f128(x);
+ else
+ u_f128 = range_reduction_large.accurate();
+
+ math::sincos_eval_internal::sincos_eval(u_f128, sin_u, cos_u);
+
+ auto get_sin_k = [](unsigned kk) -> DFloat128 {
+ unsigned idx = (kk & 64) ? 64 - (kk & 63) : (kk & 63);
+ DFloat128 ans = SIN_K_PI_OVER_128_F128[idx];
+ if (kk & 128)
+ ans.sign = Sign::NEG;
+ return ans;
+ };
+
+ // -sin(k * pi/128) = sin((k + 128) * pi/128)
+ // cos(k * pi/128) = sin(k * pi/128 + pi/2) = sin((k + 64) * pi/128).
+ DFloat128 msin_k_f128 = get_sin_k(k + 128);
+ DFloat128 cos_k_f128 = get_sin_k(k + 64);
+
+ // cos(x) = cos((k * pi/128 + u)
+ // = cos(u) * cos(k*pi/128) - sin(u) * sin(k*pi/128)
+ DFloat128 r = fputil::quick_add(fputil::quick_mul(cos_k_f128, cos_u),
+ fputil::quick_mul(msin_k_f128, sin_u));
+
+ // TODO: Add assertion if Ziv's accuracy tests fail in debug mode.
+ // https://github.com/llvm/llvm-project/issues/96452.
+
+ return static_cast<double>(r);
+}
+#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+
LIBC_INLINE double cos(double x) {
using namespace range_reduction_double_internal;
using FPBits = typename fputil::FPBits<double>;
@@ -43,8 +84,8 @@ LIBC_INLINE double cos(double x) {
// |x| < 2^16.
if (LIBC_LIKELY(x_e < FPBits::EXP_BIAS + FAST_PASS_EXPONENT)) {
- // |x| < 2^-7
- if (LIBC_UNLIKELY(x_e < FPBits::EXP_BIAS - 7)) {
+ // |x| < 2^-4
+ if (LIBC_UNLIKELY(x_e < FPBits::EXP_BIAS - 4)) {
// |x| < 2^-27
if (LIBC_UNLIKELY(x_e < FPBits::EXP_BIAS - 27)) {
// Signed zeros.
@@ -55,9 +96,39 @@ LIBC_INLINE double cos(double x) {
return fputil::round_result_slightly_down(1.0);
}
// No range reduction needed.
- k = 0;
- y.lo = 0.0;
- y.hi = x;
+
+ // Use degree-8 polynomial approximation:
+ // sin(x) ~ 1 + a1 * x^2 + a2 * x^4 + a3 * x^6 + a4 * x^8
+ // ~ 1 + x^2 * Q(x^2).
+ // > P = fpminimax(cos(x), [|0, 2, 4, 6, 8|], [|1, D...|], [0, 2^-4]);
+ // > dirtyinfnorm(cos(x) - P, [-2^-4, 2^-4]);
+ // 0x1.3cfe...p-70
+ // > P;
+ constexpr double COEFFS[] = {-0x1p-1, 0x1.5555555555262p-5,
+ -0x1.6c16c1508bff1p-10,
+ 0x1.a00ffd769159ap-16};
+ double x_sq = x * x;
+ double c0 = fputil::multiply_add(x_sq, COEFFS[1], COEFFS[0]);
+ double c1 = fputil::multiply_add(x_sq, COEFFS[3], COEFFS[2]);
+ double x4 = x_sq * x_sq;
+ double r_lo = fputil::multiply_add(x4, c1, c0) * x_sq;
+
+#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+ return x + r_lo;
+#else
+ // Overall errors <= 2 * ulp(x^2/6) + |x| * 2^-69.
+ double err = fputil::multiply_add(x_sq, 0x1.0p-53, 0x1.0p-69);
+ double r_lo_u = r_lo + err;
+ double r_lo_l = r_lo - err;
+ double r_upper = x + r_lo_u;
+ double r_lower = x + r_lo_l;
+
+ if (LIBC_LIKELY(r_upper == r_lower))
+ return r_upper;
+
+ k = range_reduction_small(x, y);
+ return cos_accurate(x, x_e, k, range_reduction_large);
+#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS
} else {
// Small range reduction.
k = range_reduction_small(x, y);
@@ -131,36 +202,7 @@ LIBC_INLINE double cos(double x) {
if (LIBC_LIKELY(r_upper == r_lower))
return r_upper;
- DFloat128 u_f128, sin_u, cos_u;
- if (LIBC_LIKELY(x_e < FPBits::EXP_BIAS + FAST_PASS_EXPONENT))
- u_f128 = range_reduction_small_f128(x);
- else
- u_f128 = range_reduction_large.accurate();
-
- math::sincos_eval_internal::sincos_eval(u_f128, sin_u, cos_u);
-
- auto get_sin_k = [](unsigned kk) -> DFloat128 {
- unsigned idx = (kk & 64) ? 64 - (kk & 63) : (kk & 63);
- DFloat128 ans = SIN_K_PI_OVER_128_F128[idx];
- if (kk & 128)
- ans.sign = Sign::NEG;
- return ans;
- };
-
- // -sin(k * pi/128) = sin((k + 128) * pi/128)
- // cos(k * pi/128) = sin(k * pi/128 + pi/2) = sin((k + 64) * pi/128).
- DFloat128 msin_k_f128 = get_sin_k(k + 128);
- DFloat128 cos_k_f128 = get_sin_k(k + 64);
-
- // cos(x) = cos((k * pi/128 + u)
- // = cos(u) * cos(k*pi/128) - sin(u) * sin(k*pi/128)
- DFloat128 r = fputil::quick_add(fputil::quick_mul(cos_k_f128, cos_u),
- fputil::quick_mul(msin_k_f128, sin_u));
-
- // TODO: Add assertion if Ziv's accuracy tests fail in debug mode.
- // https://github.com/llvm/llvm-project/issues/96452.
-
- return static_cast<double>(r);
+ return cos_accurate(x, x_e, k, range_reduction_large);
#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
}
More information about the libc-commits
mailing list