[libc-commits] [libc] [libc][math] Implement double precision asin correctly rounded for all rounding modes. (PR #134401)
via libc-commits
libc-commits at lists.llvm.org
Fri Apr 4 08:39:22 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-libc
Author: None (lntue)
<details>
<summary>Changes</summary>
Main algorithm:
The Taylor series expansion of `asin(x)` is:
```math
\begin{align*}
asin(x) &= x + x^3 / 6 + 3x^5 / 40 + ... \\
&= x \cdot P(x^2) \\
&= x \cdot P(u) &\text{, where } u = x^2.
\end{align*}
```
For the fast path, we perform range reduction mod 1/64 and use degree-7 (minimax + Taylor) polynomials to approximate `P(x^2)`.
When `|x| >= 0.5`, we use the transformation:
```math
u = \frac{1 + x}{2}
```
and apply half-angle formula to reduce `asin(x)` to:
```math
\begin{align*}
asin(x) &= sign(x) \cdot \left( \frac{\pi}{2} - 2 \cdot asin(\sqrt{u}) \right) \\
&= sign(x) \cdot \left( \frac{\pi}{2} - 2 \cdot \sqrt{u} \cdot P(u) \right).
\end{align*}
```
Since `0.5 <= |x| <= 1`, `|u| <= 0.5`. So we can reuse the polynomial evaluation of `P(u)` when `|x| < 0.5`.
For the accurate path, we redo the computations in 128-bit precision with degree-15 (minimax + Taylor) polynomials to approximate `P(u)`.
---
Patch is 50.33 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/134401.diff
14 Files Affected:
- (modified) libc/config/darwin/arm/entrypoints.txt (+1)
- (modified) libc/config/linux/aarch64/entrypoints.txt (+1)
- (modified) libc/config/linux/arm/entrypoints.txt (+1)
- (modified) libc/config/linux/riscv/entrypoints.txt (+1)
- (modified) libc/config/linux/x86_64/entrypoints.txt (+1)
- (modified) libc/config/windows/entrypoints.txt (+1)
- (modified) libc/docs/headers/math/index.rst (+1-1)
- (modified) libc/src/math/generic/CMakeLists.txt (+29)
- (added) libc/src/math/generic/asin.cpp (+257)
- (added) libc/src/math/generic/asin_utils.h (+535)
- (modified) libc/test/src/math/CMakeLists.txt (+11)
- (added) libc/test/src/math/asin_test.cpp (+84)
- (modified) libc/test/src/math/smoke/CMakeLists.txt (+11)
- (added) libc/test/src/math/smoke/asin_test.cpp (+48)
``````````diff
diff --git a/libc/config/darwin/arm/entrypoints.txt b/libc/config/darwin/arm/entrypoints.txt
index 38e585e43c776..17e51b20c52c6 100644
--- a/libc/config/darwin/arm/entrypoints.txt
+++ b/libc/config/darwin/arm/entrypoints.txt
@@ -123,6 +123,7 @@ set(TARGET_LIBM_ENTRYPOINTS
# math.h entrypoints
libc.src.math.acosf
libc.src.math.acoshf
+ libc.src.math.asin
libc.src.math.asinf
libc.src.math.asinhf
libc.src.math.atan2
diff --git a/libc/config/linux/aarch64/entrypoints.txt b/libc/config/linux/aarch64/entrypoints.txt
index 5f293dc1c3c73..09012d7ccedc8 100644
--- a/libc/config/linux/aarch64/entrypoints.txt
+++ b/libc/config/linux/aarch64/entrypoints.txt
@@ -408,6 +408,7 @@ set(TARGET_LIBM_ENTRYPOINTS
# math.h entrypoints
libc.src.math.acosf
libc.src.math.acoshf
+ libc.src.math.asin
libc.src.math.asinf
libc.src.math.asinhf
libc.src.math.atan2
diff --git a/libc/config/linux/arm/entrypoints.txt b/libc/config/linux/arm/entrypoints.txt
index 05e8e9e308168..4d8a74f869c04 100644
--- a/libc/config/linux/arm/entrypoints.txt
+++ b/libc/config/linux/arm/entrypoints.txt
@@ -241,6 +241,7 @@ set(TARGET_LIBM_ENTRYPOINTS
# math.h entrypoints
libc.src.math.acosf
libc.src.math.acoshf
+ libc.src.math.asin
libc.src.math.asinf
libc.src.math.asinhf
libc.src.math.atan2
diff --git a/libc/config/linux/riscv/entrypoints.txt b/libc/config/linux/riscv/entrypoints.txt
index e3c4fe5170104..56c316260b95d 100644
--- a/libc/config/linux/riscv/entrypoints.txt
+++ b/libc/config/linux/riscv/entrypoints.txt
@@ -398,6 +398,7 @@ set(TARGET_LIBM_ENTRYPOINTS
# math.h entrypoints
libc.src.math.acosf
libc.src.math.acoshf
+ libc.src.math.asin
libc.src.math.asinf
libc.src.math.asinhf
libc.src.math.atan2
diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt
index 1ac3a781d5279..1f9fdd1d4becb 100644
--- a/libc/config/linux/x86_64/entrypoints.txt
+++ b/libc/config/linux/x86_64/entrypoints.txt
@@ -413,6 +413,7 @@ set(TARGET_LIBM_ENTRYPOINTS
# math.h entrypoints
libc.src.math.acosf
libc.src.math.acoshf
+ libc.src.math.asin
libc.src.math.asinf
libc.src.math.asinhf
libc.src.math.atan2
diff --git a/libc/config/windows/entrypoints.txt b/libc/config/windows/entrypoints.txt
index 8ec5760bef84e..37fa888d6498a 100644
--- a/libc/config/windows/entrypoints.txt
+++ b/libc/config/windows/entrypoints.txt
@@ -129,6 +129,7 @@ set(TARGET_LIBM_ENTRYPOINTS
# math.h entrypoints
libc.src.math.acosf
libc.src.math.acoshf
+ libc.src.math.asin
libc.src.math.asinf
libc.src.math.asinhf
libc.src.math.atan2
diff --git a/libc/docs/headers/math/index.rst b/libc/docs/headers/math/index.rst
index 947bd4b60b391..38dffa729d857 100644
--- a/libc/docs/headers/math/index.rst
+++ b/libc/docs/headers/math/index.rst
@@ -255,7 +255,7 @@ Higher Math Functions
+-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
| acospi | | | | | | 7.12.4.8 | F.10.1.8 |
+-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
-| asin | |check| | | | |check| | | 7.12.4.2 | F.10.1.2 |
+| asin | |check| | |check| | | | | 7.12.4.2 | F.10.1.2 |
+-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
| asinh | |check| | | | |check| | | 7.12.5.2 | F.10.2.2 |
+-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt
index f7c36aab77b7d..8332124484372 100644
--- a/libc/src/math/generic/CMakeLists.txt
+++ b/libc/src/math/generic/CMakeLists.txt
@@ -4076,6 +4076,35 @@ add_entrypoint_object(
libc.src.__support.macros.properties.types
)
+add_header_library(
+ asin_utils
+ HDRS
+ atan_utils.h
+ DEPENDS
+ libc.src.__support.integer_literals
+ libc.src.__support.FPUtil.double_double
+ libc.src.__support.FPUtil.dyadic_float
+ libc.src.__support.FPUtil.multiply_add
+ libc.src.__support.FPUtil.nearest_integer
+ libc.src.__support.FPUtil.polyeval
+ libc.src.__support.macros.optimization
+)
+
+add_entrypoint_object(
+ asin
+ SRCS
+ asin.cpp
+ HDRS
+ ../asin.h
+ DEPENDS
+ libc.src.__support.FPUtil.fp_bits
+ libc.src.__support.FPUtil.multiply_add
+ libc.src.__support.FPUtil.polyeval
+ libc.src.__support.FPUtil.sqrt
+ libc.src.__support.macros.optimization
+ .inv_trigf_utils
+)
+
add_entrypoint_object(
acosf
SRCS
diff --git a/libc/src/math/generic/asin.cpp b/libc/src/math/generic/asin.cpp
new file mode 100644
index 0000000000000..b1c50126df2e4
--- /dev/null
+++ b/libc/src/math/generic/asin.cpp
@@ -0,0 +1,257 @@
+//===-- Double-precision asin function ------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/math/asin.h"
+#include "asin_utils.h"
+#include "src/__support/FPUtil/FEnvImpl.h"
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/FPUtil/PolyEval.h"
+#include "src/__support/FPUtil/double_double.h"
+#include "src/__support/FPUtil/dyadic_float.h"
+#include "src/__support/FPUtil/multiply_add.h"
+#include "src/__support/FPUtil/sqrt.h"
+#include "src/__support/macros/config.h"
+#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
+#include "src/__support/macros/properties/cpu_features.h" // LIBC_TARGET_CPU_HAS_FMA
+
+namespace LIBC_NAMESPACE_DECL {
+
+using DoubleDouble = fputil::DoubleDouble;
+using Float128 = fputil::DyadicFloat<128>;
+
+LLVM_LIBC_FUNCTION(double, asin, (double x)) {
+ using FPBits = typename fputil::FPBits<double>;
+
+ FPBits xbits(x);
+ int x_exp = xbits.get_biased_exponent();
+
+ // |x| < 0.5.
+ if (x_exp < FPBits::EXP_BIAS - 1) {
+ // |x| < 2^-26.
+ if (LIBC_UNLIKELY(x_exp < FPBits::EXP_BIAS - 26)) {
+ // When |x| < 2^-26, the relative error of the approximation asin(x) ~ x
+ // is:
+ // |asin(x) - x| / |asin(x)| < |x^3| / (6|x|)
+ // = x^2 / 6
+ // < 2^-54
+ // < epsilon(1)/2.
+ // So the correctly rounded values of asin(x) are:
+ // = x + sign(x)*eps(x) if rounding mode = FE_TOWARDZERO,
+ // or (rounding mode = FE_UPWARD and x is
+ // negative),
+ // = x otherwise.
+ // To simplify the rounding decision and make it more efficient, we use
+ // fma(x, 2^-54, x) instead.
+ // Note: to use the formula x + 2^-54*x to decide the correct rounding, we
+ // do need fma(x, 2^-54, x) to prevent underflow caused by 2^-54*x when
+ // |x| < 2^-1022. For targets without FMA instructions, when x is close to
+ // denormal range, we normalize x,
+#if defined(LIBC_MATH_HAS_SKIP_ACCURATE_PASS)
+ return x;
+#elif defined(LIBC_TARGET_CPU_HAS_FMA_DOUBLE)
+ return fputil::multiply_add(x, 0x1.0p-54, x);
+#else
+ if (x == 0.0)
+ return x;
+ // Get sign(x) * min_normal.
+ FPBits eps_bits = FPBits::min_normal();
+ eps_bits.set_sign(xbits.sign());
+ double eps = eps_bits.get_val();
+ double normalize_const = (x_exp == 0) ? eps : 0.0;
+ double scaled_normal =
+ fputil::multiply_add(x + normalize_const, 0x1.0p54, eps);
+ return fputil::multiply_add(scaled_normal, 0x1.0p-54, -normalize_const);
+#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+ }
+
+ unsigned idx;
+ DoubleDouble x_sq = fputil::exact_mult(x, x);
+ double err = x * 0x1.0p-52;
+ // Polynomial approximation:
+ // p ~ asin(x)/x - ASIN_COEFFS[idx][0]
+ double p = asin_eval(x_sq, idx, err);
+ // asin(x) ~ x * (ASIN_COEFFS[idx][0] + p)
+ DoubleDouble r0 = fputil::exact_mult(x, ASIN_COEFFS[idx][0]);
+ double r_lo = fputil::multiply_add(x, p, r0.lo);
+
+#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+ return r0.hi + r_lo;
+#else
+ // Ziv's accuracy test.
+
+ double r_upper = r0.hi + (r_lo + err);
+ double r_lower = r0.hi + (r_lo - err);
+
+ if (LIBC_LIKELY(r_upper == r_lower))
+ return r_upper;
+
+ // Ziv's accuracy test failed, perform 128-bit calculation.
+
+ // Get x^2 - idx/64 exactly. When FMA is available, double-double
+ // multiplication will be correct for all rounding modes. Otherwise we use
+ // Float128 directly.
+ Float128 x_f128(x);
+
+#ifdef LIBC_TARGET_CPU_HAS_FMA_DOUBLE
+ // u = x^2 - idx/64
+ Float128 u_hi(
+ fputil::multiply_add(static_cast<double>(idx), -0x1.0p-6, x_sq.hi));
+ Float128 u = fputil::quick_add(u_hi, Float128(x_sq.lo));
+#else
+ Float128 x_sq_f128 = fputil::quick_mul(x_f128, x_f128);
+ Float128 u = fputil::quick_add(
+ x_sq_f128, Float128(static_cast<double>(idx) * (-0x1.0p-6)));
+#endif // LIBC_TARGET_CPU_HAS_FMA_DOUBLE
+
+ Float128 p_f128 = asin_eval(u, idx);
+ Float128 r = fputil::quick_mul(x_f128, p_f128);
+
+ return static_cast<double>(r);
+#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+ }
+ // |x| >= 0.5
+
+ double x_abs = xbits.abs().get_val();
+
+ // |x| >= 1
+ if (LIBC_UNLIKELY(x_exp >= FPBits::EXP_BIAS)) {
+ // x = +-1, asin(x) = +- pi/2
+ if (x_abs == 1.0) {
+ // return +- pi/2
+ }
+ // |x| > 1, return NaN.
+ if (xbits.is_finite()) {
+ fputil::set_errno_if_required(EDOM);
+ fputil::raise_except_if_required(FE_INVALID);
+ }
+ return FPBits::quiet_nan().get_val();
+ }
+
+ // When |x| >= 0.5, we perform range reduction as follow:
+ //
+ // Assume further that 0.5 < x <= 1, and let:
+ // y = asin(x)
+ // We will use the double angle formula:
+ // cos(2y) = 1 - 2 sin^2(y)
+ // and the complement angle identity:
+ // x = sin(y) = cos(pi/2 - y)
+ // = 1 - 2 sin^2 (pi/4 - y/2)
+ // So:
+ // sin(pi/4 - y/2) = sqrt( (1 - x)/2 )
+ // And hence:
+ // pi/4 - y/2 = asin( sqrt( (1 - x)/2 ) )
+ // Equivalently:
+ // asin(x) = y = pi/2 - 2 * asin( sqrt( (1 - x)/2 ) )
+ // Let u = (1 - x)/2, then:
+ // asin(x) = pi/2 - 2 * asin( sqrt(u) )
+ // Moreover, since 0.5 < x <= 1:
+ // 0 <= u < 1/4, and 0 <= sqrt(u) < 0.5,
+ // And hence we can reuse the same polynomial approximation of asin(x) when
+ // |x| <= 0.5:
+ // asin(x) ~ pi/2 - 2 * sqrt(u) * P(u),
+
+ // u = (1 - |x|)/2
+ double u = fputil::multiply_add(x_abs, -0.5, 0.5);
+ // v_hi + v_lo ~ sqrt(u).
+ // Let:
+ // h = u - v_hi^2 = (sqrt(u) - v_hi) * (sqrt(u) + v_hi)
+ // Then:
+ // sqrt(u) = v_hi + h / (sqrt(u) + v_hi)
+ // ~ v_hi + h / (2 * v_hi)
+ // So we can use:
+ // v_lo = h / (2 * v_hi).
+ // Then,
+ // asin(x) ~ pi/2 - 2*(v_hi + v_lo) * P(u)
+ double v_hi = fputil::sqrt<double>(u);
+#ifdef LIBC_TARGET_CPU_HAS_FMA_DOUBLE
+ double h = fputil::multiply_add(v_hi, -v_hi, u);
+#else
+ DoubleDouble v_hi_sq = fputil::exact_mult(v_hi, v_hi);
+ double h = (u - v_hi_sq.hi) - v_hi_sq.lo;
+#endif // LIBC_TARGET_CPU_HAS_FMA_DOUBLE
+
+ // Scale v_lo and v_hi by 2 from the formula:
+ // vh = v_hi * 2
+ // vl = 2*v_lo = h / v_hi.
+ double vh = v_hi * 2.0;
+ double vl = h / v_hi;
+
+ // Polynomial approximation:
+ // p ~ asin(sqrt(u))/sqrt(u) - ASIN_COEFFS[idx][0]
+ unsigned idx;
+ [[maybe_unused]] double err = vh * 0x1.0p-52;
+ double p = asin_eval(DoubleDouble{0.0, u}, idx, err);
+
+ // Perform computations in double-double arithmetic:
+ // asin(x) = pi/2 - (v_hi + v_lo) * (ASIN_COEFFS[idx][0] + p)
+ DoubleDouble r0 = fputil::exact_mult(vh, ASIN_COEFFS[idx][0]);
+ DoubleDouble r = fputil::exact_add(PI_OVER_TWO.hi, -r0.hi);
+
+ // Combining all the lower terms.
+ double lo = r.lo - fputil::multiply_add(vl, ASIN_COEFFS[idx][0],
+ r0.lo - PI_OVER_TWO.lo);
+ double r_lo = fputil::multiply_add(vh, -p, lo);
+
+#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+#else
+ // Ziv's accuracy test.
+
+ double r_upper = r.hi + (r_lo + err);
+ double r_lower = r.hi + (r_lo - err);
+
+ if (LIBC_LIKELY(r_upper == r_lower))
+ return r_upper;
+
+ // Ziv's accuracy test failed, we redo the computations in Float128.
+
+ // After the first step of Newton-Raphson approximating v = sqrt(u), we have
+ // that:
+ // sqrt(u) = v_hi + h / (sqrt(u) + v_hi)
+ // v_lo = h / (2 * v_hi)
+ // With error:
+ // sqrt(u) - (v_hi + v_lo) = h * ( 1/(sqrt(u) + v_hi) - 1/(2*v_hi) )
+ // = -h^2 / (2*v * (sqrt(u) + v)^2).
+ // Since:
+ // (sqrt(u) + v_hi)^2 ~ (2sqrt(u))^2 = 4u,
+ // we can add another correction term to (v_hi + v_lo) that is:
+ // v_ll = -h^2 / (2*v_hi * 4u)
+ // = -v_lo * (h / 4u)
+ // = -vl * (h / 8u),
+ // making the errors:
+ // sqrt(u) - (v_hi + v_lo + v_ll) = O(h^3)
+ // well beyond 128-bit precision needed.
+
+ // Get the rounding error of vl = 2 * v_lo ~ h / vh
+ // Get full product of vh * vl
+#ifdef LIBC_TARGET_CPU_HAS_FMA_DOUBLE
+ double vl_lo = fputil::multiply_add(-v_hi, vl, h) / v_hi;
+#else
+ DoubleDouble vh_vl = fputil::exact_mult(v_hi, vl);
+ double vl_lo = ((h - vh_vl.hi) - vh_vl.lo) / v_hi;
+#endif // LIBC_TARGET_CPU_HAS_FMA_DOUBLE
+ // vll = 2*v_ll = -vl * (h / (4u)).
+ double t = h * (-0.25) / u;
+ double vll = fputil::multiply_add(vl, t, vl_lo);
+ // m_v = -(v_hi + v_lo + v_ll).
+ Float128 m_v = fputil::quick_add(
+ Float128(vh), fputil::quick_add(Float128(vl), Float128(vll)));
+ m_v.sign = Sign::NEG;
+
+ // Perform computations in Float128:
+ // asin(x) = pi/2 - (v_hi + v_lo + vll) * P(u).
+ Float128 y_f128(fputil::multiply_add(static_cast<double>(idx), -0x1.0p-6, u));
+
+ Float128 p_f128 = asin_eval(y_f128, idx);
+ Float128 r0_f128 = fputil::quick_mul(m_v, p_f128);
+ Float128 r_f128 = fputil::quick_add(PI_OVER_TWO_F128, r0_f128);
+
+ return static_cast<double>(r_f128);
+#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+}
+
+} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/math/generic/asin_utils.h b/libc/src/math/generic/asin_utils.h
new file mode 100644
index 0000000000000..547deb6dd5570
--- /dev/null
+++ b/libc/src/math/generic/asin_utils.h
@@ -0,0 +1,535 @@
+//===-- Collection of utils for asin/acos -----------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_MATH_GENERIC_ASIN_UTILS_H
+#define LLVM_LIBC_SRC_MATH_GENERIC_ASIN_UTILS_H
+
+#include "src/__support/FPUtil/PolyEval.h"
+#include "src/__support/FPUtil/double_double.h"
+#include "src/__support/FPUtil/dyadic_float.h"
+#include "src/__support/FPUtil/multiply_add.h"
+#include "src/__support/FPUtil/nearest_integer.h"
+#include "src/__support/integer_literals.h"
+#include "src/__support/macros/config.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+namespace {
+
+using DoubleDouble = fputil::DoubleDouble;
+using Float128 = fputil::DyadicFloat<128>;
+
+constexpr DoubleDouble PI_OVER_TWO = {0x1.1a62633145c07p-54,
+ 0x1.921fb54442d18p0};
+
+// The Taylor expansion of asin(x) around 0 is:
+// asin(x) = x + x^3/6 + 3x^5/40 + ...
+// ~ x * P(x^2).
+// Let u = x^2, then P(x^2) = P(u), and |x| = sqrt(u). Note that when
+// |x| <= 0.5, we have |u| <= 0.25.
+// We approximate P(u) by breaking it down by performing range reduction mod
+// 2^-6 = 1/64.
+// So for:
+// k = round(u * 64),
+// y = u - k/64,
+// we have that:
+// x = sqrt(u) = sqrt(k/64 + y),
+// |y| <= 2^-6 = 1/64,
+// and:
+// P(u) = P(k/64 + y) = Q_k(y).
+// Hence :
+// asin(x) = sqrt(k/64 + y) * Q_k(y),
+// Or equivalently:
+// Q_k(y) = asin(sqrt(k/64 + y)) / sqrt(k/64 + y).
+// We generate the coefficients of Q_k by Sollya as following:
+// > procedure ASIN_APPROX(N, Deg) {
+// abs_error = 0;
+// rel_error = 0;
+// for i from 1 to N/4 do {
+// Q = fpminimax(asin(sqrt(i/N + x))/sqrt(i/N + x), Deg, [|DD, D...|],
+// [-1/(2*N), 1/(2*N)]);
+// abs_err = dirtyinfnorm(asin(sqrt(i/N + x)) - sqrt(i/N + x) * Q,
+// [-1/(2*N), 1/(2*N)]);
+// rel_err = dirtyinfnorm(asin(sqrt(i/N + x))/sqrt(i/N + x) - Q,
+// [-1/(2*N), 1/(2*N)]);
+// if (abs_err > abs_error) then abs_error = abs_err;
+// if (rel_err > rel_error) then rel_error = rel_err;
+// d0 = D(coeff(Q, 0));
+// d1 = coeff(Q, 0) - d0;
+// write("{", d0, ", ", d1);
+// for j from 1 to Deg do {
+// write(", ", coeff(Q, j));
+// };
+// print("},");
+// };
+// print("Absolute Errors:", abs_error);
+// print("Relative Errors:", rel_error);
+// };
+// > ASIN_APPROX(64, 7);
+// ...
+// Absolute Errors: 0x1.dc9...p-67
+// Relative Errors: 0x1.da2...p-66
+//
+// For k = 0, we use the degree-14 Taylor polynomial of asin(x)/x:
+//
+// > P = 1 + x^2 * D(1/6) + x^4 * D(3/40) + x^6 * D(5/112) + x^8 * D(35/1152) +
+// x^10 * D(63/2816) + x^12 * D(231/13312) + x^14 * D(143/10240);
+// > dirtyinfnorm(asin(x)/x - P, [-1/128, 1/128]);
+// 0x1.555...p-71
+constexpr double ASIN_COEFFS[17][9] = {
+ {1.0, 0.0, 0x1.5555555555555p-3, 0x1.3333333333333p-4, 0x1.6db6db6db6db7p-5,
+ 0x1.f1c71c71c71c7p-6, 0x1.6e8ba2e8ba2e9p-6, 0x1.1c4ec4ec4ec4fp-6,
+ 0x1.c99999999999ap-7},
+ {0x1.00abe0c129e1ep0, 0x1.7cdde330a0e08p-57, 0x1.5a3385d5c7ba5p-3,
+ 0x1.3bf51056f666bp-4, 0x1.7dba76b165c55p-5, 0x1.07be4afb45142p-5,
+ 0x1.8a6a242c27adbp-6, 0x1.36b49e664eb74p-6, 0x1.f1ac022c7a725p-7},
+ {0x1.015a397cf0f1cp0, -0x1.eec12f4a0e23ap-55, 0x1.5f3581be7b08bp-3,
+ 0x1.4519ddf1ae56cp-4, 0x1.8eb4b6ee90a1cp-5, 0x1.17bc8538a6a91p-5,
+ 0x1.a8e3b76a81de9p-6, 0x1.540067751f362p-6, 0x1.16aa1460b24d5p-6},
+ {0x1.020b1c7df0575p0, -0x1.dd58bfb09878p-55, 0x1.645ce0ab901bap-3,
+ 0x1.4ea79c34fc7e8p-4, 0x1.a0b8ac0945946p-5, 0x1.28f9bab33ecacp-5,
+ 0x1.ca42e489eb27ep-6, 0x1.7498cf62245cep-6, 0x1.3ecec5d85730ap-6},
+ {0x1.02be9ce0b87cdp0, 0x1.e5c6ec8c73cdcp-56, 0x1.69ab5325bc359p-3,
+ 0x1.58a4c3097aaffp-4, 0x1.b3db3606681b5p-5, 0x1.3b94820c270b7p-5,
+ 0x1.eedca27fefeebp-6, 0x1.98ed0a672ba3dp-6, 0x1.5a7ba92d7c7c1p-6},
+ {0x1.0374cea0c0c9fp0, -0x1.917ebfad78ac3p-54, 0x1.6f22a497b2ecp-3,
+ 0x1.63184d8a79e0bp-4, 0x1.c83339cb8a93bp-5, 0x1.4faef324635b9p-5,
+ 0x1.0b87cb9dda2aap-5, 0x1.c17d18cbaf4dcp-6, 0x1.84fd3f338eb99p-6},
+ {0x1.042dc6a65ffbfp0, -0x1.c7f0715ac0a44p-55, 0x1.74c4bd7412f9dp-3,
+ 0x1.6e09c6d2b731ep-4, 0x1.ddd9dcdaf4745p-5, 0x1.656f1f5455d0cp-5,
+ 0x1.21a427af0979bp-5, 0x1.eedcba062ae41p-6, 0x1.b87984ee94704p-6},
+ {0x1.04e99ad5e4bcdp0, -0x1.e97e02ea4515ep-54, 0x1.7a93a5917200bp-3,
+ 0x1.7981584731c74p-4, 0x1.f4eac9278d167p-5, 0x1.7cff9c2ab9587p-5,
+ 0x1.3a011aba1743dp-5, 0x1.10db6a3cd6ec6p-5, 0x1.f07578c65197dp-6},
+ {0x1.05a8621feb16bp0, -0x1.e5c3a30dcee94p-56, 0x1.809186c2e57ddp-3,
+ 0x1.8587d99442e48p-4, 0x1.06c23d1e73eacp-4, 0x1.969023f0a9dc4p-5,
+ 0x1.54e4fab6ced65p-5, 0x1.2d68d296bb8fap-5, 0x1.147275ba8ee51p-5},
+ {0x1.066a34930ec8dp0, -0x1.4813f47576a3bp-54, 0x1.86c0afb447a74p-3,
+ 0x1.9226e29948e2ep-4, 0x...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/134401
More information about the libc-commits
mailing list