[libc-commits] [libc] [libc][math] Implement double precision asin correctly rounded for all rounding modes. (PR #134401)

via libc-commits libc-commits at lists.llvm.org
Fri Apr 4 08:39:22 PDT 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-libc

Author: None (lntue)

<details>
<summary>Changes</summary>

Main algorithm:

The Taylor series expansion of `asin(x)` is:
```math
\begin{align*}
  asin(x) &= x + x^3 / 6 + 3x^5 / 40 + ... \\
       &= x \cdot P(x^2) \\
       &= x \cdot P(u) &\text{, where } u = x^2.
\end{align*}
```
For the fast path, we perform range reduction mod 1/64 and use degree-7 (minimax + Taylor) polynomials to approximate `P(x^2)`.

When `|x| >= 0.5`, we use the transformation:
```math
  u = \frac{1 + x}{2}
```
and apply half-angle formula to reduce `asin(x)` to:
```math
\begin{align*}
  asin(x) &= sign(x) \cdot \left( \frac{\pi}{2} - 2 \cdot asin(\sqrt{u}) \right) \\
       &= sign(x) \cdot \left( \frac{\pi}{2} - 2 \cdot \sqrt{u} \cdot P(u) \right).
\end{align*}
```
Since `0.5 <= |x| <= 1`, `|u| <= 0.5`.  So we can reuse the polynomial evaluation of `P(u)` when `|x| < 0.5`.

For the accurate path, we redo the computations in 128-bit precision with degree-15 (minimax + Taylor) polynomials to approximate `P(u)`.


---

Patch is 50.33 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/134401.diff


14 Files Affected:

- (modified) libc/config/darwin/arm/entrypoints.txt (+1) 
- (modified) libc/config/linux/aarch64/entrypoints.txt (+1) 
- (modified) libc/config/linux/arm/entrypoints.txt (+1) 
- (modified) libc/config/linux/riscv/entrypoints.txt (+1) 
- (modified) libc/config/linux/x86_64/entrypoints.txt (+1) 
- (modified) libc/config/windows/entrypoints.txt (+1) 
- (modified) libc/docs/headers/math/index.rst (+1-1) 
- (modified) libc/src/math/generic/CMakeLists.txt (+29) 
- (added) libc/src/math/generic/asin.cpp (+257) 
- (added) libc/src/math/generic/asin_utils.h (+535) 
- (modified) libc/test/src/math/CMakeLists.txt (+11) 
- (added) libc/test/src/math/asin_test.cpp (+84) 
- (modified) libc/test/src/math/smoke/CMakeLists.txt (+11) 
- (added) libc/test/src/math/smoke/asin_test.cpp (+48) 


``````````diff
diff --git a/libc/config/darwin/arm/entrypoints.txt b/libc/config/darwin/arm/entrypoints.txt
index 38e585e43c776..17e51b20c52c6 100644
--- a/libc/config/darwin/arm/entrypoints.txt
+++ b/libc/config/darwin/arm/entrypoints.txt
@@ -123,6 +123,7 @@ set(TARGET_LIBM_ENTRYPOINTS
     # math.h entrypoints
     libc.src.math.acosf
     libc.src.math.acoshf
+    libc.src.math.asin
     libc.src.math.asinf
     libc.src.math.asinhf
     libc.src.math.atan2
diff --git a/libc/config/linux/aarch64/entrypoints.txt b/libc/config/linux/aarch64/entrypoints.txt
index 5f293dc1c3c73..09012d7ccedc8 100644
--- a/libc/config/linux/aarch64/entrypoints.txt
+++ b/libc/config/linux/aarch64/entrypoints.txt
@@ -408,6 +408,7 @@ set(TARGET_LIBM_ENTRYPOINTS
     # math.h entrypoints
     libc.src.math.acosf
     libc.src.math.acoshf
+    libc.src.math.asin
     libc.src.math.asinf
     libc.src.math.asinhf
     libc.src.math.atan2
diff --git a/libc/config/linux/arm/entrypoints.txt b/libc/config/linux/arm/entrypoints.txt
index 05e8e9e308168..4d8a74f869c04 100644
--- a/libc/config/linux/arm/entrypoints.txt
+++ b/libc/config/linux/arm/entrypoints.txt
@@ -241,6 +241,7 @@ set(TARGET_LIBM_ENTRYPOINTS
     # math.h entrypoints
     libc.src.math.acosf
     libc.src.math.acoshf
+    libc.src.math.asin
     libc.src.math.asinf
     libc.src.math.asinhf
     libc.src.math.atan2
diff --git a/libc/config/linux/riscv/entrypoints.txt b/libc/config/linux/riscv/entrypoints.txt
index e3c4fe5170104..56c316260b95d 100644
--- a/libc/config/linux/riscv/entrypoints.txt
+++ b/libc/config/linux/riscv/entrypoints.txt
@@ -398,6 +398,7 @@ set(TARGET_LIBM_ENTRYPOINTS
     # math.h entrypoints
     libc.src.math.acosf
     libc.src.math.acoshf
+    libc.src.math.asin
     libc.src.math.asinf
     libc.src.math.asinhf
     libc.src.math.atan2
diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt
index 1ac3a781d5279..1f9fdd1d4becb 100644
--- a/libc/config/linux/x86_64/entrypoints.txt
+++ b/libc/config/linux/x86_64/entrypoints.txt
@@ -413,6 +413,7 @@ set(TARGET_LIBM_ENTRYPOINTS
     # math.h entrypoints
     libc.src.math.acosf
     libc.src.math.acoshf
+    libc.src.math.asin
     libc.src.math.asinf
     libc.src.math.asinhf
     libc.src.math.atan2
diff --git a/libc/config/windows/entrypoints.txt b/libc/config/windows/entrypoints.txt
index 8ec5760bef84e..37fa888d6498a 100644
--- a/libc/config/windows/entrypoints.txt
+++ b/libc/config/windows/entrypoints.txt
@@ -129,6 +129,7 @@ set(TARGET_LIBM_ENTRYPOINTS
     # math.h entrypoints
     libc.src.math.acosf
     libc.src.math.acoshf
+    libc.src.math.asin
     libc.src.math.asinf
     libc.src.math.asinhf
     libc.src.math.atan2
diff --git a/libc/docs/headers/math/index.rst b/libc/docs/headers/math/index.rst
index 947bd4b60b391..38dffa729d857 100644
--- a/libc/docs/headers/math/index.rst
+++ b/libc/docs/headers/math/index.rst
@@ -255,7 +255,7 @@ Higher Math Functions
 +-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
 | acospi    |                  |                 |                        |                      |                        | 7.12.4.8               | F.10.1.8                   |
 +-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
-| asin      | |check|          |                 |                        | |check|              |                        | 7.12.4.2               | F.10.1.2                   |
+| asin      | |check|          | |check|         |                        |                      |                        | 7.12.4.2               | F.10.1.2                   |
 +-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
 | asinh     | |check|          |                 |                        | |check|              |                        | 7.12.5.2               | F.10.2.2                   |
 +-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt
index f7c36aab77b7d..8332124484372 100644
--- a/libc/src/math/generic/CMakeLists.txt
+++ b/libc/src/math/generic/CMakeLists.txt
@@ -4076,6 +4076,35 @@ add_entrypoint_object(
     libc.src.__support.macros.properties.types  
 )
 
+add_header_library(
+  asin_utils
+  HDRS
+    atan_utils.h
+  DEPENDS
+    libc.src.__support.integer_literals
+    libc.src.__support.FPUtil.double_double
+    libc.src.__support.FPUtil.dyadic_float
+    libc.src.__support.FPUtil.multiply_add
+    libc.src.__support.FPUtil.nearest_integer
+    libc.src.__support.FPUtil.polyeval
+    libc.src.__support.macros.optimization
+)
+
+add_entrypoint_object(
+  asin
+  SRCS
+    asin.cpp
+  HDRS
+    ../asin.h
+  DEPENDS
+    libc.src.__support.FPUtil.fp_bits
+    libc.src.__support.FPUtil.multiply_add
+    libc.src.__support.FPUtil.polyeval
+    libc.src.__support.FPUtil.sqrt
+    libc.src.__support.macros.optimization
+    .inv_trigf_utils
+)
+
 add_entrypoint_object(
   acosf
   SRCS
diff --git a/libc/src/math/generic/asin.cpp b/libc/src/math/generic/asin.cpp
new file mode 100644
index 0000000000000..b1c50126df2e4
--- /dev/null
+++ b/libc/src/math/generic/asin.cpp
@@ -0,0 +1,257 @@
+//===-- Double-precision asin function ------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/math/asin.h"
+#include "asin_utils.h"
+#include "src/__support/FPUtil/FEnvImpl.h"
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/FPUtil/PolyEval.h"
+#include "src/__support/FPUtil/double_double.h"
+#include "src/__support/FPUtil/dyadic_float.h"
+#include "src/__support/FPUtil/multiply_add.h"
+#include "src/__support/FPUtil/sqrt.h"
+#include "src/__support/macros/config.h"
+#include "src/__support/macros/optimization.h"            // LIBC_UNLIKELY
+#include "src/__support/macros/properties/cpu_features.h" // LIBC_TARGET_CPU_HAS_FMA
+
+namespace LIBC_NAMESPACE_DECL {
+
+using DoubleDouble = fputil::DoubleDouble;
+using Float128 = fputil::DyadicFloat<128>;
+
+LLVM_LIBC_FUNCTION(double, asin, (double x)) {
+  using FPBits = typename fputil::FPBits<double>;
+
+  FPBits xbits(x);
+  int x_exp = xbits.get_biased_exponent();
+
+  // |x| < 0.5.
+  if (x_exp < FPBits::EXP_BIAS - 1) {
+    // |x| < 2^-26.
+    if (LIBC_UNLIKELY(x_exp < FPBits::EXP_BIAS - 26)) {
+      // When |x| < 2^-26, the relative error of the approximation asin(x) ~ x
+      // is:
+      //   |asin(x) - x| / |asin(x)| < |x^3| / (6|x|)
+      //                             = x^2 / 6
+      //                             < 2^-54
+      //                             < epsilon(1)/2.
+      // So the correctly rounded values of asin(x) are:
+      //   = x + sign(x)*eps(x) if rounding mode = FE_TOWARDZERO,
+      //                        or (rounding mode = FE_UPWARD and x is
+      //                        negative),
+      //   = x otherwise.
+      // To simplify the rounding decision and make it more efficient, we use
+      //   fma(x, 2^-54, x) instead.
+      // Note: to use the formula x + 2^-54*x to decide the correct rounding, we
+      // do need fma(x, 2^-54, x) to prevent underflow caused by 2^-54*x when
+      // |x| < 2^-1022. For targets without FMA instructions, when x is close to
+      // denormal range, we normalize x,
+#if defined(LIBC_MATH_HAS_SKIP_ACCURATE_PASS)
+      return x;
+#elif defined(LIBC_TARGET_CPU_HAS_FMA_DOUBLE)
+      return fputil::multiply_add(x, 0x1.0p-54, x);
+#else
+      if (x == 0.0)
+        return x;
+      // Get sign(x) * min_normal.
+      FPBits eps_bits = FPBits::min_normal();
+      eps_bits.set_sign(xbits.sign());
+      double eps = eps_bits.get_val();
+      double normalize_const = (x_exp == 0) ? eps : 0.0;
+      double scaled_normal =
+          fputil::multiply_add(x + normalize_const, 0x1.0p54, eps);
+      return fputil::multiply_add(scaled_normal, 0x1.0p-54, -normalize_const);
+#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+    }
+
+    unsigned idx;
+    DoubleDouble x_sq = fputil::exact_mult(x, x);
+    double err = x * 0x1.0p-52;
+    // Polynomial approximation:
+    //   p ~ asin(x)/x - ASIN_COEFFS[idx][0]
+    double p = asin_eval(x_sq, idx, err);
+    // asin(x) ~ x * (ASIN_COEFFS[idx][0] + p)
+    DoubleDouble r0 = fputil::exact_mult(x, ASIN_COEFFS[idx][0]);
+    double r_lo = fputil::multiply_add(x, p, r0.lo);
+
+#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+    return r0.hi + r_lo;
+#else
+    // Ziv's accuracy test.
+
+    double r_upper = r0.hi + (r_lo + err);
+    double r_lower = r0.hi + (r_lo - err);
+
+    if (LIBC_LIKELY(r_upper == r_lower))
+      return r_upper;
+
+    // Ziv's accuracy test failed, perform 128-bit calculation.
+
+    // Get x^2 - idx/64 exactly.  When FMA is available, double-double
+    // multiplication will be correct for all rounding modes.  Otherwise we use
+    // Float128 directly.
+    Float128 x_f128(x);
+
+#ifdef LIBC_TARGET_CPU_HAS_FMA_DOUBLE
+    // u = x^2 - idx/64
+    Float128 u_hi(
+        fputil::multiply_add(static_cast<double>(idx), -0x1.0p-6, x_sq.hi));
+    Float128 u = fputil::quick_add(u_hi, Float128(x_sq.lo));
+#else
+    Float128 x_sq_f128 = fputil::quick_mul(x_f128, x_f128);
+    Float128 u = fputil::quick_add(
+        x_sq_f128, Float128(static_cast<double>(idx) * (-0x1.0p-6)));
+#endif // LIBC_TARGET_CPU_HAS_FMA_DOUBLE
+
+    Float128 p_f128 = asin_eval(u, idx);
+    Float128 r = fputil::quick_mul(x_f128, p_f128);
+
+    return static_cast<double>(r);
+#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+  }
+  // |x| >= 0.5
+
+  double x_abs = xbits.abs().get_val();
+
+  // |x| >= 1
+  if (LIBC_UNLIKELY(x_exp >= FPBits::EXP_BIAS)) {
+    // x = +-1, asin(x) = +- pi/2
+    if (x_abs == 1.0) {
+      // return +- pi/2
+    }
+    // |x| > 1, return NaN.
+    if (xbits.is_finite()) {
+      fputil::set_errno_if_required(EDOM);
+      fputil::raise_except_if_required(FE_INVALID);
+    }
+    return FPBits::quiet_nan().get_val();
+  }
+
+  // When |x| >= 0.5, we perform range reduction as follow:
+  //
+  // Assume further that 0.5 < x <= 1, and let:
+  //   y = asin(x)
+  // We will use the double angle formula:
+  //   cos(2y) = 1 - 2 sin^2(y)
+  // and the complement angle identity:
+  //   x = sin(y) = cos(pi/2 - y)
+  //              = 1 - 2 sin^2 (pi/4 - y/2)
+  // So:
+  //   sin(pi/4 - y/2) = sqrt( (1 - x)/2 )
+  // And hence:
+  //   pi/4 - y/2 = asin( sqrt( (1 - x)/2 ) )
+  // Equivalently:
+  //   asin(x) = y = pi/2 - 2 * asin( sqrt( (1 - x)/2 ) )
+  // Let u = (1 - x)/2, then:
+  //   asin(x) = pi/2 - 2 * asin( sqrt(u) )
+  // Moreover, since 0.5 < x <= 1:
+  //   0 <= u < 1/4, and 0 <= sqrt(u) < 0.5,
+  // And hence we can reuse the same polynomial approximation of asin(x) when
+  // |x| <= 0.5:
+  //   asin(x) ~ pi/2 - 2 * sqrt(u) * P(u),
+
+  // u = (1 - |x|)/2
+  double u = fputil::multiply_add(x_abs, -0.5, 0.5);
+  // v_hi + v_lo ~ sqrt(u).
+  // Let:
+  //   h = u - v_hi^2 = (sqrt(u) - v_hi) * (sqrt(u) + v_hi)
+  // Then:
+  //   sqrt(u) = v_hi + h / (sqrt(u) + v_hi)
+  //            ~ v_hi + h / (2 * v_hi)
+  // So we can use:
+  //   v_lo = h / (2 * v_hi).
+  // Then,
+  //   asin(x) ~ pi/2 - 2*(v_hi + v_lo) * P(u)
+  double v_hi = fputil::sqrt<double>(u);
+#ifdef LIBC_TARGET_CPU_HAS_FMA_DOUBLE
+  double h = fputil::multiply_add(v_hi, -v_hi, u);
+#else
+  DoubleDouble v_hi_sq = fputil::exact_mult(v_hi, v_hi);
+  double h = (u - v_hi_sq.hi) - v_hi_sq.lo;
+#endif // LIBC_TARGET_CPU_HAS_FMA_DOUBLE
+
+  // Scale v_lo and v_hi by 2 from the formula:
+  //   vh = v_hi * 2
+  //   vl = 2*v_lo = h / v_hi.
+  double vh = v_hi * 2.0;
+  double vl = h / v_hi;
+
+  // Polynomial approximation:
+  //   p ~ asin(sqrt(u))/sqrt(u) - ASIN_COEFFS[idx][0]
+  unsigned idx;
+  [[maybe_unused]] double err = vh * 0x1.0p-52;
+  double p = asin_eval(DoubleDouble{0.0, u}, idx, err);
+
+  // Perform computations in double-double arithmetic:
+  //   asin(x) = pi/2 - (v_hi + v_lo) * (ASIN_COEFFS[idx][0] + p)
+  DoubleDouble r0 = fputil::exact_mult(vh, ASIN_COEFFS[idx][0]);
+  DoubleDouble r = fputil::exact_add(PI_OVER_TWO.hi, -r0.hi);
+
+  // Combining all the lower terms.
+  double lo = r.lo - fputil::multiply_add(vl, ASIN_COEFFS[idx][0],
+                                          r0.lo - PI_OVER_TWO.lo);
+  double r_lo = fputil::multiply_add(vh, -p, lo);
+
+#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+#else
+  // Ziv's accuracy test.
+
+  double r_upper = r.hi + (r_lo + err);
+  double r_lower = r.hi + (r_lo - err);
+
+  if (LIBC_LIKELY(r_upper == r_lower))
+    return r_upper;
+
+  // Ziv's accuracy test failed, we redo the computations in Float128.
+
+  // After the first step of Newton-Raphson approximating v = sqrt(u), we have
+  // that:
+  //   sqrt(u) = v_hi + h / (sqrt(u) + v_hi)
+  //      v_lo = h / (2 * v_hi)
+  // With error:
+  //   sqrt(u) - (v_hi + v_lo) = h * ( 1/(sqrt(u) + v_hi) - 1/(2*v_hi) )
+  //                           = -h^2 / (2*v * (sqrt(u) + v)^2).
+  // Since:
+  //   (sqrt(u) + v_hi)^2 ~ (2sqrt(u))^2 = 4u,
+  // we can add another correction term to (v_hi + v_lo) that is:
+  //   v_ll = -h^2 / (2*v_hi * 4u)
+  //        = -v_lo * (h / 4u)
+  //        = -vl * (h / 8u),
+  // making the errors:
+  //   sqrt(u) - (v_hi + v_lo + v_ll) = O(h^3)
+  // well beyond 128-bit precision needed.
+
+  // Get the rounding error of vl = 2 * v_lo ~ h / vh
+  // Get full product of vh * vl
+#ifdef LIBC_TARGET_CPU_HAS_FMA_DOUBLE
+  double vl_lo = fputil::multiply_add(-v_hi, vl, h) / v_hi;
+#else
+  DoubleDouble vh_vl = fputil::exact_mult(v_hi, vl);
+  double vl_lo = ((h - vh_vl.hi) - vh_vl.lo) / v_hi;
+#endif // LIBC_TARGET_CPU_HAS_FMA_DOUBLE
+  // vll = 2*v_ll = -vl * (h / (4u)).
+  double t = h * (-0.25) / u;
+  double vll = fputil::multiply_add(vl, t, vl_lo);
+  // m_v = -(v_hi + v_lo + v_ll).
+  Float128 m_v = fputil::quick_add(
+      Float128(vh), fputil::quick_add(Float128(vl), Float128(vll)));
+  m_v.sign = Sign::NEG;
+
+  // Perform computations in Float128:
+  //   asin(x) = pi/2 - (v_hi + v_lo + vll) * P(u).
+  Float128 y_f128(fputil::multiply_add(static_cast<double>(idx), -0x1.0p-6, u));
+
+  Float128 p_f128 = asin_eval(y_f128, idx);
+  Float128 r0_f128 = fputil::quick_mul(m_v, p_f128);
+  Float128 r_f128 = fputil::quick_add(PI_OVER_TWO_F128, r0_f128);
+
+  return static_cast<double>(r_f128);
+#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+}
+
+} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/math/generic/asin_utils.h b/libc/src/math/generic/asin_utils.h
new file mode 100644
index 0000000000000..547deb6dd5570
--- /dev/null
+++ b/libc/src/math/generic/asin_utils.h
@@ -0,0 +1,535 @@
+//===-- Collection of utils for asin/acos -----------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_MATH_GENERIC_ASIN_UTILS_H
+#define LLVM_LIBC_SRC_MATH_GENERIC_ASIN_UTILS_H
+
+#include "src/__support/FPUtil/PolyEval.h"
+#include "src/__support/FPUtil/double_double.h"
+#include "src/__support/FPUtil/dyadic_float.h"
+#include "src/__support/FPUtil/multiply_add.h"
+#include "src/__support/FPUtil/nearest_integer.h"
+#include "src/__support/integer_literals.h"
+#include "src/__support/macros/config.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+namespace {
+
+using DoubleDouble = fputil::DoubleDouble;
+using Float128 = fputil::DyadicFloat<128>;
+
+constexpr DoubleDouble PI_OVER_TWO = {0x1.1a62633145c07p-54,
+                                      0x1.921fb54442d18p0};
+
+// The Taylor expansion of asin(x) around 0 is:
+//   asin(x) = x + x^3/6 + 3x^5/40 + ...
+//           ~ x * P(x^2).
+// Let u = x^2, then P(x^2) = P(u), and |x| = sqrt(u).  Note that when
+// |x| <= 0.5, we have |u| <= 0.25.
+// We approximate P(u) by breaking it down by performing range reduction mod
+//   2^-6 = 1/64.
+// So for:
+//   k = round(u * 64),
+//   y = u - k/64,
+// we have that:
+//   x = sqrt(u) = sqrt(k/64 + y),
+//   |y| <= 2^-6 = 1/64,
+// and:
+//   P(u) = P(k/64 + y) = Q_k(y).
+// Hence :
+//   asin(x) = sqrt(k/64 + y) * Q_k(y),
+// Or equivalently:
+//   Q_k(y) = asin(sqrt(k/64 + y)) / sqrt(k/64 + y).
+// We generate the coefficients of Q_k by Sollya as following:
+// > procedure ASIN_APPROX(N, Deg) {
+//     abs_error = 0;
+//     rel_error = 0;
+//     for i from 1 to N/4 do {
+//       Q = fpminimax(asin(sqrt(i/N + x))/sqrt(i/N + x), Deg, [|DD, D...|],
+//                     [-1/(2*N), 1/(2*N)]);
+//       abs_err = dirtyinfnorm(asin(sqrt(i/N + x)) - sqrt(i/N + x) * Q,
+//                              [-1/(2*N), 1/(2*N)]);
+//       rel_err = dirtyinfnorm(asin(sqrt(i/N + x))/sqrt(i/N + x) - Q,
+//                              [-1/(2*N), 1/(2*N)]);
+//       if (abs_err > abs_error) then abs_error = abs_err;
+//       if (rel_err > rel_error) then rel_error = rel_err;
+//       d0 = D(coeff(Q, 0));
+//       d1 = coeff(Q, 0) - d0;
+//       write("{", d0, ", ", d1);
+//       for j from 1 to Deg do {
+//         write(", ", coeff(Q, j));
+//       };
+//       print("},");
+//     };
+//     print("Absolute Errors:", abs_error);
+//     print("Relative Errors:", rel_error);
+//  };
+// > ASIN_APPROX(64, 7);
+// ...
+// Absolute Errors: 0x1.dc9...p-67
+// Relative Errors: 0x1.da2...p-66
+//
+// For k = 0, we use the degree-14 Taylor polynomial of asin(x)/x:
+//
+// > P = 1 + x^2 * D(1/6) + x^4 * D(3/40) + x^6 * D(5/112) + x^8 * D(35/1152) +
+//       x^10 * D(63/2816) + x^12 * D(231/13312) + x^14 * D(143/10240);
+// > dirtyinfnorm(asin(x)/x - P, [-1/128, 1/128]);
+// 0x1.555...p-71
+constexpr double ASIN_COEFFS[17][9] = {
+    {1.0, 0.0, 0x1.5555555555555p-3, 0x1.3333333333333p-4, 0x1.6db6db6db6db7p-5,
+     0x1.f1c71c71c71c7p-6, 0x1.6e8ba2e8ba2e9p-6, 0x1.1c4ec4ec4ec4fp-6,
+     0x1.c99999999999ap-7},
+    {0x1.00abe0c129e1ep0, 0x1.7cdde330a0e08p-57, 0x1.5a3385d5c7ba5p-3,
+     0x1.3bf51056f666bp-4, 0x1.7dba76b165c55p-5, 0x1.07be4afb45142p-5,
+     0x1.8a6a242c27adbp-6, 0x1.36b49e664eb74p-6, 0x1.f1ac022c7a725p-7},
+    {0x1.015a397cf0f1cp0, -0x1.eec12f4a0e23ap-55, 0x1.5f3581be7b08bp-3,
+     0x1.4519ddf1ae56cp-4, 0x1.8eb4b6ee90a1cp-5, 0x1.17bc8538a6a91p-5,
+     0x1.a8e3b76a81de9p-6, 0x1.540067751f362p-6, 0x1.16aa1460b24d5p-6},
+    {0x1.020b1c7df0575p0, -0x1.dd58bfb09878p-55, 0x1.645ce0ab901bap-3,
+     0x1.4ea79c34fc7e8p-4, 0x1.a0b8ac0945946p-5, 0x1.28f9bab33ecacp-5,
+     0x1.ca42e489eb27ep-6, 0x1.7498cf62245cep-6, 0x1.3ecec5d85730ap-6},
+    {0x1.02be9ce0b87cdp0, 0x1.e5c6ec8c73cdcp-56, 0x1.69ab5325bc359p-3,
+     0x1.58a4c3097aaffp-4, 0x1.b3db3606681b5p-5, 0x1.3b94820c270b7p-5,
+     0x1.eedca27fefeebp-6, 0x1.98ed0a672ba3dp-6, 0x1.5a7ba92d7c7c1p-6},
+    {0x1.0374cea0c0c9fp0, -0x1.917ebfad78ac3p-54, 0x1.6f22a497b2ecp-3,
+     0x1.63184d8a79e0bp-4, 0x1.c83339cb8a93bp-5, 0x1.4faef324635b9p-5,
+     0x1.0b87cb9dda2aap-5, 0x1.c17d18cbaf4dcp-6, 0x1.84fd3f338eb99p-6},
+    {0x1.042dc6a65ffbfp0, -0x1.c7f0715ac0a44p-55, 0x1.74c4bd7412f9dp-3,
+     0x1.6e09c6d2b731ep-4, 0x1.ddd9dcdaf4745p-5, 0x1.656f1f5455d0cp-5,
+     0x1.21a427af0979bp-5, 0x1.eedcba062ae41p-6, 0x1.b87984ee94704p-6},
+    {0x1.04e99ad5e4bcdp0, -0x1.e97e02ea4515ep-54, 0x1.7a93a5917200bp-3,
+     0x1.7981584731c74p-4, 0x1.f4eac9278d167p-5, 0x1.7cff9c2ab9587p-5,
+     0x1.3a011aba1743dp-5, 0x1.10db6a3cd6ec6p-5, 0x1.f07578c65197dp-6},
+    {0x1.05a8621feb16bp0, -0x1.e5c3a30dcee94p-56, 0x1.809186c2e57ddp-3,
+     0x1.8587d99442e48p-4, 0x1.06c23d1e73eacp-4, 0x1.969023f0a9dc4p-5,
+     0x1.54e4fab6ced65p-5, 0x1.2d68d296bb8fap-5, 0x1.147275ba8ee51p-5},
+    {0x1.066a34930ec8dp0, -0x1.4813f47576a3bp-54, 0x1.86c0afb447a74p-3,
+     0x1.9226e29948e2ep-4, 0x...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/134401


More information about the libc-commits mailing list