[libc-commits] [libc] [libc][math] Optimize `asinpif` and `acospif` using estrin's scheme (PR #184286)
Mohamed Emad via libc-commits
libc-commits at lists.llvm.org
Wed Mar 4 13:06:53 PST 2026
https://github.com/hulxv updated https://github.com/llvm/llvm-project/pull/184286
>From ae5b32da59b6fa4843356bbb762f90b70e7b9d65 Mon Sep 17 00:00:00 2001
From: hulxv <hulxxv at gmail.com>
Date: Tue, 3 Mar 2026 05:08:20 +0200
Subject: [PATCH] [libc][math] optimize `asinpif` and `acospif`
---
libc/src/__support/math/asinpif.h | 22 -----------
libc/src/__support/math/inv_trigf_utils.h | 48 +++++++++++++++++++----
2 files changed, 40 insertions(+), 30 deletions(-)
diff --git a/libc/src/__support/math/asinpif.h b/libc/src/__support/math/asinpif.h
index 9a5daf6198a42..79d3ebbe63b5b 100644
--- a/libc/src/__support/math/asinpif.h
+++ b/libc/src/__support/math/asinpif.h
@@ -23,22 +23,6 @@ namespace LIBC_NAMESPACE_DECL {
namespace math {
LIBC_INLINE float asinpif(float x) {
-#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
- constexpr size_t N_EXCEPTS = 5;
- constexpr fputil::ExceptValues<float, N_EXCEPTS> ASINPIF_EXCEPTS = {
- {// (inputs, RZ output, RU offset, RD offset, RN offset)
- // x = 0x1.e768f6p-122, asinpif(x) = 0x1.364b7ap-123 (RZ)
- {0x02F3B47B, 0x021B25BD, 1, 0, 0},
- // x = 0x1.e768f6p-24, asinpif(x) = 0x1.364b7ap-25 (RZ)
- {0x33F3B47B, 0x331B25BD, 1, 0, 1},
- // x = 0x1.dddb4ep-19, asinpif(x) = 0x1.303686p-20 (RZ)
- {0x366EEDA7, 0x35981B43, 1, 0, 1},
- // x = -0x1.dddb4ep-19, asinpif(x) = -0x1.303686p-20 (RZ)
- {0xB66EEDA7, 0xB5981B43, 0, 1, 1},
- // x = -0x1.e768f6p-24, asinpif(x) = -0x1.364b7ap-25 (RZ)
- {0xB3F3B47B, 0xB31B25BD, 0, 1, 1}}};
-#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
-
using FPBits = fputil::FPBits<float>;
FPBits xbits(x);
@@ -61,12 +45,6 @@ LIBC_INLINE float asinpif(float x) {
return FPBits::quiet_nan().get_val();
}
-#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
- auto r = ASINPIF_EXCEPTS.lookup(xbits.uintval());
- if (LIBC_UNLIKELY(r.has_value()))
- return r.value();
-#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
-
// if |x| <= 0.5:
// asinpi(x) = x * (c0 + x^2 * P1(x^2))
if (LIBC_UNLIKELY(x_abs <= 0.5)) {
diff --git a/libc/src/__support/math/inv_trigf_utils.h b/libc/src/__support/math/inv_trigf_utils.h
index 7a93831333db0..a26fa86019444 100644
--- a/libc/src/__support/math/inv_trigf_utils.h
+++ b/libc/src/__support/math/inv_trigf_utils.h
@@ -184,14 +184,14 @@ LIBC_INLINE double asin_eval(double xsq) {
// > prec = 200;
// > display = hexadecimal;
// > g = asin(x) / (pi * x);
-// > P = fpminimax(g, [|0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20|],
+// > P = fpminimax(g, [|0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22|],
// > [|D...|], [0, 0.5]);
// > for i from 0 to degree(P) do coeff(P, i);
// > print("Error:", dirtyinfnorm(P - g, [1e-30; 0.25]));
-// Error: 0x1.45c281e1cf9b58p-50 ~= 2^−49.652
+// Error : 0x1.a53f84eafa3ea69bb81b6c52b3278872083fca2c757bd778acp-54 ~= 2^-54
//
// Non-zero coefficients (even powers only):
-LIBC_INLINE_VAR constexpr double ASINPI_COEFFS[13] = {
+LIBC_INLINE_VAR constexpr double ASINPI_COEFFS[12] = {
0x1.45f306dc9c881p-2, // x^0
0x1.b2995e7b7e756p-5, // x^2
0x1.8723a1d12f828p-6, // x^4
@@ -206,12 +206,44 @@ LIBC_INLINE_VAR constexpr double ASINPI_COEFFS[13] = {
0x1.4b50c2eb13708p-7 // x^22
};
-// Evaluates P1(v2) = c1 + c2*v2 + c3*v2^2 + ... (tail of P without c0)
+// Evaluates P1(v2) = c1 + c2*v2 + c3*v2^2 + ... + c12*v2^11 (tail of P
+// without c0) using Estrin's scheme for instruction-level parallelism.
+//
+// The tail polynomial has 12 coefficients ASINPI_COEFFS[1..11] in powers of
+// v2:
+// P1(v2) = c1 + c2*v2 + c3*v2^2 + c4*v2^3 + ... + c11*v2^10
+//
+// Estrin pairs them bottom-up:
+// Level 0 (6 pairs, using v2):
+// p0 = c1 + c2*v2 p1 = c3 + c4*v2
+// p2 = c5 + c6*v2 p3 = c7 + c8*v2
+// p4 = c9 + c10*v2 p5 = c11
+// Level 1 (3 pairs, using v4):
+// q0 = p0 + p1*v4 q1 = p2 + p3*v4
+// q2 = p4 + p5*v4
+// Level 2 (using v8):
+// r0 = q0 + q1*v8 r1 = q2
+// Level 3 (using v16):
+// result = r0 + r1*v16
LIBC_INLINE double asinpi_eval(double v2) {
- return fputil::polyeval(
- v2, ASINPI_COEFFS[1], ASINPI_COEFFS[2], ASINPI_COEFFS[3],
- ASINPI_COEFFS[4], ASINPI_COEFFS[5], ASINPI_COEFFS[6], ASINPI_COEFFS[7],
- ASINPI_COEFFS[8], ASINPI_COEFFS[9], ASINPI_COEFFS[10], ASINPI_COEFFS[11]);
+ double v4 = v2 * v2;
+ double v8 = v4 * v4;
+ double v16 = v8 * v8;
+
+ double p0 = fputil::multiply_add(v2, ASINPI_COEFFS[2], ASINPI_COEFFS[1]);
+ double p1 = fputil::multiply_add(v2, ASINPI_COEFFS[4], ASINPI_COEFFS[3]);
+ double p2 = fputil::multiply_add(v2, ASINPI_COEFFS[6], ASINPI_COEFFS[5]);
+ double p3 = fputil::multiply_add(v2, ASINPI_COEFFS[8], ASINPI_COEFFS[7]);
+ double p4 = fputil::multiply_add(v2, ASINPI_COEFFS[10], ASINPI_COEFFS[9]);
+ double p5 = ASINPI_COEFFS[11];
+
+ double q0 = fputil::multiply_add(v4, p1, p0);
+ double q1 = fputil::multiply_add(v4, p3, p2);
+ double q2 = fputil::multiply_add(v4, p5, p4);
+
+ double r0 = fputil::multiply_add(v8, q1, q0);
+
+ return fputil::multiply_add(v16, q2, r0);
}
} // namespace inv_trigf_utils_internal
More information about the libc-commits
mailing list