[libc-commits] [libc] [libc][math] Add small code size options for atan2f. (PR #118532)
via libc-commits
libc-commits at lists.llvm.org
Tue Dec 3 11:16:12 PST 2024
https://github.com/lntue created https://github.com/llvm/llvm-project/pull/118532
None
>From 49d44084418255ce78ac0aa6f890f9cc436a920d Mon Sep 17 00:00:00 2001
From: Tue Ly <lntue.h at gmail.com>
Date: Tue, 3 Dec 2024 19:14:23 +0000
Subject: [PATCH] [libc][math] Add small code size options for atan2f.
---
libc/src/__support/macros/optimization.h | 4 +-
libc/src/math/generic/atan2f.cpp | 21 ++++++--
libc/src/math/generic/inv_trigf_utils.h | 66 ++++++++++++++++++++----
3 files changed, 76 insertions(+), 15 deletions(-)
diff --git a/libc/src/__support/macros/optimization.h b/libc/src/__support/macros/optimization.h
index 41ecd2bd6d7191..a2634950d431bc 100644
--- a/libc/src/__support/macros/optimization.h
+++ b/libc/src/__support/macros/optimization.h
@@ -48,7 +48,7 @@ LIBC_INLINE constexpr bool expects_bool_condition(T value, T expected) {
#ifndef LIBC_MATH
#define LIBC_MATH 0
-#else
+#endif // LIBC_MATH
#if (LIBC_MATH & LIBC_MATH_SKIP_ACCURATE_PASS)
#define LIBC_MATH_HAS_SKIP_ACCURATE_PASS
@@ -58,6 +58,4 @@ LIBC_INLINE constexpr bool expects_bool_condition(T value, T expected) {
#define LIBC_MATH_HAS_SMALL_TABLES
#endif
-#endif // LIBC_MATH
-
#endif // LLVM_LIBC_SRC___SUPPORT_MACROS_OPTIMIZATION_H
diff --git a/libc/src/math/generic/atan2f.cpp b/libc/src/math/generic/atan2f.cpp
index a2e5499809a340..db7639396cdd7a 100644
--- a/libc/src/math/generic/atan2f.cpp
+++ b/libc/src/math/generic/atan2f.cpp
@@ -21,6 +21,8 @@ namespace LIBC_NAMESPACE_DECL {
namespace {
+#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+
// Look up tables for accurate pass:
// atan(i/16) with i = 0..16, generated by Sollya with:
@@ -163,6 +165,8 @@ float atan2f_double_double(double num_d, double den_d, double q_d, int idx,
return static_cast<float>(cpp::bit_cast<double>(rr_bits));
}
+#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+
} // anonymous namespace
// There are several range reduction steps we can take for atan2(y, x) as
@@ -283,14 +287,24 @@ LLVM_LIBC_FUNCTION(float, atan2f, (float y, float x)) {
fputil::DoubleDouble const_term = CONST_ADJ[x_sign][y_sign][recip];
double q_d = num_d / den_d;
- double k_d = fputil::nearest_integer(q_d * 0x1.0p4f);
+ double k_d = fputil::nearest_integer(q_d * 0x1.0p4);
int idx = static_cast<int>(k_d);
+ double r;
+
+#ifdef LIBC_MATH_HAS_SMALL_TABLES
+ double p = atan_eval_no_table(num_d, den_d, k_d * 0x1.0p-4);
+ r = final_sign * (p + (const_term.hi + ATAN_K_OVER_16[idx]));
+#else
q_d = fputil::multiply_add(k_d, -0x1.0p-4, q_d);
double p = atan_eval(q_d, idx);
- double r = final_sign *
- fputil::multiply_add(q_d, p, const_term.hi + ATAN_COEFFS[idx][0]);
+ r = final_sign *
+ fputil::multiply_add(q_d, p, const_term.hi + ATAN_COEFFS[idx][0]);
+#endif // LIBC_MATH_HAS_SMALL_TABLES
+#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+ return static_cast<float>(r);
+#else
constexpr uint32_t LOWER_ERR = 4;
// Mask sticky bits in double precision before rounding to single precision.
constexpr uint32_t MASK =
@@ -306,6 +320,7 @@ LLVM_LIBC_FUNCTION(float, atan2f, (float y, float x)) {
return atan2f_double_double(num_d, den_d, q_d, idx, k_d, final_sign,
const_term);
+#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS
}
} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/math/generic/inv_trigf_utils.h b/libc/src/math/generic/inv_trigf_utils.h
index bdabec9c1b6bb9..8b47aba342995b 100644
--- a/libc/src/math/generic/inv_trigf_utils.h
+++ b/libc/src/math/generic/inv_trigf_utils.h
@@ -17,14 +17,35 @@
namespace LIBC_NAMESPACE_DECL {
// PI and PI / 2
-constexpr double M_MATH_PI = 0x1.921fb54442d18p+1;
-constexpr double M_MATH_PI_2 = 0x1.921fb54442d18p+0;
+static constexpr double M_MATH_PI = 0x1.921fb54442d18p+1;
+static constexpr double M_MATH_PI_2 = 0x1.921fb54442d18p+0;
extern double ATAN_COEFFS[17][9];
+// Look-up table for atan(k/16) with k = 0..16.
+static constexpr double ATAN_K_OVER_16[17] = {
+ 0.0,
+ 0x1.ff55bb72cfdeap-5,
+ 0x1.fd5ba9aac2f6ep-4,
+ 0x1.7b97b4bce5b02p-3,
+ 0x1.f5b75f92c80ddp-3,
+ 0x1.362773707ebccp-2,
+ 0x1.6f61941e4def1p-2,
+ 0x1.a64eec3cc23fdp-2,
+ 0x1.dac670561bb4fp-2,
+ 0x1.0657e94db30dp-1,
+ 0x1.1e00babdefeb4p-1,
+ 0x1.345f01cce37bbp-1,
+ 0x1.4978fa3269ee1p-1,
+ 0x1.5d58987169b18p-1,
+ 0x1.700a7c5784634p-1,
+ 0x1.819d0b7158a4dp-1,
+ 0x1.921fb54442d18p-1,
+};
+
// For |x| <= 1/32 and 0 <= i <= 16, return Q(x) such that:
// Q(x) ~ (atan(x + i/16) - atan(i/16)) / x.
-LIBC_INLINE double atan_eval(double x, int i) {
+LIBC_INLINE static double atan_eval(double x, unsigned i) {
double x2 = x * x;
double c0 = fputil::multiply_add(x, ATAN_COEFFS[i][2], ATAN_COEFFS[i][1]);
@@ -39,16 +60,43 @@ LIBC_INLINE double atan_eval(double x, int i) {
return p;
}
+// Evaluate atan without big lookup table.
+// atan(n/d) - atan(k/16) = atan((n/d - k/16) / (1 + (n/d) * (k/16)))
+// = atan((n - d * k/16)) / (d + n * k/16))
+// So we let q = (n - d * k/16) / (d + n * k/16),
+// and approximate with Taylor polynomial:
+// atan(q) ~ q - q^3/3 + q^5/5 - q^7/7 + q^9/9
+LIBC_INLINE static double atan_eval_no_table(double num, double den,
+ double k_over_16) {
+ double num_r = fputil::multiply_add(den, -k_over_16, num);
+ double den_r = fputil::multiply_add(num, k_over_16, den);
+ double q = num_r / den_r;
+
+ constexpr double ATAN_TAYLOR[] = {
+ -0x1.5555555555555p-2,
+ 0x1.999999999999ap-3,
+ -0x1.2492492492492p-3,
+ 0x1.c71c71c71c71cp-4,
+ };
+ double q2 = q * q;
+ double q3 = q2 * q;
+ double q4 = q2 * q2;
+ double c0 = fputil::multiply_add(q2, ATAN_TAYLOR[1], ATAN_TAYLOR[0]);
+ double c1 = fputil::multiply_add(q2, ATAN_TAYLOR[3], ATAN_TAYLOR[2]);
+ double d = fputil::multiply_add(q4, c1, c0);
+ return fputil::multiply_add(q3, d, q);
+}
+
// > Q = fpminimax(asin(x)/x, [|0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20|],
// [|1, D...|], [0, 0.5]);
-constexpr double ASIN_COEFFS[10] = {0x1.5555555540fa1p-3, 0x1.333333512edc2p-4,
- 0x1.6db6cc1541b31p-5, 0x1.f1caff324770ep-6,
- 0x1.6e43899f5f4f4p-6, 0x1.1f847cf652577p-6,
- 0x1.9b60f47f87146p-7, 0x1.259e2634c494fp-6,
- -0x1.df946fa875ddp-8, 0x1.02311ecf99c28p-5};
+static constexpr double ASIN_COEFFS[10] = {
+ 0x1.5555555540fa1p-3, 0x1.333333512edc2p-4, 0x1.6db6cc1541b31p-5,
+ 0x1.f1caff324770ep-6, 0x1.6e43899f5f4f4p-6, 0x1.1f847cf652577p-6,
+ 0x1.9b60f47f87146p-7, 0x1.259e2634c494fp-6, -0x1.df946fa875ddp-8,
+ 0x1.02311ecf99c28p-5};
// Evaluate P(x^2) - 1, where P(x^2) ~ asin(x)/x
-LIBC_INLINE double asin_eval(double xsq) {
+LIBC_INLINE static double asin_eval(double xsq) {
double x4 = xsq * xsq;
double r1 = fputil::polyeval(x4, ASIN_COEFFS[0], ASIN_COEFFS[2],
ASIN_COEFFS[4], ASIN_COEFFS[6], ASIN_COEFFS[8]);
More information about the libc-commits
mailing list