[libc-commits] [libc] a629621 - [libc][math] Improve atanf performance. (#85463)
via libc-commits
libc-commits at lists.llvm.org
Mon Mar 18 18:10:55 PDT 2024
Author: lntue
Date: 2024-03-18T21:10:50-04:00
New Revision: a629621454838207ca49bcfd773d5aa0671659f1
URL: https://github.com/llvm/llvm-project/commit/a629621454838207ca49bcfd773d5aa0671659f1
DIFF: https://github.com/llvm/llvm-project/commit/a629621454838207ca49bcfd773d5aa0671659f1.diff
LOG: [libc][math] Improve atanf performance. (#85463)
Simplify the range reduction logic and computations. Performance test
using `perf.sh` from CORE-MATH project on Ryzen 5900X:
Before:
```
$ ./perf.sh atanf --rdtsc --path1
LIBC-location: /home/lnt/experiment/llvm-project/build/projects/libc/lib/libllvmlibc.a
GNU libc version: 2.35
GNU libc release: stable
-- CORE-MATH reciprocal throughput --
[####################] 100 %
Ntrial = 20 ; Min = 14.369 + 0.556 clc/call; Median-Min = 0.613 clc/call; Max = 15.061 clc/call;
-- System LIBC reciprocal throughput --
[####################] 100 %
Ntrial = 20 ; Min = 26.180 + 0.015 clc/call; Median-Min = 0.014 clc/call; Max = 26.260 clc/call;
-- LIBC reciprocal throughput --
[####################] 100 %
Ntrial = 20 ; Min = 21.237 + 0.022 clc/call; Median-Min = 0.020 clc/call; Max = 21.272 clc/call;
$ ./perf.sh atanf --rdtsc --path1 --latency
LIBC-location: /home/lnt/experiment/llvm-project/build/projects/libc/lib/libllvmlibc.a
GNU libc version: 2.35
GNU libc release: stable
-- CORE-MATH latency --
[####################] 100 %
Ntrial = 20 ; Min = 50.505 + 0.045 clc/call; Median-Min = 0.037 clc/call; Max = 50.579 clc/call;
-- System LIBC latency --
[####################] 100 %
Ntrial = 20 ; Min = 62.438 + 0.836 clc/call; Median-Min = 0.049 clc/call; Max = 64.498 clc/call;
-- LIBC latency --
[####################] 100 %
Ntrial = 20 ; Min = 67.781 + 0.546 clc/call; Median-Min = 0.028 clc/call; Max = 68.844 clc/call;
```
After:
```
$ ./perf.sh atanf --rdtsc --path2
LIBC-location: /home/lnt/experiment/llvm/llvm-project/build/projects/libc/lib/libllvmlibc.a
GNU libc version: 2.35
GNU libc release: stable
-- CORE-MATH reciprocal throughput --
[####################] 100 %
Ntrial = 20 ; Min = 14.400 + 0.353 clc/call; Median-Min = 0.404 clc/call; Max = 14.863 clc/call;
-- System LIBC reciprocal throughput --
[####################] 100 %
Ntrial = 20 ; Min = 25.247 + 0.783 clc/call; Median-Min = 0.714 clc/call; Max = 26.238 clc/call;
-- LIBC reciprocal throughput --
[####################] 100 %
Ntrial = 20 ; Min = 13.751 + 0.158 clc/call; Median-Min = 0.140 clc/call; Max = 14.006 clc/call;
$ ./perf.sh atanf --rdtsc --path2 --latency
LIBC-location: /home/lnt/experiment/llvm/llvm-project/build/projects/libc/lib/libllvmlibc.a
GNU libc version: 2.35
GNU libc release: stable
-- CORE-MATH latency --
[####################] 100 %
Ntrial = 20 ; Min = 51.837 + 0.073 clc/call; Median-Min = 0.058 clc/call; Max = 52.000 clc/call;
-- System LIBC latency --
[####################] 100 %
Ntrial = 20 ; Min = 62.487 + 1.878 clc/call; Median-Min = 1.965 clc/call; Max = 64.569 clc/call;
OK
-- LIBC latency --
[####################] 100 %
Ntrial = 20 ; Min = 55.414 + 1.312 clc/call; Median-Min = 0.345 clc/call; Max = 58.362 clc/call;
```
Added:
Modified:
libc/src/math/generic/CMakeLists.txt
libc/src/math/generic/atanf.cpp
libc/src/math/generic/inv_trigf_utils.cpp
libc/src/math/generic/inv_trigf_utils.h
libc/test/src/math/CMakeLists.txt
libc/test/src/math/atanf_test.cpp
libc/test/src/math/exhaustive/atanf_test.cpp
utils/bazel/llvm-project-overlay/libc/BUILD.bazel
Removed:
libc/test/src/math/inv_trigf_utils_test.cpp
################################################################################
diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt
index b0a35c652cd587..176c32e47768df 100644
--- a/libc/src/math/generic/CMakeLists.txt
+++ b/libc/src/math/generic/CMakeLists.txt
@@ -2155,16 +2155,9 @@ add_object_library(
SRCS
inv_trigf_utils.cpp
DEPENDS
- .math_utils
- libc.src.__support.FPUtil.fp_bits
- libc.src.__support.FPUtil.fenv_impl
- libc.src.__support.FPUtil.nearest_integer
- libc.src.__support.FPUtil.nearest_integer_operations
+ libc.src.__support.FPUtil.multiply_add
libc.src.__support.FPUtil.polyeval
libc.src.__support.common
- libc.include.errno
- libc.src.errno.errno
- libc.include.math
)
add_entrypoint_object(
@@ -2211,8 +2204,11 @@ add_entrypoint_object(
../atanf.h
DEPENDS
.inv_trigf_utils
- .math_utils
+ libc.src.__support.FPUtil.except_value_utils
libc.src.__support.FPUtil.fp_bits
+ libc.src.__support.FPUtil.multiply_add
+ libc.src.__support.FPUtil.nearest_integer
+ libc.src.__support.FPUtil.polyeval
libc.src.__support.FPUtil.rounding_mode
libc.src.__support.macros.optimization
COMPILE_OPTIONS
diff --git a/libc/src/math/generic/atanf.cpp b/libc/src/math/generic/atanf.cpp
index e0f8a1bfc2ecc9..69fd45ddd767e5 100644
--- a/libc/src/math/generic/atanf.cpp
+++ b/libc/src/math/generic/atanf.cpp
@@ -7,11 +7,14 @@
//===----------------------------------------------------------------------===//
#include "src/math/atanf.h"
-#include "math_utils.h"
+#include "inv_trigf_utils.h"
#include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/FPUtil/PolyEval.h"
+#include "src/__support/FPUtil/except_value_utils.h"
+#include "src/__support/FPUtil/multiply_add.h"
+#include "src/__support/FPUtil/nearest_integer.h"
#include "src/__support/FPUtil/rounding_mode.h"
#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
-#include "src/math/generic/inv_trigf_utils.h"
namespace LIBC_NAMESPACE {
@@ -19,48 +22,100 @@ LLVM_LIBC_FUNCTION(float, atanf, (float x)) {
using FPBits = typename fputil::FPBits<float>;
using Sign = fputil::Sign;
- // x == 0.0
- if (LIBC_UNLIKELY(x == 0.0f))
- return x;
+ constexpr double FINAL_SIGN[2] = {1.0, -1.0};
+ constexpr double SIGNED_PI_OVER_2[2] = {0x1.921fb54442d18p0,
+ -0x1.921fb54442d18p0};
- FPBits xbits(x);
- Sign sign = xbits.sign();
- xbits.set_sign(Sign::POS);
+ FPBits x_bits(x);
+ Sign sign = x_bits.sign();
+ x_bits.set_sign(Sign::POS);
+ uint32_t x_abs = x_bits.uintval();
- if (LIBC_UNLIKELY(xbits.is_inf_or_nan())) {
- if (xbits.is_inf())
- return static_cast<float>(
- opt_barrier(sign.is_neg() ? -M_MATH_PI_2 : M_MATH_PI_2));
- else
+ // x is inf or nan, |x| < 2^-4 or |x|= > 16.
+ if (LIBC_UNLIKELY(x_abs <= 0x3d80'0000U || x_abs >= 0x4180'0000U)) {
+ double x_d = static_cast<double>(x);
+ double const_term = 0.0;
+ if (LIBC_UNLIKELY(x_abs >= 0x4180'0000)) {
+ // atan(+-Inf) = +-pi/2.
+ if (x_bits.is_inf())
+ return static_cast<float>(SIGNED_PI_OVER_2[sign.is_neg()]);
+ if (x_bits.is_nan())
+ return x;
+ // x >= 16
+ x_d = -1.0 / x_d;
+ const_term = SIGNED_PI_OVER_2[sign.is_neg()];
+ }
+ // 0 <= x < 1/16;
+ if (LIBC_UNLIKELY(x_bits.is_zero()))
return x;
- }
- // |x| == 0.06905200332403183
- if (LIBC_UNLIKELY(xbits.uintval() == 0x3d8d6b23U)) {
- if (fputil::fenv_is_round_to_nearest()) {
- // 0.06894256919622421
- FPBits br(0x3d8d31c3U);
- br.set_sign(sign);
- return br.get_val();
+ // x <= 2^-12;
+ if (LIBC_UNLIKELY(x_abs < 0x3980'0000)) {
+#if defined(LIBC_TARGET_CPU_HAS_FMA)
+ return fputil::multiply_add(x, -0x1.0p-25f, x);
+#else
+ double x_d = static_cast<double>(x);
+ return static_cast<float>(fputil::multiply_add(x_d, -0x1.0p-25, x_d));
+#endif // LIBC_TARGET_CPU_HAS_FMA
}
+ // Use Taylor polynomial:
+ // atan(x) ~ x * (1 - x^2 / 3 + x^4 / 5 - x^6 / 7 + x^8 / 9 - x^10 / 11).
+ double x2 = x_d * x_d;
+ double x4 = x2 * x2;
+ double c0 = fputil::multiply_add(x2, ATAN_COEFFS[0][1], ATAN_COEFFS[0][0]);
+ double c1 = fputil::multiply_add(x2, ATAN_COEFFS[0][3], ATAN_COEFFS[0][2]);
+ double c2 = fputil::multiply_add(x2, ATAN_COEFFS[0][5], ATAN_COEFFS[0][4]);
+ double p = fputil::polyeval(x4, c0, c1, c2);
+ double r = fputil::multiply_add(x_d, p, const_term);
+ return static_cast<float>(r);
}
- // |x| == 1.8670953512191772
- if (LIBC_UNLIKELY(xbits.uintval() == 0x3feefcfbU)) {
- int rounding_mode = fputil::quick_get_round();
- if (sign.is_neg()) {
- if (rounding_mode == FE_DOWNWARD) {
- // -1.0790828466415405
- return FPBits(0xbf8a1f63U).get_val();
- }
- } else {
- if (rounding_mode == FE_UPWARD) {
- // 1.0790828466415405
- return FPBits(0x3f8a1f63U).get_val();
- }
+ // Range reduction steps:
+ // 1) atan(x) = sign(x) * atan(|x|)
+ // 2) If |x| > 1, atan(|x|) = pi/2 - atan(1/|x|)
+ // 3) For 1/16 < x <= 1, we find k such that: |x - k/16| <= 1/32.
+ // 4) Then we use polynomial approximation:
+ // atan(x) ~ atan((k/16) + (x - (k/16)) * Q(x - k/16)
+ // = P(x - k/16)
+ double x_d, const_term, final_sign;
+ int idx;
+
+ if (x_abs > 0x3f80'0000U) {
+ // Exceptional value:
+ if (LIBC_UNLIKELY(x_abs == 0x3ffe'2ec1U)) { // |x| = 0x1.fc5d82p+0
+ return sign.is_pos() ? fputil::round_result_slightly_up(0x1.1ab2fp0f)
+ : fputil::round_result_slightly_down(-0x1.1ab2fp0f);
}
+ // |x| > 1, we need to invert x, so we will perform range reduction in
+ // double precision.
+ x_d = 1.0 / static_cast<double>(x_bits.get_val());
+ double k_d = fputil::nearest_integer(x_d * 0x1.0p4);
+ x_d = fputil::multiply_add(k_d, -0x1.0p-4, x_d);
+ idx = static_cast<int>(k_d);
+ final_sign = FINAL_SIGN[sign.is_pos()];
+ // Adjust constant term of the polynomial by +- pi/2.
+ const_term = fputil::multiply_add(final_sign, ATAN_COEFFS[idx][0],
+ SIGNED_PI_OVER_2[sign.is_neg()]);
+ } else {
+ // Exceptional value:
+ if (LIBC_UNLIKELY(x_abs == 0x3dbb'6ac7U)) { // |x| = 0x1.76d58ep-4
+ return sign.is_pos()
+ ? fputil::round_result_slightly_up(0x1.75cb06p-4f)
+ : fputil::round_result_slightly_down(-0x1.75cb06p-4f);
+ }
+ // Perform range reduction in single precision.
+ float x_f = x_bits.get_val();
+ float k_f = fputil::nearest_integer(x_f * 0x1.0p4f);
+ x_f = fputil::multiply_add(k_f, -0x1.0p-4f, x_f);
+ x_d = static_cast<double>(x_f);
+ idx = static_cast<int>(k_f);
+ final_sign = FINAL_SIGN[sign.is_neg()];
+ const_term = final_sign * ATAN_COEFFS[idx][0];
}
- return static_cast<float>(atan_eval(x));
+ double p = atan_eval(x_d, idx);
+ double r = fputil::multiply_add(final_sign * x_d, p, const_term);
+
+ return static_cast<float>(r);
}
} // namespace LIBC_NAMESPACE
diff --git a/libc/src/math/generic/inv_trigf_utils.cpp b/libc/src/math/generic/inv_trigf_utils.cpp
index 8013c0470affb1..93d5bcbf7b567d 100644
--- a/libc/src/math/generic/inv_trigf_utils.cpp
+++ b/libc/src/math/generic/inv_trigf_utils.cpp
@@ -10,19 +10,71 @@
namespace LIBC_NAMESPACE {
-// N[Table[ArcTan[x], {x, 1/16, 16/16, 1/16}], 40]
-alignas(64) const double ATAN_T[ATAN_T_SIZE] = {
- 0x1.ff55bb72cfdeap-5, 0x1.fd5ba9aac2f6ep-4, 0x1.7b97b4bce5b02p-3,
- 0x1.f5b75f92c80ddp-3, 0x1.362773707ebccp-2, 0x1.6f61941e4def1p-2,
- 0x1.a64eec3cc23fdp-2, 0x1.dac670561bb4fp-2, 0x1.0657e94db30d0p-1,
- 0x1.1e00babdefeb4p-1, 0x1.345f01cce37bbp-1, 0x1.4978fa3269ee1p-1,
- 0x1.5d58987169b18p-1, 0x1.700a7c5784634p-1, 0x1.819d0b7158a4dp-1,
- 0x1.921fb54442d18p-1};
-
-// for(int i = 0; i < 5; i++)
-// printf("%.13a,\n", (-2 * (i % 2) + 1) * 1.0 / (2 * i + 1));
-alignas(64) const double ATAN_K[5] = {
- 0x1.0000000000000p+0, -0x1.5555555555555p-2, 0x1.999999999999ap-3,
- -0x1.2492492492492p-3, 0x1.c71c71c71c71cp-4};
+// Polynomial approximation for 0 <= x <= 1:
+// atan(x) ~ atan((i/16) + (x - (i/16)) * Q(x - i/16)
+// = P(x - i/16)
+// Generated by Sollya with:
+// > for i from 1 to 16 do {
+// mid_point = i/16;
+// P = fpminimax(atan(mid_point + x), 7, [|D...|], [-1/32, 1/32]);
+// print("{", coeff(P, 0), ",", coeff(P, 1), ",", coeff(P, 2), ",",
+// coeff(P, 3), ",", coeff(P, 4), ",", coeff(P, 5), ",", coeff(P, 6),
+// ",", coeff(P, 7), "},");
+// };
+// For i = 0, ATAN_COEFFS[0][j] = (-1)^j * (1/(2*j + 1)) is the odd coefficients
+// of the Taylor polynomial of atan(x).
+double ATAN_COEFFS[17][8] = {
+ {0x1.0000000000000p+0, -0x1.5555555555555p-2, 0x1.999999999999ap-3,
+ -0x1.2492492492492p-3, 0x1.c71c71c71c71cp-4, -0x1.745d1745d1746p-4,
+ 0x1.3b13b13b13b14p-4, -0x1.1111111111111p-4},
+ {0x1.ff55bb72cfdb1p-5, 0x1.fe01fe01fe1bp-1, -0x1.fc05f80821d1ap-5,
+ -0x1.4d6930419fc5fp-2, 0x1.f61b9f6d69313p-5, 0x1.8208a32f4346cp-3,
+ -0x1.ecb8fc53d04efp-5, -0x1.060710cb59cbcp-3},
+ {0x1.fd5ba9aac2f3cp-4, 0x1.f81f81f81f96ap-1, -0x1.f05e09cf4c1b2p-4,
+ -0x1.368c3aac7543ep-2, 0x1.d9b14bddfac55p-4, 0x1.4048e55ec725ep-3,
+ -0x1.b98ca3c1594b5p-4, -0x1.664eabaabbc16p-4},
+ {0x1.7b97b4bce5ae7p-3, 0x1.ee9c7f8458f06p-1, -0x1.665c226c8dc69p-3,
+ -0x1.1344bb77961b7p-2, 0x1.42ac97745d3ccp-3, 0x1.c32e142047ec1p-4,
+ -0x1.137ae41ab96cbp-3, -0x1.1a6ae8c09a4b6p-5},
+ {0x1.f5b75f92c80c6p-3, 0x1.e1e1e1e1e1ed4p-1, -0x1.c5894d101ad4p-3,
+ -0x1.ce6de02b38c38p-3, 0x1.78a3920c336b9p-3, 0x1.dd5ff94a9d499p-5,
+ -0x1.1ac2d3f9d072ep-3, 0x1.0af9735dff373p-6},
+ {0x1.362773707ebc5p-2, 0x1.d272ca3fc5b8bp-1, -0x1.0997e8ae90cb6p-2,
+ -0x1.6cf6667146798p-3, 0x1.8dd1dff17f3d3p-3, 0x1.24860eced656fp-7,
+ -0x1.f4220e8f18ed5p-4, 0x1.b700aed7cdc34p-5},
+ {0x1.6f61941e4deeep-2, 0x1.c0e070381c115p-1, -0x1.2726dd1347c7ep-2,
+ -0x1.09f37b3ad010dp-3, 0x1.85eaca5196f5cp-3, -0x1.04d640117852ap-5,
+ -0x1.802c2956871c7p-4, 0x1.2992b45df0ee7p-4},
+ {0x1.a64eec3cc23fep-2, 0x1.adbe87f94906bp-1, -0x1.3b9d8eab5eae5p-2,
+ -0x1.57c09646faabbp-4, 0x1.6795330e73aep-3, -0x1.f2d89a702a652p-5,
+ -0x1.f3afd90a9d4d7p-5, 0x1.3261723d3f153p-4},
+ {0x1.dac670561bb53p-2, 0x1.999999999998fp-1, -0x1.47ae147afd8cap-2,
+ -0x1.5d867c3dfd72ap-5, 0x1.3a92a76cba833p-3, -0x1.3ec460286928ap-4,
+ -0x1.ed02ff86892acp-6, 0x1.0a674c8f05727p-4},
+ {0x1.0657e94db30d2p-1, 0x1.84f00c27805ffp-1, -0x1.4c62cb564f677p-2,
+ -0x1.e6495b262dfe7p-8, 0x1.063c34eca262bp-3, -0x1.58b78dc79b5aep-4,
+ -0x1.4623815233be1p-8, 0x1.93afe94328089p-5},
+ {0x1.1e00babdefeb6p-1, 0x1.702e05c0b8159p-1, -0x1.4af2b78236bd6p-2,
+ 0x1.5d0b7ea46ed08p-6, 0x1.a124870236935p-4, -0x1.519e1ec133a88p-4,
+ 0x1.a54632a3f48c7p-7, 0x1.099ca0945096dp-5},
+ {0x1.345f01cce37bdp-1, 0x1.5babcc647fa7ep-1, -0x1.449db09443a67p-2,
+ 0x1.655caac78a0fcp-5, 0x1.3bbbdb0d09efap-4, -0x1.34a306c27e021p-4,
+ 0x1.83fe749c7966p-6, 0x1.2057cc96d9edcp-6},
+ {0x1.4978fa3269ee2p-1, 0x1.47ae147ae146bp-1, -0x1.3a92a305652e1p-2,
+ 0x1.ec21b5172657fp-5, 0x1.c2f8c45d2f4eep-5, -0x1.0ba99c4aeb8acp-4,
+ 0x1.d716a4af4d1d6p-6, 0x1.97fba0a9696dep-8},
+ {0x1.5d58987169b19p-1, 0x1.34679ace0133cp-1, -0x1.2ddfb03920e2fp-2,
+ 0x1.2491307c0fa0bp-4, 0x1.29c7eca0136fp-5, -0x1.bca792caa6f1cp-5,
+ 0x1.e5d92545576bcp-6, -0x1.8ca76fcf5ccd2p-10},
+ {0x1.700a7c5784634p-1, 0x1.21fb78121fb71p-1, -0x1.1f6a8499ea541p-2,
+ 0x1.41b15e5e77bcfp-4, 0x1.59bc9bf54fb02p-6, -0x1.63b54ff058e0fp-5,
+ 0x1.c8da01221306fp-6, -0x1.906b2c274c39cp-8},
+ {0x1.819d0b7158a4dp-1, 0x1.107fbbe01107cp-1, -0x1.0feeb40897d4ep-2,
+ 0x1.50e5afb95f5d6p-4, 0x1.2a7c2f0c7495dp-7, -0x1.12bd2bb5062cdp-5,
+ 0x1.93e8ceb89afebp-6, -0x1.10da9b8c6b731p-7},
+ {0x1.921fb54442d18p-1, 0x1.fffffffffffebp-2, -0x1.fffffffffcbbcp-3,
+ 0x1.555555564e2fep-4, -0x1.20b17d5dd89dcp-30, -0x1.9999c5ad71711p-6,
+ 0x1.5558b76e7aaf9p-6, -0x1.236e803c6c1f6p-7},
+};
} // namespace LIBC_NAMESPACE
diff --git a/libc/src/math/generic/inv_trigf_utils.h b/libc/src/math/generic/inv_trigf_utils.h
index 20f912de2ac008..308bdc44e3f8ac 100644
--- a/libc/src/math/generic/inv_trigf_utils.h
+++ b/libc/src/math/generic/inv_trigf_utils.h
@@ -9,85 +9,32 @@
#ifndef LLVM_LIBC_SRC_MATH_GENERIC_INV_TRIGF_UTILS_H
#define LLVM_LIBC_SRC_MATH_GENERIC_INV_TRIGF_UTILS_H
-#include "math_utils.h"
-#include "src/__support/FPUtil/FEnvImpl.h"
-#include "src/__support/FPUtil/FPBits.h"
#include "src/__support/FPUtil/PolyEval.h"
-#include "src/__support/FPUtil/nearest_integer.h"
+#include "src/__support/FPUtil/multiply_add.h"
#include "src/__support/common.h"
-#include <errno.h>
-
namespace LIBC_NAMESPACE {
// PI and PI / 2
constexpr double M_MATH_PI = 0x1.921fb54442d18p+1;
constexpr double M_MATH_PI_2 = 0x1.921fb54442d18p+0;
-// atan table size
-constexpr int ATAN_T_BITS = 4;
-constexpr int ATAN_T_SIZE = 1 << ATAN_T_BITS;
-
-// N[Table[ArcTan[x], {x, 1/8, 8/8, 1/8}], 40]
-extern const double ATAN_T[ATAN_T_SIZE];
-extern const double ATAN_K[5];
-
-// The main idea of the function is to use formula
-// atan(u) + atan(v) = atan((u+v)/(1-uv))
-
-// x should be positive, normal finite value
-LIBC_INLINE double atan_eval(double x) {
- using FPB = fputil::FPBits<double>;
- using Sign = fputil::Sign;
- // Added some small value to umin and umax mantissa to avoid possible rounding
- // errors.
- FPB::StorageType umin =
- FPB::create_value(Sign::POS, FPB::EXP_BIAS - ATAN_T_BITS - 1,
- 0x100000000000UL)
- .uintval();
- FPB::StorageType umax =
- FPB::create_value(Sign::POS, FPB::EXP_BIAS + ATAN_T_BITS,
- 0xF000000000000UL)
- .uintval();
-
- FPB bs(x);
- auto x_abs = bs.abs().uintval();
-
- if (x_abs <= umin) {
- double pe = LIBC_NAMESPACE::fputil::polyeval(
- x * x, 0.0, ATAN_K[1], ATAN_K[2], ATAN_K[3], ATAN_K[4]);
- return fputil::multiply_add(pe, x, x);
- }
-
- if (x_abs >= umax) {
- double one_over_x_m = -1.0 / x;
- double one_over_x2 = one_over_x_m * one_over_x_m;
- double pe = LIBC_NAMESPACE::fputil::polyeval(
- one_over_x2, ATAN_K[0], ATAN_K[1], ATAN_K[2], ATAN_K[3]);
- return fputil::multiply_add(pe, one_over_x_m,
- bs.is_neg() ? (-M_MATH_PI_2) : (M_MATH_PI_2));
- }
+extern double ATAN_COEFFS[17][8];
- double pos_x = FPB(x_abs).get_val();
- bool one_over_x = pos_x > 1.0;
- if (one_over_x) {
- pos_x = 1.0 / pos_x;
- }
+// For |x| <= 1/32 and 1 <= i <= 16, return Q(x) such that:
+// Q(x) ~ (atan(x + i/16) - atan(i/16)) / x.
+LIBC_INLINE constexpr double atan_eval(double x, int i) {
+ double x2 = x * x;
- double near_x = fputil::nearest_integer(pos_x * ATAN_T_SIZE);
- int val = static_cast<int>(near_x);
- near_x *= 1.0 / ATAN_T_SIZE;
+ double c0 = fputil::multiply_add(x, ATAN_COEFFS[i][2], ATAN_COEFFS[i][1]);
+ double c1 = fputil::multiply_add(x, ATAN_COEFFS[i][4], ATAN_COEFFS[i][3]);
+ double c2 = fputil::multiply_add(x, ATAN_COEFFS[i][6], ATAN_COEFFS[i][5]);
- double v = (pos_x - near_x) / fputil::multiply_add(near_x, pos_x, 1.0);
- double v2 = v * v;
- double pe = LIBC_NAMESPACE::fputil::polyeval(v2, ATAN_K[0], ATAN_K[1],
- ATAN_K[2], ATAN_K[3], ATAN_K[4]);
- double result;
- if (one_over_x)
- result = M_MATH_PI_2 - fputil::multiply_add(pe, v, ATAN_T[val - 1]);
- else
- result = fputil::multiply_add(pe, v, ATAN_T[val - 1]);
- return bs.is_neg() ? -result : result;
+ double x4 = x2 * x2;
+ double d1 = fputil::multiply_add(x2, c1, c0);
+ double d2 = fputil::multiply_add(x2, ATAN_COEFFS[i][7], c2);
+ double p = fputil::multiply_add(x4, d2, d1);
+ return p;
}
// > Q = fpminimax(asin(x)/x, [|0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20|],
@@ -99,7 +46,7 @@ constexpr double ASIN_COEFFS[10] = {0x1.5555555540fa1p-3, 0x1.333333512edc2p-4,
-0x1.df946fa875ddp-8, 0x1.02311ecf99c28p-5};
// Evaluate P(x^2) - 1, where P(x^2) ~ asin(x)/x
-LIBC_INLINE double asin_eval(double xsq) {
+LIBC_INLINE constexpr double asin_eval(double xsq) {
double x4 = xsq * xsq;
double r1 = fputil::polyeval(x4, ASIN_COEFFS[0], ASIN_COEFFS[2],
ASIN_COEFFS[4], ASIN_COEFFS[6], ASIN_COEFFS[8]);
diff --git a/libc/test/src/math/CMakeLists.txt b/libc/test/src/math/CMakeLists.txt
index 9d62b1cf7ef730..026bcd12928bdf 100644
--- a/libc/test/src/math/CMakeLists.txt
+++ b/libc/test/src/math/CMakeLists.txt
@@ -1645,20 +1645,6 @@ add_fp_unittest(
libc.src.__support.FPUtil.fp_bits
)
-add_fp_unittest(
- inv_trigf_utils_test
- NEED_MPFR
- SUITE
- libc-math-unittests
- HDRS
- in_float_range_test_helper.h
- SRCS
- inv_trigf_utils_test.cpp
- DEPENDS
- libc.src.math.generic.inv_trigf_utils
- libc.src.__support.FPUtil.fp_bits
-)
-
add_fp_unittest(
scalbn_test
NEED_MPFR
diff --git a/libc/test/src/math/atanf_test.cpp b/libc/test/src/math/atanf_test.cpp
index fdd69d3dc7914f..e51932fc495525 100644
--- a/libc/test/src/math/atanf_test.cpp
+++ b/libc/test/src/math/atanf_test.cpp
@@ -53,8 +53,15 @@ TEST_F(LlvmLibcAtanfTest, InFloatRange) {
// For small values, tanh(x) is x.
TEST_F(LlvmLibcAtanfTest, SpecialValues) {
- uint32_t val_arr[] = {0x3d8d6b23U, 0x3feefcfbU, 0xbd8d6b23U,
- 0xbfeefcfbU, 0x7F800000U, 0xFF800000U};
+ uint32_t val_arr[] = {
+ 0x3d8d6b23U, // x = 0x1.1ad646p-4f
+ 0x3feefcfbU, // x = 0x1.ddf9f6p+0f
+ 0xbd8d6b23U, // x = -0x1.1ad646p-4f
+ 0xbfeefcfbU, // x = -0x1.ddf9f6p+0f
+ 0x7F800000U, // x = +Inf
+ 0xFF800000U, // x = -Inf
+ 0xbffe2ec1U, // x = -0x1.fc5d82p+0f
+ };
for (uint32_t v : val_arr) {
float x = FPBits(v).get_val();
EXPECT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Atan, x,
diff --git a/libc/test/src/math/exhaustive/atanf_test.cpp b/libc/test/src/math/exhaustive/atanf_test.cpp
index 508c288b050c55..6f23231d426741 100644
--- a/libc/test/src/math/exhaustive/atanf_test.cpp
+++ b/libc/test/src/math/exhaustive/atanf_test.cpp
@@ -25,7 +25,7 @@ TEST_F(LlvmLibcAtanfExhaustiveTest, PostiveRange) {
}
// Range: [-Inf, 0];
-static constexpr uint32_t NEG_START = 0xb000'0000U;
+static constexpr uint32_t NEG_START = 0x8000'0000U;
static constexpr uint32_t NEG_STOP = 0xff80'0000U;
TEST_F(LlvmLibcAtanfExhaustiveTest, NegativeRange) {
diff --git a/libc/test/src/math/inv_trigf_utils_test.cpp b/libc/test/src/math/inv_trigf_utils_test.cpp
deleted file mode 100644
index ebf21765b26d28..00000000000000
--- a/libc/test/src/math/inv_trigf_utils_test.cpp
+++ /dev/null
@@ -1,34 +0,0 @@
-//===-- Unittests for supfuncf --------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "in_float_range_test_helper.h"
-#include "include/llvm-libc-macros/math-macros.h"
-#include "src/__support/FPUtil/FPBits.h"
-#include "src/math/generic/inv_trigf_utils.h"
-#include "test/UnitTest/FPMatcher.h"
-#include "test/UnitTest/Test.h"
-#include "utils/MPFRWrapper/MPFRUtils.h"
-
-using LlvmLibcAtanfTest = LIBC_NAMESPACE::testing::FPTest<float>;
-
-namespace mpfr = LIBC_NAMESPACE::testing::mpfr;
-
-constexpr int def_count = 100003;
-constexpr float def_prec = 0.500001f;
-
-auto f_normal = [](float x) -> bool { return !(isnan(x) || isinf(x)); };
-
-TEST_F(LlvmLibcAtanfTest, InPositiveRange) {
- CHECK_DATA(0.0f, inf, mpfr::Operation::Atan, LIBC_NAMESPACE::atan_eval,
- f_normal, def_count, def_prec);
-}
-
-TEST_F(LlvmLibcAtanfTest, InNegativeRange) {
- CHECK_DATA(-0.0f, neg_inf, mpfr::Operation::Atan, LIBC_NAMESPACE::atan_eval,
- f_normal, def_count, def_prec);
-}
diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
index 0ec8e350281df9..01bb8b4298cdef 100644
--- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
@@ -1252,13 +1252,9 @@ libc_support_library(
hdrs = ["src/math/generic/inv_trigf_utils.h"],
deps = [
":__support_common",
- ":__support_fputil_fenv_impl",
":__support_fputil_fma",
- ":__support_fputil_fp_bits",
":__support_fputil_multiply_add",
- ":__support_fputil_nearest_integer",
":__support_fputil_polyeval",
- ":math_utils",
],
)
More information about the libc-commits
mailing list