[libc-commits] [libc] [libc][math] Implement atan2f correctly rounded to all rounding modes. (PR #86716)
Nick Desaulniers via libc-commits
libc-commits at lists.llvm.org
Fri Mar 29 09:14:42 PDT 2024
================
@@ -0,0 +1,299 @@
+//===-- Single-precision atan2f function ----------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/math/atan2f.h"
+#include "inv_trigf_utils.h"
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/FPUtil/PolyEval.h"
+#include "src/__support/FPUtil/double_double.h"
+#include "src/__support/FPUtil/multiply_add.h"
+#include "src/__support/FPUtil/nearest_integer.h"
+#include "src/__support/FPUtil/rounding_mode.h"
+#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
+
+namespace LIBC_NAMESPACE {
+
+namespace {
+
+// Look up tables for accurate pass:
+
+// atan(i/16) with i = 0..16, generated by Sollya with:
+// > for i from 0 to 16 do {
+// a = round(atan(i/16), D, RN);
+// b = round(atan(i/16) - a, D, RN);
+// print("{", b, ",", a, "},");
+// };
+constexpr fputil::DoubleDouble ATAN_I[17] = {
+ {0.0, 0.0},
+ {-0x1.c934d86d23f1dp-60, 0x1.ff55bb72cfdeap-5},
+ {-0x1.cd37686760c17p-59, 0x1.fd5ba9aac2f6ep-4},
+ {0x1.347b0b4f881cap-58, 0x1.7b97b4bce5b02p-3},
+ {0x1.8ab6e3cf7afbdp-57, 0x1.f5b75f92c80ddp-3},
+ {-0x1.963a544b672d8p-57, 0x1.362773707ebccp-2},
+ {-0x1.c63aae6f6e918p-56, 0x1.6f61941e4def1p-2},
+ {-0x1.24dec1b50b7ffp-56, 0x1.a64eec3cc23fdp-2},
+ {0x1.a2b7f222f65e2p-56, 0x1.dac670561bb4fp-2},
+ {-0x1.d5b495f6349e6p-56, 0x1.0657e94db30dp-1},
+ {-0x1.928df287a668fp-58, 0x1.1e00babdefeb4p-1},
+ {0x1.1021137c71102p-55, 0x1.345f01cce37bbp-1},
+ {0x1.2419a87f2a458p-56, 0x1.4978fa3269ee1p-1},
+ {0x1.0028e4bc5e7cap-57, 0x1.5d58987169b18p-1},
+ {-0x1.8c34d25aadef6p-56, 0x1.700a7c5784634p-1},
+ {-0x1.bf76229d3b917p-56, 0x1.819d0b7158a4dp-1},
+ {0x1.1a62633145c07p-55, 0x1.921fb54442d18p-1},
+};
+
+// Taylor polynomial, generated by Sollya with:
+// > for i from 0 to 8 do {
+// j = (-1)^(i + 1)/(2*i + 1);
+// a = round(j, D, RN);
+// b = round(j - a, D, RN);
+// print("{", b, ",", a, "},");
+// };
+constexpr fputil::DoubleDouble COEFFS[9] = {
+ {0.0, 1.0}, // 1
+ {-0x1.5555555555555p-56, -0x1.5555555555555p-2}, // -1/3
+ {-0x1.999999999999ap-57, 0x1.999999999999ap-3}, // 1/5
+ {-0x1.2492492492492p-57, -0x1.2492492492492p-3}, // -1/7
+ {0x1.c71c71c71c71cp-58, 0x1.c71c71c71c71cp-4}, // 1/9
+ {0x1.745d1745d1746p-59, -0x1.745d1745d1746p-4}, // -1/11
+ {-0x1.3b13b13b13b14p-58, 0x1.3b13b13b13b14p-4}, // 1/13
+ {-0x1.1111111111111p-60, -0x1.1111111111111p-4}, // -1/15
+ {0x1.e1e1e1e1e1e1ep-61, 0x1.e1e1e1e1e1e1ep-5}, // 1/17
+};
+
+// Veltkamp's splitting of a double precision into hi + lo, where the hi part is
+// slightly smaller than an even split, so that the product of
+// hi * s * k is exact,
+// where:
+// s is single precsion,
+// 0 < k < 16 is an integer.
+// This is used when FMA instruction is not available.
+[[maybe_unused]] LIBC_INLINE constexpr fputil::DoubleDouble split_d(double a) {
----------------
nickdesaulniers wrote:
That's not the semantics of this keyword. https://gist.github.com/nico/93df46db7f5e1d0bd941d63663db95b1
The `inline` keyword is much more watered down than its name implies.
Simply by putting it in an anonymous namespace outside of a header (so that it's no longer externally visible) and having only one call site (and enabling any optimization level), the inline cost model (in LLVM) will provide an near-infinite discount to the "cost" of inlining.
https://godbolt.org/z/c53vb4KEr
Once inlined, the definition no longer has any callers and is dead code eliminated.
You can verify this via:
```diff
diff --git a/libc/src/math/generic/atan2f.cpp b/libc/src/math/generic/atan2f.cpp
index d50ac6608836..c8e943d13466 100644
--- a/libc/src/math/generic/atan2f.cpp
+++ b/libc/src/math/generic/atan2f.cpp
@@ -81,7 +81,7 @@ constexpr fputil::DoubleDouble COEFFS[9] = {
// = 33.
// Thus, the Veltkamp splitting constant is C = 2^33 + 1.
// This is used when FMA instruction is not available.
-[[maybe_unused]] LIBC_INLINE constexpr fputil::DoubleDouble split_d(double a) {
+[[maybe_unused]] constexpr fputil::DoubleDouble split_d(double a) {
fputil::DoubleDouble r{0.0, 0.0};
constexpr double C = 0x1.0p33 + 1.0;
double t1 = C * a;
@@ -98,7 +98,7 @@ constexpr fputil::DoubleDouble COEFFS[9] = {
// idx, k_d = round( 2^4 * num_d / den_d )
// final_sign = sign of the final result
// const_term = the constant term in the final expression.
-LIBC_INLINE float atan2f_double_double(double num_d, double den_d, double q_d,
+float atan2f_double_double(double num_d, double den_d, double q_d,
int idx, double k_d, double final_sign,
const fputil::DoubleDouble &const_term) {
fputil::DoubleDouble q;
```
```
$ ninja libc
$ llvm-nm ./libc/src/math/generic/CMakeFiles/libc.src.math.generic.atan2f.dir/atan2f.cpp.o | llvm-cxxfilt | grep -e split_d -e atan2f_double_double
$ llvm-objdump -dr ./libc/src/math/generic/CMakeFiles/libc.src.math.generic.atan2f.dir/atan2f.cpp.o | llvm-cxxfilt | grep call -A1
2a6: e8 00 00 00 00 callq 0x2ab <atan2f+0x2ab>
< call to polyeval >
$
```
---
If you want stronger guarantees wrt. inline substitution, you must use the function attributes always_inline or noinline. Those are heavy handed, and I don't recommend using them until one has benchmarks or disassembly showing they're not getting inlining and need it. (I've seen codebases put always_inline on everything, at the extreme cost to compile time and resulting code size). They should not be necessary here.
`inline` has interesting semantics when used with `static` or `extern` on definitions in a header. There's also a function attribute `gnu_inline`, changes to `extern inline` in C99 (and newer) modes, and a `-f` flag for `gnu_inline`/C90 `extern inline` semantics.
https://github.com/llvm/llvm-project/pull/86716
More information about the libc-commits
mailing list