[libc-commits] [PATCH] D118157: [libc] Improve hypotf performance with different algorithm correctly rounded to all rounding modes.
Tue Ly via Phabricator via libc-commits
libc-commits at lists.llvm.org
Fri Jan 28 15:21:42 PST 2022
lntue updated this revision to Diff 404184.
lntue added a comment.
Fix variable names and ignore compiler warnings about C++17.
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D118157/new/
https://reviews.llvm.org/D118157
Files:
libc/src/math/generic/CMakeLists.txt
libc/src/math/generic/hypotf.cpp
libc/test/src/math/hypotf_hard_to_round.h
Index: libc/test/src/math/hypotf_hard_to_round.h
===================================================================
--- libc/test/src/math/hypotf_hard_to_round.h
+++ libc/test/src/math/hypotf_hard_to_round.h
@@ -13,8 +13,9 @@
namespace mpfr = __llvm_libc::testing::mpfr;
-constexpr int N_HARD_TO_ROUND = 1216;
+constexpr int N_HARD_TO_ROUND = 1217;
constexpr mpfr::BinaryInput<float> HYPOTF_HARD_TO_ROUND[N_HARD_TO_ROUND] = {
+ {0x1.faf49ep+25f, 0x1.480002p+23f},
{0x1.ffffecp-1f, 0x1.000002p+27},
{0x1.900004p+34, 0x1.400002p+23}, /* 45 identical bits */
{0x1.05555p+34, 0x1.bffffep+23}, /* 44 identical bits */
Index: libc/src/math/generic/hypotf.cpp
===================================================================
--- libc/src/math/generic/hypotf.cpp
+++ libc/src/math/generic/hypotf.cpp
@@ -6,13 +6,57 @@
//
//===----------------------------------------------------------------------===//
#include "src/math/hypotf.h"
-#include "src/__support/FPUtil/Hypot.h"
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/FPUtil/sqrt.h"
#include "src/__support/common.h"
namespace __llvm_libc {
LLVM_LIBC_FUNCTION(float, hypotf, (float x, float y)) {
- return __llvm_libc::fputil::hypot(x, y);
+ using DoubleBits = fputil::FPBits<double>;
+ using FPBits = fputil::FPBits<float>;
+
+ double xd = static_cast<double>(x);
+ double yd = static_cast<double>(y);
+
+ // These squares are exact.
+ double x_sq = xd * xd;
+ double y_sq = yd * yd;
+
+ // Compute the sum of squares.
+ double sum_sq = x_sq + y_sq;
+
+ // Compute the rounding error with Fast2Sum algorithm:
+ // x_sq + y_sq = sum_sq - err
+ double err = (x_sq >= y_sq) ? (sum_sq - x_sq) - y_sq : (sum_sq - y_sq) - x_sq;
+
+ // Take sqrt in double precision.
+ DoubleBits result(fputil::sqrt(sum_sq));
+
+ if (!DoubleBits(sum_sq).is_inf_or_nan()) {
+ // Correct rounding.
+ double r_sq = static_cast<double>(result) * static_cast<double>(result);
+ double diff = sum_sq - r_sq;
+ constexpr uint64_t mask = 0x0000'0000'3FFF'FFFFULL;
+ uint64_t lrs = result.uintval() & mask;
+
+ if (lrs == 0x0000'0000'1000'0000ULL && err < diff) {
+ result.bits |= 1ULL;
+ } else if (lrs == 0x0000'0000'3000'0000ULL && err > diff) {
+ result.bits -= 1ULL;
+ }
+ } else {
+ FPBits bits_x(x), bits_y(y);
+ if (bits_x.is_inf_or_nan() || bits_y.is_inf_or_nan()) {
+ if (bits_x.is_inf() || bits_y.is_inf())
+ return static_cast<float>(FPBits::inf());
+ if (bits_x.is_nan())
+ return x;
+ return y;
+ }
+ }
+
+ return static_cast<float>(static_cast<double>(result));
}
} // namespace __llvm_libc
Index: libc/src/math/generic/CMakeLists.txt
===================================================================
--- libc/src/math/generic/CMakeLists.txt
+++ libc/src/math/generic/CMakeLists.txt
@@ -973,8 +973,10 @@
../hypotf.h
DEPENDS
libc.src.__support.FPUtil.fputil
+ libc.src.__support.FPUtil.sqrt
COMPILE_OPTIONS
-O3
+ -Wno-c++17-extensions
)
add_entrypoint_object(
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D118157.404184.patch
Type: text/x-patch
Size: 3073 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/libc-commits/attachments/20220128/9a29fd57/attachment.bin>
More information about the libc-commits
mailing list