[libc-commits] [libc] f1ec99f - [libc] Improve hypotf performance with different algorithm correctly rounded to all rounding modes.
Tue Ly via libc-commits
libc-commits at lists.llvm.org
Wed Feb 16 06:49:09 PST 2022
Author: Tue Ly
Date: 2022-02-16T09:48:51-05:00
New Revision: f1ec99f973bdff66f81c3e00969d900ef2def081
URL: https://github.com/llvm/llvm-project/commit/f1ec99f973bdff66f81c3e00969d900ef2def081
DIFF: https://github.com/llvm/llvm-project/commit/f1ec99f973bdff66f81c3e00969d900ef2def081.diff
LOG: [libc] Improve hypotf performance with different algorithm correctly rounded to all rounding modes.
Algorithm for hypotf: compute (a*a + b*b) in double precision, then use Dekker's algorithm to find the rounding error, and then correcting it after taking its square-root.
Reviewed By: sivachandra
Differential Revision: https://reviews.llvm.org/D118157
Added:
libc/test/src/math/exhaustive/hypotf_test.cpp
Modified:
libc/src/math/generic/CMakeLists.txt
libc/src/math/generic/hypotf.cpp
libc/test/src/math/differential_testing/BinaryOpSingleOutputDiff.h
libc/test/src/math/exhaustive/CMakeLists.txt
libc/test/src/math/hypotf_hard_to_round.h
Removed:
################################################################################
diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt
index c2cc79f2004dc..87770431adeb3 100644
--- a/libc/src/math/generic/CMakeLists.txt
+++ b/libc/src/math/generic/CMakeLists.txt
@@ -972,6 +972,7 @@ add_entrypoint_object(
../hypotf.h
DEPENDS
libc.src.__support.FPUtil.fputil
+ libc.src.__support.FPUtil.sqrt
COMPILE_OPTIONS
-O3
)
diff --git a/libc/src/math/generic/hypotf.cpp b/libc/src/math/generic/hypotf.cpp
index aa3b7cd43f319..ffc892ee566f4 100644
--- a/libc/src/math/generic/hypotf.cpp
+++ b/libc/src/math/generic/hypotf.cpp
@@ -6,13 +6,68 @@
//
//===----------------------------------------------------------------------===//
#include "src/math/hypotf.h"
-#include "src/__support/FPUtil/Hypot.h"
+#include "src/__support/FPUtil/BasicOperations.h"
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/FPUtil/sqrt.h"
#include "src/__support/common.h"
namespace __llvm_libc {
LLVM_LIBC_FUNCTION(float, hypotf, (float x, float y)) {
- return __llvm_libc::fputil::hypot(x, y);
+ using DoubleBits = fputil::FPBits<double>;
+ using FPBits = fputil::FPBits<float>;
+
+ FPBits x_bits(x), y_bits(y);
+
+ uint16_t x_exp = x_bits.get_unbiased_exponent();
+ uint16_t y_exp = y_bits.get_unbiased_exponent();
+ uint16_t exp_
diff = (x_exp > y_exp) ? (x_exp - y_exp) : (y_exp - x_exp);
+
+ if (exp_
diff >= fputil::MantissaWidth<float>::VALUE + 2) {
+ return fputil::abs(x) + fputil::abs(y);
+ }
+
+ double xd = static_cast<double>(x);
+ double yd = static_cast<double>(y);
+
+ // These squares are exact.
+ double x_sq = xd * xd;
+ double y_sq = yd * yd;
+
+ // Compute the sum of squares.
+ double sum_sq = x_sq + y_sq;
+
+ // Compute the rounding error with Fast2Sum algorithm:
+ // x_sq + y_sq = sum_sq - err
+ double err = (x_sq >= y_sq) ? (sum_sq - x_sq) - y_sq : (sum_sq - y_sq) - x_sq;
+
+ // Take sqrt in double precision.
+ DoubleBits result(fputil::sqrt(sum_sq));
+
+ if (!DoubleBits(sum_sq).is_inf_or_nan()) {
+ // Correct rounding.
+ double r_sq = static_cast<double>(result) * static_cast<double>(result);
+ double
diff = sum_sq - r_sq;
+ constexpr uint64_t mask = 0x0000'0000'3FFF'FFFFULL;
+ uint64_t lrs = result.uintval() & mask;
+
+ if (lrs == 0x0000'0000'1000'0000ULL && err <
diff ) {
+ result.bits |= 1ULL;
+ } else if (lrs == 0x0000'0000'3000'0000ULL && err >
diff ) {
+ result.bits -= 1ULL;
+ }
+ } else {
+ FPBits bits_x(x), bits_y(y);
+ if (bits_x.is_inf_or_nan() || bits_y.is_inf_or_nan()) {
+ if (bits_x.is_inf() || bits_y.is_inf())
+ return static_cast<float>(FPBits::inf());
+ if (bits_x.is_nan())
+ return x;
+ return y;
+ }
+ }
+
+ return static_cast<float>(static_cast<double>(result));
}
} // namespace __llvm_libc
diff --git a/libc/test/src/math/
diff erential_testing/BinaryOpSingleOutputDiff.h b/libc/test/src/math/
diff erential_testing/BinaryOpSingleOutputDiff.h
index 0fdfdf069aa93..2fa72af0795a8 100644
--- a/libc/test/src/math/
diff erential_testing/BinaryOpSingleOutputDiff.h
+++ b/libc/test/src/math/
diff erential_testing/BinaryOpSingleOutputDiff.h
@@ -78,6 +78,11 @@ template <typename T> class BinaryOpSingleOutputDiff {
log << "\n Performance tests with inputs in normal range:\n";
run_perf_in_range(myFunc, otherFunc, /* startingBit= */ FPBits::MIN_NORMAL,
/* endingBit= */ FPBits::MAX_NORMAL, 100'000'001, log);
+ log << "\n Performance tests with inputs in normal range with exponents "
+ "close to each other:\n";
+ run_perf_in_range(
+ myFunc, otherFunc, /* startingBit= */ FPBits(T(0x1.0p-10)).uintval(),
+ /* endingBit= */ FPBits(T(0x1.0p+10)).uintval(), 10'000'001, log);
}
};
diff --git a/libc/test/src/math/exhaustive/CMakeLists.txt b/libc/test/src/math/exhaustive/CMakeLists.txt
index 56931a0340656..f02c3f7d4744a 100644
--- a/libc/test/src/math/exhaustive/CMakeLists.txt
+++ b/libc/test/src/math/exhaustive/CMakeLists.txt
@@ -129,3 +129,22 @@ add_fp_unittest(
LINK_OPTIONS
-lpthread
)
+
+add_fp_unittest(
+ hypotf_test
+ NO_RUN_POSTBUILD
+ NEED_MPFR
+ SUITE
+ libc_math_exhaustive_tests
+ SRCS
+ hypotf_test.cpp
+ DEPENDS
+ .exhaustive_test
+ libc.include.math
+ libc.src.math.hypotf
+ libc.src.__support.FPUtil.fputil
+ COMPILE_OPTIONS
+ -O3
+ LINK_OPTIONS
+ -lpthread
+)
diff --git a/libc/test/src/math/exhaustive/hypotf_test.cpp b/libc/test/src/math/exhaustive/hypotf_test.cpp
new file mode 100644
index 0000000000000..34ce5b0ab8f85
--- /dev/null
+++ b/libc/test/src/math/exhaustive/hypotf_test.cpp
@@ -0,0 +1,65 @@
+//===-- Exhaustive test for hypotf ----------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "exhaustive_test.h"
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/FPUtil/Hypot.h"
+#include "src/math/hypotf.h"
+#include "utils/MPFRWrapper/MPFRUtils.h"
+#include "utils/UnitTest/FPMatcher.h"
+
+using FPBits = __llvm_libc::fputil::FPBits<float>;
+
+namespace mpfr = __llvm_libc::testing::mpfr;
+
+struct LlvmLibcHypotfExhaustiveTest : public LlvmLibcExhaustiveTest<uint32_t> {
+ void check(uint32_t start, uint32_t stop,
+ mpfr::RoundingMode rounding) override {
+ // Range of the second input: [2^37, 2^48).
+ constexpr uint32_t Y_START = (37U + 127U) << 23;
+ constexpr uint32_t Y_STOP = (48U + 127U) << 23;
+
+ mpfr::ForceRoundingMode r(rounding);
+ uint32_t xbits = start;
+ do {
+ float x = float(FPBits(xbits));
+ uint32_t ybits = Y_START;
+ do {
+ float y = float(FPBits(ybits));
+ EXPECT_FP_EQ(__llvm_libc::fputil::hypot(x, y),
+ __llvm_libc::hypotf(x, y));
+ // Using MPFR will be much slower.
+ // mpfr::BinaryInput<float> input{x, y};
+ // EXPECT_MPFR_MATCH(mpfr::Operation::Hypot, input,
+ // __llvm_libc::hypotf(x, y), 0.5,
+ // rounding);
+ } while (ybits++ < Y_STOP);
+ } while (xbits++ < stop);
+ }
+};
+
+// Range of the first input: [2^23, 2^24);
+static constexpr uint32_t START = (23U + 127U) << 23;
+static constexpr uint32_t STOP = ((23U + 127U) << 23) + 1;
+static constexpr int NUM_THREADS = 1;
+
+TEST_F(LlvmLibcHypotfExhaustiveTest, RoundNearestTieToEven) {
+ test_full_range(START, STOP, NUM_THREADS, mpfr::RoundingMode::Nearest);
+}
+
+TEST_F(LlvmLibcHypotfExhaustiveTest, RoundUp) {
+ test_full_range(START, STOP, NUM_THREADS, mpfr::RoundingMode::Upward);
+}
+
+TEST_F(LlvmLibcHypotfExhaustiveTest, RoundDown) {
+ test_full_range(START, STOP, NUM_THREADS, mpfr::RoundingMode::Downward);
+}
+
+TEST_F(LlvmLibcHypotfExhaustiveTest, RoundTowardZero) {
+ test_full_range(START, STOP, NUM_THREADS, mpfr::RoundingMode::TowardZero);
+}
diff --git a/libc/test/src/math/hypotf_hard_to_round.h b/libc/test/src/math/hypotf_hard_to_round.h
index 06817c7a3a104..1b2abfd2fb909 100644
--- a/libc/test/src/math/hypotf_hard_to_round.h
+++ b/libc/test/src/math/hypotf_hard_to_round.h
@@ -13,8 +13,9 @@
namespace mpfr = __llvm_libc::testing::mpfr;
-constexpr int N_HARD_TO_ROUND = 1216;
+constexpr int N_HARD_TO_ROUND = 1217;
constexpr mpfr::BinaryInput<float> HYPOTF_HARD_TO_ROUND[N_HARD_TO_ROUND] = {
+ {0x1.faf49ep+25f, 0x1.480002p+23f},
{0x1.ffffecp-1f, 0x1.000002p+27},
{0x1.900004p+34, 0x1.400002p+23}, /* 45 identical bits */
{0x1.05555p+34, 0x1.bffffep+23}, /* 44 identical bits */
More information about the libc-commits
mailing list