[libc-commits] [libc] f1ec99f - [libc] Improve hypotf performance with different algorithm correctly rounded to all rounding modes.

Wed Feb 16 06:49:09 PST 2022

Author: Tue Ly
Date: 2022-02-16T09:48:51-05:00
New Revision: f1ec99f973bdff66f81c3e00969d900ef2def081

URL: https://github.com/llvm/llvm-project/commit/f1ec99f973bdff66f81c3e00969d900ef2def081
DIFF: https://github.com/llvm/llvm-project/commit/f1ec99f973bdff66f81c3e00969d900ef2def081.diff

LOG: [libc] Improve hypotf performance with different algorithm correctly rounded to all rounding modes.

Algorithm for hypotf: compute (a*a + b*b) in double precision, then use Dekker's algorithm to find the rounding error, and then correcting it after taking its square-root.

Reviewed By: sivachandra

Differential Revision: https://reviews.llvm.org/D118157

Added: 
    libc/test/src/math/exhaustive/hypotf_test.cpp

Modified: 
    libc/src/math/generic/CMakeLists.txt
    libc/src/math/generic/hypotf.cpp
    libc/test/src/math/differential_testing/BinaryOpSingleOutputDiff.h
    libc/test/src/math/exhaustive/CMakeLists.txt
    libc/test/src/math/hypotf_hard_to_round.h

Removed: 
    


################################################################################
diff  --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt
index c2cc79f2004dc..87770431adeb3 100644

--- a/libc/src/math/generic/CMakeLists.txt
+++ b/libc/src/math/generic/CMakeLists.txt
@@ -972,6 +972,7 @@ add_entrypoint_object(
     ../hypotf.h
   DEPENDS
     libc.src.__support.FPUtil.fputil
+    libc.src.__support.FPUtil.sqrt
   COMPILE_OPTIONS
     -O3
 )

diff  --git a/libc/src/math/generic/hypotf.cpp b/libc/src/math/generic/hypotf.cpp
index aa3b7cd43f319..ffc892ee566f4 100644
--- a/libc/src/math/generic/hypotf.cpp
+++ b/libc/src/math/generic/hypotf.cpp
@@ -6,13 +6,68 @@
 //
 //===----------------------------------------------------------------------===//
 #include "src/math/hypotf.h"
-#include "src/__support/FPUtil/Hypot.h"
+#include "src/__support/FPUtil/BasicOperations.h"
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/FPUtil/sqrt.h"
 #include "src/__support/common.h"
 
 namespace __llvm_libc {
 
 LLVM_LIBC_FUNCTION(float, hypotf, (float x, float y)) {
-  return __llvm_libc::fputil::hypot(x, y);
+  using DoubleBits = fputil::FPBits<double>;
+  using FPBits = fputil::FPBits<float>;
+
+  FPBits x_bits(x), y_bits(y);
+
+  uint16_t x_exp = x_bits.get_unbiased_exponent();
+  uint16_t y_exp = y_bits.get_unbiased_exponent();
+  uint16_t exp_
diff  = (x_exp > y_exp) ? (x_exp - y_exp) : (y_exp - x_exp);
+
+  if (exp_
diff  >= fputil::MantissaWidth<float>::VALUE + 2) {
+    return fputil::abs(x) + fputil::abs(y);
+  }
+
+  double xd = static_cast<double>(x);
+  double yd = static_cast<double>(y);
+
+  // These squares are exact.
+  double x_sq = xd * xd;
+  double y_sq = yd * yd;
+
+  // Compute the sum of squares.
+  double sum_sq = x_sq + y_sq;
+
+  // Compute the rounding error with Fast2Sum algorithm:
+  // x_sq + y_sq = sum_sq - err
+  double err = (x_sq >= y_sq) ? (sum_sq - x_sq) - y_sq : (sum_sq - y_sq) - x_sq;
+
+  // Take sqrt in double precision.
+  DoubleBits result(fputil::sqrt(sum_sq));
+
+  if (!DoubleBits(sum_sq).is_inf_or_nan()) {
+    // Correct rounding.
+    double r_sq = static_cast<double>(result) * static_cast<double>(result);
+    double 
diff  = sum_sq - r_sq;
+    constexpr uint64_t mask = 0x0000'0000'3FFF'FFFFULL;
+    uint64_t lrs = result.uintval() & mask;
+
+    if (lrs == 0x0000'0000'1000'0000ULL && err < 
diff ) {
+      result.bits |= 1ULL;
+    } else if (lrs == 0x0000'0000'3000'0000ULL && err > 
diff ) {
+      result.bits -= 1ULL;
+    }
+  } else {
+    FPBits bits_x(x), bits_y(y);
+    if (bits_x.is_inf_or_nan() || bits_y.is_inf_or_nan()) {
+      if (bits_x.is_inf() || bits_y.is_inf())
+        return static_cast<float>(FPBits::inf());
+      if (bits_x.is_nan())
+        return x;
+      return y;
+    }
+  }
+
+  return static_cast<float>(static_cast<double>(result));
 }
 
 } // namespace __llvm_libc

diff  --git a/libc/test/src/math/
diff erential_testing/BinaryOpSingleOutputDiff.h b/libc/test/src/math/
diff erential_testing/BinaryOpSingleOutputDiff.h
index 0fdfdf069aa93..2fa72af0795a8 100644
--- a/libc/test/src/math/
diff erential_testing/BinaryOpSingleOutputDiff.h
+++ b/libc/test/src/math/
diff erential_testing/BinaryOpSingleOutputDiff.h
@@ -78,6 +78,11 @@ template <typename T> class BinaryOpSingleOutputDiff {
     log << "\n Performance tests with inputs in normal range:\n";
     run_perf_in_range(myFunc, otherFunc, /* startingBit= */ FPBits::MIN_NORMAL,
                       /* endingBit= */ FPBits::MAX_NORMAL, 100'000'001, log);
+    log << "\n Performance tests with inputs in normal range with exponents "
+           "close to each other:\n";
+    run_perf_in_range(
+        myFunc, otherFunc, /* startingBit= */ FPBits(T(0x1.0p-10)).uintval(),
+        /* endingBit= */ FPBits(T(0x1.0p+10)).uintval(), 10'000'001, log);
   }
 };
 

diff  --git a/libc/test/src/math/exhaustive/CMakeLists.txt b/libc/test/src/math/exhaustive/CMakeLists.txt
index 56931a0340656..f02c3f7d4744a 100644
--- a/libc/test/src/math/exhaustive/CMakeLists.txt
+++ b/libc/test/src/math/exhaustive/CMakeLists.txt
@@ -129,3 +129,22 @@ add_fp_unittest(
   LINK_OPTIONS
     -lpthread
 )
+
+add_fp_unittest(
+  hypotf_test
+  NO_RUN_POSTBUILD
+  NEED_MPFR
+  SUITE
+    libc_math_exhaustive_tests
+  SRCS
+    hypotf_test.cpp
+  DEPENDS
+    .exhaustive_test
+    libc.include.math
+    libc.src.math.hypotf
+    libc.src.__support.FPUtil.fputil
+  COMPILE_OPTIONS
+    -O3
+  LINK_OPTIONS
+    -lpthread
+)

diff  --git a/libc/test/src/math/exhaustive/hypotf_test.cpp b/libc/test/src/math/exhaustive/hypotf_test.cpp
new file mode 100644
index 0000000000000..34ce5b0ab8f85
--- /dev/null
+++ b/libc/test/src/math/exhaustive/hypotf_test.cpp
@@ -0,0 +1,65 @@
+//===-- Exhaustive test for hypotf ----------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "exhaustive_test.h"
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/FPUtil/Hypot.h"
+#include "src/math/hypotf.h"
+#include "utils/MPFRWrapper/MPFRUtils.h"
+#include "utils/UnitTest/FPMatcher.h"
+
+using FPBits = __llvm_libc::fputil::FPBits<float>;
+
+namespace mpfr = __llvm_libc::testing::mpfr;
+
+struct LlvmLibcHypotfExhaustiveTest : public LlvmLibcExhaustiveTest<uint32_t> {
+  void check(uint32_t start, uint32_t stop,
+             mpfr::RoundingMode rounding) override {
+    // Range of the second input: [2^37, 2^48).
+    constexpr uint32_t Y_START = (37U + 127U) << 23;
+    constexpr uint32_t Y_STOP = (48U + 127U) << 23;
+
+    mpfr::ForceRoundingMode r(rounding);
+    uint32_t xbits = start;
+    do {
+      float x = float(FPBits(xbits));
+      uint32_t ybits = Y_START;
+      do {
+        float y = float(FPBits(ybits));
+        EXPECT_FP_EQ(__llvm_libc::fputil::hypot(x, y),
+                     __llvm_libc::hypotf(x, y));
+        // Using MPFR will be much slower.
+        // mpfr::BinaryInput<float> input{x, y};
+        // EXPECT_MPFR_MATCH(mpfr::Operation::Hypot, input,
+        // __llvm_libc::hypotf(x, y), 0.5,
+        //                   rounding);
+      } while (ybits++ < Y_STOP);
+    } while (xbits++ < stop);
+  }
+};
+
+// Range of the first input: [2^23, 2^24);
+static constexpr uint32_t START = (23U + 127U) << 23;
+static constexpr uint32_t STOP = ((23U + 127U) << 23) + 1;
+static constexpr int NUM_THREADS = 1;
+
+TEST_F(LlvmLibcHypotfExhaustiveTest, RoundNearestTieToEven) {
+  test_full_range(START, STOP, NUM_THREADS, mpfr::RoundingMode::Nearest);
+}
+
+TEST_F(LlvmLibcHypotfExhaustiveTest, RoundUp) {
+  test_full_range(START, STOP, NUM_THREADS, mpfr::RoundingMode::Upward);
+}
+
+TEST_F(LlvmLibcHypotfExhaustiveTest, RoundDown) {
+  test_full_range(START, STOP, NUM_THREADS, mpfr::RoundingMode::Downward);
+}
+
+TEST_F(LlvmLibcHypotfExhaustiveTest, RoundTowardZero) {
+  test_full_range(START, STOP, NUM_THREADS, mpfr::RoundingMode::TowardZero);
+}

diff  --git a/libc/test/src/math/hypotf_hard_to_round.h b/libc/test/src/math/hypotf_hard_to_round.h
index 06817c7a3a104..1b2abfd2fb909 100644
--- a/libc/test/src/math/hypotf_hard_to_round.h
+++ b/libc/test/src/math/hypotf_hard_to_round.h
@@ -13,8 +13,9 @@
 
 namespace mpfr = __llvm_libc::testing::mpfr;
 
-constexpr int N_HARD_TO_ROUND = 1216;
+constexpr int N_HARD_TO_ROUND = 1217;
 constexpr mpfr::BinaryInput<float> HYPOTF_HARD_TO_ROUND[N_HARD_TO_ROUND] = {
+    {0x1.faf49ep+25f, 0x1.480002p+23f},
     {0x1.ffffecp-1f, 0x1.000002p+27},
     {0x1.900004p+34, 0x1.400002p+23}, /* 45 identical bits */
     {0x1.05555p+34, 0x1.bffffep+23},  /* 44 identical bits */