[libc-commits] [PATCH] D118157: [libc] Improve hypotf performance with different algorithm correctly rounded to all rounding modes.

Fri Jan 28 15:14:56 PST 2022

lntue updated this revision to Diff 404181.
lntue added a comment.
Herald added a subscriber: mgorny.

Use fputil::sqrt instead of __builtin_sqrtf.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D118157/new/

https://reviews.llvm.org/D118157

Files:
  libc/src/math/generic/CMakeLists.txt
  libc/src/math/generic/hypotf.cpp
  libc/test/src/math/hypotf_hard_to_round.h


Index: libc/test/src/math/hypotf_hard_to_round.h
===================================================================

--- libc/test/src/math/hypotf_hard_to_round.h
+++ libc/test/src/math/hypotf_hard_to_round.h
@@ -13,8 +13,9 @@
 
 namespace mpfr = __llvm_libc::testing::mpfr;
 
-constexpr int N_HARD_TO_ROUND = 1216;
+constexpr int N_HARD_TO_ROUND = 1217;
 constexpr mpfr::BinaryInput<float> HYPOTF_HARD_TO_ROUND[N_HARD_TO_ROUND] = {
+    {0x1.faf49ep+25f, 0x1.480002p+23f},
     {0x1.ffffecp-1f, 0x1.000002p+27},
     {0x1.900004p+34, 0x1.400002p+23}, /* 45 identical bits */
     {0x1.05555p+34, 0x1.bffffep+23},  /* 44 identical bits */
Index: libc/src/math/generic/hypotf.cpp
===================================================================
--- libc/src/math/generic/hypotf.cpp
+++ libc/src/math/generic/hypotf.cpp
@@ -6,13 +6,57 @@
 //
 //===----------------------------------------------------------------------===//
 #include "src/math/hypotf.h"
-#include "src/__support/FPUtil/Hypot.h"
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/FPUtil/sqrt.h"
 #include "src/__support/common.h"
 
 namespace __llvm_libc {
 
 LLVM_LIBC_FUNCTION(float, hypotf, (float x, float y)) {
-  return __llvm_libc::fputil::hypot(x, y);
+  using DoubleBits = fputil::FPBits<double>;
+  using FPBits = fputil::FPBits<float>;
+
+  double xd = static_cast<double>(x);
+  double yd = static_cast<double>(y);
+
+  // These squares are exact.
+  double xSq = xd * xd;
+  double ySq = yd * yd;
+
+  // Compute the sum of squares.
+  double sumSq = xSq + ySq;
+
+  // Compute the rounding error with Fast2Sum algorithm:
+  // xSq + ySq = sumSq - err
+  double err = (xSq >= ySq) ? (sumSq - xSq) - ySq : (sumSq - ySq) - xSq;
+
+  // Take sqrt in double precision.
+  DoubleBits result(fputil::sqrt(sumSq));
+
+  if (!DoubleBits(sumSq).is_inf_or_nan()) {
+    // Correct rounding.
+    double rSq = static_cast<double>(result) * static_cast<double>(result);
+    double diff = sumSq - rSq;
+    constexpr uint64_t mask = 0x0000'0000'3FFF'FFFFULL;
+    uint64_t lrs = result.uintval() & mask;
+
+    if (lrs == 0x0000'0000'1000'0000ULL && err < diff) {
+      result.bits |= 1ULL;
+    } else if (lrs == 0x0000'0000'3000'0000ULL && err > diff) {
+      result.bits -= 1ULL;
+    }
+  } else {
+    FPBits bits_x(x), bits_y(y);
+    if (bits_x.is_inf_or_nan() || bits_y.is_inf_or_nan()) {
+      if (bits_x.is_inf() || bits_y.is_inf())
+        return static_cast<float>(FPBits::inf());
+      if (bits_x.is_nan())
+        return x;
+      return y;
+    }
+  }
+
+  return static_cast<float>(static_cast<double>(result));
 }
 
 } // namespace __llvm_libc
Index: libc/src/math/generic/CMakeLists.txt
===================================================================
--- libc/src/math/generic/CMakeLists.txt
+++ libc/src/math/generic/CMakeLists.txt
@@ -973,6 +973,7 @@
     ../hypotf.h
   DEPENDS
     libc.src.__support.FPUtil.fputil
+    libc.src.__support.FPUtil.sqrt
   COMPILE_OPTIONS
     -O3
 )


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D118157.404181.patch
Type: text/x-patch
Size: 2998 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/libc-commits/attachments/20220128/886f8c4c/attachment.bin>