[libc-commits] [libc] [libc][math] Implement fast path for double precision hypot function. (PR #204292)

Tue Jun 16 23:10:40 PDT 2026

llvmorg-github-actions[bot] wrote:




@llvm/pr-subscribers-libc

Author: lntue

<details>
<summary>Changes</summary>

Compute `hypot(x, y) = sqrt(x^2 + y^2)` in double-double with relative errors < 2^-102.
Using the approximation:
```
  a = maximum_mag(x, y);
  b = minimum_mag(x, y);
  (hi, lo) ~ a^2 + b^2;
  r_hi = sqrt(hi);
  r_lo = (a + b - r_hi^2) * (0.5 / r_hi)
  ------
  r_hi + r_lo ~ sqrt(x^2 + y^2).
```

Performance benchmark with CORE-MATH:

Before the patch:
```
$ ./perf.sh hypot --llvm
GNU libc version: 2.42
GNU libc release: stable
-- CORE-MATH reciprocal throughput --
[####################] 100 %
Ntrial = 20 ; Min = 23.183 + 0.311 clc/call; Median-Min = 0.319 clc/call; Max = 23.620 clc/call;
-- System LIBC reciprocal throughput --
[####################] 100 %
Ntrial = 20 ; Min = 14.218 + 0.299 clc/call; Median-Min = 0.193 clc/call; Max = 15.240 clc/call;
-- LIBC reciprocal throughput --
[####################] 100 %
Ntrial = 20 ; Min = 204.764 + 0.593 clc/call; Median-Min = 0.576 clc/call; Max = 206.057 clc/call;
```
After the patch without FMA:
```
-- LIBC reciprocal throughput --
[####################] 100 %
Ntrial = 20 ; Min = 26.920 + 0.063 clc/call; Median-Min = 0.048 clc/call; Max = 27.139 clc/call;
```
After the patch with FMA:
```
-- LIBC reciprocal throughput --
[####################] 100 %
Ntrial = 20 ; Min = 15.180 + 0.189 clc/call; Median-Min = 0.178 clc/call; Max = 15.461 clc/call;
```

---
Full diff: https://github.com/llvm/llvm-project/pull/204292.diff


2 Files Affected:

- (modified) libc/src/__support/FPUtil/double_double.h (+24-5) 
- (modified) libc/src/__support/math/hypot.h (+160-1) 


``````````diff

diff --git a/libc/src/__support/FPUtil/double_double.h b/libc/src/__support/FPUtil/double_double.h
index 31049ced5a934..87b3887716b0f 100644
--- a/libc/src/__support/FPUtil/double_double.h
+++ b/libc/src/__support/FPUtil/double_double.h
@@ -29,10 +29,26 @@ template <> struct DefaultSplit<double> {
 using DoubleDouble = NumberPair<double>;
 using FloatFloat = NumberPair<float>;
 
-// The output of Dekker's FastTwoSum algorithm is correct, i.e.:
-//   r.hi + r.lo = a + b exactly
-//   and |r.lo| < eps(r.lo)
-// Assumption: |a| >= |b|, or a = 0.
+// Dekker's FastTwoSum:
+//   r_hi + r_lo ~ a + b
+// In precision `p`, it's exact if `lsb(a) >= ulp(b)` and one of the following
+// conditions satisfies:
+//   (i)   rounding mode = RN
+//   (ii)  e_a - e_b <= p
+//   (iii) rounding mode = RD and b >= 0
+//   (iv)  rounding mode = RU and b <= 0
+//   (v)   rounding mode = RZ and ab >= 0
+// Otherwise, the errors err = (a + b) - (r_hi + r_lo) is bounded by:
+//   |err| <= 2^(-2p + 1) * ufp(a + b) <= 2^(-2p + 1) * ufp(r_hi)
+// when `lsb(a) >= ulp(b)`.
+// In particular,
+//   |err| <=  2^-47 * ufp(r_hi) for single precision,
+//         <= 2^-105 * ufp(r_hi) for double precision.
+// If the condition `lsb(a) >= ulp(b)` does NOT satisfy, then:
+//   |err| < 3 * 2^(-p) * |r_hi|.
+// Reference:
+//   Jeannerod, C.-P. and Zimmermann, P., "FastTwoSum revisited", ARITH 2025,
+//     <https://www.arith2025.org/proceedings/215900a141.pdf>.
 template <bool FAST2SUM = true, typename T = double>
 LIBC_INLINE constexpr NumberPair<T> exact_add(T a, T b) {
   NumberPair<T> r{0.0, 0.0};
@@ -51,7 +67,10 @@ LIBC_INLINE constexpr NumberPair<T> exact_add(T a, T b) {
   return r;
 }
 
-// Assumption: |a.hi| >= |b.hi|
+// Following the above analysis of FastTwoSum,
+// If `lsb(a.hi) >= ulp(b.hi)`, then the errors:
+//   err = (a.hi + a.lo + b.hi + b.lo) - (r.hi - r.lo) is bounded by:
+//   |err| < 2^(-2p) * ufp(r.hi) + 2^(-p + 2) * ufp(a.lo + b.lo).
 template <typename T>
 LIBC_INLINE constexpr NumberPair<T> add(const NumberPair<T> &a,
                                         const NumberPair<T> &b) {
diff --git a/libc/src/__support/math/hypot.h b/libc/src/__support/math/hypot.h
index 13d6e3fecff01..3df5ad1be0ceb 100644
--- a/libc/src/__support/math/hypot.h
+++ b/libc/src/__support/math/hypot.h
@@ -9,14 +9,173 @@
 #ifndef LLVM_LIBC_SRC___SUPPORT_MATH_HYPOT_H
 #define LLVM_LIBC_SRC___SUPPORT_MATH_HYPOT_H
 
+#include "src/__support/FPUtil/FEnvImpl.h"
+#include "src/__support/FPUtil/FPBits.h"
 #include "src/__support/FPUtil/Hypot.h"
+#include "src/__support/FPUtil/double_double.h"
+#include "src/__support/FPUtil/rounding_mode.h"
+#include "src/__support/FPUtil/sqrt.h"
 #include "src/__support/common.h"
 #include "src/__support/macros/config.h"
+#include "src/__support/macros/optimization.h"
 
 namespace LIBC_NAMESPACE_DECL {
 namespace math {
 
-LIBC_INLINE double hypot(double x, double y) { return fputil::hypot(x, y); }
+LIBC_INLINE double hypot(double x, double y) {
+  using FPBits = fputil::FPBits<double>;
+  using DoubleDouble = fputil::DoubleDouble;
+
+  uint64_t x_u = FPBits(x).uintval();
+  uint64_t y_u = FPBits(y).uintval();
+
+  // Shift the exponent field to the top 11 bits of the lower 32-bit.
+  // Casting it to 32-bit effectively remove the sign bit.
+  uint32_t x_e = static_cast<uint32_t>(x_u >> 31);
+  uint32_t y_e = static_cast<uint32_t>(y_u >> 31);
+
+  // a = maximum_mag(x, y);
+  // b = minimum_mag(x, y);
+  double a, b;
+  uint32_t a_e, b_e;
+
+  if (x_e >= y_e) {
+    a_e = x_e;
+    b_e = y_e;
+    a = x;
+    b = y;
+  } else {
+    a_e = y_e;
+    b_e = x_e;
+    a = y;
+    b = x;
+  }
+
+  double scale = 1.0;
+  double scale_back = 1.0;
+
+  // For a_e, b_e, the top 11 bits are exponent fields.
+  if (LIBC_UNLIKELY(a_e >= ((500U + FPBits::EXP_BIAS) << (32 - 11)))) {
+    // The larger magnitude is above 2^500 (or Inf/NaN), need to scale down to
+    // prevent overflow when squaring.
+    if (a_e >= static_cast<uint32_t>(FPBits::EXP_MASK >> 31)) {
+      // Inf or NaN;
+      FPBits x_bits(x);
+      FPBits y_bits(y);
+      if (x_bits.is_signaling_nan() || y_bits.is_signaling_nan()) {
+        fputil::raise_except_if_required(FE_INVALID);
+        return FPBits::quiet_nan().get_val();
+      }
+      if (x_bits.is_inf() || y_bits.is_inf())
+        return FPBits::inf().get_val();
+      if (x_bits.is_nan())
+        return x;
+      return y;
+    }
+    //  Any scaling factor < 2^(-1024/2) = 2^-512 would work.
+    scale = 0x1.0p-600;
+    scale_back = 0x1.0p600;
+    a *= scale;
+    b *= scale;
+  } else if (LIBC_UNLIKELY(b_e <= ((FPBits::EXP_BIAS - 500) << (32 - 11)))) {
+    // The smaller magnitude is below 2^-500 (or 0), need to scale up to prevent
+    // underflow when squaring.
+    if ((x == 0.0) || (y == 0.0)) {
+      double x_abs = FPBits(x_u & FPBits::EXP_SIG_MASK).get_val();
+      double y_abs = FPBits(y_u & FPBits::EXP_SIG_MASK).get_val();
+      return x_abs + y_abs;
+    }
+    //  Any scaling factor > 2^((1072 + 52)/2) = 2^562 would work.
+    scale = 0x1.0p600;
+    scale_back = 0x1.0p-600;
+    a *= scale;
+    b *= scale;
+  }
+
+  // When the gap in the exponent of `a` and `b` is >= 54,
+  //   |b| < ufp(a) * 2^(-53) = ulp(a)/2
+  // So:
+  //   hypot(x, y) =  sqrt(a^2 + b^2)
+  //               <= sqrt( (|a| + |b|)^2 )
+  //               = |a| + |b|
+  //               < |a| + ulp(a)
+  //  Hence, we can return:
+  //    |a| + |b| = |x| + |y|
+  //  to perform correct rounding to all rounding modes.
+  if (LIBC_UNLIKELY(a_e - b_e >= (54U << (32 - 11)))) {
+    double x_abs = FPBits(x_u & FPBits::EXP_SIG_MASK).get_val();
+    double y_abs = FPBits(y_u & FPBits::EXP_SIG_MASK).get_val();
+    return x_abs + y_abs;
+  }
+
+  // sum.hi + sum.lo ~ a^2 + b^2.
+  DoubleDouble a_sq = fputil::exact_mult(a, a);
+  DoubleDouble b_sq = fputil::exact_mult(b, b);
+  DoubleDouble sum = fputil::exact_add(a_sq.hi, b_sq.hi);
+  sum.lo += a_sq.lo + b_sq.lo;
+
+  // Let hi = sum.hi and lo = sum.lo.
+  // To compute r_hi + r_lo ~ sqrt(hi + lo):
+  // - First we use fast sqrt instruction to get:
+  //     r_hi ~ sqrt(hi)
+  // - Then use Taylor expansion:
+  //     f(hi + lo) = f(hi) + f'(hi) * lo + f''(hi) * lo^2 / 2 + ...
+  //   with f(x) = sqrt(x):
+  //     sqrt(hi + lo) ~ sqrt(hi) + lo / (2 * sqrt(hi)).
+  // - Subtract by r_hi to find the correction term:
+  //     sqrt(hi + lo) - r_hi ~ (sqrt(hi) - r_hi) + lo / (2 * sqrt(hi))
+  // - Instead of finding the rounding errors sqrt(hi) - r_hi, we use the
+  //   squared residual d = hi - r_hi^2, which can be calculated accurately in
+  //   double-double.  Then, using the same Taylor approximation of sqrt(x) as
+  //   above:
+  //     sqrt(hi) - r_hi = sqrt(r_hi^2 + d) - r_hi
+  //                     ~ sqrt(r_hi^2) + d / (2 * sqrt(r_hi^2)) - r_hi
+  //                     = d / (2 * r_hi).
+  // - Similarly,
+  //     1 / sqrt(hi) = 1 / sqrt(r_hi^2 + d)
+  //                  ~ 1 / sqrt(r_hi^2) - d / (2 * (r_hi^2)^(3/2))
+  //                  = 1 / r_hi - d / (2 * r_hi^3)
+  // - Putting them together, we have the correction term:
+  //     sqrt(hi + lo) - r_hi + lo / (2 * sqrt(hi)) ~
+  //   ~ (lo + d) / (2 * r_hi) + lo * d / (4 * r_hi^3)
+  //   ~ (hi + lo - r_hi^2) / (2 * r_hi).
+  // - When computing hi + lo - r_hi^2, we will pair (hi - r_sq.hi) and
+  //   (lo - r_sq.lo), since `r_sq.hi` is very close to `hi`, and the
+  //   subtraction is exact.
+  // - Taking intermediate roundings with directed rounding modes into
+  //   consideration, the overall errors should be bounded by
+  //     (2^-51)^2 = 2^-102.
+
+  // |sqrt(sum.hi) - r_hi| < 2^-52.
+  double r_hi = fputil::sqrt<double>(sum.hi);
+  // r_inv ~ 1 / (2 * r_hi)
+  double r_inv = 0.5 / r_hi;
+  // r_hi^2
+  DoubleDouble r_sq = fputil::exact_mult(r_hi, r_hi);
+  // (hi + lo - r_hi^2)
+  double num_lo = (sum.lo - r_sq.lo) - (r_sq.hi - sum.hi);
+  // (hi + lo - r_hi^2) / (2 * r_hi)
+  double r_lo = num_lo * r_inv;
+
+#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+  // TODO: What's the worst error if we just do:
+  //   return sqrt(a*a + b*b) * scale_back;
+  // without all the double-double computations?
+  return (r_hi + r_lo) * scale_back;
+#else
+  constexpr double ERR = 0x1.0p-102;
+
+  // Ziv's rounding test.
+  double upper = r_hi + fputil::multiply_add(r_hi, ERR, r_lo);
+  double lower = r_hi + fputil::multiply_add(r_hi, -ERR, r_lo);
+
+  if (LIBC_LIKELY(upper == lower)) {
+    return upper * scale_back;
+  }
+
+  return fputil::hypot(x, y);
+#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+}
 
 } // namespace math
 } // namespace LIBC_NAMESPACE_DECL

``````````

</details>


https://github.com/llvm/llvm-project/pull/204292