[libc] [llvm] [libc][math] Refactor exp10 implementation to header-only in src/__support/math folder. (PR #148400)

via llvm-commits llvm-commits at lists.llvm.org
Sat Jul 12 14:43:04 PDT 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-libc

Author: Muhammad Bassiouni (bassiounix)

<details>
<summary>Changes</summary>

Part of #<!-- -->147386

in preparation for: https://discourse.llvm.org/t/rfc-make-clang-builtin-math-functions-constexpr-with-llvm-libc-to-support-c-23-constexpr-math-functions/86450

Please merge #<!-- -->148091 first

@<!-- -->lntue

---

Patch is 121.97 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/148400.diff


25 Files Affected:

- (modified) libc/shared/math.h (+5) 
- (added) libc/shared/math/exp.h (+23) 
- (added) libc/shared/math/exp10.h (+23) 
- (added) libc/shared/math/ldexpf.h (+23) 
- (added) libc/shared/math/ldexpf128.h (+29) 
- (added) libc/shared/math/ldexpf16.h (+31) 
- (modified) libc/src/__support/FPUtil/double_double.h (+2-2) 
- (modified) libc/src/__support/math/CMakeLists.txt (+88) 
- (added) libc/src/__support/math/exp.h (+448) 
- (added) libc/src/__support/math/exp10.h (+505) 
- (added) libc/src/__support/math/exp_constants.h (+174) 
- (added) libc/src/__support/math/exp_utils.h (+72) 
- (added) libc/src/__support/math/ldexpf.h (+28) 
- (added) libc/src/__support/math/ldexpf128.h (+34) 
- (added) libc/src/__support/math/ldexpf16.h (+34) 
- (modified) libc/src/math/generic/CMakeLists.txt (+7-38) 
- (modified) libc/src/math/generic/common_constants.cpp (-157) 
- (modified) libc/src/math/generic/common_constants.h (+1-6) 
- (modified) libc/src/math/generic/exp.cpp (+2-427) 
- (modified) libc/src/math/generic/exp10.cpp (+2-483) 
- (modified) libc/src/math/generic/explogxf.h (+2-55) 
- (modified) libc/src/math/generic/ldexpf.cpp (+2-4) 
- (modified) libc/src/math/generic/ldexpf128.cpp (+3-4) 
- (modified) libc/src/math/generic/ldexpf16.cpp (+3-4) 
- (modified) utils/bazel/llvm-project-overlay/libc/BUILD.bazel (+110-27) 


``````````diff
diff --git a/libc/shared/math.h b/libc/shared/math.h
index e2950e075a81d..b37aa46820523 100644
--- a/libc/shared/math.h
+++ b/libc/shared/math.h
@@ -11,10 +11,15 @@
 
 #include "libc_common.h"
 
+#include "math/exp.h"
+#include "math/exp10.h"
 #include "math/expf.h"
 #include "math/expf16.h"
 #include "math/frexpf.h"
 #include "math/frexpf128.h"
 #include "math/frexpf16.h"
+#include "math/ldexpf.h"
+#include "math/ldexpf128.h"
+#include "math/ldexpf16.h"
 
 #endif // LLVM_LIBC_SHARED_MATH_H
diff --git a/libc/shared/math/exp.h b/libc/shared/math/exp.h
new file mode 100644
index 0000000000000..7cdd6331e613a
--- /dev/null
+++ b/libc/shared/math/exp.h
@@ -0,0 +1,23 @@
+//===-- Shared exp function -------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SHARED_MATH_EXP_H
+#define LLVM_LIBC_SHARED_MATH_EXP_H
+
+#include "shared/libc_common.h"
+#include "src/__support/math/exp.h"
+
+namespace LIBC_NAMESPACE_DECL {
+namespace shared {
+
+using math::exp;
+
+} // namespace shared
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SHARED_MATH_EXP_H
diff --git a/libc/shared/math/exp10.h b/libc/shared/math/exp10.h
new file mode 100644
index 0000000000000..3d36d9103705f
--- /dev/null
+++ b/libc/shared/math/exp10.h
@@ -0,0 +1,23 @@
+//===-- Shared exp10 function -----------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SHARED_MATH_EXP10_H
+#define LLVM_LIBC_SHARED_MATH_EXP10_H
+
+#include "shared/libc_common.h"
+#include "src/__support/math/exp10.h"
+
+namespace LIBC_NAMESPACE_DECL {
+namespace shared {
+
+using math::exp10;
+
+} // namespace shared
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SHARED_MATH_EXP10_H
diff --git a/libc/shared/math/ldexpf.h b/libc/shared/math/ldexpf.h
new file mode 100644
index 0000000000000..497933c47321f
--- /dev/null
+++ b/libc/shared/math/ldexpf.h
@@ -0,0 +1,23 @@
+//===-- Shared ldexpf function ----------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SHARED_MATH_LDEXPF_H
+#define LLVM_LIBC_SHARED_MATH_LDEXPF_H
+
+#include "shared/libc_common.h"
+#include "src/__support/math/ldexpf.h"
+
+namespace LIBC_NAMESPACE_DECL {
+namespace shared {
+
+using math::ldexpf;
+
+} // namespace shared
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SHARED_MATH_LDEXPF_H
diff --git a/libc/shared/math/ldexpf128.h b/libc/shared/math/ldexpf128.h
new file mode 100644
index 0000000000000..d4066beb809c7
--- /dev/null
+++ b/libc/shared/math/ldexpf128.h
@@ -0,0 +1,29 @@
+//===-- Shared ldexpf128 function -------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SHARED_MATH_LDEXPF128_H
+#define LLVM_LIBC_SHARED_MATH_LDEXPF128_H
+
+#include "include/llvm-libc-types/float128.h"
+
+#ifdef LIBC_TYPES_HAS_FLOAT128
+
+#include "shared/libc_common.h"
+#include "src/__support/math/ldexpf128.h"
+
+namespace LIBC_NAMESPACE_DECL {
+namespace shared {
+
+using math::ldexpf128;
+
+} // namespace shared
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LIBC_TYPES_HAS_FLOAT128
+
+#endif // LLVM_LIBC_SHARED_MATH_LDEXPF128_H
diff --git a/libc/shared/math/ldexpf16.h b/libc/shared/math/ldexpf16.h
new file mode 100644
index 0000000000000..d4e39c449943e
--- /dev/null
+++ b/libc/shared/math/ldexpf16.h
@@ -0,0 +1,31 @@
+//===-- Shared ldexpf16 function --------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SHARED_MATH_ldexpf16_H
+#define LLVM_LIBC_SHARED_MATH_ldexpf16_H
+
+#include "include/llvm-libc-macros/float16-macros.h"
+
+#ifdef LIBC_TYPES_HAS_FLOAT16
+
+#include "shared/libc_common.h"
+#include "src/__support/math/ldexpf16.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+namespace shared {
+
+using math::ldexpf16;
+
+} // namespace shared
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LIBC_TYPES_HAS_FLOAT16
+
+#endif // LLVM_LIBC_SHARED_MATH_ldexpf16_H
diff --git a/libc/src/__support/FPUtil/double_double.h b/libc/src/__support/FPUtil/double_double.h
index c27885aadc028..8e54e845de493 100644
--- a/libc/src/__support/FPUtil/double_double.h
+++ b/libc/src/__support/FPUtil/double_double.h
@@ -151,8 +151,8 @@ LIBC_INLINE DoubleDouble quick_mult(double a, const DoubleDouble &b) {
 }
 
 template <size_t SPLIT_B = 27>
-LIBC_INLINE DoubleDouble quick_mult(const DoubleDouble &a,
-                                    const DoubleDouble &b) {
+LIBC_INLINE constexpr DoubleDouble quick_mult(const DoubleDouble &a,
+                                              const DoubleDouble &b) {
   DoubleDouble r = exact_mult<double, SPLIT_B>(a.hi, b.hi);
   double t1 = multiply_add(a.hi, b.lo, r.lo);
   double t2 = multiply_add(a.lo, b.hi, t1);
diff --git a/libc/src/__support/math/CMakeLists.txt b/libc/src/__support/math/CMakeLists.txt
index 0baf12a26a9d5..0bfc996c44fc8 100644
--- a/libc/src/__support/math/CMakeLists.txt
+++ b/libc/src/__support/math/CMakeLists.txt
@@ -82,3 +82,91 @@ add_header_library(
   DEPENDS
     libc.src.__support.FPUtil.manipulation_functions
 )
+
+add_header_library(
+  ldexpf128
+  HDRS
+    ldexpf128.h
+  DEPENDS
+    libc.src.__support.macros.properties.types
+    libc.src.__support.FPUtil.manipulation_functions
+    libc.include.llvm-libc-types.float128
+)
+
+add_header_library(
+  ldexpf16
+  HDRS
+    ldexpf16.h
+  DEPENDS
+    libc.src.__support.macros.properties.types
+    libc.src.__support.FPUtil.manipulation_functions
+    libc.include.llvm-libc-macros.float16_macros
+)
+
+add_header_library(
+  ldexpf
+  HDRS
+    ldexpf.h
+  DEPENDS
+    libc.src.__support.FPUtil.manipulation_functions
+)
+
+add_header_library(
+  exp_constants
+  HDRS
+    exp_constants.h
+  DEPENDS
+    libc.src.__support.FPUtil.triple_double
+)
+
+add_header_library(
+  exp_utils
+  HDRS
+    exp_utils.h
+  DEPENDS
+    libc.src.__support.CPP.optional
+    libc.src.__support.CPP.bit
+    libc.src.__support.FPUtil.fp_bits
+)
+
+add_header_library(
+  exp
+  HDRS
+    exp.h
+  DEPENDS
+    .exp_constants
+    .exp_utils
+    libc.src.__support.CPP.bit
+    libc.src.__support.CPP.optional
+    libc.src.__support.FPUtil.dyadic_float
+    libc.src.__support.FPUtil.fenv_impl
+    libc.src.__support.FPUtil.fp_bits
+    libc.src.__support.FPUtil.multiply_add
+    libc.src.__support.FPUtil.nearest_integer
+    libc.src.__support.FPUtil.polyeval
+    libc.src.__support.FPUtil.rounding_mode
+    libc.src.__support.FPUtil.triple_double
+    libc.src.__support.integer_literals
+    libc.src.__support.macros.optimization
+)
+
+add_header_library(
+  exp10
+  HDRS
+    exp10.h
+  DEPENDS
+    .exp_constants
+    .exp_utils
+    libc.src.__support.CPP.bit
+    libc.src.__support.CPP.optional
+    libc.src.__support.FPUtil.dyadic_float
+    libc.src.__support.FPUtil.fenv_impl
+    libc.src.__support.FPUtil.fp_bits
+    libc.src.__support.FPUtil.multiply_add
+    libc.src.__support.FPUtil.nearest_integer
+    libc.src.__support.FPUtil.polyeval
+    libc.src.__support.FPUtil.rounding_mode
+    libc.src.__support.FPUtil.triple_double
+    libc.src.__support.integer_literals
+    libc.src.__support.macros.optimization
+)
diff --git a/libc/src/__support/math/exp.h b/libc/src/__support/math/exp.h
new file mode 100644
index 0000000000000..5c43e753ea687
--- /dev/null
+++ b/libc/src/__support/math/exp.h
@@ -0,0 +1,448 @@
+//===-- Implementation header for exp ---------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_EXP_H
+#define LLVM_LIBC_SRC___SUPPORT_MATH_EXP_H
+
+#include "exp_constants.h"
+#include "exp_utils.h"
+#include "src/__support/CPP/bit.h"
+#include "src/__support/CPP/optional.h"
+#include "src/__support/FPUtil/FEnvImpl.h"
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/FPUtil/PolyEval.h"
+#include "src/__support/FPUtil/double_double.h"
+#include "src/__support/FPUtil/dyadic_float.h"
+#include "src/__support/FPUtil/multiply_add.h"
+#include "src/__support/FPUtil/nearest_integer.h"
+#include "src/__support/FPUtil/rounding_mode.h"
+#include "src/__support/FPUtil/triple_double.h"
+#include "src/__support/common.h"
+#include "src/__support/integer_literals.h"
+#include "src/__support/macros/config.h"
+#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
+
+namespace LIBC_NAMESPACE_DECL {
+
+using fputil::DoubleDouble;
+using fputil::TripleDouble;
+using Float128 = typename fputil::DyadicFloat<128>;
+
+using LIBC_NAMESPACE::operator""_u128;
+
+// log2(e)
+static constexpr double LOG2_E = 0x1.71547652b82fep+0;
+
+// Error bounds:
+// Errors when using double precision.
+static constexpr double ERR_D = 0x1.8p-63;
+
+#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+// Errors when using double-double precision.
+static constexpr double ERR_DD = 0x1.0p-99;
+#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+
+// -2^-12 * log(2)
+// > a = -2^-12 * log(2);
+// > b = round(a, 30, RN);
+// > c = round(a - b, 30, RN);
+// > d = round(a - b - c, D, RN);
+// Errors < 1.5 * 2^-133
+static constexpr double MLOG_2_EXP2_M12_HI = -0x1.62e42ffp-13;
+static constexpr double MLOG_2_EXP2_M12_MID = 0x1.718432a1b0e26p-47;
+
+#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+static constexpr double MLOG_2_EXP2_M12_MID_30 = 0x1.718432ap-47;
+static constexpr double MLOG_2_EXP2_M12_LO = 0x1.b0e2633fe0685p-79;
+#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+
+namespace {
+
+// Polynomial approximations with double precision:
+// Return expm1(dx) / x ~ 1 + dx / 2 + dx^2 / 6 + dx^3 / 24.
+// For |dx| < 2^-13 + 2^-30:
+//   | output - expm1(dx) / dx | < 2^-51.
+static constexpr double poly_approx_d(double dx) {
+  // dx^2
+  double dx2 = dx * dx;
+  // c0 = 1 + dx / 2
+  double c0 = fputil::multiply_add(dx, 0.5, 1.0);
+  // c1 = 1/6 + dx / 24
+  double c1 =
+      fputil::multiply_add(dx, 0x1.5555555555555p-5, 0x1.5555555555555p-3);
+  // p = dx^2 * c1 + c0 = 1 + dx / 2 + dx^2 / 6 + dx^3 / 24
+  double p = fputil::multiply_add(dx2, c1, c0);
+  return p;
+}
+
+#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+// Polynomial approximation with double-double precision:
+// Return exp(dx) ~ 1 + dx + dx^2 / 2 + ... + dx^6 / 720
+// For |dx| < 2^-13 + 2^-30:
+//   | output - exp(dx) | < 2^-101
+static constexpr DoubleDouble poly_approx_dd(const DoubleDouble &dx) {
+  // Taylor polynomial.
+  constexpr DoubleDouble COEFFS[] = {
+      {0, 0x1p0},                                      // 1
+      {0, 0x1p0},                                      // 1
+      {0, 0x1p-1},                                     // 1/2
+      {0x1.5555555555555p-57, 0x1.5555555555555p-3},   // 1/6
+      {0x1.5555555555555p-59, 0x1.5555555555555p-5},   // 1/24
+      {0x1.1111111111111p-63, 0x1.1111111111111p-7},   // 1/120
+      {-0x1.f49f49f49f49fp-65, 0x1.6c16c16c16c17p-10}, // 1/720
+  };
+
+  DoubleDouble p = fputil::polyeval(dx, COEFFS[0], COEFFS[1], COEFFS[2],
+                                    COEFFS[3], COEFFS[4], COEFFS[5], COEFFS[6]);
+  return p;
+}
+
+// Polynomial approximation with 128-bit precision:
+// Return exp(dx) ~ 1 + dx + dx^2 / 2 + ... + dx^7 / 5040
+// For |dx| < 2^-13 + 2^-30:
+//   | output - exp(dx) | < 2^-126.
+static constexpr Float128 poly_approx_f128(const Float128 &dx) {
+  constexpr Float128 COEFFS_128[]{
+      {Sign::POS, -127, 0x80000000'00000000'00000000'00000000_u128}, // 1.0
+      {Sign::POS, -127, 0x80000000'00000000'00000000'00000000_u128}, // 1.0
+      {Sign::POS, -128, 0x80000000'00000000'00000000'00000000_u128}, // 0.5
+      {Sign::POS, -130, 0xaaaaaaaa'aaaaaaaa'aaaaaaaa'aaaaaaab_u128}, // 1/6
+      {Sign::POS, -132, 0xaaaaaaaa'aaaaaaaa'aaaaaaaa'aaaaaaab_u128}, // 1/24
+      {Sign::POS, -134, 0x88888888'88888888'88888888'88888889_u128}, // 1/120
+      {Sign::POS, -137, 0xb60b60b6'0b60b60b'60b60b60'b60b60b6_u128}, // 1/720
+      {Sign::POS, -140, 0xd00d00d0'0d00d00d'00d00d00'd00d00d0_u128}, // 1/5040
+  };
+
+  Float128 p = fputil::polyeval(dx, COEFFS_128[0], COEFFS_128[1], COEFFS_128[2],
+                                COEFFS_128[3], COEFFS_128[4], COEFFS_128[5],
+                                COEFFS_128[6], COEFFS_128[7]);
+  return p;
+}
+
+// Compute exp(x) using 128-bit precision.
+// TODO(lntue): investigate triple-double precision implementation for this
+// step.
+static constexpr Float128 exp_f128(double x, double kd, int idx1, int idx2) {
+  // Recalculate dx:
+
+  double t1 = fputil::multiply_add(kd, MLOG_2_EXP2_M12_HI, x); // exact
+  double t2 = kd * MLOG_2_EXP2_M12_MID_30;                     // exact
+  double t3 = kd * MLOG_2_EXP2_M12_LO;                         // Error < 2^-133
+
+  Float128 dx = fputil::quick_add(
+      Float128(t1), fputil::quick_add(Float128(t2), Float128(t3)));
+
+  // TODO: Skip recalculating exp_mid1 and exp_mid2.
+  Float128 exp_mid1 =
+      fputil::quick_add(Float128(EXP2_MID1[idx1].hi),
+                        fputil::quick_add(Float128(EXP2_MID1[idx1].mid),
+                                          Float128(EXP2_MID1[idx1].lo)));
+
+  Float128 exp_mid2 =
+      fputil::quick_add(Float128(EXP2_MID2[idx2].hi),
+                        fputil::quick_add(Float128(EXP2_MID2[idx2].mid),
+                                          Float128(EXP2_MID2[idx2].lo)));
+
+  Float128 exp_mid = fputil::quick_mul(exp_mid1, exp_mid2);
+
+  Float128 p = poly_approx_f128(dx);
+
+  Float128 r = fputil::quick_mul(exp_mid, p);
+
+  r.exponent += static_cast<int>(kd) >> 12;
+
+  return r;
+}
+
+// Compute exp(x) with double-double precision.
+static constexpr DoubleDouble exp_double_double(double x, double kd,
+                                                const DoubleDouble &exp_mid) {
+  // Recalculate dx:
+  //   dx = x - k * 2^-12 * log(2)
+  double t1 = fputil::multiply_add(kd, MLOG_2_EXP2_M12_HI, x); // exact
+  double t2 = kd * MLOG_2_EXP2_M12_MID_30;                     // exact
+  double t3 = kd * MLOG_2_EXP2_M12_LO;                         // Error < 2^-130
+
+  DoubleDouble dx = fputil::exact_add(t1, t2);
+  dx.lo += t3;
+
+  // Degree-6 Taylor polynomial approximation in double-double precision.
+  // | p - exp(x) | < 2^-100.
+  DoubleDouble p = poly_approx_dd(dx);
+
+  // Error bounds: 2^-99.
+  DoubleDouble r = fputil::quick_mult(exp_mid, p);
+
+  return r;
+}
+#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+
+// Check for exceptional cases when
+// |x| <= 2^-53 or x < log(2^-1075) or x >= 0x1.6232bdd7abcd3p+9
+static constexpr double set_exceptional(double x) {
+  using FPBits = typename fputil::FPBits<double>;
+  FPBits xbits(x);
+
+  uint64_t x_u = xbits.uintval();
+  uint64_t x_abs = xbits.abs().uintval();
+
+  // |x| <= 2^-53
+  if (x_abs <= 0x3ca0'0000'0000'0000ULL) {
+    // exp(x) ~ 1 + x
+    return 1 + x;
+  }
+
+  // x <= log(2^-1075) || x >= 0x1.6232bdd7abcd3p+9 or inf/nan.
+
+  // x <= log(2^-1075) or -inf/nan
+  if (x_u >= 0xc087'4910'd52d'3052ULL) {
+    // exp(-Inf) = 0
+    if (xbits.is_inf())
+      return 0.0;
+
+    // exp(nan) = nan
+    if (xbits.is_nan())
+      return x;
+
+    if (fputil::quick_get_round() == FE_UPWARD)
+      return FPBits::min_subnormal().get_val();
+    fputil::set_errno_if_required(ERANGE);
+    fputil::raise_except_if_required(FE_UNDERFLOW);
+    return 0.0;
+  }
+
+  // x >= round(log(MAX_NORMAL), D, RU) = 0x1.62e42fefa39fp+9 or +inf/nan
+  // x is finite
+  if (x_u < 0x7ff0'0000'0000'0000ULL) {
+    int rounding = fputil::quick_get_round();
+    if (rounding == FE_DOWNWARD || rounding == FE_TOWARDZERO)
+      return FPBits::max_normal().get_val();
+
+    fputil::set_errno_if_required(ERANGE);
+    fputil::raise_except_if_required(FE_OVERFLOW);
+  }
+  // x is +inf or nan
+  return x + FPBits::inf().get_val();
+}
+
+} // namespace
+
+namespace math {
+
+static constexpr double exp(double x) {
+  using FPBits = typename fputil::FPBits<double>;
+  FPBits xbits(x);
+
+  uint64_t x_u = xbits.uintval();
+
+  // Upper bound: max normal number = 2^1023 * (2 - 2^-52)
+  // > round(log (2^1023 ( 2 - 2^-52 )), D, RU) = 0x1.62e42fefa39fp+9
+  // > round(log (2^1023 ( 2 - 2^-52 )), D, RD) = 0x1.62e42fefa39efp+9
+  // > round(log (2^1023 ( 2 - 2^-52 )), D, RN) = 0x1.62e42fefa39efp+9
+  // > round(exp(0x1.62e42fefa39fp+9), D, RN) = infty
+
+  // Lower bound: min denormal number / 2 = 2^-1075
+  // > round(log(2^-1075), D, RN) = -0x1.74910d52d3052p9
+
+  // Another lower bound: min normal number = 2^-1022
+  // > round(log(2^-1022), D, RN) = -0x1.6232bdd7abcd2p9
+
+  // x < log(2^-1075) or x >= 0x1.6232bdd7abcd3p+9 or |x| < 2^-53.
+  if (LIBC_UNLIKELY(x_u >= 0xc0874910d52d3052 ||
+                    (x_u < 0xbca0000000000000 && x_u >= 0x40862e42fefa39f0) ||
+                    x_u < 0x3ca0000000000000)) {
+    return set_exceptional(x);
+  }
+
+  // Now log(2^-1075) <= x <= -2^-53 or 2^-53 <= x < log(2^1023 * (2 - 2^-52))
+
+  // Range reduction:
+  // Let x = log(2) * (hi + mid1 + mid2) + lo
+  // in which:
+  //   hi is an integer
+  //   mid1 * 2^6 is an integer
+  //   mid2 * 2^12 is an integer
+  // then:
+  //   exp(x) = 2^hi * 2^(mid1) * 2^(mid2) * exp(lo).
+  // With this formula:
+  //   - multiplying by 2^hi is exact and cheap, simply by adding the exponent
+  //     field.
+  //   - 2^(mid1) and 2^(mid2) are stored in 2 x 64-element tables.
+  //   - exp(lo) ~ 1 + lo + a0 * lo^2 + ...
+  //
+  // They can be defined by:
+  //   hi + mid1 + mid2 = 2^(-12) * round(2^12 * log_2(e) * x)
+  // If we store L2E = round(log2(e), D, RN), then:
+  //   log2(e) - L2E ~ 1.5 * 2^(-56)
+  // So the errors when computing in double precision is:
+  //   | x * 2^12 * log_2(e) - D(x * 2^12 * L2E) | <=
+  //  <= | x * 2^12 * log_2(e) - x * 2^12 * L2E | +
+  //     + | x * 2^12 * L2E - D(x * 2^12 * L2E) |
+  //  <= 2^12 * ( |x| * 1.5 * 2^-56 + eps(x))  for RN
+  //     2^12 * ( |x| * 1.5 * 2^-56 + 2*eps(x)) for other rounding modes.
+  // So if:
+  //   hi + mid1 + mid2 = 2^(-12) * round(x * 2^12 * L2E) is computed entirely
+  // in double precision, the reduced argument:
+  //   lo = x - log(2) * (hi + mid1 + mid2) is bounded by:
+  //   |lo| <= 2^-13 + (|x| * 1.5 * 2^-56 + 2*eps(x))
+  //         < 2^-13 + (1.5 * 2^9 * 1.5 * 2^-56 + 2*2^(9 - 52))
+  //         < 2^-13 + 2^-41
+  //
+
+  // The following trick computes the round(x * L2E) more efficiently
+  // than using the rounding instructions, with the tradeoff for less accuracy,
+  // and hence a slightly larger range for the reduced argument `lo`.
+  //
+  // To be precise, since |x| < |log(2^-1075)| < 1.5 * 2^9,
+  //   |x * 2^12 * L2E| < 1.5 * 2^9 * 1.5 < 2^23,
+  // So we can fit the rounded result round(x * 2^12 * L2E) in int32_t.
+  // Thus, the goal is to be able to use an additional addition and fixed width
+  // shift to get an int32_t representing round(x * 2^12 * L2E).
+  //
+  // Assuming int32_t using 2-complement representation, since the mantissa part
+  // of a double precision is unsigned with the leading bit hidden, if we add an
+  // extra constant C = 2^e1 + 2^e2 with...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/148400


More information about the llvm-commits mailing list