[libc] [llvm] [libc][math] Refactor exp10f implementation to header-only in src/__support/math folder. (PR #148405)
Muhammad Bassiouni via llvm-commits
llvm-commits at lists.llvm.org
Wed Jul 16 12:38:33 PDT 2025
https://github.com/bassiounix updated https://github.com/llvm/llvm-project/pull/148405
>From b3a269fcadb6710e00ec6ea6576ec1b4e595878d Mon Sep 17 00:00:00 2001
From: bassiounix <muhammad.m.bassiouni at gmail.com>
Date: Fri, 11 Jul 2025 03:12:38 +0300
Subject: [PATCH 1/9] [libc][math] Refactor exp implementation to header-only
in src/__support/math folder.
---
libc/shared/math.h | 1 +
libc/src/math/generic/common_constants.h | 2 ++
2 files changed, 3 insertions(+)
diff --git a/libc/shared/math.h b/libc/shared/math.h
index 3012cbb938816..37505191b915b 100644
--- a/libc/shared/math.h
+++ b/libc/shared/math.h
@@ -20,5 +20,6 @@
#include "math/ldexpf.h"
#include "math/ldexpf128.h"
#include "math/ldexpf16.h"
+#include "math/exp.h"
#endif // LLVM_LIBC_SHARED_MATH_H
diff --git a/libc/src/math/generic/common_constants.h b/libc/src/math/generic/common_constants.h
index 291816a7889ad..a1d6de3c87240 100644
--- a/libc/src/math/generic/common_constants.h
+++ b/libc/src/math/generic/common_constants.h
@@ -13,6 +13,8 @@
#include "src/__support/macros/config.h"
#include "src/__support/math/exp_constants.h"
#include "src/__support/number_pair.h"
+#include "src/__support/math/exp_constants.h"
+
namespace LIBC_NAMESPACE_DECL {
>From da7ed77f45140c0b7d70b47425ce401edc240bca Mon Sep 17 00:00:00 2001
From: bassiounix <muhammad.m.bassiouni at gmail.com>
Date: Fri, 11 Jul 2025 03:19:04 +0300
Subject: [PATCH 2/9] fix style
---
libc/shared/math.h | 1 -
libc/src/math/generic/common_constants.h | 2 +-
2 files changed, 1 insertion(+), 2 deletions(-)
diff --git a/libc/shared/math.h b/libc/shared/math.h
index 37505191b915b..3012cbb938816 100644
--- a/libc/shared/math.h
+++ b/libc/shared/math.h
@@ -20,6 +20,5 @@
#include "math/ldexpf.h"
#include "math/ldexpf128.h"
#include "math/ldexpf16.h"
-#include "math/exp.h"
#endif // LLVM_LIBC_SHARED_MATH_H
diff --git a/libc/src/math/generic/common_constants.h b/libc/src/math/generic/common_constants.h
index a1d6de3c87240..80eee06b2d4d9 100644
--- a/libc/src/math/generic/common_constants.h
+++ b/libc/src/math/generic/common_constants.h
@@ -14,7 +14,7 @@
#include "src/__support/math/exp_constants.h"
#include "src/__support/number_pair.h"
#include "src/__support/math/exp_constants.h"
-
+#include "src/__support/number_pair.h"
namespace LIBC_NAMESPACE_DECL {
>From a39b11b3c565fcf81f99595603b60e77a3e107fc Mon Sep 17 00:00:00 2001
From: bassiounix <muhammad.m.bassiouni at gmail.com>
Date: Sun, 13 Jul 2025 00:40:16 +0300
Subject: [PATCH 3/9] [libc][math] Refactor exp10 implementation to header-only
in src/__support/math folder.
---
libc/shared/math.h | 1 +
libc/shared/math/exp10.h | 23 +
libc/src/__support/FPUtil/double_double.h | 4 +-
libc/src/__support/math/CMakeLists.txt | 21 +
libc/src/__support/math/exp10.h | 505 ++++++++++++++++++
libc/src/math/generic/CMakeLists.txt | 15 +-
libc/src/math/generic/common_constants.h | 2 -
libc/src/math/generic/exp10.cpp | 485 +----------------
.../llvm-project-overlay/libc/BUILD.bazel | 31 +-
9 files changed, 575 insertions(+), 512 deletions(-)
create mode 100644 libc/shared/math/exp10.h
create mode 100644 libc/src/__support/math/exp10.h
diff --git a/libc/shared/math.h b/libc/shared/math.h
index 3012cbb938816..b37aa46820523 100644
--- a/libc/shared/math.h
+++ b/libc/shared/math.h
@@ -12,6 +12,7 @@
#include "libc_common.h"
#include "math/exp.h"
+#include "math/exp10.h"
#include "math/expf.h"
#include "math/expf16.h"
#include "math/frexpf.h"
diff --git a/libc/shared/math/exp10.h b/libc/shared/math/exp10.h
new file mode 100644
index 0000000000000..3d36d9103705f
--- /dev/null
+++ b/libc/shared/math/exp10.h
@@ -0,0 +1,23 @@
+//===-- Shared exp10 function -----------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SHARED_MATH_EXP10_H
+#define LLVM_LIBC_SHARED_MATH_EXP10_H
+
+#include "shared/libc_common.h"
+#include "src/__support/math/exp10.h"
+
+namespace LIBC_NAMESPACE_DECL {
+namespace shared {
+
+using math::exp10;
+
+} // namespace shared
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SHARED_MATH_EXP10_H
diff --git a/libc/src/__support/FPUtil/double_double.h b/libc/src/__support/FPUtil/double_double.h
index c27885aadc028..8e54e845de493 100644
--- a/libc/src/__support/FPUtil/double_double.h
+++ b/libc/src/__support/FPUtil/double_double.h
@@ -151,8 +151,8 @@ LIBC_INLINE DoubleDouble quick_mult(double a, const DoubleDouble &b) {
}
template <size_t SPLIT_B = 27>
-LIBC_INLINE DoubleDouble quick_mult(const DoubleDouble &a,
- const DoubleDouble &b) {
+LIBC_INLINE constexpr DoubleDouble quick_mult(const DoubleDouble &a,
+ const DoubleDouble &b) {
DoubleDouble r = exact_mult<double, SPLIT_B>(a.hi, b.hi);
double t1 = multiply_add(a.hi, b.lo, r.lo);
double t2 = multiply_add(a.lo, b.hi, t1);
diff --git a/libc/src/__support/math/CMakeLists.txt b/libc/src/__support/math/CMakeLists.txt
index f7ef9e7694fe6..0bfc996c44fc8 100644
--- a/libc/src/__support/math/CMakeLists.txt
+++ b/libc/src/__support/math/CMakeLists.txt
@@ -149,3 +149,24 @@ add_header_library(
libc.src.__support.integer_literals
libc.src.__support.macros.optimization
)
+
+add_header_library(
+ exp10
+ HDRS
+ exp10.h
+ DEPENDS
+ .exp_constants
+ .exp_utils
+ libc.src.__support.CPP.bit
+ libc.src.__support.CPP.optional
+ libc.src.__support.FPUtil.dyadic_float
+ libc.src.__support.FPUtil.fenv_impl
+ libc.src.__support.FPUtil.fp_bits
+ libc.src.__support.FPUtil.multiply_add
+ libc.src.__support.FPUtil.nearest_integer
+ libc.src.__support.FPUtil.polyeval
+ libc.src.__support.FPUtil.rounding_mode
+ libc.src.__support.FPUtil.triple_double
+ libc.src.__support.integer_literals
+ libc.src.__support.macros.optimization
+)
diff --git a/libc/src/__support/math/exp10.h b/libc/src/__support/math/exp10.h
new file mode 100644
index 0000000000000..da94281c0c745
--- /dev/null
+++ b/libc/src/__support/math/exp10.h
@@ -0,0 +1,505 @@
+//===-- Implementation header for exp10 ------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_EXP10_H
+#define LLVM_LIBC_SRC___SUPPORT_MATH_EXP10_H
+
+#include "exp_constants.h" // Lookup tables EXP2_MID1 and EXP_M2.
+#include "exp_utils.h" // ziv_test_denorm.
+#include "src/__support/CPP/bit.h"
+#include "src/__support/CPP/optional.h"
+#include "src/__support/FPUtil/FEnvImpl.h"
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/FPUtil/PolyEval.h"
+#include "src/__support/FPUtil/double_double.h"
+#include "src/__support/FPUtil/dyadic_float.h"
+#include "src/__support/FPUtil/multiply_add.h"
+#include "src/__support/FPUtil/nearest_integer.h"
+#include "src/__support/FPUtil/rounding_mode.h"
+#include "src/__support/FPUtil/triple_double.h"
+#include "src/__support/common.h"
+#include "src/__support/integer_literals.h"
+#include "src/__support/macros/config.h"
+#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
+
+namespace LIBC_NAMESPACE_DECL {
+
+using fputil::DoubleDouble;
+using fputil::TripleDouble;
+using Float128 = typename fputil::DyadicFloat<128>;
+
+using LIBC_NAMESPACE::operator""_u128;
+
+// log2(10)
+constexpr double LOG2_10 = 0x1.a934f0979a371p+1;
+
+// -2^-12 * log10(2)
+// > a = -2^-12 * log10(2);
+// > b = round(a, 32, RN);
+// > c = round(a - b, 32, RN);
+// > d = round(a - b - c, D, RN);
+// Errors < 1.5 * 2^-144
+constexpr double MLOG10_2_EXP2_M12_HI = -0x1.3441350ap-14;
+constexpr double MLOG10_2_EXP2_M12_MID = 0x1.0c0219dc1da99p-51;
+
+#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+constexpr double MLOG10_2_EXP2_M12_MID_32 = 0x1.0c0219dcp-51;
+constexpr double MLOG10_2_EXP2_M12_LO = 0x1.da994fd20dba2p-87;
+#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+
+// Error bounds:
+// Errors when using double precision.
+constexpr double ERR_D = 0x1.8p-63;
+
+#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+// Errors when using double-double precision.
+constexpr double ERR_DD = 0x1.8p-99;
+#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+
+namespace {
+
+// Polynomial approximations with double precision. Generated by Sollya with:
+// > P = fpminimax((10^x - 1)/x, 3, [|D...|], [-2^-14, 2^-14]);
+// > P;
+// Error bounds:
+// | output - (10^dx - 1) / dx | < 2^-52.
+LIBC_INLINE static constexpr double poly_approx_d(double dx) {
+ // dx^2
+ double dx2 = dx * dx;
+ double c0 =
+ fputil::multiply_add(dx, 0x1.53524c73cea6ap+1, 0x1.26bb1bbb55516p+1);
+ double c1 =
+ fputil::multiply_add(dx, 0x1.2bd75cc6afc65p+0, 0x1.0470587aa264cp+1);
+ double p = fputil::multiply_add(dx2, c1, c0);
+ return p;
+}
+
+#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+// Polynomial approximation with double-double precision. Generated by Solya
+// with:
+// > P = fpminimax((10^x - 1)/x, 5, [|DD...|], [-2^-14, 2^-14]);
+// Error bounds:
+// | output - 10^(dx) | < 2^-101
+static constexpr DoubleDouble poly_approx_dd(const DoubleDouble &dx) {
+ // Taylor polynomial.
+ constexpr DoubleDouble COEFFS[] = {
+ {0, 0x1p0},
+ {-0x1.f48ad494e927bp-53, 0x1.26bb1bbb55516p1},
+ {-0x1.e2bfab3191cd2p-53, 0x1.53524c73cea69p1},
+ {0x1.80fb65ec3b503p-53, 0x1.0470591de2ca4p1},
+ {0x1.338fc05e21e55p-54, 0x1.2bd7609fd98c4p0},
+ {0x1.d4ea116818fbp-56, 0x1.1429ffd519865p-1},
+ {-0x1.872a8ff352077p-57, 0x1.a7ed70847c8b3p-3},
+
+ };
+
+ DoubleDouble p = fputil::polyeval(dx, COEFFS[0], COEFFS[1], COEFFS[2],
+ COEFFS[3], COEFFS[4], COEFFS[5], COEFFS[6]);
+ return p;
+}
+
+// Polynomial approximation with 128-bit precision:
+// Return exp(dx) ~ 1 + a0 * dx + a1 * dx^2 + ... + a6 * dx^7
+// For |dx| < 2^-14:
+// | output - 10^dx | < 1.5 * 2^-124.
+static constexpr Float128 poly_approx_f128(const Float128 &dx) {
+ constexpr Float128 COEFFS_128[]{
+ {Sign::POS, -127, 0x80000000'00000000'00000000'00000000_u128}, // 1.0
+ {Sign::POS, -126, 0x935d8ddd'aaa8ac16'ea56d62b'82d30a2d_u128},
+ {Sign::POS, -126, 0xa9a92639'e753443a'80a99ce7'5f4d5bdb_u128},
+ {Sign::POS, -126, 0x82382c8e'f1652304'6a4f9d7d'bf6c9635_u128},
+ {Sign::POS, -124, 0x12bd7609'fd98c44c'34578701'9216c7af_u128},
+ {Sign::POS, -127, 0x450a7ff4'7535d889'cc41ed7e'0d27aee5_u128},
+ {Sign::POS, -130, 0xd3f6b844'702d636b'8326bb91'a6e7601d_u128},
+ {Sign::POS, -130, 0x45b937f0'd05bb1cd'fa7b46df'314112a9_u128},
+ };
+
+ Float128 p = fputil::polyeval(dx, COEFFS_128[0], COEFFS_128[1], COEFFS_128[2],
+ COEFFS_128[3], COEFFS_128[4], COEFFS_128[5],
+ COEFFS_128[6], COEFFS_128[7]);
+ return p;
+}
+
+// Compute 10^(x) using 128-bit precision.
+// TODO(lntue): investigate triple-double precision implementation for this
+// step.
+static constexpr Float128 exp10_f128(double x, double kd, int idx1, int idx2) {
+ double t1 = fputil::multiply_add(kd, MLOG10_2_EXP2_M12_HI, x); // exact
+ double t2 = kd * MLOG10_2_EXP2_M12_MID_32; // exact
+ double t3 = kd * MLOG10_2_EXP2_M12_LO; // Error < 2^-144
+
+ Float128 dx = fputil::quick_add(
+ Float128(t1), fputil::quick_add(Float128(t2), Float128(t3)));
+
+ // TODO: Skip recalculating exp_mid1 and exp_mid2.
+ Float128 exp_mid1 =
+ fputil::quick_add(Float128(EXP2_MID1[idx1].hi),
+ fputil::quick_add(Float128(EXP2_MID1[idx1].mid),
+ Float128(EXP2_MID1[idx1].lo)));
+
+ Float128 exp_mid2 =
+ fputil::quick_add(Float128(EXP2_MID2[idx2].hi),
+ fputil::quick_add(Float128(EXP2_MID2[idx2].mid),
+ Float128(EXP2_MID2[idx2].lo)));
+
+ Float128 exp_mid = fputil::quick_mul(exp_mid1, exp_mid2);
+
+ Float128 p = poly_approx_f128(dx);
+
+ Float128 r = fputil::quick_mul(exp_mid, p);
+
+ r.exponent += static_cast<int>(kd) >> 12;
+
+ return r;
+}
+
+// Compute 10^x with double-double precision.
+static constexpr DoubleDouble exp10_double_double(double x, double kd,
+ const DoubleDouble &exp_mid) {
+ // Recalculate dx:
+ // dx = x - k * 2^-12 * log10(2)
+ double t1 = fputil::multiply_add(kd, MLOG10_2_EXP2_M12_HI, x); // exact
+ double t2 = kd * MLOG10_2_EXP2_M12_MID_32; // exact
+ double t3 = kd * MLOG10_2_EXP2_M12_LO; // Error < 2^-140
+
+ DoubleDouble dx = fputil::exact_add(t1, t2);
+ dx.lo += t3;
+
+ // Degree-6 polynomial approximation in double-double precision.
+ // | p - 10^x | < 2^-103.
+ DoubleDouble p = poly_approx_dd(dx);
+
+ // Error bounds: 2^-102.
+ DoubleDouble r = fputil::quick_mult(exp_mid, p);
+
+ return r;
+}
+#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+
+// When output is denormal.
+static constexpr double exp10_denorm(double x) {
+ // Range reduction.
+ double tmp = fputil::multiply_add(x, LOG2_10, 0x1.8000'0000'4p21);
+ int k = static_cast<int>(cpp::bit_cast<uint64_t>(tmp) >> 19);
+ double kd = static_cast<double>(k);
+
+ uint32_t idx1 = (k >> 6) & 0x3f;
+ uint32_t idx2 = k & 0x3f;
+
+ int hi = k >> 12;
+
+ DoubleDouble exp_mid1{EXP2_MID1[idx1].mid, EXP2_MID1[idx1].hi};
+ DoubleDouble exp_mid2{EXP2_MID2[idx2].mid, EXP2_MID2[idx2].hi};
+ DoubleDouble exp_mid = fputil::quick_mult(exp_mid1, exp_mid2);
+
+ // |dx| < 1.5 * 2^-15 + 2^-31 < 2^-14
+ double lo_h = fputil::multiply_add(kd, MLOG10_2_EXP2_M12_HI, x); // exact
+ double dx = fputil::multiply_add(kd, MLOG10_2_EXP2_M12_MID, lo_h);
+
+ double mid_lo = dx * exp_mid.hi;
+
+ // Approximate (10^dx - 1)/dx ~ 1 + a0*dx + a1*dx^2 + a2*dx^3 + a3*dx^4.
+ double p = poly_approx_d(dx);
+
+ double lo = fputil::multiply_add(p, mid_lo, exp_mid.lo);
+
+#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+ return ziv_test_denorm</*SKIP_ZIV_TEST=*/true>(hi, exp_mid.hi, lo, ERR_D)
+ .value();
+#else
+ if (auto r = ziv_test_denorm(hi, exp_mid.hi, lo, ERR_D);
+ LIBC_LIKELY(r.has_value()))
+ return r.value();
+
+ // Use double-double
+ DoubleDouble r_dd = exp10_double_double(x, kd, exp_mid);
+
+ if (auto r = ziv_test_denorm(hi, r_dd.hi, r_dd.lo, ERR_DD);
+ LIBC_LIKELY(r.has_value()))
+ return r.value();
+
+ // Use 128-bit precision
+ Float128 r_f128 = exp10_f128(x, kd, idx1, idx2);
+
+ return static_cast<double>(r_f128);
+#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+}
+
+// Check for exceptional cases when:
+// * log10(1 - 2^-54) < x < log10(1 + 2^-53)
+// * x >= log10(2^1024)
+// * x <= log10(2^-1022)
+// * x is inf or nan
+static constexpr double set_exceptional(double x) {
+ using FPBits = typename fputil::FPBits<double>;
+ FPBits xbits(x);
+
+ uint64_t x_u = xbits.uintval();
+ uint64_t x_abs = xbits.abs().uintval();
+
+ // |x| < log10(1 + 2^-53)
+ if (x_abs <= 0x3c8bcb7b1526e50e) {
+ // 10^(x) ~ 1 + x/2
+ return fputil::multiply_add(x, 0.5, 1.0);
+ }
+
+ // x <= log10(2^-1022) || x >= log10(2^1024) or inf/nan.
+ if (x_u >= 0xc0733a7146f72a42) {
+ // x <= log10(2^-1075) or -inf/nan
+ if (x_u > 0xc07439b746e36b52) {
+ // exp(-Inf) = 0
+ if (xbits.is_inf())
+ return 0.0;
+
+ // exp(nan) = nan
+ if (xbits.is_nan())
+ return x;
+
+ if (fputil::quick_get_round() == FE_UPWARD)
+ return FPBits::min_subnormal().get_val();
+ fputil::set_errno_if_required(ERANGE);
+ fputil::raise_except_if_required(FE_UNDERFLOW);
+ return 0.0;
+ }
+
+ return exp10_denorm(x);
+ }
+
+ // x >= log10(2^1024) or +inf/nan
+ // x is finite
+ if (x_u < 0x7ff0'0000'0000'0000ULL) {
+ int rounding = fputil::quick_get_round();
+ if (rounding == FE_DOWNWARD || rounding == FE_TOWARDZERO)
+ return FPBits::max_normal().get_val();
+
+ fputil::set_errno_if_required(ERANGE);
+ fputil::raise_except_if_required(FE_OVERFLOW);
+ }
+ // x is +inf or nan
+ return x + FPBits::inf().get_val();
+}
+
+} // namespace
+
+namespace math {
+
+static constexpr double exp10(double x) {
+ using FPBits = typename fputil::FPBits<double>;
+ FPBits xbits(x);
+
+ uint64_t x_u = xbits.uintval();
+
+ // x <= log10(2^-1022) or x >= log10(2^1024) or
+ // log10(1 - 2^-54) < x < log10(1 + 2^-53).
+ if (LIBC_UNLIKELY(x_u >= 0xc0733a7146f72a42 ||
+ (x_u <= 0xbc7bcb7b1526e50e && x_u >= 0x40734413509f79ff) ||
+ x_u < 0x3c8bcb7b1526e50e)) {
+ return set_exceptional(x);
+ }
+
+ // Now log10(2^-1075) < x <= log10(1 - 2^-54) or
+ // log10(1 + 2^-53) < x < log10(2^1024)
+
+ // Range reduction:
+ // Let x = log10(2) * (hi + mid1 + mid2) + lo
+ // in which:
+ // hi is an integer
+ // mid1 * 2^6 is an integer
+ // mid2 * 2^12 is an integer
+ // then:
+ // 10^(x) = 2^hi * 2^(mid1) * 2^(mid2) * 10^(lo).
+ // With this formula:
+ // - multiplying by 2^hi is exact and cheap, simply by adding the exponent
+ // field.
+ // - 2^(mid1) and 2^(mid2) are stored in 2 x 64-element tables.
+ // - 10^(lo) ~ 1 + a0*lo + a1 * lo^2 + ...
+ //
+ // We compute (hi + mid1 + mid2) together by perform the rounding on
+ // x * log2(10) * 2^12.
+ // Since |x| < |log10(2^-1075)| < 2^9,
+ // |x * 2^12| < 2^9 * 2^12 < 2^21,
+ // So we can fit the rounded result round(x * 2^12) in int32_t.
+ // Thus, the goal is to be able to use an additional addition and fixed width
+ // shift to get an int32_t representing round(x * 2^12).
+ //
+ // Assuming int32_t using 2-complement representation, since the mantissa part
+ // of a double precision is unsigned with the leading bit hidden, if we add an
+ // extra constant C = 2^e1 + 2^e2 with e1 > e2 >= 2^23 to the product, the
+ // part that are < 2^e2 in resulted mantissa of (x*2^12*L2E + C) can be
+ // considered as a proper 2-complement representations of x*2^12.
+ //
+ // One small problem with this approach is that the sum (x*2^12 + C) in
+ // double precision is rounded to the least significant bit of the dorminant
+ // factor C. In order to minimize the rounding errors from this addition, we
+ // want to minimize e1. Another constraint that we want is that after
+ // shifting the mantissa so that the least significant bit of int32_t
+ // corresponds to the unit bit of (x*2^12*L2E), the sign is correct without
+ // any adjustment. So combining these 2 requirements, we can choose
+ // C = 2^33 + 2^32, so that the sign bit corresponds to 2^31 bit, and hence
+ // after right shifting the mantissa, the resulting int32_t has correct sign.
+ // With this choice of C, the number of mantissa bits we need to shift to the
+ // right is: 52 - 33 = 19.
+ //
+ // Moreover, since the integer right shifts are equivalent to rounding down,
+ // we can add an extra 0.5 so that it will become round-to-nearest, tie-to-
+ // +infinity. So in particular, we can compute:
+ // hmm = x * 2^12 + C,
+ // where C = 2^33 + 2^32 + 2^-1, then if
+ // k = int32_t(lower 51 bits of double(x * 2^12 + C) >> 19),
+ // the reduced argument:
+ // lo = x - log10(2) * 2^-12 * k is bounded by:
+ // |lo| = |x - log10(2) * 2^-12 * k|
+ // = log10(2) * 2^-12 * | x * log2(10) * 2^12 - k |
+ // <= log10(2) * 2^-12 * (2^-1 + 2^-19)
+ // < 1.5 * 2^-2 * (2^-13 + 2^-31)
+ // = 1.5 * (2^-15 * 2^-31)
+ //
+ // Finally, notice that k only uses the mantissa of x * 2^12, so the
+ // exponent 2^12 is not needed. So we can simply define
+ // C = 2^(33 - 12) + 2^(32 - 12) + 2^(-13 - 12), and
+ // k = int32_t(lower 51 bits of double(x + C) >> 19).
+
+ // Rounding errors <= 2^-31.
+ double tmp = fputil::multiply_add(x, LOG2_10, 0x1.8000'0000'4p21);
+ int k = static_cast<int>(cpp::bit_cast<uint64_t>(tmp) >> 19);
+ double kd = static_cast<double>(k);
+
+ uint32_t idx1 = (k >> 6) & 0x3f;
+ uint32_t idx2 = k & 0x3f;
+
+ int hi = k >> 12;
+
+ DoubleDouble exp_mid1{EXP2_MID1[idx1].mid, EXP2_MID1[idx1].hi};
+ DoubleDouble exp_mid2{EXP2_MID2[idx2].mid, EXP2_MID2[idx2].hi};
+ DoubleDouble exp_mid = fputil::quick_mult(exp_mid1, exp_mid2);
+
+ // |dx| < 1.5 * 2^-15 + 2^-31 < 2^-14
+ double lo_h = fputil::multiply_add(kd, MLOG10_2_EXP2_M12_HI, x); // exact
+ double dx = fputil::multiply_add(kd, MLOG10_2_EXP2_M12_MID, lo_h);
+
+ // We use the degree-4 polynomial to approximate 10^(lo):
+ // 10^(lo) ~ 1 + a0 * lo + a1 * lo^2 + a2 * lo^3 + a3 * lo^4
+ // = 1 + lo * P(lo)
+ // So that the errors are bounded by:
+ // |P(lo) - (10^lo - 1)/lo| < |lo|^4 / 64 < 2^(-13 * 4) / 64 = 2^-58
+ // Let P_ be an evaluation of P where all intermediate computations are in
+ // double precision. Using either Horner's or Estrin's schemes, the evaluated
+ // errors can be bounded by:
+ // |P_(lo) - P(lo)| < 2^-51
+ // => |lo * P_(lo) - (2^lo - 1) | < 2^-65
+ // => 2^(mid1 + mid2) * |lo * P_(lo) - expm1(lo)| < 2^-64.
+ // Since we approximate
+ // 2^(mid1 + mid2) ~ exp_mid.hi + exp_mid.lo,
+ // We use the expression:
+ // (exp_mid.hi + exp_mid.lo) * (1 + dx * P_(dx)) ~
+ // ~ exp_mid.hi + (exp_mid.hi * dx * P_(dx) + exp_mid.lo)
+ // with errors bounded by 2^-64.
+
+ double mid_lo = dx * exp_mid.hi;
+
+ // Approximate (10^dx - 1)/dx ~ 1 + a0*dx + a1*dx^2 + a2*dx^3 + a3*dx^4.
+ double p = poly_approx_d(dx);
+
+ double lo = fputil::multiply_add(p, mid_lo, exp_mid.lo);
+
+#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+ int64_t exp_hi = static_cast<int64_t>(hi) << FPBits::FRACTION_LEN;
+ double r =
+ cpp::bit_cast<double>(exp_hi + cpp::bit_cast<int64_t>(exp_mid.hi + lo));
+ return r;
+#else
+ double upper = exp_mid.hi + (lo + ERR_D);
+ double lower = exp_mid.hi + (lo - ERR_D);
+
+ if (LIBC_LIKELY(upper == lower)) {
+ // To multiply by 2^hi, a fast way is to simply add hi to the exponent
+ // field.
+ int64_t exp_hi = static_cast<int64_t>(hi) << FPBits::FRACTION_LEN;
+ double r = cpp::bit_cast<double>(exp_hi + cpp::bit_cast<int64_t>(upper));
+ return r;
+ }
+
+ // Exact outputs when x = 1, 2, ..., 22 + hard to round with x = 23.
+ // Quick check mask: 0x800f'ffffU = ~(bits of 1.0 | ... | bits of 23.0)
+ if (LIBC_UNLIKELY((x_u & 0x8000'ffff'ffff'ffffULL) == 0ULL)) {
+ switch (x_u) {
+ case 0x3ff0000000000000: // x = 1.0
+ return 10.0;
+ case 0x4000000000000000: // x = 2.0
+ return 100.0;
+ case 0x4008000000000000: // x = 3.0
+ return 1'000.0;
+ case 0x4010000000000000: // x = 4.0
+ return 10'000.0;
+ case 0x4014000000000000: // x = 5.0
+ return 100'000.0;
+ case 0x4018000000000000: // x = 6.0
+ return 1'000'000.0;
+ case 0x401c000000000000: // x = 7.0
+ return 10'000'000.0;
+ case 0x4020000000000000: // x = 8.0
+ return 100'000'000.0;
+ case 0x4022000000000000: // x = 9.0
+ return 1'000'000'000.0;
+ case 0x4024000000000000: // x = 10.0
+ return 10'000'000'000.0;
+ case 0x4026000000000000: // x = 11.0
+ return 100'000'000'000.0;
+ case 0x4028000000000000: // x = 12.0
+ return 1'000'000'000'000.0;
+ case 0x402a000000000000: // x = 13.0
+ return 10'000'000'000'000.0;
+ case 0x402c000000000000: // x = 14.0
+ return 100'000'000'000'000.0;
+ case 0x402e000000000000: // x = 15.0
+ return 1'000'000'000'000'000.0;
+ case 0x4030000000000000: // x = 16.0
+ return 10'000'000'000'000'000.0;
+ case 0x4031000000000000: // x = 17.0
+ return 100'000'000'000'000'000.0;
+ case 0x4032000000000000: // x = 18.0
+ return 1'000'000'000'000'000'000.0;
+ case 0x4033000000000000: // x = 19.0
+ return 10'000'000'000'000'000'000.0;
+ case 0x4034000000000000: // x = 20.0
+ return 100'000'000'000'000'000'000.0;
+ case 0x4035000000000000: // x = 21.0
+ return 1'000'000'000'000'000'000'000.0;
+ case 0x4036000000000000: // x = 22.0
+ return 10'000'000'000'000'000'000'000.0;
+ case 0x4037000000000000: // x = 23.0
+ return 0x1.52d02c7e14af6p76 + x;
+ }
+ }
+
+ // Use double-double
+ DoubleDouble r_dd = exp10_double_double(x, kd, exp_mid);
+
+ double upper_dd = r_dd.hi + (r_dd.lo + ERR_DD);
+ double lower_dd = r_dd.hi + (r_dd.lo - ERR_DD);
+
+ if (LIBC_LIKELY(upper_dd == lower_dd)) {
+ // To multiply by 2^hi, a fast way is to simply add hi to the exponent
+ // field.
+ int64_t exp_hi = static_cast<int64_t>(hi) << FPBits::FRACTION_LEN;
+ double r = cpp::bit_cast<double>(exp_hi + cpp::bit_cast<int64_t>(upper_dd));
+ return r;
+ }
+
+ // Use 128-bit precision
+ Float128 r_f128 = exp10_f128(x, kd, idx1, idx2);
+
+ return static_cast<double>(r_f128);
+#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+}
+
+} // namespace math
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC___SUPPORT_MATH_EXP10_H
diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt
index b59beacd94143..352c2ad4ab22a 100644
--- a/libc/src/math/generic/CMakeLists.txt
+++ b/libc/src/math/generic/CMakeLists.txt
@@ -1457,20 +1457,7 @@ add_entrypoint_object(
HDRS
../exp10.h
DEPENDS
- .common_constants
- .explogxf
- libc.src.__support.CPP.bit
- libc.src.__support.CPP.optional
- libc.src.__support.FPUtil.dyadic_float
- libc.src.__support.FPUtil.fenv_impl
- libc.src.__support.FPUtil.fp_bits
- libc.src.__support.FPUtil.multiply_add
- libc.src.__support.FPUtil.nearest_integer
- libc.src.__support.FPUtil.polyeval
- libc.src.__support.FPUtil.rounding_mode
- libc.src.__support.FPUtil.triple_double
- libc.src.__support.integer_literals
- libc.src.__support.macros.optimization
+ libc.src.__support.math.exp10
libc.src.errno.errno
)
diff --git a/libc/src/math/generic/common_constants.h b/libc/src/math/generic/common_constants.h
index 80eee06b2d4d9..291816a7889ad 100644
--- a/libc/src/math/generic/common_constants.h
+++ b/libc/src/math/generic/common_constants.h
@@ -13,8 +13,6 @@
#include "src/__support/macros/config.h"
#include "src/__support/math/exp_constants.h"
#include "src/__support/number_pair.h"
-#include "src/__support/math/exp_constants.h"
-#include "src/__support/number_pair.h"
namespace LIBC_NAMESPACE_DECL {
diff --git a/libc/src/math/generic/exp10.cpp b/libc/src/math/generic/exp10.cpp
index c464979b092c3..5c36d28c166ae 100644
--- a/libc/src/math/generic/exp10.cpp
+++ b/libc/src/math/generic/exp10.cpp
@@ -7,491 +7,10 @@
//===----------------------------------------------------------------------===//
#include "src/math/exp10.h"
-#include "common_constants.h" // Lookup tables EXP2_MID1 and EXP_M2.
-#include "explogxf.h" // ziv_test_denorm.
-#include "src/__support/CPP/bit.h"
-#include "src/__support/CPP/optional.h"
-#include "src/__support/FPUtil/FEnvImpl.h"
-#include "src/__support/FPUtil/FPBits.h"
-#include "src/__support/FPUtil/PolyEval.h"
-#include "src/__support/FPUtil/double_double.h"
-#include "src/__support/FPUtil/dyadic_float.h"
-#include "src/__support/FPUtil/multiply_add.h"
-#include "src/__support/FPUtil/nearest_integer.h"
-#include "src/__support/FPUtil/rounding_mode.h"
-#include "src/__support/FPUtil/triple_double.h"
-#include "src/__support/common.h"
-#include "src/__support/integer_literals.h"
-#include "src/__support/macros/config.h"
-#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
+#include "src/__support/math/exp10.h"
namespace LIBC_NAMESPACE_DECL {
-using fputil::DoubleDouble;
-using fputil::TripleDouble;
-using Float128 = typename fputil::DyadicFloat<128>;
-
-using LIBC_NAMESPACE::operator""_u128;
-
-// log2(10)
-constexpr double LOG2_10 = 0x1.a934f0979a371p+1;
-
-// -2^-12 * log10(2)
-// > a = -2^-12 * log10(2);
-// > b = round(a, 32, RN);
-// > c = round(a - b, 32, RN);
-// > d = round(a - b - c, D, RN);
-// Errors < 1.5 * 2^-144
-constexpr double MLOG10_2_EXP2_M12_HI = -0x1.3441350ap-14;
-constexpr double MLOG10_2_EXP2_M12_MID = 0x1.0c0219dc1da99p-51;
-
-#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
-constexpr double MLOG10_2_EXP2_M12_MID_32 = 0x1.0c0219dcp-51;
-constexpr double MLOG10_2_EXP2_M12_LO = 0x1.da994fd20dba2p-87;
-#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS
-
-// Error bounds:
-// Errors when using double precision.
-constexpr double ERR_D = 0x1.8p-63;
-
-#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
-// Errors when using double-double precision.
-constexpr double ERR_DD = 0x1.8p-99;
-#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS
-
-namespace {
-
-// Polynomial approximations with double precision. Generated by Sollya with:
-// > P = fpminimax((10^x - 1)/x, 3, [|D...|], [-2^-14, 2^-14]);
-// > P;
-// Error bounds:
-// | output - (10^dx - 1) / dx | < 2^-52.
-LIBC_INLINE double poly_approx_d(double dx) {
- // dx^2
- double dx2 = dx * dx;
- double c0 =
- fputil::multiply_add(dx, 0x1.53524c73cea6ap+1, 0x1.26bb1bbb55516p+1);
- double c1 =
- fputil::multiply_add(dx, 0x1.2bd75cc6afc65p+0, 0x1.0470587aa264cp+1);
- double p = fputil::multiply_add(dx2, c1, c0);
- return p;
-}
-
-#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
-// Polynomial approximation with double-double precision. Generated by Solya
-// with:
-// > P = fpminimax((10^x - 1)/x, 5, [|DD...|], [-2^-14, 2^-14]);
-// Error bounds:
-// | output - 10^(dx) | < 2^-101
-DoubleDouble poly_approx_dd(const DoubleDouble &dx) {
- // Taylor polynomial.
- constexpr DoubleDouble COEFFS[] = {
- {0, 0x1p0},
- {-0x1.f48ad494e927bp-53, 0x1.26bb1bbb55516p1},
- {-0x1.e2bfab3191cd2p-53, 0x1.53524c73cea69p1},
- {0x1.80fb65ec3b503p-53, 0x1.0470591de2ca4p1},
- {0x1.338fc05e21e55p-54, 0x1.2bd7609fd98c4p0},
- {0x1.d4ea116818fbp-56, 0x1.1429ffd519865p-1},
- {-0x1.872a8ff352077p-57, 0x1.a7ed70847c8b3p-3},
-
- };
-
- DoubleDouble p = fputil::polyeval(dx, COEFFS[0], COEFFS[1], COEFFS[2],
- COEFFS[3], COEFFS[4], COEFFS[5], COEFFS[6]);
- return p;
-}
-
-// Polynomial approximation with 128-bit precision:
-// Return exp(dx) ~ 1 + a0 * dx + a1 * dx^2 + ... + a6 * dx^7
-// For |dx| < 2^-14:
-// | output - 10^dx | < 1.5 * 2^-124.
-Float128 poly_approx_f128(const Float128 &dx) {
- constexpr Float128 COEFFS_128[]{
- {Sign::POS, -127, 0x80000000'00000000'00000000'00000000_u128}, // 1.0
- {Sign::POS, -126, 0x935d8ddd'aaa8ac16'ea56d62b'82d30a2d_u128},
- {Sign::POS, -126, 0xa9a92639'e753443a'80a99ce7'5f4d5bdb_u128},
- {Sign::POS, -126, 0x82382c8e'f1652304'6a4f9d7d'bf6c9635_u128},
- {Sign::POS, -124, 0x12bd7609'fd98c44c'34578701'9216c7af_u128},
- {Sign::POS, -127, 0x450a7ff4'7535d889'cc41ed7e'0d27aee5_u128},
- {Sign::POS, -130, 0xd3f6b844'702d636b'8326bb91'a6e7601d_u128},
- {Sign::POS, -130, 0x45b937f0'd05bb1cd'fa7b46df'314112a9_u128},
- };
-
- Float128 p = fputil::polyeval(dx, COEFFS_128[0], COEFFS_128[1], COEFFS_128[2],
- COEFFS_128[3], COEFFS_128[4], COEFFS_128[5],
- COEFFS_128[6], COEFFS_128[7]);
- return p;
-}
-
-// Compute 10^(x) using 128-bit precision.
-// TODO(lntue): investigate triple-double precision implementation for this
-// step.
-Float128 exp10_f128(double x, double kd, int idx1, int idx2) {
- double t1 = fputil::multiply_add(kd, MLOG10_2_EXP2_M12_HI, x); // exact
- double t2 = kd * MLOG10_2_EXP2_M12_MID_32; // exact
- double t3 = kd * MLOG10_2_EXP2_M12_LO; // Error < 2^-144
-
- Float128 dx = fputil::quick_add(
- Float128(t1), fputil::quick_add(Float128(t2), Float128(t3)));
-
- // TODO: Skip recalculating exp_mid1 and exp_mid2.
- Float128 exp_mid1 =
- fputil::quick_add(Float128(EXP2_MID1[idx1].hi),
- fputil::quick_add(Float128(EXP2_MID1[idx1].mid),
- Float128(EXP2_MID1[idx1].lo)));
-
- Float128 exp_mid2 =
- fputil::quick_add(Float128(EXP2_MID2[idx2].hi),
- fputil::quick_add(Float128(EXP2_MID2[idx2].mid),
- Float128(EXP2_MID2[idx2].lo)));
-
- Float128 exp_mid = fputil::quick_mul(exp_mid1, exp_mid2);
-
- Float128 p = poly_approx_f128(dx);
-
- Float128 r = fputil::quick_mul(exp_mid, p);
-
- r.exponent += static_cast<int>(kd) >> 12;
-
- return r;
-}
-
-// Compute 10^x with double-double precision.
-DoubleDouble exp10_double_double(double x, double kd,
- const DoubleDouble &exp_mid) {
- // Recalculate dx:
- // dx = x - k * 2^-12 * log10(2)
- double t1 = fputil::multiply_add(kd, MLOG10_2_EXP2_M12_HI, x); // exact
- double t2 = kd * MLOG10_2_EXP2_M12_MID_32; // exact
- double t3 = kd * MLOG10_2_EXP2_M12_LO; // Error < 2^-140
-
- DoubleDouble dx = fputil::exact_add(t1, t2);
- dx.lo += t3;
-
- // Degree-6 polynomial approximation in double-double precision.
- // | p - 10^x | < 2^-103.
- DoubleDouble p = poly_approx_dd(dx);
-
- // Error bounds: 2^-102.
- DoubleDouble r = fputil::quick_mult(exp_mid, p);
-
- return r;
-}
-#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS
-
-// When output is denormal.
-double exp10_denorm(double x) {
- // Range reduction.
- double tmp = fputil::multiply_add(x, LOG2_10, 0x1.8000'0000'4p21);
- int k = static_cast<int>(cpp::bit_cast<uint64_t>(tmp) >> 19);
- double kd = static_cast<double>(k);
-
- uint32_t idx1 = (k >> 6) & 0x3f;
- uint32_t idx2 = k & 0x3f;
-
- int hi = k >> 12;
-
- DoubleDouble exp_mid1{EXP2_MID1[idx1].mid, EXP2_MID1[idx1].hi};
- DoubleDouble exp_mid2{EXP2_MID2[idx2].mid, EXP2_MID2[idx2].hi};
- DoubleDouble exp_mid = fputil::quick_mult(exp_mid1, exp_mid2);
-
- // |dx| < 1.5 * 2^-15 + 2^-31 < 2^-14
- double lo_h = fputil::multiply_add(kd, MLOG10_2_EXP2_M12_HI, x); // exact
- double dx = fputil::multiply_add(kd, MLOG10_2_EXP2_M12_MID, lo_h);
-
- double mid_lo = dx * exp_mid.hi;
-
- // Approximate (10^dx - 1)/dx ~ 1 + a0*dx + a1*dx^2 + a2*dx^3 + a3*dx^4.
- double p = poly_approx_d(dx);
-
- double lo = fputil::multiply_add(p, mid_lo, exp_mid.lo);
-
-#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
- return ziv_test_denorm</*SKIP_ZIV_TEST=*/true>(hi, exp_mid.hi, lo, ERR_D)
- .value();
-#else
- if (auto r = ziv_test_denorm(hi, exp_mid.hi, lo, ERR_D);
- LIBC_LIKELY(r.has_value()))
- return r.value();
-
- // Use double-double
- DoubleDouble r_dd = exp10_double_double(x, kd, exp_mid);
-
- if (auto r = ziv_test_denorm(hi, r_dd.hi, r_dd.lo, ERR_DD);
- LIBC_LIKELY(r.has_value()))
- return r.value();
-
- // Use 128-bit precision
- Float128 r_f128 = exp10_f128(x, kd, idx1, idx2);
-
- return static_cast<double>(r_f128);
-#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS
-}
-
-// Check for exceptional cases when:
-// * log10(1 - 2^-54) < x < log10(1 + 2^-53)
-// * x >= log10(2^1024)
-// * x <= log10(2^-1022)
-// * x is inf or nan
-double set_exceptional(double x) {
- using FPBits = typename fputil::FPBits<double>;
- FPBits xbits(x);
-
- uint64_t x_u = xbits.uintval();
- uint64_t x_abs = xbits.abs().uintval();
-
- // |x| < log10(1 + 2^-53)
- if (x_abs <= 0x3c8bcb7b1526e50e) {
- // 10^(x) ~ 1 + x/2
- return fputil::multiply_add(x, 0.5, 1.0);
- }
-
- // x <= log10(2^-1022) || x >= log10(2^1024) or inf/nan.
- if (x_u >= 0xc0733a7146f72a42) {
- // x <= log10(2^-1075) or -inf/nan
- if (x_u > 0xc07439b746e36b52) {
- // exp(-Inf) = 0
- if (xbits.is_inf())
- return 0.0;
-
- // exp(nan) = nan
- if (xbits.is_nan())
- return x;
-
- if (fputil::quick_get_round() == FE_UPWARD)
- return FPBits::min_subnormal().get_val();
- fputil::set_errno_if_required(ERANGE);
- fputil::raise_except_if_required(FE_UNDERFLOW);
- return 0.0;
- }
-
- return exp10_denorm(x);
- }
-
- // x >= log10(2^1024) or +inf/nan
- // x is finite
- if (x_u < 0x7ff0'0000'0000'0000ULL) {
- int rounding = fputil::quick_get_round();
- if (rounding == FE_DOWNWARD || rounding == FE_TOWARDZERO)
- return FPBits::max_normal().get_val();
-
- fputil::set_errno_if_required(ERANGE);
- fputil::raise_except_if_required(FE_OVERFLOW);
- }
- // x is +inf or nan
- return x + FPBits::inf().get_val();
-}
-
-} // namespace
-
-LLVM_LIBC_FUNCTION(double, exp10, (double x)) {
- using FPBits = typename fputil::FPBits<double>;
- FPBits xbits(x);
-
- uint64_t x_u = xbits.uintval();
-
- // x <= log10(2^-1022) or x >= log10(2^1024) or
- // log10(1 - 2^-54) < x < log10(1 + 2^-53).
- if (LIBC_UNLIKELY(x_u >= 0xc0733a7146f72a42 ||
- (x_u <= 0xbc7bcb7b1526e50e && x_u >= 0x40734413509f79ff) ||
- x_u < 0x3c8bcb7b1526e50e)) {
- return set_exceptional(x);
- }
-
- // Now log10(2^-1075) < x <= log10(1 - 2^-54) or
- // log10(1 + 2^-53) < x < log10(2^1024)
-
- // Range reduction:
- // Let x = log10(2) * (hi + mid1 + mid2) + lo
- // in which:
- // hi is an integer
- // mid1 * 2^6 is an integer
- // mid2 * 2^12 is an integer
- // then:
- // 10^(x) = 2^hi * 2^(mid1) * 2^(mid2) * 10^(lo).
- // With this formula:
- // - multiplying by 2^hi is exact and cheap, simply by adding the exponent
- // field.
- // - 2^(mid1) and 2^(mid2) are stored in 2 x 64-element tables.
- // - 10^(lo) ~ 1 + a0*lo + a1 * lo^2 + ...
- //
- // We compute (hi + mid1 + mid2) together by perform the rounding on
- // x * log2(10) * 2^12.
- // Since |x| < |log10(2^-1075)| < 2^9,
- // |x * 2^12| < 2^9 * 2^12 < 2^21,
- // So we can fit the rounded result round(x * 2^12) in int32_t.
- // Thus, the goal is to be able to use an additional addition and fixed width
- // shift to get an int32_t representing round(x * 2^12).
- //
- // Assuming int32_t using 2-complement representation, since the mantissa part
- // of a double precision is unsigned with the leading bit hidden, if we add an
- // extra constant C = 2^e1 + 2^e2 with e1 > e2 >= 2^23 to the product, the
- // part that are < 2^e2 in resulted mantissa of (x*2^12*L2E + C) can be
- // considered as a proper 2-complement representations of x*2^12.
- //
- // One small problem with this approach is that the sum (x*2^12 + C) in
- // double precision is rounded to the least significant bit of the dorminant
- // factor C. In order to minimize the rounding errors from this addition, we
- // want to minimize e1. Another constraint that we want is that after
- // shifting the mantissa so that the least significant bit of int32_t
- // corresponds to the unit bit of (x*2^12*L2E), the sign is correct without
- // any adjustment. So combining these 2 requirements, we can choose
- // C = 2^33 + 2^32, so that the sign bit corresponds to 2^31 bit, and hence
- // after right shifting the mantissa, the resulting int32_t has correct sign.
- // With this choice of C, the number of mantissa bits we need to shift to the
- // right is: 52 - 33 = 19.
- //
- // Moreover, since the integer right shifts are equivalent to rounding down,
- // we can add an extra 0.5 so that it will become round-to-nearest, tie-to-
- // +infinity. So in particular, we can compute:
- // hmm = x * 2^12 + C,
- // where C = 2^33 + 2^32 + 2^-1, then if
- // k = int32_t(lower 51 bits of double(x * 2^12 + C) >> 19),
- // the reduced argument:
- // lo = x - log10(2) * 2^-12 * k is bounded by:
- // |lo| = |x - log10(2) * 2^-12 * k|
- // = log10(2) * 2^-12 * | x * log2(10) * 2^12 - k |
- // <= log10(2) * 2^-12 * (2^-1 + 2^-19)
- // < 1.5 * 2^-2 * (2^-13 + 2^-31)
- // = 1.5 * (2^-15 * 2^-31)
- //
- // Finally, notice that k only uses the mantissa of x * 2^12, so the
- // exponent 2^12 is not needed. So we can simply define
- // C = 2^(33 - 12) + 2^(32 - 12) + 2^(-13 - 12), and
- // k = int32_t(lower 51 bits of double(x + C) >> 19).
-
- // Rounding errors <= 2^-31.
- double tmp = fputil::multiply_add(x, LOG2_10, 0x1.8000'0000'4p21);
- int k = static_cast<int>(cpp::bit_cast<uint64_t>(tmp) >> 19);
- double kd = static_cast<double>(k);
-
- uint32_t idx1 = (k >> 6) & 0x3f;
- uint32_t idx2 = k & 0x3f;
-
- int hi = k >> 12;
-
- DoubleDouble exp_mid1{EXP2_MID1[idx1].mid, EXP2_MID1[idx1].hi};
- DoubleDouble exp_mid2{EXP2_MID2[idx2].mid, EXP2_MID2[idx2].hi};
- DoubleDouble exp_mid = fputil::quick_mult(exp_mid1, exp_mid2);
-
- // |dx| < 1.5 * 2^-15 + 2^-31 < 2^-14
- double lo_h = fputil::multiply_add(kd, MLOG10_2_EXP2_M12_HI, x); // exact
- double dx = fputil::multiply_add(kd, MLOG10_2_EXP2_M12_MID, lo_h);
-
- // We use the degree-4 polynomial to approximate 10^(lo):
- // 10^(lo) ~ 1 + a0 * lo + a1 * lo^2 + a2 * lo^3 + a3 * lo^4
- // = 1 + lo * P(lo)
- // So that the errors are bounded by:
- // |P(lo) - (10^lo - 1)/lo| < |lo|^4 / 64 < 2^(-13 * 4) / 64 = 2^-58
- // Let P_ be an evaluation of P where all intermediate computations are in
- // double precision. Using either Horner's or Estrin's schemes, the evaluated
- // errors can be bounded by:
- // |P_(lo) - P(lo)| < 2^-51
- // => |lo * P_(lo) - (2^lo - 1) | < 2^-65
- // => 2^(mid1 + mid2) * |lo * P_(lo) - expm1(lo)| < 2^-64.
- // Since we approximate
- // 2^(mid1 + mid2) ~ exp_mid.hi + exp_mid.lo,
- // We use the expression:
- // (exp_mid.hi + exp_mid.lo) * (1 + dx * P_(dx)) ~
- // ~ exp_mid.hi + (exp_mid.hi * dx * P_(dx) + exp_mid.lo)
- // with errors bounded by 2^-64.
-
- double mid_lo = dx * exp_mid.hi;
-
- // Approximate (10^dx - 1)/dx ~ 1 + a0*dx + a1*dx^2 + a2*dx^3 + a3*dx^4.
- double p = poly_approx_d(dx);
-
- double lo = fputil::multiply_add(p, mid_lo, exp_mid.lo);
-
-#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
- int64_t exp_hi = static_cast<int64_t>(hi) << FPBits::FRACTION_LEN;
- double r =
- cpp::bit_cast<double>(exp_hi + cpp::bit_cast<int64_t>(exp_mid.hi + lo));
- return r;
-#else
- double upper = exp_mid.hi + (lo + ERR_D);
- double lower = exp_mid.hi + (lo - ERR_D);
-
- if (LIBC_LIKELY(upper == lower)) {
- // To multiply by 2^hi, a fast way is to simply add hi to the exponent
- // field.
- int64_t exp_hi = static_cast<int64_t>(hi) << FPBits::FRACTION_LEN;
- double r = cpp::bit_cast<double>(exp_hi + cpp::bit_cast<int64_t>(upper));
- return r;
- }
-
- // Exact outputs when x = 1, 2, ..., 22 + hard to round with x = 23.
- // Quick check mask: 0x800f'ffffU = ~(bits of 1.0 | ... | bits of 23.0)
- if (LIBC_UNLIKELY((x_u & 0x8000'ffff'ffff'ffffULL) == 0ULL)) {
- switch (x_u) {
- case 0x3ff0000000000000: // x = 1.0
- return 10.0;
- case 0x4000000000000000: // x = 2.0
- return 100.0;
- case 0x4008000000000000: // x = 3.0
- return 1'000.0;
- case 0x4010000000000000: // x = 4.0
- return 10'000.0;
- case 0x4014000000000000: // x = 5.0
- return 100'000.0;
- case 0x4018000000000000: // x = 6.0
- return 1'000'000.0;
- case 0x401c000000000000: // x = 7.0
- return 10'000'000.0;
- case 0x4020000000000000: // x = 8.0
- return 100'000'000.0;
- case 0x4022000000000000: // x = 9.0
- return 1'000'000'000.0;
- case 0x4024000000000000: // x = 10.0
- return 10'000'000'000.0;
- case 0x4026000000000000: // x = 11.0
- return 100'000'000'000.0;
- case 0x4028000000000000: // x = 12.0
- return 1'000'000'000'000.0;
- case 0x402a000000000000: // x = 13.0
- return 10'000'000'000'000.0;
- case 0x402c000000000000: // x = 14.0
- return 100'000'000'000'000.0;
- case 0x402e000000000000: // x = 15.0
- return 1'000'000'000'000'000.0;
- case 0x4030000000000000: // x = 16.0
- return 10'000'000'000'000'000.0;
- case 0x4031000000000000: // x = 17.0
- return 100'000'000'000'000'000.0;
- case 0x4032000000000000: // x = 18.0
- return 1'000'000'000'000'000'000.0;
- case 0x4033000000000000: // x = 19.0
- return 10'000'000'000'000'000'000.0;
- case 0x4034000000000000: // x = 20.0
- return 100'000'000'000'000'000'000.0;
- case 0x4035000000000000: // x = 21.0
- return 1'000'000'000'000'000'000'000.0;
- case 0x4036000000000000: // x = 22.0
- return 10'000'000'000'000'000'000'000.0;
- case 0x4037000000000000: // x = 23.0
- return 0x1.52d02c7e14af6p76 + x;
- }
- }
-
- // Use double-double
- DoubleDouble r_dd = exp10_double_double(x, kd, exp_mid);
-
- double upper_dd = r_dd.hi + (r_dd.lo + ERR_DD);
- double lower_dd = r_dd.hi + (r_dd.lo - ERR_DD);
-
- if (LIBC_LIKELY(upper_dd == lower_dd)) {
- // To multiply by 2^hi, a fast way is to simply add hi to the exponent
- // field.
- int64_t exp_hi = static_cast<int64_t>(hi) << FPBits::FRACTION_LEN;
- double r = cpp::bit_cast<double>(exp_hi + cpp::bit_cast<int64_t>(upper_dd));
- return r;
- }
-
- // Use 128-bit precision
- Float128 r_f128 = exp10_f128(x, kd, idx1, idx2);
-
- return static_cast<double>(r_f128);
-#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS
-}
+LLVM_LIBC_FUNCTION(double, exp10, (double x)) { return math::exp10(x); }
} // namespace LIBC_NAMESPACE_DECL
diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
index fe843d3207ceb..8b60ca13562f6 100644
--- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
@@ -2245,6 +2245,24 @@ libc_support_library(
],
)
+libc_support_library(
+ name = "__support_math_exp10",
+ hdrs = ["src/__support/math/exp10.h"],
+ deps = [
+ ":__support_math_exp_constants",
+ ":__support_math_exp_utils",
+ ":__support_fputil_double_double",
+ ":__support_fputil_dyadic_float",
+ ":__support_fputil_multiply_add",
+ ":__support_fputil_nearest_integer",
+ ":__support_fputil_polyeval",
+ ":__support_fputil_rounding_mode",
+ ":__support_fputil_triple_double",
+ ":__support_integer_literals",
+ ":__support_macros_optimization",
+ ],
+)
+
############################### complex targets ################################
libc_function(
@@ -2849,17 +2867,8 @@ libc_math_function(
libc_math_function(
name = "exp10",
additional_deps = [
- ":__support_fputil_double_double",
- ":__support_fputil_dyadic_float",
- ":__support_fputil_multiply_add",
- ":__support_fputil_nearest_integer",
- ":__support_fputil_polyeval",
- ":__support_fputil_rounding_mode",
- ":__support_fputil_triple_double",
- ":__support_integer_literals",
- ":__support_macros_optimization",
- ":common_constants",
- ":explogxf",
+ ":__support_math_exp10",
+ ":errno",
],
)
>From c22217a0089373abe39eeb702773d08a59d84550 Mon Sep 17 00:00:00 2001
From: bassiounix <muhammad.m.bassiouni at gmail.com>
Date: Wed, 16 Jul 2025 20:01:48 +0300
Subject: [PATCH 4/9] fix gcc problems
---
libc/src/__support/FPUtil/PolyEval.h | 2 +-
libc/src/__support/math/exp10.h | 8 ++++----
2 files changed, 5 insertions(+), 5 deletions(-)
diff --git a/libc/src/__support/FPUtil/PolyEval.h b/libc/src/__support/FPUtil/PolyEval.h
index 41104620ed61d..7bec4e30a9960 100644
--- a/libc/src/__support/FPUtil/PolyEval.h
+++ b/libc/src/__support/FPUtil/PolyEval.h
@@ -37,7 +37,7 @@ LIBC_INLINE cpp::enable_if_t<(sizeof(T) <= sizeof(void *)), T> polyeval(T,
}
template <typename T, typename... Ts>
-LIBC_INLINE cpp::enable_if_t<(sizeof(T) > sizeof(void *)), T>
+LIBC_INLINE static constexpr cpp::enable_if_t<(sizeof(T) > sizeof(void *)), T>
polyeval(const T &x, const T &a0, const Ts &...a) {
return multiply_add(x, polyeval(x, a...), a0);
}
diff --git a/libc/src/__support/math/exp10.h b/libc/src/__support/math/exp10.h
index da94281c0c745..03b04239a874a 100644
--- a/libc/src/__support/math/exp10.h
+++ b/libc/src/__support/math/exp10.h
@@ -68,7 +68,7 @@ namespace {
// > P;
// Error bounds:
// | output - (10^dx - 1) / dx | < 2^-52.
-LIBC_INLINE static constexpr double poly_approx_d(double dx) {
+LIBC_INLINE static double poly_approx_d(double dx) {
// dx^2
double dx2 = dx * dx;
double c0 =
@@ -128,7 +128,7 @@ static constexpr Float128 poly_approx_f128(const Float128 &dx) {
// Compute 10^(x) using 128-bit precision.
// TODO(lntue): investigate triple-double precision implementation for this
// step.
-static constexpr Float128 exp10_f128(double x, double kd, int idx1, int idx2) {
+static Float128 exp10_f128(double x, double kd, int idx1, int idx2) {
double t1 = fputil::multiply_add(kd, MLOG10_2_EXP2_M12_HI, x); // exact
double t2 = kd * MLOG10_2_EXP2_M12_MID_32; // exact
double t3 = kd * MLOG10_2_EXP2_M12_LO; // Error < 2^-144
@@ -159,7 +159,7 @@ static constexpr Float128 exp10_f128(double x, double kd, int idx1, int idx2) {
}
// Compute 10^x with double-double precision.
-static constexpr DoubleDouble exp10_double_double(double x, double kd,
+static DoubleDouble exp10_double_double(double x, double kd,
const DoubleDouble &exp_mid) {
// Recalculate dx:
// dx = x - k * 2^-12 * log10(2)
@@ -182,7 +182,7 @@ static constexpr DoubleDouble exp10_double_double(double x, double kd,
#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS
// When output is denormal.
-static constexpr double exp10_denorm(double x) {
+static double exp10_denorm(double x) {
// Range reduction.
double tmp = fputil::multiply_add(x, LOG2_10, 0x1.8000'0000'4p21);
int k = static_cast<int>(cpp::bit_cast<uint64_t>(tmp) >> 19);
>From bcf433b41583ff0d8439e219730eae76503b7cd2 Mon Sep 17 00:00:00 2001
From: bassiounix <muhammad.m.bassiouni at gmail.com>
Date: Wed, 16 Jul 2025 20:06:56 +0300
Subject: [PATCH 5/9] fix code style
---
libc/src/__support/math/exp10.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/libc/src/__support/math/exp10.h b/libc/src/__support/math/exp10.h
index 03b04239a874a..18f8beaa4c2cc 100644
--- a/libc/src/__support/math/exp10.h
+++ b/libc/src/__support/math/exp10.h
@@ -160,7 +160,7 @@ static Float128 exp10_f128(double x, double kd, int idx1, int idx2) {
// Compute 10^x with double-double precision.
static DoubleDouble exp10_double_double(double x, double kd,
- const DoubleDouble &exp_mid) {
+ const DoubleDouble &exp_mid) {
// Recalculate dx:
// dx = x - k * 2^-12 * log10(2)
double t1 = fputil::multiply_add(kd, MLOG10_2_EXP2_M12_HI, x); // exact
>From ba1c1d1e26cd90480f92d91bb79142ef407a7b7f Mon Sep 17 00:00:00 2001
From: bassiounix <muhammad.m.bassiouni at gmail.com>
Date: Wed, 16 Jul 2025 22:02:22 +0300
Subject: [PATCH 6/9] prefix utilities with exp10
---
libc/src/__support/math/exp10.h | 34 +++++++++++++++------------------
1 file changed, 15 insertions(+), 19 deletions(-)
diff --git a/libc/src/__support/math/exp10.h b/libc/src/__support/math/exp10.h
index 18f8beaa4c2cc..88748523deb3d 100644
--- a/libc/src/__support/math/exp10.h
+++ b/libc/src/__support/math/exp10.h
@@ -36,7 +36,7 @@ using Float128 = typename fputil::DyadicFloat<128>;
using LIBC_NAMESPACE::operator""_u128;
// log2(10)
-constexpr double LOG2_10 = 0x1.a934f0979a371p+1;
+static constexpr double LOG2_10 = 0x1.a934f0979a371p+1;
// -2^-12 * log10(2)
// > a = -2^-12 * log10(2);
@@ -44,12 +44,12 @@ constexpr double LOG2_10 = 0x1.a934f0979a371p+1;
// > c = round(a - b, 32, RN);
// > d = round(a - b - c, D, RN);
// Errors < 1.5 * 2^-144
-constexpr double MLOG10_2_EXP2_M12_HI = -0x1.3441350ap-14;
-constexpr double MLOG10_2_EXP2_M12_MID = 0x1.0c0219dc1da99p-51;
+static constexpr double MLOG10_2_EXP2_M12_HI = -0x1.3441350ap-14;
+static constexpr double MLOG10_2_EXP2_M12_MID = 0x1.0c0219dc1da99p-51;
#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
-constexpr double MLOG10_2_EXP2_M12_MID_32 = 0x1.0c0219dcp-51;
-constexpr double MLOG10_2_EXP2_M12_LO = 0x1.da994fd20dba2p-87;
+static constexpr double MLOG10_2_EXP2_M12_MID_32 = 0x1.0c0219dcp-51;
+static constexpr double MLOG10_2_EXP2_M12_LO = 0x1.da994fd20dba2p-87;
#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS
// Error bounds:
@@ -58,17 +58,15 @@ constexpr double ERR_D = 0x1.8p-63;
#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
// Errors when using double-double precision.
-constexpr double ERR_DD = 0x1.8p-99;
+static constexpr double ERR_DD = 0x1.8p-99;
#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS
-namespace {
-
// Polynomial approximations with double precision. Generated by Sollya with:
// > P = fpminimax((10^x - 1)/x, 3, [|D...|], [-2^-14, 2^-14]);
// > P;
// Error bounds:
// | output - (10^dx - 1) / dx | < 2^-52.
-LIBC_INLINE static double poly_approx_d(double dx) {
+LIBC_INLINE static double exp10_poly_approx_d(double dx) {
// dx^2
double dx2 = dx * dx;
double c0 =
@@ -85,7 +83,7 @@ LIBC_INLINE static double poly_approx_d(double dx) {
// > P = fpminimax((10^x - 1)/x, 5, [|DD...|], [-2^-14, 2^-14]);
// Error bounds:
// | output - 10^(dx) | < 2^-101
-static constexpr DoubleDouble poly_approx_dd(const DoubleDouble &dx) {
+static constexpr DoubleDouble exp10_poly_approx_dd(const DoubleDouble &dx) {
// Taylor polynomial.
constexpr DoubleDouble COEFFS[] = {
{0, 0x1p0},
@@ -107,7 +105,7 @@ static constexpr DoubleDouble poly_approx_dd(const DoubleDouble &dx) {
// Return exp(dx) ~ 1 + a0 * dx + a1 * dx^2 + ... + a6 * dx^7
// For |dx| < 2^-14:
// | output - 10^dx | < 1.5 * 2^-124.
-static constexpr Float128 poly_approx_f128(const Float128 &dx) {
+static constexpr Float128 exp10_poly_approx_f128(const Float128 &dx) {
constexpr Float128 COEFFS_128[]{
{Sign::POS, -127, 0x80000000'00000000'00000000'00000000_u128}, // 1.0
{Sign::POS, -126, 0x935d8ddd'aaa8ac16'ea56d62b'82d30a2d_u128},
@@ -149,7 +147,7 @@ static Float128 exp10_f128(double x, double kd, int idx1, int idx2) {
Float128 exp_mid = fputil::quick_mul(exp_mid1, exp_mid2);
- Float128 p = poly_approx_f128(dx);
+ Float128 p = exp10_poly_approx_f128(dx);
Float128 r = fputil::quick_mul(exp_mid, p);
@@ -172,7 +170,7 @@ static DoubleDouble exp10_double_double(double x, double kd,
// Degree-6 polynomial approximation in double-double precision.
// | p - 10^x | < 2^-103.
- DoubleDouble p = poly_approx_dd(dx);
+ DoubleDouble p = exp10_poly_approx_dd(dx);
// Error bounds: 2^-102.
DoubleDouble r = fputil::quick_mult(exp_mid, p);
@@ -204,7 +202,7 @@ static double exp10_denorm(double x) {
double mid_lo = dx * exp_mid.hi;
// Approximate (10^dx - 1)/dx ~ 1 + a0*dx + a1*dx^2 + a2*dx^3 + a3*dx^4.
- double p = poly_approx_d(dx);
+ double p = exp10_poly_approx_d(dx);
double lo = fputil::multiply_add(p, mid_lo, exp_mid.lo);
@@ -235,7 +233,7 @@ static double exp10_denorm(double x) {
// * x >= log10(2^1024)
// * x <= log10(2^-1022)
// * x is inf or nan
-static constexpr double set_exceptional(double x) {
+static constexpr double exp10_set_exceptional(double x) {
using FPBits = typename fputil::FPBits<double>;
FPBits xbits(x);
@@ -284,8 +282,6 @@ static constexpr double set_exceptional(double x) {
return x + FPBits::inf().get_val();
}
-} // namespace
-
namespace math {
static constexpr double exp10(double x) {
@@ -299,7 +295,7 @@ static constexpr double exp10(double x) {
if (LIBC_UNLIKELY(x_u >= 0xc0733a7146f72a42 ||
(x_u <= 0xbc7bcb7b1526e50e && x_u >= 0x40734413509f79ff) ||
x_u < 0x3c8bcb7b1526e50e)) {
- return set_exceptional(x);
+ return exp10_set_exceptional(x);
}
// Now log10(2^-1075) < x <= log10(1 - 2^-54) or
@@ -403,7 +399,7 @@ static constexpr double exp10(double x) {
double mid_lo = dx * exp_mid.hi;
// Approximate (10^dx - 1)/dx ~ 1 + a0*dx + a1*dx^2 + a2*dx^3 + a3*dx^4.
- double p = poly_approx_d(dx);
+ double p = exp10_poly_approx_d(dx);
double lo = fputil::multiply_add(p, mid_lo, exp_mid.lo);
>From bfb7bf2822764a7b7025fca2e7b255e3afa3ce79 Mon Sep 17 00:00:00 2001
From: bassiounix <muhammad.m.bassiouni at gmail.com>
Date: Fri, 11 Jul 2025 03:12:38 +0300
Subject: [PATCH 7/9] [libc][math] Refactor exp implementation to header-only
in src/__support/math folder.
---
libc/shared/math.h | 1 +
libc/src/math/generic/common_constants.h | 2 ++
2 files changed, 3 insertions(+)
diff --git a/libc/shared/math.h b/libc/shared/math.h
index b37aa46820523..ac422f36bb2bb 100644
--- a/libc/shared/math.h
+++ b/libc/shared/math.h
@@ -21,5 +21,6 @@
#include "math/ldexpf.h"
#include "math/ldexpf128.h"
#include "math/ldexpf16.h"
+#include "math/exp.h"
#endif // LLVM_LIBC_SHARED_MATH_H
diff --git a/libc/src/math/generic/common_constants.h b/libc/src/math/generic/common_constants.h
index 291816a7889ad..a1d6de3c87240 100644
--- a/libc/src/math/generic/common_constants.h
+++ b/libc/src/math/generic/common_constants.h
@@ -13,6 +13,8 @@
#include "src/__support/macros/config.h"
#include "src/__support/math/exp_constants.h"
#include "src/__support/number_pair.h"
+#include "src/__support/math/exp_constants.h"
+
namespace LIBC_NAMESPACE_DECL {
>From 2998536140e3daa527b69388113f42824f4520bd Mon Sep 17 00:00:00 2001
From: bassiounix <muhammad.m.bassiouni at gmail.com>
Date: Fri, 11 Jul 2025 03:19:04 +0300
Subject: [PATCH 8/9] fix style
---
libc/shared/math.h | 1 -
libc/src/math/generic/common_constants.h | 2 +-
2 files changed, 1 insertion(+), 2 deletions(-)
diff --git a/libc/shared/math.h b/libc/shared/math.h
index ac422f36bb2bb..b37aa46820523 100644
--- a/libc/shared/math.h
+++ b/libc/shared/math.h
@@ -21,6 +21,5 @@
#include "math/ldexpf.h"
#include "math/ldexpf128.h"
#include "math/ldexpf16.h"
-#include "math/exp.h"
#endif // LLVM_LIBC_SHARED_MATH_H
diff --git a/libc/src/math/generic/common_constants.h b/libc/src/math/generic/common_constants.h
index a1d6de3c87240..80eee06b2d4d9 100644
--- a/libc/src/math/generic/common_constants.h
+++ b/libc/src/math/generic/common_constants.h
@@ -14,7 +14,7 @@
#include "src/__support/math/exp_constants.h"
#include "src/__support/number_pair.h"
#include "src/__support/math/exp_constants.h"
-
+#include "src/__support/number_pair.h"
namespace LIBC_NAMESPACE_DECL {
>From 3fb72ddb3ebc007e42f25f5d7472c1f245e036c4 Mon Sep 17 00:00:00 2001
From: bassiounix <muhammad.m.bassiouni at gmail.com>
Date: Sun, 13 Jul 2025 04:42:34 +0300
Subject: [PATCH 9/9] [libc][math] Refactor exp10f implementation to
header-only in src/__support/math folder.
---
libc/shared/math.h | 1 +
libc/shared/math/exp10f.h | 24 +++
libc/src/__support/math/CMakeLists.txt | 28 +++
.../exp10f_impl.h => __support/math/exp10f.h} | 17 +-
libc/src/__support/math/exp10f_utils.h | 171 ++++++++++++++++++
libc/src/math/generic/CMakeLists.txt | 33 +---
libc/src/math/generic/atanhf.cpp | 1 +
libc/src/math/generic/coshf.cpp | 2 +-
libc/src/math/generic/exp10f.cpp | 7 +-
libc/src/math/generic/explogxf.h | 156 +---------------
libc/src/math/generic/powf.cpp | 7 +-
libc/src/math/generic/sinhf.cpp | 1 +
.../llvm-project-overlay/libc/BUILD.bazel | 53 ++++--
13 files changed, 281 insertions(+), 220 deletions(-)
create mode 100644 libc/shared/math/exp10f.h
rename libc/src/{math/generic/exp10f_impl.h => __support/math/exp10f.h} (91%)
create mode 100644 libc/src/__support/math/exp10f_utils.h
diff --git a/libc/shared/math.h b/libc/shared/math.h
index b37aa46820523..2ae7c1d58ae10 100644
--- a/libc/shared/math.h
+++ b/libc/shared/math.h
@@ -13,6 +13,7 @@
#include "math/exp.h"
#include "math/exp10.h"
+#include "math/exp10f.h"
#include "math/expf.h"
#include "math/expf16.h"
#include "math/frexpf.h"
diff --git a/libc/shared/math/exp10f.h b/libc/shared/math/exp10f.h
new file mode 100644
index 0000000000000..556e78ab3b7a7
--- /dev/null
+++ b/libc/shared/math/exp10f.h
@@ -0,0 +1,24 @@
+//===-- Shared exp10f function -----------------------------------*- C++
+//-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SHARED_MATH_EXP10F_H
+#define LLVM_LIBC_SHARED_MATH_EXP10F_H
+
+#include "shared/libc_common.h"
+#include "src/__support/math/exp10f.h"
+
+namespace LIBC_NAMESPACE_DECL {
+namespace shared {
+
+using math::exp10f;
+
+} // namespace shared
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SHARED_MATH_EXP10F_H
diff --git a/libc/src/__support/math/CMakeLists.txt b/libc/src/__support/math/CMakeLists.txt
index 0bfc996c44fc8..ad36679409f89 100644
--- a/libc/src/__support/math/CMakeLists.txt
+++ b/libc/src/__support/math/CMakeLists.txt
@@ -170,3 +170,31 @@ add_header_library(
libc.src.__support.integer_literals
libc.src.__support.macros.optimization
)
+
+add_header_library(
+ exp10f_utils
+ HDRS
+ exp10f_utils.h
+ DEPENDS
+ libc.src.__support.FPUtil.basic_operations
+ libc.src.__support.FPUtil.fenv_impl
+ libc.src.__support.FPUtil.multiply_add
+ libc.src.__support.FPUtil.nearest_integer
+ libc.src.__support.FPUtil.polyeval
+ libc.src.__support.common
+ libc.src.__support.math.exp_utils
+)
+
+add_header_library(
+ exp10f
+ HDRS
+ exp10f.h
+ DEPENDS
+ .exp10f_utils
+ libc.src.__support.macros.config
+ libc.src.__support.FPUtil.fenv_impl
+ libc.src.__support.FPUtil.fp_bits
+ libc.src.__support.FPUtil.multiply_add
+ libc.src.__support.FPUtil.rounding_mode
+ libc.src.__support.macros.optimization
+)
diff --git a/libc/src/math/generic/exp10f_impl.h b/libc/src/__support/math/exp10f.h
similarity index 91%
rename from libc/src/math/generic/exp10f_impl.h
rename to libc/src/__support/math/exp10f.h
index 975fd01a0a25c..807b4f0d6c109 100644
--- a/libc/src/math/generic/exp10f_impl.h
+++ b/libc/src/__support/math/exp10f.h
@@ -1,4 +1,4 @@
-//===-- Single-precision 10^x function ------------------------------------===//
+//===-- Implementation header for exp10f ------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -6,22 +6,21 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_LIBC_SRC_MATH_GENERIC_EXP10F_IMPL_H
-#define LLVM_LIBC_SRC_MATH_GENERIC_EXP10F_IMPL_H
+#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_EXP10F_H
+#define LLVM_LIBC_SRC___SUPPORT_MATH_EXP10F_H
-#include "explogxf.h"
+#include "exp10f_utils.h"
#include "src/__support/FPUtil/FEnvImpl.h"
#include "src/__support/FPUtil/FPBits.h"
#include "src/__support/FPUtil/multiply_add.h"
#include "src/__support/FPUtil/rounding_mode.h"
-#include "src/__support/common.h"
#include "src/__support/macros/config.h"
#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
namespace LIBC_NAMESPACE_DECL {
-namespace generic {
+namespace math {
-LIBC_INLINE float exp10f(float x) {
+static constexpr float exp10f(float x) {
using FPBits = typename fputil::FPBits<float>;
FPBits xbits(x);
@@ -132,7 +131,7 @@ LIBC_INLINE float exp10f(float x) {
return static_cast<float>(multiply_add(p, lo2 * rr.mh, c0 * rr.mh));
}
-} // namespace generic
+} // namespace math
} // namespace LIBC_NAMESPACE_DECL
-#endif // LLVM_LIBC_SRC_MATH_GENERIC_EXP10F_IMPL_H
+#endif // LLVM_LIBC_SRC___SUPPORT_MATH_EXP10F_H
diff --git a/libc/src/__support/math/exp10f_utils.h b/libc/src/__support/math/exp10f_utils.h
new file mode 100644
index 0000000000000..c491872cc725c
--- /dev/null
+++ b/libc/src/__support/math/exp10f_utils.h
@@ -0,0 +1,171 @@
+//===-- Common utils for exp10f ---------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_EXP_FLOAT_CONSTANTS_H
+#define LLVM_LIBC_SRC___SUPPORT_MATH_EXP_FLOAT_CONSTANTS_H
+
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/FPUtil/PolyEval.h"
+#include "src/__support/FPUtil/nearest_integer.h"
+#include "src/__support/macros/config.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+struct ExpBase {
+ // Base = e
+ static constexpr int MID_BITS = 5;
+ static constexpr int MID_MASK = (1 << MID_BITS) - 1;
+ // log2(e) * 2^5
+ static constexpr double LOG2_B = 0x1.71547652b82fep+0 * (1 << MID_BITS);
+ // High and low parts of -log(2) * 2^(-5)
+ static constexpr double M_LOGB_2_HI = -0x1.62e42fefa0000p-1 / (1 << MID_BITS);
+ static constexpr double M_LOGB_2_LO =
+ -0x1.cf79abc9e3b3ap-40 / (1 << MID_BITS);
+ // Look up table for bit fields of 2^(i/32) for i = 0..31, generated by Sollya
+ // with:
+ // > for i from 0 to 31 do printdouble(round(2^(i/32), D, RN));
+ static constexpr int64_t EXP_2_MID[1 << MID_BITS] = {
+ 0x3ff0000000000000, 0x3ff059b0d3158574, 0x3ff0b5586cf9890f,
+ 0x3ff11301d0125b51, 0x3ff172b83c7d517b, 0x3ff1d4873168b9aa,
+ 0x3ff2387a6e756238, 0x3ff29e9df51fdee1, 0x3ff306fe0a31b715,
+ 0x3ff371a7373aa9cb, 0x3ff3dea64c123422, 0x3ff44e086061892d,
+ 0x3ff4bfdad5362a27, 0x3ff5342b569d4f82, 0x3ff5ab07dd485429,
+ 0x3ff6247eb03a5585, 0x3ff6a09e667f3bcd, 0x3ff71f75e8ec5f74,
+ 0x3ff7a11473eb0187, 0x3ff82589994cce13, 0x3ff8ace5422aa0db,
+ 0x3ff93737b0cdc5e5, 0x3ff9c49182a3f090, 0x3ffa5503b23e255d,
+ 0x3ffae89f995ad3ad, 0x3ffb7f76f2fb5e47, 0x3ffc199bdd85529c,
+ 0x3ffcb720dcef9069, 0x3ffd5818dcfba487, 0x3ffdfc97337b9b5f,
+ 0x3ffea4afa2a490da, 0x3fff50765b6e4540,
+ };
+
+ // Approximating e^dx with degree-5 minimax polynomial generated by Sollya:
+ // > Q = fpminimax(expm1(x)/x, 4, [|1, D...|], [-log(2)/64, log(2)/64]);
+ // Then:
+ // e^dx ~ P(dx) = 1 + dx + COEFFS[0] * dx^2 + ... + COEFFS[3] * dx^5.
+ static constexpr double COEFFS[4] = {
+ 0x1.ffffffffe5bc8p-2, 0x1.555555555cd67p-3, 0x1.5555c2a9b48b4p-5,
+ 0x1.11112a0e34bdbp-7};
+
+ LIBC_INLINE static double powb_lo(double dx) {
+ using fputil::multiply_add;
+ double dx2 = dx * dx;
+ double c0 = 1.0 + dx;
+ // c1 = COEFFS[0] + COEFFS[1] * dx
+ double c1 = multiply_add(dx, ExpBase::COEFFS[1], ExpBase::COEFFS[0]);
+ // c2 = COEFFS[2] + COEFFS[3] * dx
+ double c2 = multiply_add(dx, ExpBase::COEFFS[3], ExpBase::COEFFS[2]);
+ // r = c4 + c5 * dx^4
+ // = 1 + dx + COEFFS[0] * dx^2 + ... + COEFFS[5] * dx^7
+ return fputil::polyeval(dx2, c0, c1, c2);
+ }
+};
+
+struct Exp10Base : public ExpBase {
+ // log2(10) * 2^5
+ static constexpr double LOG2_B = 0x1.a934f0979a371p1 * (1 << MID_BITS);
+ // High and low parts of -log10(2) * 2^(-5).
+ // Notice that since |x * log2(10)| < 150:
+ // |k| = |round(x * log2(10) * 2^5)| < 2^8 * 2^5 = 2^13
+ // So when the FMA instructions are not available, in order for the product
+ // k * M_LOGB_2_HI
+ // to be exact, we only store the high part of log10(2) up to 38 bits
+ // (= 53 - 15) of precision.
+ // It is generated by Sollya with:
+ // > round(log10(2), 44, RN);
+ static constexpr double M_LOGB_2_HI = -0x1.34413509f8p-2 / (1 << MID_BITS);
+ // > round(log10(2) - 0x1.34413509f8p-2, D, RN);
+ static constexpr double M_LOGB_2_LO = 0x1.80433b83b532ap-44 / (1 << MID_BITS);
+
+ // Approximating 10^dx with degree-5 minimax polynomial generated by Sollya:
+ // > Q = fpminimax((10^x - 1)/x, 4, [|D...|], [-log10(2)/2^6, log10(2)/2^6]);
+ // Then:
+ // 10^dx ~ P(dx) = 1 + COEFFS[0] * dx + ... + COEFFS[4] * dx^5.
+ static constexpr double COEFFS[5] = {0x1.26bb1bbb55515p1, 0x1.53524c73bd3eap1,
+ 0x1.0470591dff149p1, 0x1.2bd7c0a9fbc4dp0,
+ 0x1.1429e74a98f43p-1};
+
+ static double powb_lo(double dx) {
+ using fputil::multiply_add;
+ double dx2 = dx * dx;
+ // c0 = 1 + COEFFS[0] * dx
+ double c0 = multiply_add(dx, Exp10Base::COEFFS[0], 1.0);
+ // c1 = COEFFS[1] + COEFFS[2] * dx
+ double c1 = multiply_add(dx, Exp10Base::COEFFS[2], Exp10Base::COEFFS[1]);
+ // c2 = COEFFS[3] + COEFFS[4] * dx
+ double c2 = multiply_add(dx, Exp10Base::COEFFS[4], Exp10Base::COEFFS[3]);
+ // r = c0 + dx^2 * (c1 + c2 * dx^2)
+ // = c0 + c1 * dx^2 + c2 * dx^4
+ // = 1 + COEFFS[0] * dx + ... + COEFFS[4] * dx^5.
+ return fputil::polyeval(dx2, c0, c1, c2);
+ }
+};
+
+constexpr int LOG_P1_BITS = 6;
+constexpr int LOG_P1_SIZE = 1 << LOG_P1_BITS;
+
+// N[Table[Log[2, 1 + x], {x, 0/64, 63/64, 1/64}], 40]
+extern const double LOG_P1_LOG2[LOG_P1_SIZE];
+
+// N[Table[1/(1 + x), {x, 0/64, 63/64, 1/64}], 40]
+extern const double LOG_P1_1_OVER[LOG_P1_SIZE];
+
+// Taylor series expansion for Log[2, 1 + x] splitted to EVEN AND ODD numbers
+// K_LOG2_ODD starts from x^3
+extern const double K_LOG2_ODD[4];
+extern const double K_LOG2_EVEN[4];
+
+// Output of range reduction for exp_b: (2^(mid + hi), lo)
+// where:
+// b^x = 2^(mid + hi) * b^lo
+struct exp_b_reduc_t {
+ double mh; // 2^(mid + hi)
+ double lo;
+};
+
+// The function correctly calculates b^x value with at least float precision
+// in a limited range.
+// Range reduction:
+// b^x = 2^(hi + mid) * b^lo
+// where:
+// x = (hi + mid) * log_b(2) + lo
+// hi is an integer,
+// 0 <= mid * 2^MID_BITS < 2^MID_BITS is an integer
+// -2^(-MID_BITS - 1) <= lo * log2(b) <= 2^(-MID_BITS - 1)
+// Base class needs to provide the following constants:
+// - MID_BITS : number of bits after decimal points used for mid
+// - MID_MASK : 2^MID_BITS - 1, mask to extract mid bits
+// - LOG2_B : log2(b) * 2^MID_BITS for scaling
+// - M_LOGB_2_HI : high part of -log_b(2) * 2^(-MID_BITS)
+// - M_LOGB_2_LO : low part of -log_b(2) * 2^(-MID_BITS)
+// - EXP_2_MID : look up table for bit fields of 2^mid
+// Return:
+// { 2^(hi + mid), lo }
+template <class Base>
+LIBC_INLINE static constexpr exp_b_reduc_t exp_b_range_reduc(float x) {
+ double xd = static_cast<double>(x);
+ // kd = round((hi + mid) * log2(b) * 2^MID_BITS)
+ double kd = fputil::nearest_integer(Base::LOG2_B * xd);
+ // k = round((hi + mid) * log2(b) * 2^MID_BITS)
+ int k = static_cast<int>(kd);
+ // hi = floor(kd * 2^(-MID_BITS))
+ // exp_hi = shift hi to the exponent field of double precision.
+ uint64_t exp_hi = static_cast<uint64_t>(k >> Base::MID_BITS)
+ << fputil::FPBits<double>::FRACTION_LEN;
+ // mh = 2^hi * 2^mid
+ // mh_bits = bit field of mh
+ uint64_t mh_bits = Base::EXP_2_MID[k & Base::MID_MASK] + exp_hi;
+ double mh = fputil::FPBits<double>(mh_bits).get_val();
+ // dx = lo = x - (hi + mid) * log(2)
+ double dx = fputil::multiply_add(
+ kd, Base::M_LOGB_2_LO, fputil::multiply_add(kd, Base::M_LOGB_2_HI, xd));
+ return {mh, dx};
+}
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC___SUPPORT_MATH_EXP_FLOAT_CONSTANTS_H
diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt
index 352c2ad4ab22a..ebdd3d7235141 100644
--- a/libc/src/math/generic/CMakeLists.txt
+++ b/libc/src/math/generic/CMakeLists.txt
@@ -358,7 +358,6 @@ add_entrypoint_object(
libc.src.__support.FPUtil.fp_bits
libc.src.__support.FPUtil.except_value_utils
libc.src.__support.FPUtil.fma
- libc.src.__support.FPUtil.multiply_add
libc.src.__support.FPUtil.polyeval
libc.src.__support.macros.optimization
)
@@ -448,7 +447,6 @@ add_entrypoint_object(
libc.src.__support.FPUtil.fenv_impl
libc.src.__support.FPUtil.fp_bits
libc.src.__support.FPUtil.fma
- libc.src.__support.FPUtil.multiply_add
libc.src.__support.FPUtil.polyeval
libc.src.__support.FPUtil.rounding_mode
libc.src.__support.macros.optimization
@@ -1461,21 +1459,6 @@ add_entrypoint_object(
libc.src.errno.errno
)
-add_header_library(
- exp10f_impl
- HDRS
- exp10f_impl.h
- DEPENDS
- .explogxf
- libc.src.__support.FPUtil.fenv_impl
- libc.src.__support.FPUtil.fp_bits
- libc.src.__support.FPUtil.multiply_add
- libc.src.__support.FPUtil.rounding_mode
- libc.src.__support.macros.optimization
- libc.src.__support.common
- libc.src.errno.errno
-)
-
add_entrypoint_object(
exp10f
SRCS
@@ -1483,7 +1466,8 @@ add_entrypoint_object(
HDRS
../exp10f.h
DEPENDS
- .exp10f_impl
+ libc.src.__support.math.exp10f
+ libc.src.errno.errno
)
add_entrypoint_object(
@@ -1620,17 +1604,15 @@ add_entrypoint_object(
../powf.h
DEPENDS
.common_constants
- .exp10f_impl
.exp2f_impl
.explogxf
+ libc.src.__support.math.exp10f
libc.src.__support.CPP.bit
- libc.src.__support.CPP.optional
libc.src.__support.FPUtil.fenv_impl
libc.src.__support.FPUtil.fp_bits
libc.src.__support.FPUtil.multiply_add
libc.src.__support.FPUtil.nearest_integer
libc.src.__support.FPUtil.polyeval
- libc.src.__support.FPUtil.rounding_mode
libc.src.__support.FPUtil.sqrt
libc.src.__support.FPUtil.triple_double
libc.src.__support.macros.optimization
@@ -3792,13 +3774,9 @@ add_object_library(
explogxf.cpp
DEPENDS
.common_constants
- libc.src.__support.FPUtil.basic_operations
- libc.src.__support.FPUtil.fenv_impl
- libc.src.__support.FPUtil.multiply_add
- libc.src.__support.FPUtil.nearest_integer
- libc.src.__support.FPUtil.polyeval
- libc.src.__support.common
libc.src.__support.math.exp_utils
+ libc.src.__support.math.exp10f_utils
+ libc.src.__support.macros.properties.cpu_features
libc.src.errno.errno
)
@@ -3981,6 +3959,7 @@ add_entrypoint_object(
DEPENDS
.explogxf
libc.src.__support.FPUtil.fp_bits
+ libc.src.__support.FPUtil.fenv_impl
libc.src.__support.macros.optimization
)
diff --git a/libc/src/math/generic/atanhf.cpp b/libc/src/math/generic/atanhf.cpp
index 2149314d2f676..f6fde766ef785 100644
--- a/libc/src/math/generic/atanhf.cpp
+++ b/libc/src/math/generic/atanhf.cpp
@@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//
#include "src/math/atanhf.h"
+#include "src/__support/FPUtil/FEnvImpl.h"
#include "src/__support/FPUtil/FPBits.h"
#include "src/__support/macros/config.h"
#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
diff --git a/libc/src/math/generic/coshf.cpp b/libc/src/math/generic/coshf.cpp
index c869f7d9dec5f..9f87564d524a6 100644
--- a/libc/src/math/generic/coshf.cpp
+++ b/libc/src/math/generic/coshf.cpp
@@ -7,8 +7,8 @@
//===----------------------------------------------------------------------===//
#include "src/math/coshf.h"
+#include "src/__support/FPUtil/FEnvImpl.h"
#include "src/__support/FPUtil/FPBits.h"
-#include "src/__support/FPUtil/multiply_add.h"
#include "src/__support/FPUtil/rounding_mode.h"
#include "src/__support/macros/config.h"
#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
diff --git a/libc/src/math/generic/exp10f.cpp b/libc/src/math/generic/exp10f.cpp
index 5284c380f52ec..b2d4f097bc7ce 100644
--- a/libc/src/math/generic/exp10f.cpp
+++ b/libc/src/math/generic/exp10f.cpp
@@ -7,12 +7,11 @@
//===----------------------------------------------------------------------===//
#include "src/math/exp10f.h"
-#include "src/__support/common.h"
-#include "src/__support/macros/config.h"
-#include "src/math/generic/exp10f_impl.h"
+
+#include "src/__support/math/exp10f.h"
namespace LIBC_NAMESPACE_DECL {
-LLVM_LIBC_FUNCTION(float, exp10f, (float x)) { return generic::exp10f(x); }
+LLVM_LIBC_FUNCTION(float, exp10f, (float x)) { return math::exp10f(x); }
} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/math/generic/explogxf.h b/libc/src/math/generic/explogxf.h
index 5ae1457ca780e..d9f39a0d6080f 100644
--- a/libc/src/math/generic/explogxf.h
+++ b/libc/src/math/generic/explogxf.h
@@ -10,166 +10,14 @@
#define LLVM_LIBC_SRC_MATH_GENERIC_EXPLOGXF_H
#include "common_constants.h"
-#include "src/__support/FPUtil/FEnvImpl.h"
-#include "src/__support/FPUtil/PolyEval.h"
-#include "src/__support/FPUtil/nearest_integer.h"
+
#include "src/__support/common.h"
-#include "src/__support/macros/config.h"
#include "src/__support/macros/properties/cpu_features.h"
-
+#include "src/__support/math/exp10f_utils.h"
#include "src/__support/math/exp_utils.h"
namespace LIBC_NAMESPACE_DECL {
-struct ExpBase {
- // Base = e
- static constexpr int MID_BITS = 5;
- static constexpr int MID_MASK = (1 << MID_BITS) - 1;
- // log2(e) * 2^5
- static constexpr double LOG2_B = 0x1.71547652b82fep+0 * (1 << MID_BITS);
- // High and low parts of -log(2) * 2^(-5)
- static constexpr double M_LOGB_2_HI = -0x1.62e42fefa0000p-1 / (1 << MID_BITS);
- static constexpr double M_LOGB_2_LO =
- -0x1.cf79abc9e3b3ap-40 / (1 << MID_BITS);
- // Look up table for bit fields of 2^(i/32) for i = 0..31, generated by Sollya
- // with:
- // > for i from 0 to 31 do printdouble(round(2^(i/32), D, RN));
- static constexpr int64_t EXP_2_MID[1 << MID_BITS] = {
- 0x3ff0000000000000, 0x3ff059b0d3158574, 0x3ff0b5586cf9890f,
- 0x3ff11301d0125b51, 0x3ff172b83c7d517b, 0x3ff1d4873168b9aa,
- 0x3ff2387a6e756238, 0x3ff29e9df51fdee1, 0x3ff306fe0a31b715,
- 0x3ff371a7373aa9cb, 0x3ff3dea64c123422, 0x3ff44e086061892d,
- 0x3ff4bfdad5362a27, 0x3ff5342b569d4f82, 0x3ff5ab07dd485429,
- 0x3ff6247eb03a5585, 0x3ff6a09e667f3bcd, 0x3ff71f75e8ec5f74,
- 0x3ff7a11473eb0187, 0x3ff82589994cce13, 0x3ff8ace5422aa0db,
- 0x3ff93737b0cdc5e5, 0x3ff9c49182a3f090, 0x3ffa5503b23e255d,
- 0x3ffae89f995ad3ad, 0x3ffb7f76f2fb5e47, 0x3ffc199bdd85529c,
- 0x3ffcb720dcef9069, 0x3ffd5818dcfba487, 0x3ffdfc97337b9b5f,
- 0x3ffea4afa2a490da, 0x3fff50765b6e4540,
- };
-
- // Approximating e^dx with degree-5 minimax polynomial generated by Sollya:
- // > Q = fpminimax(expm1(x)/x, 4, [|1, D...|], [-log(2)/64, log(2)/64]);
- // Then:
- // e^dx ~ P(dx) = 1 + dx + COEFFS[0] * dx^2 + ... + COEFFS[3] * dx^5.
- static constexpr double COEFFS[4] = {
- 0x1.ffffffffe5bc8p-2, 0x1.555555555cd67p-3, 0x1.5555c2a9b48b4p-5,
- 0x1.11112a0e34bdbp-7};
-
- LIBC_INLINE static double powb_lo(double dx) {
- using fputil::multiply_add;
- double dx2 = dx * dx;
- double c0 = 1.0 + dx;
- // c1 = COEFFS[0] + COEFFS[1] * dx
- double c1 = multiply_add(dx, ExpBase::COEFFS[1], ExpBase::COEFFS[0]);
- // c2 = COEFFS[2] + COEFFS[3] * dx
- double c2 = multiply_add(dx, ExpBase::COEFFS[3], ExpBase::COEFFS[2]);
- // r = c4 + c5 * dx^4
- // = 1 + dx + COEFFS[0] * dx^2 + ... + COEFFS[5] * dx^7
- return fputil::polyeval(dx2, c0, c1, c2);
- }
-};
-
-struct Exp10Base : public ExpBase {
- // log2(10) * 2^5
- static constexpr double LOG2_B = 0x1.a934f0979a371p1 * (1 << MID_BITS);
- // High and low parts of -log10(2) * 2^(-5).
- // Notice that since |x * log2(10)| < 150:
- // |k| = |round(x * log2(10) * 2^5)| < 2^8 * 2^5 = 2^13
- // So when the FMA instructions are not available, in order for the product
- // k * M_LOGB_2_HI
- // to be exact, we only store the high part of log10(2) up to 38 bits
- // (= 53 - 15) of precision.
- // It is generated by Sollya with:
- // > round(log10(2), 44, RN);
- static constexpr double M_LOGB_2_HI = -0x1.34413509f8p-2 / (1 << MID_BITS);
- // > round(log10(2) - 0x1.34413509f8p-2, D, RN);
- static constexpr double M_LOGB_2_LO = 0x1.80433b83b532ap-44 / (1 << MID_BITS);
-
- // Approximating 10^dx with degree-5 minimax polynomial generated by Sollya:
- // > Q = fpminimax((10^x - 1)/x, 4, [|D...|], [-log10(2)/2^6, log10(2)/2^6]);
- // Then:
- // 10^dx ~ P(dx) = 1 + COEFFS[0] * dx + ... + COEFFS[4] * dx^5.
- static constexpr double COEFFS[5] = {0x1.26bb1bbb55515p1, 0x1.53524c73bd3eap1,
- 0x1.0470591dff149p1, 0x1.2bd7c0a9fbc4dp0,
- 0x1.1429e74a98f43p-1};
-
- static double powb_lo(double dx) {
- using fputil::multiply_add;
- double dx2 = dx * dx;
- // c0 = 1 + COEFFS[0] * dx
- double c0 = multiply_add(dx, Exp10Base::COEFFS[0], 1.0);
- // c1 = COEFFS[1] + COEFFS[2] * dx
- double c1 = multiply_add(dx, Exp10Base::COEFFS[2], Exp10Base::COEFFS[1]);
- // c2 = COEFFS[3] + COEFFS[4] * dx
- double c2 = multiply_add(dx, Exp10Base::COEFFS[4], Exp10Base::COEFFS[3]);
- // r = c0 + dx^2 * (c1 + c2 * dx^2)
- // = c0 + c1 * dx^2 + c2 * dx^4
- // = 1 + COEFFS[0] * dx + ... + COEFFS[4] * dx^5.
- return fputil::polyeval(dx2, c0, c1, c2);
- }
-};
-
-constexpr int LOG_P1_BITS = 6;
-constexpr int LOG_P1_SIZE = 1 << LOG_P1_BITS;
-
-// N[Table[Log[2, 1 + x], {x, 0/64, 63/64, 1/64}], 40]
-extern const double LOG_P1_LOG2[LOG_P1_SIZE];
-
-// N[Table[1/(1 + x), {x, 0/64, 63/64, 1/64}], 40]
-extern const double LOG_P1_1_OVER[LOG_P1_SIZE];
-
-// Taylor series expansion for Log[2, 1 + x] splitted to EVEN AND ODD numbers
-// K_LOG2_ODD starts from x^3
-extern const double K_LOG2_ODD[4];
-extern const double K_LOG2_EVEN[4];
-
-// Output of range reduction for exp_b: (2^(mid + hi), lo)
-// where:
-// b^x = 2^(mid + hi) * b^lo
-struct exp_b_reduc_t {
- double mh; // 2^(mid + hi)
- double lo;
-};
-
-// The function correctly calculates b^x value with at least float precision
-// in a limited range.
-// Range reduction:
-// b^x = 2^(hi + mid) * b^lo
-// where:
-// x = (hi + mid) * log_b(2) + lo
-// hi is an integer,
-// 0 <= mid * 2^MID_BITS < 2^MID_BITS is an integer
-// -2^(-MID_BITS - 1) <= lo * log2(b) <= 2^(-MID_BITS - 1)
-// Base class needs to provide the following constants:
-// - MID_BITS : number of bits after decimal points used for mid
-// - MID_MASK : 2^MID_BITS - 1, mask to extract mid bits
-// - LOG2_B : log2(b) * 2^MID_BITS for scaling
-// - M_LOGB_2_HI : high part of -log_b(2) * 2^(-MID_BITS)
-// - M_LOGB_2_LO : low part of -log_b(2) * 2^(-MID_BITS)
-// - EXP_2_MID : look up table for bit fields of 2^mid
-// Return:
-// { 2^(hi + mid), lo }
-template <class Base> LIBC_INLINE exp_b_reduc_t exp_b_range_reduc(float x) {
- double xd = static_cast<double>(x);
- // kd = round((hi + mid) * log2(b) * 2^MID_BITS)
- double kd = fputil::nearest_integer(Base::LOG2_B * xd);
- // k = round((hi + mid) * log2(b) * 2^MID_BITS)
- int k = static_cast<int>(kd);
- // hi = floor(kd * 2^(-MID_BITS))
- // exp_hi = shift hi to the exponent field of double precision.
- uint64_t exp_hi = static_cast<uint64_t>(k >> Base::MID_BITS)
- << fputil::FPBits<double>::FRACTION_LEN;
- // mh = 2^hi * 2^mid
- // mh_bits = bit field of mh
- uint64_t mh_bits = Base::EXP_2_MID[k & Base::MID_MASK] + exp_hi;
- double mh = fputil::FPBits<double>(mh_bits).get_val();
- // dx = lo = x - (hi + mid) * log(2)
- double dx = fputil::multiply_add(
- kd, Base::M_LOGB_2_LO, fputil::multiply_add(kd, Base::M_LOGB_2_HI, xd));
- return {mh, dx};
-}
-
// The function correctly calculates sinh(x) and cosh(x) by calculating exp(x)
// and exp(-x) simultaneously.
// To compute e^x, we perform the following range
diff --git a/libc/src/math/generic/powf.cpp b/libc/src/math/generic/powf.cpp
index dfdfd5d6d5760..a45ef511c9bad 100644
--- a/libc/src/math/generic/powf.cpp
+++ b/libc/src/math/generic/powf.cpp
@@ -9,20 +9,17 @@
#include "src/math/powf.h"
#include "common_constants.h" // Lookup tables EXP_M1 and EXP_M2.
#include "src/__support/CPP/bit.h"
-#include "src/__support/CPP/optional.h"
#include "src/__support/FPUtil/FPBits.h"
#include "src/__support/FPUtil/PolyEval.h"
#include "src/__support/FPUtil/double_double.h"
-#include "src/__support/FPUtil/except_value_utils.h"
#include "src/__support/FPUtil/multiply_add.h"
#include "src/__support/FPUtil/nearest_integer.h"
-#include "src/__support/FPUtil/rounding_mode.h"
#include "src/__support/FPUtil/sqrt.h" // Speedup for powf(x, 1/2) = sqrtf(x)
#include "src/__support/common.h"
#include "src/__support/macros/config.h"
#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
+#include "src/__support/math/exp10f.h" // Speedup for powf(10, y) = exp10f(y)
-#include "exp10f_impl.h" // Speedup for powf(10, y) = exp10f(y)
#include "exp2f_impl.h" // Speedup for powf(2, y) = exp2f(y)
namespace LIBC_NAMESPACE_DECL {
@@ -781,7 +778,7 @@ LLVM_LIBC_FUNCTION(float, powf, (float x, float y)) {
return generic::exp2f(y);
case 0x4120'0000: // x = 10.0f
// pow(10, y) = exp10(y)
- return generic::exp10f(y);
+ return math::exp10f(y);
#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS
}
diff --git a/libc/src/math/generic/sinhf.cpp b/libc/src/math/generic/sinhf.cpp
index d6158fd302536..63111f84de141 100644
--- a/libc/src/math/generic/sinhf.cpp
+++ b/libc/src/math/generic/sinhf.cpp
@@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//
#include "src/math/sinhf.h"
+#include "src/__support/FPUtil/FEnvImpl.h"
#include "src/__support/FPUtil/FPBits.h"
#include "src/__support/FPUtil/rounding_mode.h"
#include "src/__support/macros/config.h"
diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
index 8b60ca13562f6..4463d28864752 100644
--- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
@@ -1999,13 +1999,13 @@ libc_support_library(
srcs = ["src/math/generic/explogxf.cpp"],
hdrs = ["src/math/generic/explogxf.h"],
deps = [
- ":__support_common",
":__support_fputil_fenv_impl",
":__support_fputil_fma",
":__support_fputil_multiply_add",
":__support_fputil_nearest_integer",
- ":__support_fputil_polyeval",
":__support_math_exp_utils",
+ ":__support_math_exp10f_utils",
+ ":__support_macros_properties_cpu_features",
":common_constants",
],
)
@@ -2050,19 +2050,6 @@ libc_support_library(
],
)
-libc_support_library(
- name = "exp10f_impl",
- hdrs = ["src/math/generic/exp10f_impl.h"],
- deps = [
- ":__support_fputil_fma",
- ":__support_fputil_multiply_add",
- ":__support_fputil_rounding_mode",
- ":__support_macros_optimization",
- ":common_constants",
- ":explogxf",
- ],
-)
-
libc_support_library(
name = "exp2f_impl",
hdrs = ["src/math/generic/exp2f_impl.h"],
@@ -2263,6 +2250,33 @@ libc_support_library(
],
)
+libc_support_library(
+ name = "__support_math_exp10f_utils",
+ hdrs = ["src/__support/math/exp10f_utils.h"],
+ deps = [
+ ":__support_fputil_basic_operations",
+ ":__support_fputil_fenv_impl",
+ ":__support_fputil_multiply_add",
+ ":__support_fputil_nearest_integer",
+ ":__support_fputil_polyeval",
+ ":__support_common",
+ ":__support_math_exp_utils",
+ ],
+)
+
+libc_support_library(
+ name = "__support_math_exp10f",
+ hdrs = ["src/__support/math/exp10f.h"],
+ deps = [
+ ":__support_math_exp10f_utils",
+ ":__support_fputil_fenv_impl",
+ ":__support_fputil_fp_bits",
+ ":__support_fputil_multiply_add",
+ ":__support_fputil_rounding_mode",
+ ":__support_macros_optimization",
+ ],
+)
+
############################### complex targets ################################
libc_function(
@@ -2726,10 +2740,10 @@ libc_math_function(
name = "cosf",
additional_deps = [
":__support_fputil_fma",
- ":__support_fputil_multiply_add",
":__support_macros_optimization",
":__support_macros_properties_cpu_features",
":sincosf_utils",
+ ":errno",
],
)
@@ -2875,7 +2889,8 @@ libc_math_function(
libc_math_function(
name = "exp10f",
additional_deps = [
- ":exp10f_impl",
+ ":__support_math_exp10f",
+ ":errno",
],
)
@@ -3724,14 +3739,13 @@ libc_math_function(
":__support_fputil_multiply_add",
":__support_fputil_nearest_integer",
":__support_fputil_polyeval",
- ":__support_fputil_rounding_mode",
":__support_fputil_sqrt",
":__support_fputil_triple_double",
":__support_macros_optimization",
+ ":__support_math_exp10f",
":common_constants",
":explogxf",
":exp2f_impl",
- ":exp10f_impl",
],
)
@@ -3840,7 +3854,6 @@ libc_math_function(
name = "sinf",
additional_deps = [
":__support_fputil_fma",
- ":__support_fputil_multiply_add",
":__support_fputil_polyeval",
":__support_fputil_rounding_mode",
":__support_macros_optimization",
More information about the llvm-commits
mailing list