[libc-commits] [libc] [libc][NFC] Integrate `quick_mul`, `quick_add`, `multiply_add`, `pow_n` and `mul_pow_2` as `friend` functions in `DyadicFloat` (PR #89373)
Guillaume Chatelet via libc-commits
libc-commits at lists.llvm.org
Fri Apr 19 04:32:42 PDT 2024
https://github.com/gchatelet created https://github.com/llvm/llvm-project/pull/89373
None
>From af4eab1355172fa5e4e9ae450cb6e198345edb64 Mon Sep 17 00:00:00 2001
From: Guillaume Chatelet <gchatelet at google.com>
Date: Fri, 19 Apr 2024 11:32:22 +0000
Subject: [PATCH] [libc][NFC] Integrate `quick_mul`, `quick_add`,
`multiply_add`, `pow_n` and `mul_pow_2` as `friend` functions in
`DyadicFloat`
---
libc/src/__support/FPUtil/dyadic_float.h | 221 ++++++++++----------
libc/src/__support/float_to_string.h | 8 +-
libc/src/math/generic/exp.cpp | 21 +-
libc/src/math/generic/exp10.cpp | 21 +-
libc/src/math/generic/exp2.cpp | 18 +-
libc/src/math/generic/expm1.cpp | 24 +--
libc/src/math/generic/log.cpp | 16 +-
libc/src/math/generic/log10.cpp | 14 +-
libc/src/math/generic/log1p.cpp | 26 +--
libc/src/math/generic/log2.cpp | 12 +-
libc/src/math/generic/log_range_reduction.h | 6 +-
11 files changed, 184 insertions(+), 203 deletions(-)
diff --git a/libc/src/__support/FPUtil/dyadic_float.h b/libc/src/__support/FPUtil/dyadic_float.h
index 12a69228d36c7b..a40e9feb6c82fb 100644
--- a/libc/src/__support/FPUtil/dyadic_float.h
+++ b/libc/src/__support/FPUtil/dyadic_float.h
@@ -200,129 +200,122 @@ template <size_t Bits> struct DyadicFloat {
return new_mant;
}
-};
-// Quick add - Add 2 dyadic floats with rounding toward 0 and then normalize the
-// output:
-// - Align the exponents so that:
-// new a.exponent = new b.exponent = max(a.exponent, b.exponent)
-// - Add or subtract the mantissas depending on the signs.
-// - Normalize the result.
-// The absolute errors compared to the mathematical sum is bounded by:
-// | quick_add(a, b) - (a + b) | < MSB(a + b) * 2^(-Bits + 2),
-// i.e., errors are up to 2 ULPs.
-// Assume inputs are normalized (by constructors or other functions) so that we
-// don't need to normalize the inputs again in this function. If the inputs are
-// not normalized, the results might lose precision significantly.
-template <size_t Bits>
-LIBC_INLINE constexpr DyadicFloat<Bits> quick_add(DyadicFloat<Bits> a,
- DyadicFloat<Bits> b) {
- if (LIBC_UNLIKELY(a.mantissa.is_zero()))
- return b;
- if (LIBC_UNLIKELY(b.mantissa.is_zero()))
- return a;
-
- // Align exponents
- if (a.exponent > b.exponent)
- b.shift_right(a.exponent - b.exponent);
- else if (b.exponent > a.exponent)
- a.shift_right(b.exponent - a.exponent);
-
- DyadicFloat<Bits> result;
-
- if (a.sign == b.sign) {
- // Addition
- result.sign = a.sign;
- result.exponent = a.exponent;
- result.mantissa = a.mantissa;
- if (result.mantissa.add_overflow(b.mantissa)) {
- // Mantissa addition overflow.
- result.shift_right(1);
- result.mantissa.val[DyadicFloat<Bits>::MantissaType::WORD_COUNT - 1] |=
- (uint64_t(1) << 63);
+ // Quick add - Add 2 dyadic floats with rounding toward 0 and then normalize
+ // the output:
+ // - Align the exponents so that:
+ // new a.exponent = new b.exponent = max(a.exponent, b.exponent)
+ // - Add or subtract the mantissas depending on the signs.
+ // - Normalize the result.
+ // The absolute errors compared to the mathematical sum is bounded by:
+ // | quick_add(a, b) - (a + b) | < MSB(a + b) * 2^(-Bits + 2),
+ // i.e., errors are up to 2 ULPs.
+ // Assume inputs are normalized (by constructors or other functions) so that
+ // we don't need to normalize the inputs again in this function. If the
+ // inputs are not normalized, the results might lose precision significantly.
+ LIBC_INLINE friend constexpr DyadicFloat quick_add(DyadicFloat a,
+ DyadicFloat b) {
+ if (LIBC_UNLIKELY(a.mantissa.is_zero()))
+ return b;
+ if (LIBC_UNLIKELY(b.mantissa.is_zero()))
+ return a;
+
+ // Align exponents
+ if (a.exponent > b.exponent)
+ b.shift_right(a.exponent - b.exponent);
+ else if (b.exponent > a.exponent)
+ a.shift_right(b.exponent - a.exponent);
+
+ DyadicFloat result;
+
+ if (a.sign == b.sign) {
+ // Addition
+ result.sign = a.sign;
+ result.exponent = a.exponent;
+ result.mantissa = a.mantissa;
+ if (result.mantissa.add_overflow(b.mantissa)) {
+ // Mantissa addition overflow.
+ result.shift_right(1);
+ result.mantissa.val[DyadicFloat::MantissaType::WORD_COUNT - 1] |=
+ (uint64_t(1) << 63);
+ }
+ // Result is already normalized.
+ return result;
}
- // Result is already normalized.
- return result;
+
+ // Subtraction
+ if (a.mantissa >= b.mantissa) {
+ result.sign = a.sign;
+ result.exponent = a.exponent;
+ result.mantissa = a.mantissa - b.mantissa;
+ } else {
+ result.sign = b.sign;
+ result.exponent = b.exponent;
+ result.mantissa = b.mantissa - a.mantissa;
+ }
+
+ return result.normalize();
}
- // Subtraction
- if (a.mantissa >= b.mantissa) {
- result.sign = a.sign;
- result.exponent = a.exponent;
- result.mantissa = a.mantissa - b.mantissa;
- } else {
- result.sign = b.sign;
- result.exponent = b.exponent;
- result.mantissa = b.mantissa - a.mantissa;
+ // Quick Mul - Slightly less accurate but efficient multiplication of 2 dyadic
+ // floats with rounding toward 0 and then normalize the output:
+ // result.exponent = a.exponent + b.exponent + Bits,
+ // result.mantissa = quick_mul_hi(a.mantissa + b.mantissa)
+ // ~ (full product a.mantissa * b.mantissa) >> Bits.
+ // The errors compared to the mathematical product is bounded by:
+ // 2 * errors of quick_mul_hi = 2 * (UInt<Bits>::WORD_COUNT - 1) in ULPs.
+ // Assume inputs are normalized (by constructors or other functions) so that
+ // we don't need to normalize the inputs again in this function. If the
+ // inputs are not normalized, the results might lose precision significantly.
+ LIBC_INLINE friend constexpr DyadicFloat quick_mul(DyadicFloat a,
+ DyadicFloat b) {
+ DyadicFloat result;
+ result.sign = (a.sign != b.sign) ? Sign::NEG : Sign::POS;
+ result.exponent = a.exponent + b.exponent + int(Bits);
+
+ if (!(a.mantissa.is_zero() || b.mantissa.is_zero())) {
+ result.mantissa = a.mantissa.quick_mul_hi(b.mantissa);
+ // Check the leading bit directly, should be faster than using clz in
+ // normalize().
+ if (result.mantissa.val[DyadicFloat::MantissaType::WORD_COUNT - 1] >>
+ 63 ==
+ 0)
+ result.shift_left(1);
+ } else {
+ result.mantissa = (typename DyadicFloat::MantissaType)(0);
+ }
+ return result;
}
- return result.normalize();
-}
-
-// Quick Mul - Slightly less accurate but efficient multiplication of 2 dyadic
-// floats with rounding toward 0 and then normalize the output:
-// result.exponent = a.exponent + b.exponent + Bits,
-// result.mantissa = quick_mul_hi(a.mantissa + b.mantissa)
-// ~ (full product a.mantissa * b.mantissa) >> Bits.
-// The errors compared to the mathematical product is bounded by:
-// 2 * errors of quick_mul_hi = 2 * (UInt<Bits>::WORD_COUNT - 1) in ULPs.
-// Assume inputs are normalized (by constructors or other functions) so that we
-// don't need to normalize the inputs again in this function. If the inputs are
-// not normalized, the results might lose precision significantly.
-template <size_t Bits>
-LIBC_INLINE constexpr DyadicFloat<Bits> quick_mul(DyadicFloat<Bits> a,
- DyadicFloat<Bits> b) {
- DyadicFloat<Bits> result;
- result.sign = (a.sign != b.sign) ? Sign::NEG : Sign::POS;
- result.exponent = a.exponent + b.exponent + int(Bits);
-
- if (!(a.mantissa.is_zero() || b.mantissa.is_zero())) {
- result.mantissa = a.mantissa.quick_mul_hi(b.mantissa);
- // Check the leading bit directly, should be faster than using clz in
- // normalize().
- if (result.mantissa.val[DyadicFloat<Bits>::MantissaType::WORD_COUNT - 1] >>
- 63 ==
- 0)
- result.shift_left(1);
- } else {
- result.mantissa = (typename DyadicFloat<Bits>::MantissaType)(0);
+ // Simple polynomial approximation.
+ LIBC_INLINE friend constexpr DyadicFloat multiply_add(const DyadicFloat &a,
+ const DyadicFloat &b,
+ const DyadicFloat &c) {
+ return quick_add(c, quick_mul(a, b));
}
- return result;
-}
-
-// Simple polynomial approximation.
-template <size_t Bits>
-LIBC_INLINE constexpr DyadicFloat<Bits>
-multiply_add(const DyadicFloat<Bits> &a, const DyadicFloat<Bits> &b,
- const DyadicFloat<Bits> &c) {
- return quick_add(c, quick_mul(a, b));
-}
-
-// Simple exponentiation implementation for printf. Only handles positive
-// exponents, since division isn't implemented.
-template <size_t Bits>
-LIBC_INLINE constexpr DyadicFloat<Bits> pow_n(DyadicFloat<Bits> a,
- uint32_t power) {
- DyadicFloat<Bits> result = 1.0;
- DyadicFloat<Bits> cur_power = a;
-
- while (power > 0) {
- if ((power % 2) > 0) {
- result = quick_mul(result, cur_power);
+
+ // Simple exponentiation implementation for printf. Only handles positive
+ // exponents, since division isn't implemented.
+ LIBC_INLINE constexpr DyadicFloat pow_n(uint32_t power) const {
+ DyadicFloat result = 1.0;
+ DyadicFloat cur_power = *this;
+
+ while (power > 0) {
+ if ((power % 2) > 0) {
+ result = quick_mul(result, cur_power);
+ }
+ power = power >> 1;
+ cur_power = quick_mul(cur_power, cur_power);
}
- power = power >> 1;
- cur_power = quick_mul(cur_power, cur_power);
+ return result;
}
- return result;
-}
-
-template <size_t Bits>
-LIBC_INLINE constexpr DyadicFloat<Bits> mul_pow_2(DyadicFloat<Bits> a,
- int32_t pow_2) {
- DyadicFloat<Bits> result = a;
- result.exponent += pow_2;
- return result;
-}
+
+ LIBC_INLINE constexpr DyadicFloat mul_pow_2(int32_t pow_2) const {
+ DyadicFloat result = *this;
+ result.exponent += pow_2;
+ return result;
+ }
+};
} // namespace LIBC_NAMESPACE::fputil
diff --git a/libc/src/__support/float_to_string.h b/libc/src/__support/float_to_string.h
index 09b13324f25bbd..94504838f04613 100644
--- a/libc/src/__support/float_to_string.h
+++ b/libc/src/__support/float_to_string.h
@@ -244,10 +244,10 @@ LIBC_INLINE UInt<MID_INT_SIZE> get_table_positive_df(int exponent, size_t i) {
false, -276, FIVE_EXP_MINUS_NINE_MANT);
if (i > 0) {
- fputil::DyadicFloat<INT_SIZE> fives = fputil::pow_n(FIVE_EXP_MINUS_NINE, i);
+ fputil::DyadicFloat<INT_SIZE> fives = FIVE_EXP_MINUS_NINE.pow_n(i);
num = fives;
}
- num = mul_pow_2(num, shift_amount);
+ num = num.mul_pow_2(shift_amount);
// Adding one is part of the formula.
UInt<INT_SIZE> int_num = static_cast<UInt<INT_SIZE>>(num) + 1;
@@ -349,10 +349,10 @@ LIBC_INLINE UInt<MID_INT_SIZE> get_table_negative_df(int exponent, size_t i) {
TEN_EXP_NINE_MANT);
if (i > 0) {
- fputil::DyadicFloat<INT_SIZE> tens = fputil::pow_n(TEN_EXP_NINE, i);
+ fputil::DyadicFloat<INT_SIZE> tens = TEN_EXP_NINE.pow_n(i);
num = tens;
}
- num = mul_pow_2(num, shift_amount);
+ num = num.mul_pow_2(shift_amount);
UInt<INT_SIZE> int_num = static_cast<UInt<INT_SIZE>>(num);
if (int_num > MOD_SIZE) {
diff --git a/libc/src/math/generic/exp.cpp b/libc/src/math/generic/exp.cpp
index 3d060bcbd3be30..24ad81fcbea405 100644
--- a/libc/src/math/generic/exp.cpp
+++ b/libc/src/math/generic/exp.cpp
@@ -126,25 +126,22 @@ Float128 exp_f128(double x, double kd, int idx1, int idx2) {
double t2 = kd * MLOG_2_EXP2_M12_MID_30; // exact
double t3 = kd * MLOG_2_EXP2_M12_LO; // Error < 2^-133
- Float128 dx = fputil::quick_add(
- Float128(t1), fputil::quick_add(Float128(t2), Float128(t3)));
+ Float128 dx = quick_add(Float128(t1), quick_add(Float128(t2), Float128(t3)));
// TODO: Skip recalculating exp_mid1 and exp_mid2.
- Float128 exp_mid1 =
- fputil::quick_add(Float128(EXP2_MID1[idx1].hi),
- fputil::quick_add(Float128(EXP2_MID1[idx1].mid),
- Float128(EXP2_MID1[idx1].lo)));
+ Float128 exp_mid1 = quick_add(
+ Float128(EXP2_MID1[idx1].hi),
+ quick_add(Float128(EXP2_MID1[idx1].mid), Float128(EXP2_MID1[idx1].lo)));
- Float128 exp_mid2 =
- fputil::quick_add(Float128(EXP2_MID2[idx2].hi),
- fputil::quick_add(Float128(EXP2_MID2[idx2].mid),
- Float128(EXP2_MID2[idx2].lo)));
+ Float128 exp_mid2 = quick_add(
+ Float128(EXP2_MID2[idx2].hi),
+ quick_add(Float128(EXP2_MID2[idx2].mid), Float128(EXP2_MID2[idx2].lo)));
- Float128 exp_mid = fputil::quick_mul(exp_mid1, exp_mid2);
+ Float128 exp_mid = quick_mul(exp_mid1, exp_mid2);
Float128 p = poly_approx_f128(dx);
- Float128 r = fputil::quick_mul(exp_mid, p);
+ Float128 r = quick_mul(exp_mid, p);
r.exponent += static_cast<int>(kd) >> 12;
diff --git a/libc/src/math/generic/exp10.cpp b/libc/src/math/generic/exp10.cpp
index a4ae41407112bd..4dddd50ada6b22 100644
--- a/libc/src/math/generic/exp10.cpp
+++ b/libc/src/math/generic/exp10.cpp
@@ -126,25 +126,22 @@ Float128 exp10_f128(double x, double kd, int idx1, int idx2) {
double t2 = kd * MLOG10_2_EXP2_M12_MID_32; // exact
double t3 = kd * MLOG10_2_EXP2_M12_LO; // Error < 2^-144
- Float128 dx = fputil::quick_add(
- Float128(t1), fputil::quick_add(Float128(t2), Float128(t3)));
+ Float128 dx = quick_add(Float128(t1), quick_add(Float128(t2), Float128(t3)));
// TODO: Skip recalculating exp_mid1 and exp_mid2.
- Float128 exp_mid1 =
- fputil::quick_add(Float128(EXP2_MID1[idx1].hi),
- fputil::quick_add(Float128(EXP2_MID1[idx1].mid),
- Float128(EXP2_MID1[idx1].lo)));
+ Float128 exp_mid1 = quick_add(
+ Float128(EXP2_MID1[idx1].hi),
+ quick_add(Float128(EXP2_MID1[idx1].mid), Float128(EXP2_MID1[idx1].lo)));
- Float128 exp_mid2 =
- fputil::quick_add(Float128(EXP2_MID2[idx2].hi),
- fputil::quick_add(Float128(EXP2_MID2[idx2].mid),
- Float128(EXP2_MID2[idx2].lo)));
+ Float128 exp_mid2 = quick_add(
+ Float128(EXP2_MID2[idx2].hi),
+ quick_add(Float128(EXP2_MID2[idx2].mid), Float128(EXP2_MID2[idx2].lo)));
- Float128 exp_mid = fputil::quick_mul(exp_mid1, exp_mid2);
+ Float128 exp_mid = quick_mul(exp_mid1, exp_mid2);
Float128 p = poly_approx_f128(dx);
- Float128 r = fputil::quick_mul(exp_mid, p);
+ Float128 r = quick_mul(exp_mid, p);
r.exponent += static_cast<int>(kd) >> 12;
diff --git a/libc/src/math/generic/exp2.cpp b/libc/src/math/generic/exp2.cpp
index 1a2fa3feb83e54..48ebb41a328be6 100644
--- a/libc/src/math/generic/exp2.cpp
+++ b/libc/src/math/generic/exp2.cpp
@@ -114,21 +114,19 @@ Float128 exp2_f128(double x, int hi, int idx1, int idx2) {
Float128 dx = Float128(x);
// TODO: Skip recalculating exp_mid1 and exp_mid2.
- Float128 exp_mid1 =
- fputil::quick_add(Float128(EXP2_MID1[idx1].hi),
- fputil::quick_add(Float128(EXP2_MID1[idx1].mid),
- Float128(EXP2_MID1[idx1].lo)));
+ Float128 exp_mid1 = quick_add(
+ Float128(EXP2_MID1[idx1].hi),
+ quick_add(Float128(EXP2_MID1[idx1].mid), Float128(EXP2_MID1[idx1].lo)));
- Float128 exp_mid2 =
- fputil::quick_add(Float128(EXP2_MID2[idx2].hi),
- fputil::quick_add(Float128(EXP2_MID2[idx2].mid),
- Float128(EXP2_MID2[idx2].lo)));
+ Float128 exp_mid2 = quick_add(
+ Float128(EXP2_MID2[idx2].hi),
+ quick_add(Float128(EXP2_MID2[idx2].mid), Float128(EXP2_MID2[idx2].lo)));
- Float128 exp_mid = fputil::quick_mul(exp_mid1, exp_mid2);
+ Float128 exp_mid = quick_mul(exp_mid1, exp_mid2);
Float128 p = poly_approx_f128(dx);
- Float128 r = fputil::quick_mul(exp_mid, p);
+ Float128 r = quick_mul(exp_mid, p);
r.exponent += hi;
diff --git a/libc/src/math/generic/expm1.cpp b/libc/src/math/generic/expm1.cpp
index 574c4b9aaf39f7..f1a2a70697b6c5 100644
--- a/libc/src/math/generic/expm1.cpp
+++ b/libc/src/math/generic/expm1.cpp
@@ -148,34 +148,30 @@ Float128 expm1_f128(double x, double kd, int idx1, int idx2) {
double t2 = kd * MLOG_2_EXP2_M12_MID_30; // exact
double t3 = kd * MLOG_2_EXP2_M12_LO; // Error < 2^-133
- Float128 dx = fputil::quick_add(
- Float128(t1), fputil::quick_add(Float128(t2), Float128(t3)));
+ Float128 dx = quick_add(Float128(t1), quick_add(Float128(t2), Float128(t3)));
// TODO: Skip recalculating exp_mid1 and exp_mid2.
- Float128 exp_mid1 =
- fputil::quick_add(Float128(EXP2_MID1[idx1].hi),
- fputil::quick_add(Float128(EXP2_MID1[idx1].mid),
- Float128(EXP2_MID1[idx1].lo)));
+ Float128 exp_mid1 = quick_add(
+ Float128(EXP2_MID1[idx1].hi),
+ quick_add(Float128(EXP2_MID1[idx1].mid), Float128(EXP2_MID1[idx1].lo)));
- Float128 exp_mid2 =
- fputil::quick_add(Float128(EXP2_MID2[idx2].hi),
- fputil::quick_add(Float128(EXP2_MID2[idx2].mid),
- Float128(EXP2_MID2[idx2].lo)));
+ Float128 exp_mid2 = quick_add(
+ Float128(EXP2_MID2[idx2].hi),
+ quick_add(Float128(EXP2_MID2[idx2].mid), Float128(EXP2_MID2[idx2].lo)));
- Float128 exp_mid = fputil::quick_mul(exp_mid1, exp_mid2);
+ Float128 exp_mid = quick_mul(exp_mid1, exp_mid2);
int hi = static_cast<int>(kd) >> 12;
Float128 minus_one{Sign::NEG, -127 - hi,
0x80000000'00000000'00000000'00000000_u128};
- Float128 exp_mid_m1 = fputil::quick_add(exp_mid, minus_one);
+ Float128 exp_mid_m1 = quick_add(exp_mid, minus_one);
Float128 p = poly_approx_f128(dx);
// r = exp_mid * (1 + dx * P) - 1
// = (exp_mid - 1) + (dx * exp_mid) * P
- Float128 r =
- fputil::multiply_add(fputil::quick_mul(exp_mid, dx), p, exp_mid_m1);
+ Float128 r = multiply_add(quick_mul(exp_mid, dx), p, exp_mid_m1);
r.exponent += hi;
diff --git a/libc/src/math/generic/log.cpp b/libc/src/math/generic/log.cpp
index 6de0d90be80e11..3674fcdfd94cf2 100644
--- a/libc/src/math/generic/log.cpp
+++ b/libc/src/math/generic/log.cpp
@@ -714,19 +714,19 @@ constexpr Float128 BIG_COEFFS[3]{
double log_accurate(int e_x, int index, double m_x) {
Float128 e_x_f128(static_cast<float>(e_x));
- Float128 sum = fputil::quick_mul(LOG_2, e_x_f128);
- sum = fputil::quick_add(sum, LOG_TABLE.step_1[index]);
+ Float128 sum = quick_mul(LOG_2, e_x_f128);
+ sum = quick_add(sum, LOG_TABLE.step_1[index]);
Float128 v_f128 = log_range_reduction(m_x, LOG_TABLE, sum);
- sum = fputil::quick_add(sum, v_f128);
+ sum = quick_add(sum, v_f128);
// Polynomial approximation
- Float128 p = fputil::quick_mul(v_f128, BIG_COEFFS[0]);
- p = fputil::quick_mul(v_f128, fputil::quick_add(p, BIG_COEFFS[1]));
- p = fputil::quick_mul(v_f128, fputil::quick_add(p, BIG_COEFFS[2]));
- p = fputil::quick_mul(v_f128, p);
+ Float128 p = quick_mul(v_f128, BIG_COEFFS[0]);
+ p = quick_mul(v_f128, quick_add(p, BIG_COEFFS[1]));
+ p = quick_mul(v_f128, quick_add(p, BIG_COEFFS[2]));
+ p = quick_mul(v_f128, p);
- Float128 r = fputil::quick_add(sum, p);
+ Float128 r = quick_add(sum, p);
return static_cast<double>(r);
}
diff --git a/libc/src/math/generic/log10.cpp b/libc/src/math/generic/log10.cpp
index fb839c111e6a0f..515faab8d5e668 100644
--- a/libc/src/math/generic/log10.cpp
+++ b/libc/src/math/generic/log10.cpp
@@ -717,18 +717,18 @@ constexpr Float128 BIG_COEFFS[4]{
double log10_accurate(int e_x, int index, double m_x) {
Float128 e_x_f128(static_cast<float>(e_x));
- Float128 sum = fputil::quick_mul(LOG10_2, e_x_f128);
- sum = fputil::quick_add(sum, LOG10_TABLE.step_1[index]);
+ Float128 sum = quick_mul(LOG10_2, e_x_f128);
+ sum = quick_add(sum, LOG10_TABLE.step_1[index]);
Float128 v_f128 = log_range_reduction(m_x, LOG10_TABLE, sum);
// Polynomial approximation
- Float128 p = fputil::quick_mul(v_f128, BIG_COEFFS[0]);
- p = fputil::quick_mul(v_f128, fputil::quick_add(p, BIG_COEFFS[1]));
- p = fputil::quick_mul(v_f128, fputil::quick_add(p, BIG_COEFFS[2]));
- p = fputil::quick_mul(v_f128, fputil::quick_add(p, BIG_COEFFS[3]));
+ Float128 p = quick_mul(v_f128, BIG_COEFFS[0]);
+ p = quick_mul(v_f128, quick_add(p, BIG_COEFFS[1]));
+ p = quick_mul(v_f128, quick_add(p, BIG_COEFFS[2]));
+ p = quick_mul(v_f128, quick_add(p, BIG_COEFFS[3]));
- Float128 r = fputil::quick_add(sum, p);
+ Float128 r = quick_add(sum, p);
return static_cast<double>(r);
}
diff --git a/libc/src/math/generic/log1p.cpp b/libc/src/math/generic/log1p.cpp
index 2b187080a057b4..ce33d9ed035f76 100644
--- a/libc/src/math/generic/log1p.cpp
+++ b/libc/src/math/generic/log1p.cpp
@@ -824,11 +824,11 @@ constexpr Float128 BIG_COEFFS[4]{
LIBC_INLINE double log1p_accurate(int e_x, int index,
fputil::DoubleDouble m_x) {
Float128 e_x_f128(static_cast<float>(e_x));
- Float128 sum = fputil::quick_mul(LOG_2, e_x_f128);
- sum = fputil::quick_add(sum, LOG_R1[index]);
+ Float128 sum = quick_mul(LOG_2, e_x_f128);
+ sum = quick_add(sum, LOG_R1[index]);
// fputil::DoubleDouble v4;
- Float128 v = fputil::quick_add(Float128(m_x.hi), Float128(m_x.lo));
+ Float128 v = quick_add(Float128(m_x.hi), Float128(m_x.lo));
// Skip 2nd range reduction step if |m_x| <= 2^-15.
if (m_x.hi > 0x1p-15 || m_x.hi < -0x1p-15) {
@@ -841,9 +841,9 @@ LIBC_INLINE double log1p_accurate(int e_x, int index,
// Output range:
// -0x1.1037c00000040271p-15 <= v2.hi + v2.lo <= 0x1.108480000008096cp-15
int idx2 = static_cast<int>(0x1p14 * (m_x.hi + (91 * 0x1p-14 + 0x1p-15)));
- sum = fputil::quick_add(sum, LOG_R2[idx2]);
+ sum = quick_add(sum, LOG_R2[idx2]);
Float128 s2 = Float128(S2[idx2]);
- v = fputil::quick_add(fputil::quick_add(v, s2), fputil::quick_mul(v, s2));
+ v = quick_add(quick_add(v, s2), quick_mul(v, s2));
}
// Skip 3rd range reduction step if |v| <= 2^-22.
@@ -857,19 +857,19 @@ LIBC_INLINE double log1p_accurate(int e_x, int index,
// -0x1.012bb800000800114p-22 <= v3.hi + v3.lo <= 0x1p-22
int idx3 =
static_cast<int>(0x1p21 * (double(v) + (69 * 0x1p-21 + 0x1p-22)));
- sum = fputil::quick_add(sum, LOG_R3[idx3]);
+ sum = quick_add(sum, LOG_R3[idx3]);
Float128 s3 = Float128(S3[idx3]);
- v = fputil::quick_add(fputil::quick_add(v, s3), fputil::quick_mul(v, s3));
+ v = quick_add(quick_add(v, s3), quick_mul(v, s3));
}
// Polynomial approximation
- Float128 p = fputil::quick_mul(v, BIG_COEFFS[0]);
- p = fputil::quick_mul(v, fputil::quick_add(p, BIG_COEFFS[1]));
- p = fputil::quick_mul(v, fputil::quick_add(p, BIG_COEFFS[2]));
- p = fputil::quick_mul(v, fputil::quick_add(p, BIG_COEFFS[3]));
- p = fputil::quick_add(v, fputil::quick_mul(v, p));
+ Float128 p = quick_mul(v, BIG_COEFFS[0]);
+ p = quick_mul(v, quick_add(p, BIG_COEFFS[1]));
+ p = quick_mul(v, quick_add(p, BIG_COEFFS[2]));
+ p = quick_mul(v, quick_add(p, BIG_COEFFS[3]));
+ p = quick_add(v, quick_mul(v, p));
- Float128 r = fputil::quick_add(sum, p);
+ Float128 r = quick_add(sum, p);
return static_cast<double>(r);
}
diff --git a/libc/src/math/generic/log2.cpp b/libc/src/math/generic/log2.cpp
index c68bc60e8468bb..37f354318d3fca 100644
--- a/libc/src/math/generic/log2.cpp
+++ b/libc/src/math/generic/log2.cpp
@@ -838,17 +838,17 @@ constexpr Float128 BIG_COEFFS[4]{
double log2_accurate(int e_x, int index, double m_x) {
Float128 sum(static_cast<float>(e_x));
- sum = fputil::quick_add(sum, LOG2_TABLE.step_1[index]);
+ sum = quick_add(sum, LOG2_TABLE.step_1[index]);
Float128 v_f128 = log_range_reduction(m_x, LOG2_TABLE, sum);
// Polynomial approximation
- Float128 p = fputil::quick_mul(v_f128, BIG_COEFFS[0]);
- p = fputil::quick_mul(v_f128, fputil::quick_add(p, BIG_COEFFS[1]));
- p = fputil::quick_mul(v_f128, fputil::quick_add(p, BIG_COEFFS[2]));
- p = fputil::quick_mul(v_f128, fputil::quick_add(p, BIG_COEFFS[3]));
+ Float128 p = quick_mul(v_f128, BIG_COEFFS[0]);
+ p = quick_mul(v_f128, quick_add(p, BIG_COEFFS[1]));
+ p = quick_mul(v_f128, quick_add(p, BIG_COEFFS[2]));
+ p = quick_mul(v_f128, quick_add(p, BIG_COEFFS[3]));
- Float128 r = fputil::quick_add(sum, p);
+ Float128 r = quick_add(sum, p);
return static_cast<double>(r);
}
diff --git a/libc/src/math/generic/log_range_reduction.h b/libc/src/math/generic/log_range_reduction.h
index d12da47a2cfae7..9eddb5be21318f 100644
--- a/libc/src/math/generic/log_range_reduction.h
+++ b/libc/src/math/generic/log_range_reduction.h
@@ -44,7 +44,7 @@ log_range_reduction(double m_x, const LogRR &log_table,
// Output range: vv2 in [-0x1.3ffcp-15, 0x1.3e3dp-15].
// idx2 = trunc(2^14 * (v + 2^-8 + 2^-15))
size_t idx2 = static_cast<size_t>((v + 0x10'2000'0000'0000) >> 46);
- sum = fputil::quick_add(sum, log_table.step_2[idx2]);
+ sum = quick_add(sum, log_table.step_2[idx2]);
int64_t s2 = static_cast<int64_t>(S2[idx2]); // |s| <= 2^-7, ulp = 2^-16
int64_t sv2 = s2 * v; // |s*v| < 2^-14, ulp = 2^(-60-16) = 2^-76
@@ -55,7 +55,7 @@ log_range_reduction(double m_x, const LogRR &log_table,
// Output range: vv3 in [-0x1.01928p-22 , 0x1p-22]
// idx3 = trunc(2^21 * (v + 80*2^-21 + 2^-22))
size_t idx3 = static_cast<size_t>((vv2 + 0x2840'0000'0000'0000) >> 55);
- sum = fputil::quick_add(sum, log_table.step_3[idx3]);
+ sum = quick_add(sum, log_table.step_3[idx3]);
int64_t s3 = static_cast<int64_t>(S3[idx3]); // |s| < 2^-13, ulp = 2^-21
int64_t spv3 = (s3 << 55) + vv2; // |s + v| < 2^-21, ulp = 2^-76
@@ -69,7 +69,7 @@ log_range_reduction(double m_x, const LogRR &log_table,
// idx4 = trunc(2^21 * (v + 65*2^-28 + 2^-29))
size_t idx4 = static_cast<size_t>((static_cast<int>(vv3 >> 68) + 131) >> 1);
- sum = fputil::quick_add(sum, log_table.step_4[idx4]);
+ sum = quick_add(sum, log_table.step_4[idx4]);
Int128 s4 = static_cast<Int128>(S4[idx4]); // |s| < 2^-21, ulp = 2^-28
// |s + v| < 2^-28, ulp = 2^-97
More information about the libc-commits
mailing list