[libc-commits] [libc] [libc][math] Improve the error analysis and accuracy for pow function. (PR #102098)
via libc-commits
libc-commits at lists.llvm.org
Mon Aug 5 20:47:46 PDT 2024
https://github.com/lntue created https://github.com/llvm/llvm-project/pull/102098
None
>From a85f2cb156f231a32514c61cea80740f8e318eb0 Mon Sep 17 00:00:00 2001
From: Tue Ly <lntue.h at gmail.com>
Date: Tue, 6 Aug 2024 03:44:57 +0000
Subject: [PATCH] [libc][math] Improve the error analysis and accuracy for pow
function.
---
libc/src/math/generic/pow.cpp | 67 ++++++++++++++++++++++-----------
libc/test/src/math/pow_test.cpp | 14 +++++--
2 files changed, 54 insertions(+), 27 deletions(-)
diff --git a/libc/src/math/generic/pow.cpp b/libc/src/math/generic/pow.cpp
index d2c5f7535082c..5f906994f76fe 100644
--- a/libc/src/math/generic/pow.cpp
+++ b/libc/src/math/generic/pow.cpp
@@ -358,42 +358,63 @@ LLVM_LIBC_FUNCTION(double, pow, (double x, double y)) {
// Then m_x = (1 + dx) / r, and
// log2(m_x) = log2( (1 + dx) / r )
// = log2(1 + dx) - log2(r).
- // Perform exact range reduction
+
+ // In order for the overall computations x^y = 2^(y * log2(x)) to has relative
+ // errors < 2^-52 (1ULP), we will need evaluate the exponent part y * log2(x)
+ // with absolute errors < 2^52 (or better, 2^-53). Since the whole exponent
+ // range for double precision is bounded by |y * log2(x)| < 1076 ~ 2^10, we
+ // need to evaluate log2(x) with absolute errors < 2^-53 * 2^-10 = 2^-63.
+
+ // With that requirement, we use the following degree-6 polynomial
+ // approximation:
+ // P(dx) ~ log2(1 + dx) / dx
+ // Generated by Sollya with:
+ // > P = fpminimax(log2(1 + x)/x, 6, [|D...|], [-2^-8, 2^-7]); P;
+ // > dirtyinfnorm(log2(1 + x) - x*P, [-2^-8, 2^-7]);
+ // 0x1.d03cc...p-66
+ constexpr double COEFFS[] = {0x1.71547652b82fep0, -0x1.71547652b82e7p-1,
+ 0x1.ec709dc3b1fd5p-2, -0x1.7154766124215p-2,
+ 0x1.2776bd90259d8p-2, -0x1.ec586c6f3d311p-3,
+ 0x1.9c4775eccf524p-3};
+ // Error: ulp(dx^2) <= (2^-7)^2 * 2^-52 = 2^-66
+ // Extra errors from various computations and rounding directions, the overall
+ // errors we can be bounded by 2^-65.
+
double dx;
+ DoubleDouble dx_c0;
+
+ // Perform exact range reduction and exact product dx * c0.
#ifdef LIBC_TARGET_CPU_HAS_FMA
dx = fputil::multiply_add(RD[idx_x], m_x.get_val(), -1.0); // Exact
+ dx_c0 = fputil::exact_mult(COEFFS[0], dx);
#else
double c = FPBits(m_x.uintval() & 0x3fff'e000'0000'0000).get_val();
dx = fputil::multiply_add(RD[idx_x], m_x.get_val() - c, CD[idx_x]); // Exact
+ dx_c0 = fputil::exact_mult<true>(COEFFS[0], dx);
#endif // LIBC_TARGET_CPU_HAS_FMA
- // Degree-5 polynomial approximation:
- // dx * P(dx) ~ log2(1 + dx)
- // Generated by Sollya with:
- // > P = fpminimax(log2(1 + x)/x, 5, [|D...|], [-2^-8, 2^-7]);
- // > dirtyinfnorm(log2(1 + x)/x - P, [-2^-8, 2^-7]);
- // 0x1.653...p-52
- constexpr double COEFFS[] = {0x1.71547652b82fep0, -0x1.71547652b7a07p-1,
- 0x1.ec709dc458db1p-2, -0x1.715479c2266c9p-2,
- 0x1.2776ae1ddf8fp-2, -0x1.e7b2178870157p-3};
-
- double dx2 = dx * dx; // Exact
- double c0 = fputil::multiply_add(dx, COEFFS[1], COEFFS[0]);
- double c1 = fputil::multiply_add(dx, COEFFS[3], COEFFS[2]);
- double c2 = fputil::multiply_add(dx, COEFFS[5], COEFFS[4]);
+ double dx2 = dx * dx;
+ double c0 = fputil::multiply_add(dx, COEFFS[2], COEFFS[1]);
+ double c1 = fputil::multiply_add(dx, COEFFS[4], COEFFS[3]);
+ double c2 = fputil::multiply_add(dx, COEFFS[6], COEFFS[5]);
double p = fputil::polyeval(dx2, c0, c1, c2);
// s = e_x - log2(r) + dx * P(dx)
// Absolute error bound:
- // |log2(x) - log2_x.hi - log2_x.lo| < 2^-58.
- // Relative error bound:
- // |(log2_x.hi + log2_x.lo)/log2(x) - 1| < 2^-51.
- double log2_x_hi = e_x + LOG2_R_DD[idx_x].hi; // Exact
- // Error
- double log2_x_lo = fputil::multiply_add(dx, p, LOG2_R_DD[idx_x].lo);
-
- DoubleDouble log2_x = fputil::exact_add(log2_x_hi, log2_x_lo);
+ // |log2(x) - log2_x.hi - log2_x.lo| < 2^-65.
+
+ // Notice that e_x - log2(r).hi is exact, so we perform an exact sum of
+ // e_x - log2(r).hi and the high part of the product dx * c0:
+ // log2_x_hi.hi + log2_x_hi.lo = e_x - log2(r).hi + (dx * c0).hi
+ DoubleDouble log2_x_hi =
+ fputil::exact_add(e_x + LOG2_R_DD[idx_x].hi, dx_c0.hi);
+ // The low part is dx^2 * p + low part of (dx * c0) + low part of -log2(r).
+ double log2_x_lo =
+ fputil::multiply_add(dx2, p, dx_c0.lo + LOG2_R_DD[idx_x].lo);
+ // Perform accurate sums.
+ DoubleDouble log2_x = fputil::exact_add(log2_x_hi.hi, log2_x_lo);
+ log2_x.lo += log2_x_hi.lo;
// To compute 2^(y * log2(x)), we break the exponent into 3 parts:
// y * log(2) = hi + mid + lo, where
diff --git a/libc/test/src/math/pow_test.cpp b/libc/test/src/math/pow_test.cpp
index 468a6bbee4b2c..20e3ddfc8fbe5 100644
--- a/libc/test/src/math/pow_test.cpp
+++ b/libc/test/src/math/pow_test.cpp
@@ -18,15 +18,21 @@ namespace mpfr = LIBC_NAMESPACE::testing::mpfr;
TEST_F(LlvmLibcPowTest, TrickyInputs) {
constexpr mpfr::BinaryInput<double> INPUTS[] = {
- {0x1.0853408534085p-2, 0x1.0D148E03BCBA8p-1},
- {0x1.65FBD65FBD657p-1, 0x1.F10D148E03BB6p+1},
+ {0x1.0853408534085p-2, 0x1.0d148e03bcba8p-1},
+ {0x1.65fbd65fbd657p-1, 0x1.f10d148e03bb6p+1},
+ {0x1.c046a084d2e12p-1, 0x1.1f9p+12},
+ {0x1.ae37ed1670326p-1, 0x1.f967df66a202p-1},
+ {0x1.ffffffffffffcp-1, 0x1.fffffffffffffp-2},
+ {0x1.f558a88a8aadep-1, 0x1.88ap+12},
+ {0x1.e84d32731e593p-1, 0x1.2cb8p+13},
+ {0x1.ffffffffffffcp-1, 0x1.fffffffffffffp-2},
};
for (auto input : INPUTS) {
double x = input.x;
double y = input.y;
- EXPECT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Pow, input,
- LIBC_NAMESPACE::pow(x, y), 0.5);
+ EXPECT_MPFR_MATCH(mpfr::Operation::Pow, input, LIBC_NAMESPACE::pow(x, y),
+ 1.5);
}
}
More information about the libc-commits
mailing list