[libc-commits] [libc] [libc][math] Fix overflow shifts for dyadic floats and add skip accuracy option for expm1. (PR #98048)
via libc-commits
libc-commits at lists.llvm.org
Mon Jul 8 09:53:46 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-libc
Author: None (lntue)
<details>
<summary>Changes</summary>
The following errors were reported by Paul Zimmerman:
```
expm1
Running worst cases check in --rndn mode...
FAIL x=0x1.c3b93d3b25c91p+8 ref=0x1.9fb21849affd1p+651 z=0x1.9f921849affd1p+651
Running worst cases check in --rndz mode...
FAIL x=0x1.d6479eba7c971p+8 ref=0x1.62a88613629b5p+678 z=0x1.62a886135e9b5p+678
Running worst cases check in --rndu mode...
FAIL x=0x1.1f0da93354198p+7 ref=0x1.0bd73b73fc74dp+207 z=0x1.0bd73b73fc74cp+207
Running worst cases check in --rndd mode...
FAIL x=0x1.93b3037166891p+8 ref=0x1.554f3a5c4535cp+582 z=0x1.554f3a5c4535bp+582
```
```
tan
Running worst cases check in --rndn mode...
FAIL x=0x1.00452f0e0134dp-13 ref=0x1.00452f2367da9p-13 z=0x1.00452f2367daap-13
Running worst cases check in --rndz mode...
FAIL x=0x1.89f0f5241255bp-2 ref=0x1.9e997dd861117p-2 z=0x1.9e997dd861118p-2
FAIL x=0x1.045457ae3994p+5 ref=0x1.0c06941768ef3p+1 z=0x1.0c06941768ef4p+1
```
---
Full diff: https://github.com/llvm/llvm-project/pull/98048.diff
4 Files Affected:
- (modified) libc/src/__support/FPUtil/dyadic_float.h (+13-4)
- (modified) libc/src/math/generic/expm1.cpp (+21-10)
- (modified) libc/test/src/math/expm1_test.cpp (+17-28)
- (modified) libc/test/src/math/tan_test.cpp (+10-8)
``````````diff
diff --git a/libc/src/__support/FPUtil/dyadic_float.h b/libc/src/__support/FPUtil/dyadic_float.h
index 8d44a98a693f8..79fb9c362ed69 100644
--- a/libc/src/__support/FPUtil/dyadic_float.h
+++ b/libc/src/__support/FPUtil/dyadic_float.h
@@ -260,10 +260,19 @@ LIBC_INLINE constexpr DyadicFloat<Bits> quick_add(DyadicFloat<Bits> a,
return a;
// Align exponents
- if (a.exponent > b.exponent)
- b.shift_right(a.exponent - b.exponent);
- else if (b.exponent > a.exponent)
- a.shift_right(b.exponent - a.exponent);
+ if (a.exponent > b.exponent) {
+ size_t shift = static_cast<size_t>(a.exponent - b.exponent);
+ if (shift < Bits)
+ b.shift_right(static_cast<int>(shift));
+ else
+ b = DyadicFloat<Bits>();
+ } else if (b.exponent > a.exponent) {
+ size_t shift = static_cast<size_t>(b.exponent - a.exponent);
+ if (shift < Bits)
+ a.shift_right(static_cast<int>(shift));
+ else
+ a = DyadicFloat<Bits>();
+ }
DyadicFloat<Bits> result;
diff --git a/libc/src/math/generic/expm1.cpp b/libc/src/math/generic/expm1.cpp
index 574c4b9aaf39f..150c0bbcf60da 100644
--- a/libc/src/math/generic/expm1.cpp
+++ b/libc/src/math/generic/expm1.cpp
@@ -25,7 +25,9 @@
#include "src/__support/integer_literals.h"
#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
-#include <errno.h>
+#if ((LIBC_MATH & LIBC_MATH_SKIP_ACCURATE_PASS) != 0)
+#define LIBC_MATH_EXPM1_SKIP_ACCURATE_PASS
+#endif
// #define DEBUGDEBUG
@@ -51,7 +53,7 @@ constexpr double LOG2_E = 0x1.71547652b82fep+0;
constexpr uint64_t ERR_D = 0x3c08000000000000;
// Errors when using double-double precision.
// 0x1.0p-99
-constexpr uint64_t ERR_DD = 0x39c0000000000000;
+[[maybe_unused]] constexpr uint64_t ERR_DD = 0x39c0000000000000;
// -2^-12 * log(2)
// > a = -2^-12 * log(2);
@@ -108,7 +110,7 @@ DoubleDouble poly_approx_dd(const DoubleDouble &dx) {
// Return (exp(dx) - 1)/dx ~ 1 + dx / 2 + dx^2 / 6 + ... + dx^6 / 5040
// For |dx| < 2^-13 + 2^-30:
// | output - exp(dx) | < 2^-126.
-Float128 poly_approx_f128(const Float128 &dx) {
+[[maybe_unused]] Float128 poly_approx_f128(const Float128 &dx) {
constexpr Float128 COEFFS_128[]{
{Sign::POS, -127, 0x80000000'00000000'00000000'00000000_u128}, // 1.0
{Sign::POS, -128, 0x80000000'00000000'00000000'00000000_u128}, // 0.5
@@ -127,13 +129,14 @@ Float128 poly_approx_f128(const Float128 &dx) {
#ifdef DEBUGDEBUG
std::ostream &operator<<(std::ostream &OS, const Float128 &r) {
- OS << (r.sign ? "-(" : "(") << r.mantissa.val[0] << " + " << r.mantissa.val[1]
- << " * 2^64) * 2^" << r.exponent << "\n";
+ OS << (r.sign == Sign::NEG ? "-(" : "(") << r.mantissa.val[0] << " + "
+ << r.mantissa.val[1] << " * 2^64) * 2^" << r.exponent << "\n";
return OS;
}
std::ostream &operator<<(std::ostream &OS, const DoubleDouble &r) {
- OS << std::hexfloat << r.hi << " + " << r.lo << std::defaultfloat << "\n";
+ OS << std::hexfloat << "(" << r.hi << " + " << r.lo << ")"
+ << std::defaultfloat << "\n";
return OS;
}
#endif
@@ -141,7 +144,7 @@ std::ostream &operator<<(std::ostream &OS, const DoubleDouble &r) {
// Compute exp(x) - 1 using 128-bit precision.
// TODO(lntue): investigate triple-double precision implementation for this
// step.
-Float128 expm1_f128(double x, double kd, int idx1, int idx2) {
+[[maybe_unused]] Float128 expm1_f128(double x, double kd, int idx1, int idx2) {
// Recalculate dx:
double t1 = fputil::multiply_add(kd, MLOG_2_EXP2_M12_HI, x); // exact
@@ -182,9 +185,10 @@ Float128 expm1_f128(double x, double kd, int idx1, int idx2) {
#ifdef DEBUGDEBUG
std::cout << "=== VERY SLOW PASS ===\n"
<< " kd: " << kd << "\n"
- << " dx: " << dx << "exp_mid_m1: " << exp_mid_m1
- << " exp_mid: " << exp_mid << " p: " << p
- << " r: " << r << std::endl;
+ << " hi: " << hi << "\n"
+ << " minus_one: " << minus_one << " dx: " << dx
+ << "exp_mid_m1: " << exp_mid_m1 << " exp_mid: " << exp_mid
+ << " p: " << p << " r: " << r << std::endl;
#endif
return r;
@@ -479,6 +483,12 @@ LLVM_LIBC_FUNCTION(double, expm1, (double x)) {
// Use double-double
DoubleDouble r_dd = exp_double_double(x, kd, exp_mid, hi_part);
+#ifdef LIBC_MATH_EXPM1_SKIP_ACCURATE_PASS
+ int64_t exp_hi = static_cast<int64_t>(hi) << FPBits::FRACTION_LEN;
+ double r =
+ cpp::bit_cast<double>(exp_hi + cpp::bit_cast<int64_t>(r_dd.hi + r_dd.lo));
+ return r;
+#else
double err_dd = cpp::bit_cast<double>(ERR_DD + err);
double upper_dd = r_dd.hi + (r_dd.lo + err_dd);
@@ -494,6 +504,7 @@ LLVM_LIBC_FUNCTION(double, expm1, (double x)) {
Float128 r_f128 = expm1_f128(x, kd, idx1, idx2);
return static_cast<double>(r_f128);
+#endif // LIBC_MATH_EXPM1_SKIP_ACCURATE_PASS
}
} // namespace LIBC_NAMESPACE
diff --git a/libc/test/src/math/expm1_test.cpp b/libc/test/src/math/expm1_test.cpp
index 1bf07f19f3a7c..df5c08864bb8a 100644
--- a/libc/test/src/math/expm1_test.cpp
+++ b/libc/test/src/math/expm1_test.cpp
@@ -14,7 +14,6 @@
#include "test/UnitTest/Test.h"
#include "utils/MPFRWrapper/MPFRUtils.h"
-#include <errno.h>
#include <stdint.h>
using LlvmLibcExpm1Test = LIBC_NAMESPACE::testing::FPTest<double>;
@@ -23,34 +22,24 @@ namespace mpfr = LIBC_NAMESPACE::testing::mpfr;
using LIBC_NAMESPACE::testing::tlog;
TEST_F(LlvmLibcExpm1Test, TrickyInputs) {
- constexpr int N = 21;
- constexpr uint64_t INPUTS[N] = {
- 0x3FD79289C6E6A5C0, // x=0x1.79289c6e6a5cp-2
- 0x3FD05DE80A173EA0, // x=0x1.05de80a173eap-2
- 0xbf1eb7a4cb841fcc, // x=-0x1.eb7a4cb841fccp-14
- 0xbf19a61fb925970d, // x=-0x1.9a61fb925970dp-14
- 0x3fda7b764e2cf47a, // x=0x1.a7b764e2cf47ap-2
- 0xc04757852a4b93aa, // x=-0x1.757852a4b93aap+5
- 0x4044c19e5712e377, // x=0x1.4c19e5712e377p+5
- 0xbf19a61fb925970d, // x=-0x1.9a61fb925970dp-14
- 0xc039a74cdab36c28, // x=-0x1.9a74cdab36c28p+4
- 0xc085b3e4e2e3bba9, // x=-0x1.5b3e4e2e3bba9p+9
- 0xc086960d591aec34, // x=-0x1.6960d591aec34p+9
- 0xc086232c09d58d91, // x=-0x1.6232c09d58d91p+9
- 0xc0874910d52d3051, // x=-0x1.74910d52d3051p9
- 0xc0867a172ceb0990, // x=-0x1.67a172ceb099p+9
- 0xc08ff80000000000, // x=-0x1.ff8p+9
- 0xbc971547652b82fe, // x=-0x1.71547652b82fep-54
- 0xbce465655f122ff6, // x=-0x1.465655f122ff6p-49
- 0x3d1bc8ee6b28659a, // x=0x1.bc8ee6b28659ap-46
- 0x3f18442b169f672d, // x=0x1.8442b169f672dp-14
- 0xc02b4f0cfb15ca0f, // x=-0x1.b4f0cfb15ca0fp+3
- 0xc042b708872320dd, // x=-0x1.2b708872320ddp+5
+ constexpr double INPUTS[] = {
+ 0x1.71547652b82fep-54, 0x1.465655f122ff6p-49, 0x1.bc8ee6b28659ap-46,
+ 0x1.8442b169f672dp-14, 0x1.9a61fb925970dp-14, 0x1.eb7a4cb841fccp-14,
+ 0x1.05de80a173eap-2, 0x1.79289c6e6a5cp-2, 0x1.a7b764e2cf47ap-2,
+ 0x1.b4f0cfb15ca0fp+3, 0x1.9a74cdab36c28p+4, 0x1.2b708872320ddp+5,
+ 0x1.4c19e5712e377p+5, 0x1.757852a4b93aap+5, 0x1.77f74111e0894p+6,
+ 0x1.a6c3780bbf824p+6, 0x1.e3d57e4c557f6p+6, 0x1.f07560077985ap+6,
+ 0x1.1f0da93354198p+7, 0x1.71018579c0758p+7, 0x1.204684c1167e9p+8,
+ 0x1.5b3e4e2e3bba9p+9, 0x1.6232c09d58d91p+9, 0x1.67a172ceb099p+9,
+ 0x1.6960d591aec34p+9, 0x1.74910d52d3051p+9, 0x1.ff8p+9,
};
+ constexpr int N = sizeof(INPUTS) / sizeof(INPUTS[0]);
for (int i = 0; i < N; ++i) {
- double x = FPBits(INPUTS[i]).get_val();
+ double x = INPUTS[i];
EXPECT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Expm1, x,
LIBC_NAMESPACE::expm1(x), 0.5);
+ EXPECT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Expm1, -x,
+ LIBC_NAMESPACE::expm1(-x), 0.5);
}
}
@@ -98,10 +87,10 @@ TEST_F(LlvmLibcExpm1Test, InDoubleRange) {
}
}
}
- tlog << " Expm1 failed: " << fails << "/" << count << "/" << cc
- << " tests.\n";
- tlog << " Max ULPs is at most: " << static_cast<uint64_t>(tol) << ".\n";
if (fails) {
+ tlog << " Expm1 failed: " << fails << "/" << count << "/" << cc
+ << " tests.\n";
+ tlog << " Max ULPs is at most: " << static_cast<uint64_t>(tol) << ".\n";
EXPECT_MPFR_MATCH(mpfr::Operation::Expm1, mx, mr, 0.5, rounding_mode);
}
};
diff --git a/libc/test/src/math/tan_test.cpp b/libc/test/src/math/tan_test.cpp
index e9e3e59f4d12d..80d57939a4f61 100644
--- a/libc/test/src/math/tan_test.cpp
+++ b/libc/test/src/math/tan_test.cpp
@@ -22,14 +22,15 @@ TEST_F(LlvmLibcTanTest, TrickyInputs) {
constexpr double INPUTS[] = {
0x1.d130383d17321p-27, 0x1.8000000000009p-23, 0x1.8000000000024p-22,
0x1.800000000009p-21, 0x1.20000000000f3p-20, 0x1.800000000024p-20,
- 0x1.e0000000001c2p-20, 0x1.0da8cc189b47dp-10, 0x1.00a33764a0a83p-7,
- 0x1.911a18779813fp-7, 0x1.940c877fb7dacp-7, 0x1.f42fb19b5b9b2p-6,
- 0x1.0285070f9f1bcp-5, 0x1.6ca9ef729af76p-1, 0x1.23f40dccdef72p+0,
- 0x1.43cf16358c9d7p+0, 0x1.addf3b9722265p+0, 0x1.ae78d360afa15p+0,
- 0x1.fe81868fc47fep+1, 0x1.e31b55306f22cp+2, 0x1.e639103a05997p+2,
- 0x1.f7898d5a756ddp+2, 0x1.1685973506319p+3, 0x1.5f09cad750ab1p+3,
- 0x1.aaf85537ea4c7p+3, 0x1.4f2b874135d27p+4, 0x1.13114266f9764p+4,
- 0x1.a211877de55dbp+4, 0x1.a5eece87e8606p+4, 0x1.a65d441ea6dcep+4,
+ 0x1.e0000000001c2p-20, 0x1.00452f0e0134dp-13, 0x1.0da8cc189b47dp-10,
+ 0x1.00a33764a0a83p-7, 0x1.911a18779813fp-7, 0x1.940c877fb7dacp-7,
+ 0x1.f42fb19b5b9b2p-6, 0x1.0285070f9f1bcp-5, 0x1.89f0f5241255bp-2,
+ 0x1.6ca9ef729af76p-1, 0x1.23f40dccdef72p+0, 0x1.43cf16358c9d7p+0,
+ 0x1.addf3b9722265p+0, 0x1.ae78d360afa15p+0, 0x1.fe81868fc47fep+1,
+ 0x1.e31b55306f22cp+2, 0x1.e639103a05997p+2, 0x1.f7898d5a756ddp+2,
+ 0x1.1685973506319p+3, 0x1.5f09cad750ab1p+3, 0x1.aaf85537ea4c7p+3,
+ 0x1.4f2b874135d27p+4, 0x1.13114266f9764p+4, 0x1.a211877de55dbp+4,
+ 0x1.a5eece87e8606p+4, 0x1.a65d441ea6dcep+4, 0x1.045457ae3994p+5,
0x1.1ffb509f3db15p+5, 0x1.2345d1e090529p+5, 0x1.c96e28eb679f8p+5,
0x1.da1838053b866p+5, 0x1.be886d9c2324dp+6, 0x1.ab514bfc61c76p+7,
0x1.14823229799c2p+7, 0x1.48ff1782ca91dp+8, 0x1.dcbfda0c7559ep+8,
@@ -42,6 +43,7 @@ TEST_F(LlvmLibcTanTest, TrickyInputs) {
0x1.6ac5b262ca1ffp+843, 0x1.8bb5847d49973p+845, 0x1.6ac5b262ca1ffp+849,
0x1.f08b14e1c4d0fp+890, 0x1.2b5fe88a9d8d5p+903, 0x1.a880417b7b119p+1023,
0x1.f6d7518808571p+1023,
+
};
constexpr int N = sizeof(INPUTS) / sizeof(INPUTS[0]);
``````````
</details>
https://github.com/llvm/llvm-project/pull/98048
More information about the libc-commits
mailing list