[libc-commits] [libc] 484319f - [libc] Make expm1f correctly rounded when the targets have no FMA instructions.
Tue Ly via libc-commits
libc-commits at lists.llvm.org
Fri Jun 3 12:58:06 PDT 2022
Author: Tue Ly
Date: 2022-06-03T15:57:48-04:00
New Revision: 484319f4972850d6e9bb3d433042cb56806ba183
URL: https://github.com/llvm/llvm-project/commit/484319f4972850d6e9bb3d433042cb56806ba183
DIFF: https://github.com/llvm/llvm-project/commit/484319f4972850d6e9bb3d433042cb56806ba183.diff
LOG: [libc] Make expm1f correctly rounded when the targets have no FMA instructions.
Add another exceptional value and fix the case when |x| is small.
Performance tests with CORE-MATH project scripts:
With FMA instructions on Ryzen 1700:
```
$ ./perf.sh expm1f
LIBC-location: /home/lnt/experiment/llvm/llvm-project/build/projects/libc/lib/libllvmlibc.a
CORE-MATH reciprocal throughput : 15.362
System LIBC reciprocal throughput : 53.194
LIBC reciprocal throughput : 14.595
$ ./perf.sh expm1f --latency
LIBC-location: /home/lnt/experiment/llvm/llvm-project/build/projects/libc/lib/libllvmlibc.a
CORE-MATH latency : 57.755
System LIBC latency : 147.020
LIBC latency : 60.269
```
Without FMA instructions:
```
$ ./perf.sh expm1f
LIBC-location: /home/lnt/experiment/llvm/llvm-project/build/projects/libc/lib/libllvmlibc.a
CORE-MATH reciprocal throughput : 15.362
System LIBC reciprocal throughput : 53.300
LIBC reciprocal throughput : 18.020
$ ./perf.sh expm1f --latency
LIBC-location: /home/lnt/experiment/llvm/llvm-project/build/projects/libc/lib/libllvmlibc.a
CORE-MATH latency : 57.758
System LIBC latency : 147.025
LIBC latency : 70.304
```
Reviewed By: michaelrj
Differential Revision: https://reviews.llvm.org/D123440
Added:
Modified:
libc/src/math/generic/expm1f.cpp
libc/test/src/math/CMakeLists.txt
libc/test/src/math/exhaustive/CMakeLists.txt
libc/test/src/math/exhaustive/expm1f_test.cpp
libc/test/src/math/expm1f_test.cpp
Removed:
################################################################################
diff --git a/libc/src/math/generic/expm1f.cpp b/libc/src/math/generic/expm1f.cpp
index 2cb68e33ebd08..3019d71369c08 100644
--- a/libc/src/math/generic/expm1f.cpp
+++ b/libc/src/math/generic/expm1f.cpp
@@ -34,6 +34,15 @@ LLVM_LIBC_FUNCTION(float, expm1f, (float x)) {
return 0x1.8dbe62p-3f;
}
+#if !defined(LIBC_TARGET_HAS_FMA)
+ if (unlikely(x_u == 0xbdc1'c6cbU)) { // x = -0x1.838d96p-4f
+ int round_mode = fputil::get_round();
+ if (round_mode == FE_TONEAREST || round_mode == FE_DOWNWARD)
+ return -0x1.71c884p-4f;
+ return -0x1.71c882p-4f;
+ }
+#endif // LIBC_TARGET_HAS_FMA
+
// When |x| > 25*log(2), or nan
if (unlikely(x_abs >= 0x418a'a123U)) {
// x < log(2^-25)
@@ -70,19 +79,30 @@ LLVM_LIBC_FUNCTION(float, expm1f, (float x)) {
// x = -0.0f
if (unlikely(xbits.uintval() == 0x8000'0000U))
return x;
- // When |x| < 2^-25, the relative error of the approximation e^x - 1 ~ x
- // is:
- // |(e^x - 1) - x| / |e^x - 1| < |x^2| / |x|
- // = |x|
- // < 2^-25
- // < epsilon(1)/2.
- // So the correctly rounded values of expm1(x) are:
- // = x + eps(x) if rounding mode = FE_UPWARD,
- // or (rounding mode = FE_TOWARDZERO and x is negative),
- // = x otherwise.
- // To simplify the rounding decision and make it more efficient, we use
- // fma(x, x, x) ~ x + x^2 instead.
- return fputil::multiply_add(x, x, x);
+ // When |x| < 2^-25, the relative error of the approximation e^x - 1 ~ x
+ // is:
+ // |(e^x - 1) - x| / |e^x - 1| < |x^2| / |x|
+ // = |x|
+ // < 2^-25
+ // < epsilon(1)/2.
+ // So the correctly rounded values of expm1(x) are:
+ // = x + eps(x) if rounding mode = FE_UPWARD,
+ // or (rounding mode = FE_TOWARDZERO and x is
+ // negative),
+ // = x otherwise.
+ // To simplify the rounding decision and make it more efficient, we use
+ // fma(x, x, x) ~ x + x^2 instead.
+ // Note: to use the formula x + x^2 to decide the correct rounding, we
+ // do need fma(x, x, x) to prevent underflow caused by x*x when |x| <
+ // 2^-76. For targets without FMA instructions, we simply use double for
+ // intermediate results as it is more efficient than using an emulated
+ // version of FMA.
+#if defined(LIBC_TARGET_HAS_FMA)
+ return fputil::fma(x, x, x);
+#else
+ double xd = x;
+ return static_cast<float>(fputil::multiply_add(xd, xd, xd));
+#endif // LIBC_TARGET_HAS_FMA
}
// 2^-25 <= |x| < 2^-4
diff --git a/libc/test/src/math/CMakeLists.txt b/libc/test/src/math/CMakeLists.txt
index 956fb170462d0..68a9aed825a9f 100644
--- a/libc/test/src/math/CMakeLists.txt
+++ b/libc/test/src/math/CMakeLists.txt
@@ -1189,9 +1189,6 @@ add_fp_unittest(
libc.src.__support.FPUtil.fputil
)
-# Without FMA instructions, the current expm1f implementation is not correctly
-# rounded for all float inputs (1 extra exceptional value). This will be fixed
-# in the followup patch: https://reviews.llvm.org/D123440
add_fp_unittest(
expm1f_test
NEED_MPFR
@@ -1204,8 +1201,6 @@ add_fp_unittest(
libc.include.math
libc.src.math.expm1f
libc.src.__support.FPUtil.fputil
- FLAGS
- FMA_OPT__ONLY
)
add_fp_unittest(
diff --git a/libc/test/src/math/exhaustive/CMakeLists.txt b/libc/test/src/math/exhaustive/CMakeLists.txt
index b91f3c213a224..34f8a38772570 100644
--- a/libc/test/src/math/exhaustive/CMakeLists.txt
+++ b/libc/test/src/math/exhaustive/CMakeLists.txt
@@ -92,7 +92,6 @@ add_fp_unittest(
DEPENDS
.exhaustive_test
libc.include.math
- libc.src.math.expf
libc.src.math.expm1f
libc.src.__support.FPUtil.fputil
LINK_LIBRARIES
diff --git a/libc/test/src/math/exhaustive/expm1f_test.cpp b/libc/test/src/math/exhaustive/expm1f_test.cpp
index 5131f1de6025e..b99a2e478d8b2 100644
--- a/libc/test/src/math/exhaustive/expm1f_test.cpp
+++ b/libc/test/src/math/exhaustive/expm1f_test.cpp
@@ -18,7 +18,7 @@ using FPBits = __llvm_libc::fputil::FPBits<float>;
namespace mpfr = __llvm_libc::testing::mpfr;
-struct LlvmLibcExpfExhaustiveTest : public LlvmLibcExhaustiveTest<uint32_t> {
+struct LlvmLibcExpm1fExhaustiveTest : public LlvmLibcExhaustiveTest<uint32_t> {
bool check(uint32_t start, uint32_t stop,
mpfr::RoundingMode rounding) override {
mpfr::ForceRoundingMode r(rounding);
@@ -40,21 +40,21 @@ static const int NUM_THREADS = std::thread::hardware_concurrency();
static constexpr uint32_t POS_START = 0x0000'0000U;
static constexpr uint32_t POS_STOP = 0x42b2'0000U;
-TEST_F(LlvmLibcExpfExhaustiveTest, PostiveRangeRoundNearestTieToEven) {
+TEST_F(LlvmLibcExpm1fExhaustiveTest, PostiveRangeRoundNearestTieToEven) {
test_full_range(POS_START, POS_STOP, NUM_THREADS,
mpfr::RoundingMode::Nearest);
}
-TEST_F(LlvmLibcExpfExhaustiveTest, PostiveRangeRoundUp) {
+TEST_F(LlvmLibcExpm1fExhaustiveTest, PostiveRangeRoundUp) {
test_full_range(POS_START, POS_STOP, NUM_THREADS, mpfr::RoundingMode::Upward);
}
-TEST_F(LlvmLibcExpfExhaustiveTest, PostiveRangeRoundDown) {
+TEST_F(LlvmLibcExpm1fExhaustiveTest, PostiveRangeRoundDown) {
test_full_range(POS_START, POS_STOP, NUM_THREADS,
mpfr::RoundingMode::Downward);
}
-TEST_F(LlvmLibcExpfExhaustiveTest, PostiveRangeRoundTowardZero) {
+TEST_F(LlvmLibcExpm1fExhaustiveTest, PostiveRangeRoundTowardZero) {
test_full_range(POS_START, POS_STOP, NUM_THREADS,
mpfr::RoundingMode::TowardZero);
}
@@ -63,21 +63,21 @@ TEST_F(LlvmLibcExpfExhaustiveTest, PostiveRangeRoundTowardZero) {
static constexpr uint32_t NEG_START = 0x8000'0000U;
static constexpr uint32_t NEG_STOP = 0xc2d0'0000U;
-TEST_F(LlvmLibcExpfExhaustiveTest, NegativeRangeRoundNearestTieToEven) {
+TEST_F(LlvmLibcExpm1fExhaustiveTest, NegativeRangeRoundNearestTieToEven) {
test_full_range(NEG_START, NEG_STOP, NUM_THREADS,
mpfr::RoundingMode::Nearest);
}
-TEST_F(LlvmLibcExpfExhaustiveTest, NegativeRangeRoundUp) {
+TEST_F(LlvmLibcExpm1fExhaustiveTest, NegativeRangeRoundUp) {
test_full_range(NEG_START, NEG_STOP, NUM_THREADS, mpfr::RoundingMode::Upward);
}
-TEST_F(LlvmLibcExpfExhaustiveTest, NegativeRangeRoundDown) {
+TEST_F(LlvmLibcExpm1fExhaustiveTest, NegativeRangeRoundDown) {
test_full_range(NEG_START, NEG_STOP, NUM_THREADS,
mpfr::RoundingMode::Downward);
}
-TEST_F(LlvmLibcExpfExhaustiveTest, NegativeRangeRoundTowardZero) {
+TEST_F(LlvmLibcExpm1fExhaustiveTest, NegativeRangeRoundTowardZero) {
test_full_range(NEG_START, NEG_STOP, NUM_THREADS,
mpfr::RoundingMode::TowardZero);
}
diff --git a/libc/test/src/math/expm1f_test.cpp b/libc/test/src/math/expm1f_test.cpp
index 78be76ec57f73..032badbb5c2be 100644
--- a/libc/test/src/math/expm1f_test.cpp
+++ b/libc/test/src/math/expm1f_test.cpp
@@ -97,6 +97,16 @@ TEST(LlvmLibcExpm1fTest, Borderline) {
ASSERT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Expm1, x,
__llvm_libc::expm1f(x), 0.5);
EXPECT_MATH_ERRNO(0);
+
+ x = float(FPBits(0x942ed494U));
+ ASSERT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Expm1, x,
+ __llvm_libc::expm1f(x), 0.5);
+ EXPECT_MATH_ERRNO(0);
+
+ x = float(FPBits(0xbdc1c6cbU));
+ ASSERT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Expm1, x,
+ __llvm_libc::expm1f(x), 0.5);
+ EXPECT_MATH_ERRNO(0);
}
TEST(LlvmLibcExpm1fTest, InFloatRange) {
More information about the libc-commits
mailing list