[libc-commits] [libc] 0ae409c - [libc][math] Slightly improve sinhf and coshf performance.
Tue Ly via libc-commits
libc-commits at lists.llvm.org
Tue Jun 20 06:27:34 PDT 2023
Author: Tue Ly
Date: 2023-06-20T09:27:28-04:00
New Revision: 0ae409c0d70d6251777351296599b60c81c54d2d
URL: https://github.com/llvm/llvm-project/commit/0ae409c0d70d6251777351296599b60c81c54d2d
DIFF: https://github.com/llvm/llvm-project/commit/0ae409c0d70d6251777351296599b60c81c54d2d.diff
LOG: [libc][math] Slightly improve sinhf and coshf performance.
Re-order exceptional branches and slightly adjust the evaluation.
Depends on https://reviews.llvm.org/D153026 .
Reviewed By: michaelrj
Differential Revision: https://reviews.llvm.org/D153062
Added:
Modified:
libc/src/math/generic/coshf.cpp
libc/src/math/generic/explogxf.h
libc/src/math/generic/sinhf.cpp
Removed:
################################################################################
diff --git a/libc/src/math/generic/coshf.cpp b/libc/src/math/generic/coshf.cpp
index 8b30d51c69dce..67d2667711de9 100644
--- a/libc/src/math/generic/coshf.cpp
+++ b/libc/src/math/generic/coshf.cpp
@@ -23,13 +23,13 @@ LLVM_LIBC_FUNCTION(float, coshf, (float x)) {
uint32_t x_u = xbits.uintval();
- // |x| <= 2^-26
- if (LIBC_UNLIKELY(x_u <= 0x3280'0000U)) {
- return 1.0f + x;
- }
-
// When |x| >= 90, or x is inf or nan
- if (LIBC_UNLIKELY(x_u >= 0x42b4'0000U)) {
+ if (LIBC_UNLIKELY(x_u >= 0x42b4'0000U || x_u <= 0x3280'0000U)) {
+ // |x| <= 2^-26
+ if (x_u <= 0x3280'0000U) {
+ return 1.0f + x;
+ }
+
if (xbits.is_inf_or_nan())
return x + FPBits::inf().get_val();
diff --git a/libc/src/math/generic/explogxf.h b/libc/src/math/generic/explogxf.h
index cb8efc92f28c7..827762ca48aeb 100644
--- a/libc/src/math/generic/explogxf.h
+++ b/libc/src/math/generic/explogxf.h
@@ -16,6 +16,7 @@
#include "src/__support/FPUtil/PolyEval.h"
#include "src/__support/FPUtil/nearest_integer.h"
#include "src/__support/common.h"
+#include "src/__support/macros/properties/cpu_features.h"
#include <errno.h>
@@ -210,13 +211,24 @@ template <class Base> LIBC_INLINE exp_b_reduc_t exp_b_range_reduc(float x) {
template <bool is_sinh> LIBC_INLINE double exp_pm_eval(float x) {
double xd = static_cast<double>(x);
- // round(x * log2(e) * 2^5)
- double kd = fputil::nearest_integer(ExpBase::LOG2_B * xd);
-
+ // kd = round(x * log2(e) * 2^5)
// k_p = round(x * log2(e) * 2^5)
- int k_p = static_cast<int>(kd);
// k_m = round(-x * log2(e) * 2^5)
- int k_m = -k_p;
+ double kd;
+ int k_p, k_m;
+
+#ifdef LIBC_TARGET_CPU_HAS_NEAREST_INT
+ kd = fputil::nearest_integer(ExpBase::LOG2_B * xd);
+ k_p = static_cast<int>(kd);
+ k_m = -k_p;
+#else
+ constexpr double HALF_WAY[2] = {0.5, -0.5};
+
+ k_p = static_cast<int>(
+ fputil::multiply_add(xd, ExpBase::LOG2_B, HALF_WAY[x < 0.0f]));
+ k_m = -k_p;
+ kd = static_cast<double>(k_p);
+#endif // LIBC_TARGET_CPU_HAS_NEAREST_INT
// hi = floor(kf * 2^(-5))
// exp_hi = shift hi to the exponent field of double precision.
@@ -243,19 +255,19 @@ template <bool is_sinh> LIBC_INLINE double exp_pm_eval(float x) {
double dx2 = dx * dx;
// c0 = 1 + COEFFS[0] * lo^2
- // P_even = 1 + COEFFS[0] * lo^2 + COEFFS[2] * lo^4
- double p_even =
- fputil::polyeval(dx2, 1.0, ExpBase::COEFFS[0], ExpBase::COEFFS[2]);
- // P_odd = 1 + COEFFS[1] * lo^2 + COEFFS[3] * lo^4
- double p_odd =
- fputil::polyeval(dx2, 1.0, ExpBase::COEFFS[1], ExpBase::COEFFS[3]);
+ // P_even = (1 + COEFFS[0] * lo^2 + COEFFS[2] * lo^4) / 2
+ double p_even = fputil::polyeval(dx2, 0.5, ExpBase::COEFFS[0] * 0.5,
+ ExpBase::COEFFS[2] * 0.5);
+ // P_odd = (1 + COEFFS[1] * lo^2 + COEFFS[3] * lo^4) / 2
+ double p_odd = fputil::polyeval(dx2, 0.5, ExpBase::COEFFS[1] * 0.5,
+ ExpBase::COEFFS[3] * 0.5);
double r;
if constexpr (is_sinh)
r = fputil::multiply_add(dx * mh_sum, p_odd, p_even * mh_
diff );
else
r = fputil::multiply_add(dx * mh_
diff , p_odd, p_even * mh_sum);
- return 0.5 * r;
+ return r;
}
// x should be positive, normal finite value
diff --git a/libc/src/math/generic/sinhf.cpp b/libc/src/math/generic/sinhf.cpp
index 7f4d0d6e3af2e..3ebfe6ba07009 100644
--- a/libc/src/math/generic/sinhf.cpp
+++ b/libc/src/math/generic/sinhf.cpp
@@ -17,23 +17,43 @@ namespace __llvm_libc {
LLVM_LIBC_FUNCTION(float, sinhf, (float x)) {
using FPBits = typename fputil::FPBits<float>;
FPBits xbits(x);
- bool sign = xbits.get_sign();
uint32_t x_abs = xbits.uintval() & FPBits::FloatProp::EXP_MANT_MASK;
- // |x| <= 2^-26
- if (LIBC_UNLIKELY(x_abs <= 0x3280'0000U)) {
- return static_cast<float>(
- LIBC_UNLIKELY(x_abs == 0) ? x : (x + 0.25 * x * x * x));
- }
-
// When |x| >= 90, or x is inf or nan
- if (LIBC_UNLIKELY(x_abs >= 0x42b4'0000U)) {
+ if (LIBC_UNLIKELY(x_abs >= 0x42b4'0000U || x_abs <= 0x3da0'0000U)) {
+ // |x| <= 0.078125
+ if (x_abs <= 0x3da0'0000U) {
+ // |x| = 0.0005589424981735646724700927734375
+ if (LIBC_UNLIKELY(x_abs == 0x3a12'85ffU)) {
+ if (fputil::fenv_is_round_to_nearest())
+ return x;
+ }
+
+ // |x| <= 2^-26
+ if (LIBC_UNLIKELY(x_abs <= 0x3280'0000U)) {
+ return static_cast<float>(
+ LIBC_UNLIKELY(x_abs == 0) ? x : (x + 0.25 * x * x * x));
+ }
+
+ double xdbl = x;
+ double x2 = xdbl * xdbl;
+ // Sollya: fpminimax(sinh(x),[|3,5,7|],[|D...|],[-1/16-1/64;1/16+1/64],x);
+ // Sollya output: x * (0x1p0 + x^0x1p1 * (0x1.5555555556583p-3 + x^0x1p1
+ // * (0x1.111110d239f1fp-7
+ // + x^0x1p1 * 0x1.a02b5a284013cp-13)))
+ // Therefore, output of Sollya = x * pe;
+ double pe = fputil::polyeval(x2, 0.0, 0x1.5555555556583p-3,
+ 0x1.111110d239f1fp-7, 0x1.a02b5a284013cp-13);
+ return static_cast<float>(fputil::multiply_add(xdbl, pe, xdbl));
+ }
+
if (xbits.is_nan())
return x + 1.0f; // sNaN to qNaN + signal
if (xbits.is_inf())
return x;
+ bool sign = xbits.get_sign();
int rounding = fputil::quick_get_round();
if (sign) {
if (LIBC_UNLIKELY(rounding == FE_UPWARD || rounding == FE_TOWARDZERO))
@@ -50,26 +70,6 @@ LLVM_LIBC_FUNCTION(float, sinhf, (float x)) {
return x + FPBits::inf(sign).get_val();
}
- // |x| <= 0.078125
- if (LIBC_UNLIKELY(x_abs <= 0x3da0'0000U)) {
- // |x| = 0.0005589424981735646724700927734375
- if (LIBC_UNLIKELY(x_abs == 0x3a12'85ffU)) {
- if (fputil::fenv_is_round_to_nearest())
- return x;
- }
-
- double xdbl = x;
- double x2 = xdbl * xdbl;
- // Sollya: fpminimax(sinh(x),[|3,5,7|],[|D...|],[-1/16-1/64;1/16+1/64],x);
- // Sollya output: x * (0x1p0 + x^0x1p1 * (0x1.5555555556583p-3 + x^0x1p1
- // * (0x1.111110d239f1fp-7
- // + x^0x1p1 * 0x1.a02b5a284013cp-13)))
- // Therefore, output of Sollya = x * pe;
- double pe = fputil::polyeval(x2, 0.0, 0x1.5555555556583p-3,
- 0x1.111110d239f1fp-7, 0x1.a02b5a284013cp-13);
- return static_cast<float>(fputil::multiply_add(xdbl, pe, xdbl));
- }
-
// sinh(x) = (e^x - e^(-x)) / 2.
return static_cast<float>(exp_pm_eval</*is_sinh*/ true>(x));
}
More information about the libc-commits
mailing list