[libc-commits] [libc] 0ae409c - [libc][math] Slightly improve sinhf and coshf performance.

Tue Jun 20 06:27:34 PDT 2023

Author: Tue Ly
Date: 2023-06-20T09:27:28-04:00
New Revision: 0ae409c0d70d6251777351296599b60c81c54d2d

URL: https://github.com/llvm/llvm-project/commit/0ae409c0d70d6251777351296599b60c81c54d2d
DIFF: https://github.com/llvm/llvm-project/commit/0ae409c0d70d6251777351296599b60c81c54d2d.diff

LOG: [libc][math] Slightly improve sinhf and coshf performance.

Re-order exceptional branches and slightly adjust the evaluation.
Depends on https://reviews.llvm.org/D153026 .

Reviewed By: michaelrj

Differential Revision: https://reviews.llvm.org/D153062

Added: 
    

Modified: 
    libc/src/math/generic/coshf.cpp
    libc/src/math/generic/explogxf.h
    libc/src/math/generic/sinhf.cpp

Removed: 
    


################################################################################
diff  --git a/libc/src/math/generic/coshf.cpp b/libc/src/math/generic/coshf.cpp
index 8b30d51c69dce..67d2667711de9 100644

--- a/libc/src/math/generic/coshf.cpp
+++ b/libc/src/math/generic/coshf.cpp
@@ -23,13 +23,13 @@ LLVM_LIBC_FUNCTION(float, coshf, (float x)) {
 
   uint32_t x_u = xbits.uintval();
 
-  // |x| <= 2^-26
-  if (LIBC_UNLIKELY(x_u <= 0x3280'0000U)) {
-    return 1.0f + x;
-  }
-
   // When |x| >= 90, or x is inf or nan
-  if (LIBC_UNLIKELY(x_u >= 0x42b4'0000U)) {
+  if (LIBC_UNLIKELY(x_u >= 0x42b4'0000U || x_u <= 0x3280'0000U)) {
+    // |x| <= 2^-26
+    if (x_u <= 0x3280'0000U) {
+      return 1.0f + x;
+    }
+
     if (xbits.is_inf_or_nan())
       return x + FPBits::inf().get_val();
 

diff  --git a/libc/src/math/generic/explogxf.h b/libc/src/math/generic/explogxf.h
index cb8efc92f28c7..827762ca48aeb 100644
--- a/libc/src/math/generic/explogxf.h
+++ b/libc/src/math/generic/explogxf.h
@@ -16,6 +16,7 @@
 #include "src/__support/FPUtil/PolyEval.h"
 #include "src/__support/FPUtil/nearest_integer.h"
 #include "src/__support/common.h"
+#include "src/__support/macros/properties/cpu_features.h"
 
 #include <errno.h>
 
@@ -210,13 +211,24 @@ template <class Base> LIBC_INLINE exp_b_reduc_t exp_b_range_reduc(float x) {
 template <bool is_sinh> LIBC_INLINE double exp_pm_eval(float x) {
   double xd = static_cast<double>(x);
 
-  // round(x * log2(e) * 2^5)
-  double kd = fputil::nearest_integer(ExpBase::LOG2_B * xd);
-
+  // kd = round(x * log2(e) * 2^5)
   // k_p = round(x * log2(e) * 2^5)
-  int k_p = static_cast<int>(kd);
   // k_m = round(-x * log2(e) * 2^5)
-  int k_m = -k_p;
+  double kd;
+  int k_p, k_m;
+
+#ifdef LIBC_TARGET_CPU_HAS_NEAREST_INT
+  kd = fputil::nearest_integer(ExpBase::LOG2_B * xd);
+  k_p = static_cast<int>(kd);
+  k_m = -k_p;
+#else
+  constexpr double HALF_WAY[2] = {0.5, -0.5};
+
+  k_p = static_cast<int>(
+      fputil::multiply_add(xd, ExpBase::LOG2_B, HALF_WAY[x < 0.0f]));
+  k_m = -k_p;
+  kd = static_cast<double>(k_p);
+#endif // LIBC_TARGET_CPU_HAS_NEAREST_INT
 
   // hi = floor(kf * 2^(-5))
   // exp_hi = shift hi to the exponent field of double precision.
@@ -243,19 +255,19 @@ template <bool is_sinh> LIBC_INLINE double exp_pm_eval(float x) {
   double dx2 = dx * dx;
 
   // c0 = 1 + COEFFS[0] * lo^2
-  // P_even = 1 + COEFFS[0] * lo^2 + COEFFS[2] * lo^4
-  double p_even =
-      fputil::polyeval(dx2, 1.0, ExpBase::COEFFS[0], ExpBase::COEFFS[2]);
-  // P_odd = 1 + COEFFS[1] * lo^2 + COEFFS[3] * lo^4
-  double p_odd =
-      fputil::polyeval(dx2, 1.0, ExpBase::COEFFS[1], ExpBase::COEFFS[3]);
+  // P_even = (1 + COEFFS[0] * lo^2 + COEFFS[2] * lo^4) / 2
+  double p_even = fputil::polyeval(dx2, 0.5, ExpBase::COEFFS[0] * 0.5,
+                                   ExpBase::COEFFS[2] * 0.5);
+  // P_odd = (1 + COEFFS[1] * lo^2 + COEFFS[3] * lo^4) / 2
+  double p_odd = fputil::polyeval(dx2, 0.5, ExpBase::COEFFS[1] * 0.5,
+                                  ExpBase::COEFFS[3] * 0.5);
 
   double r;
   if constexpr (is_sinh)
     r = fputil::multiply_add(dx * mh_sum, p_odd, p_even * mh_
diff );
   else
     r = fputil::multiply_add(dx * mh_
diff , p_odd, p_even * mh_sum);
-  return 0.5 * r;
+  return r;
 }
 
 // x should be positive, normal finite value

diff  --git a/libc/src/math/generic/sinhf.cpp b/libc/src/math/generic/sinhf.cpp
index 7f4d0d6e3af2e..3ebfe6ba07009 100644
--- a/libc/src/math/generic/sinhf.cpp
+++ b/libc/src/math/generic/sinhf.cpp
@@ -17,23 +17,43 @@ namespace __llvm_libc {
 LLVM_LIBC_FUNCTION(float, sinhf, (float x)) {
   using FPBits = typename fputil::FPBits<float>;
   FPBits xbits(x);
-  bool sign = xbits.get_sign();
   uint32_t x_abs = xbits.uintval() & FPBits::FloatProp::EXP_MANT_MASK;
 
-  // |x| <= 2^-26
-  if (LIBC_UNLIKELY(x_abs <= 0x3280'0000U)) {
-    return static_cast<float>(
-        LIBC_UNLIKELY(x_abs == 0) ? x : (x + 0.25 * x * x * x));
-  }
-
   // When |x| >= 90, or x is inf or nan
-  if (LIBC_UNLIKELY(x_abs >= 0x42b4'0000U)) {
+  if (LIBC_UNLIKELY(x_abs >= 0x42b4'0000U || x_abs <= 0x3da0'0000U)) {
+    // |x| <= 0.078125
+    if (x_abs <= 0x3da0'0000U) {
+      // |x| = 0.0005589424981735646724700927734375
+      if (LIBC_UNLIKELY(x_abs == 0x3a12'85ffU)) {
+        if (fputil::fenv_is_round_to_nearest())
+          return x;
+      }
+
+      // |x| <= 2^-26
+      if (LIBC_UNLIKELY(x_abs <= 0x3280'0000U)) {
+        return static_cast<float>(
+            LIBC_UNLIKELY(x_abs == 0) ? x : (x + 0.25 * x * x * x));
+      }
+
+      double xdbl = x;
+      double x2 = xdbl * xdbl;
+      // Sollya: fpminimax(sinh(x),[|3,5,7|],[|D...|],[-1/16-1/64;1/16+1/64],x);
+      // Sollya output: x * (0x1p0 + x^0x1p1 * (0x1.5555555556583p-3 + x^0x1p1
+      //                  * (0x1.111110d239f1fp-7
+      //                  + x^0x1p1 * 0x1.a02b5a284013cp-13)))
+      // Therefore, output of Sollya = x * pe;
+      double pe = fputil::polyeval(x2, 0.0, 0x1.5555555556583p-3,
+                                   0x1.111110d239f1fp-7, 0x1.a02b5a284013cp-13);
+      return static_cast<float>(fputil::multiply_add(xdbl, pe, xdbl));
+    }
+
     if (xbits.is_nan())
       return x + 1.0f; // sNaN to qNaN + signal
 
     if (xbits.is_inf())
       return x;
 
+    bool sign = xbits.get_sign();
     int rounding = fputil::quick_get_round();
     if (sign) {
       if (LIBC_UNLIKELY(rounding == FE_UPWARD || rounding == FE_TOWARDZERO))
@@ -50,26 +70,6 @@ LLVM_LIBC_FUNCTION(float, sinhf, (float x)) {
     return x + FPBits::inf(sign).get_val();
   }
 
-  // |x| <= 0.078125
-  if (LIBC_UNLIKELY(x_abs <= 0x3da0'0000U)) {
-    // |x| = 0.0005589424981735646724700927734375
-    if (LIBC_UNLIKELY(x_abs == 0x3a12'85ffU)) {
-      if (fputil::fenv_is_round_to_nearest())
-        return x;
-    }
-
-    double xdbl = x;
-    double x2 = xdbl * xdbl;
-    // Sollya: fpminimax(sinh(x),[|3,5,7|],[|D...|],[-1/16-1/64;1/16+1/64],x);
-    // Sollya output: x * (0x1p0 + x^0x1p1 * (0x1.5555555556583p-3 + x^0x1p1
-    //                  * (0x1.111110d239f1fp-7
-    //                  + x^0x1p1 * 0x1.a02b5a284013cp-13)))
-    // Therefore, output of Sollya = x * pe;
-    double pe = fputil::polyeval(x2, 0.0, 0x1.5555555556583p-3,
-                                 0x1.111110d239f1fp-7, 0x1.a02b5a284013cp-13);
-    return static_cast<float>(fputil::multiply_add(xdbl, pe, xdbl));
-  }
-
   // sinh(x) = (e^x - e^(-x)) / 2.
   return static_cast<float>(exp_pm_eval</*is_sinh*/ true>(x));
 }