[libc-commits] [PATCH] D127951: [libc][math] New common algorithm for expf/expm1f/exp2f.

Thu Jun 16 07:11:05 PDT 2022

lntue added inline comments.

================
Comment at: libc/src/math/generic/common_constants.cpp:106
+// Wolfram alpha: N[Table[2^x-1,{x,-16/32,15/32,1/32}],27]
+const double EXP_M[EXP_num_p] = {-0.292893218813452475599155638,
+                                 -0.277409596511476689981496879,
----------------
Use hexadecimal floats for constants.

================
Comment at: libc/src/math/generic/common_constants.h:20-21

-// Lookup table for exp(m) with m = -104, ..., 89.
-//   -104 = floor(log(single precision's min denormal))
-//     89 = ceil(log(single precision's max normal))
-// Table is generated with Sollya as follow:
-// > display = hexadecimal;
-// > for i from -104 to 89 do { D(exp(i)); };
-extern const double EXP_M1[195];
-
-// Lookup table for exp(m * 2^(-7)) with m = 0, ..., 127.
-// Table is generated with Sollya as follow:
-// > display = hexadecimal;
-// > for i from 0 to 127 do { D(exp(i / 128)); };
-extern const double EXP_M2[128];
+static constexpr int EXP_bits_p = 5;
+static constexpr int EXP_num_p = 1 << EXP_bits_p;
+
----------------
Use all caps for constants.

================
Comment at: libc/src/math/generic/exp2f.cpp:22

-// Lookup table for 2^(m * 2^(-6)) with m = 0, ..., 63.
-// Table is generated with Sollya as follow:
-// > display = hexadecimal;
-// > for i from 0 to 63 do { D(2^(i / 64)); };
-static constexpr double EXP_M[64] = {
-    0x1.0000000000000p0, 0x1.02c9a3e778061p0, 0x1.059b0d3158574p0,
-    0x1.0874518759bc8p0, 0x1.0b5586cf9890fp0, 0x1.0e3ec32d3d1a2p0,
-    0x1.11301d0125b51p0, 0x1.1429aaea92de0p0, 0x1.172b83c7d517bp0,
-    0x1.1a35beb6fcb75p0, 0x1.1d4873168b9aap0, 0x1.2063b88628cd6p0,
-    0x1.2387a6e756238p0, 0x1.26b4565e27cddp0, 0x1.29e9df51fdee1p0,
-    0x1.2d285a6e4030bp0, 0x1.306fe0a31b715p0, 0x1.33c08b26416ffp0,
-    0x1.371a7373aa9cbp0, 0x1.3a7db34e59ff7p0, 0x1.3dea64c123422p0,
-    0x1.4160a21f72e2ap0, 0x1.44e086061892dp0, 0x1.486a2b5c13cd0p0,
-    0x1.4bfdad5362a27p0, 0x1.4f9b2769d2ca7p0, 0x1.5342b569d4f82p0,
-    0x1.56f4736b527dap0, 0x1.5ab07dd485429p0, 0x1.5e76f15ad2148p0,
-    0x1.6247eb03a5585p0, 0x1.6623882552225p0, 0x1.6a09e667f3bcdp0,
-    0x1.6dfb23c651a2fp0, 0x1.71f75e8ec5f74p0, 0x1.75feb564267c9p0,
-    0x1.7a11473eb0187p0, 0x1.7e2f336cf4e62p0, 0x1.82589994cce13p0,
-    0x1.868d99b4492edp0, 0x1.8ace5422aa0dbp0, 0x1.8f1ae99157736p0,
-    0x1.93737b0cdc5e5p0, 0x1.97d829fde4e50p0, 0x1.9c49182a3f090p0,
-    0x1.a0c667b5de565p0, 0x1.a5503b23e255dp0, 0x1.a9e6b5579fdbfp0,
-    0x1.ae89f995ad3adp0, 0x1.b33a2b84f15fbp0, 0x1.b7f76f2fb5e47p0,
-    0x1.bcc1e904bc1d2p0, 0x1.c199bdd85529cp0, 0x1.c67f12e57d14bp0,
-    0x1.cb720dcef9069p0, 0x1.d072d4a07897cp0, 0x1.d5818dcfba487p0,
-    0x1.da9e603db3285p0, 0x1.dfc97337b9b5fp0, 0x1.e502ee78b3ff6p0,
-    0x1.ea4afa2a490dap0, 0x1.efa1bee615a27p0, 0x1.f50765b6e4540p0,
-    0x1.fa7c1819e90d8p0,
-};
+constexpr double l2h = 0.6931471787393093109130859375;
+constexpr double l2l = 1.8206359985041461839581765680755E-9;
----------------
Use hexadecimal floats for constants and provide how are these constants generated.  Also use caps and more descriptive names.

================
Comment at: libc/src/math/generic/exp2f.cpp:73

-  // x_hi = round(hi + mid).
-  // The default rounding mode for float-to-int conversion in C++ is
-  // round-toward-zero. To make it round-to-nearest, we add (-1)^sign(x) * 0.5
-  // before conversion.
-  int x_hi =
-      static_cast<int>(x * 0x1.0p+6f + (xbits.get_sign() ? -0.5f : 0.5f));
-  // For 2-complement integers, arithmetic right shift is the same as dividing
-  // by a power of 2 and then round down (toward negative infinity).
-  int e_hi = (x_hi >> 6) +
-             static_cast<int>(fputil::FloatProperties<double>::EXPONENT_BIAS);
-  fputil::FPBits<double> exp_hi(
-      static_cast<uint64_t>(e_hi)
-      << fputil::FloatProperties<double>::MANTISSA_WIDTH);
-  // mid = x_hi & 0x0000'003fU;
-  double exp_hi_mid = static_cast<double>(exp_hi) * EXP_M[x_hi & 0x3f];
-  // Subtract (hi + mid) from x to get lo.
-  x -= static_cast<float>(x_hi) * 0x1.0p-6f;
-  double xd = static_cast<double>(x);
-  // Degree-4 minimax polynomial generated by Sollya with the following
-  // commands:
-  //   > display = hexadecimal;
-  //   > Q = fpminimax((2^x - 1)/x, 3, [|D...|], [-2^-7, 2^-7]);
-  //   > Q;
-  double exp_lo =
-      fputil::polyeval(xd, 0x1p0, 0x1.62e42fefa2417p-1, 0x1.ebfbdff82f809p-3,
-                       0x1.c6b0b92131c47p-5, 0x1.3b2ab6fb568a3p-7);
-  double result = exp_hi_mid * exp_lo;
-  return static_cast<float>(result);
+  if (unlikely(x_u & exval_mask) == exval_mask) {
+    if (unlikely(x_u == exval1)) { // x = 0x1.853a6ep-9f
----------------
Maybe you can just inline `exval1`, `exval2`, and `exval_mask` here, with the comment explaining how `exval_mask` is obtained.

================
Comment at: libc/src/math/generic/exp2f.cpp:83-89
+  double xdbl = x;
+  int ps = xdbl * mlp + 16384.5;
+  double psx = mld * (ps - 16384.0);
+  ps += 1 << (EXP_bits_p - 1);
+  int dg = (ps >> EXP_bits_p) - (16384 >> EXP_bits_p);
+  double ml = EXP_M[ps & (EXP_num_p - 1)];
+  double dx = xdbl - psx;
----------------
Add comments explain the range reduction computations in detail.

================
Comment at: libc/src/math/generic/exp2f.cpp:91
+  // N[Table[Ln[2]^n/n!,{n,1,6}],30]
+  double pe = dx * fputil::polyeval(dx, l2l, 0.240226506959100712333551263163,
+                                    0.0555041086648215799531422637686,
----------------
Use hexadecimal floats for constants.  Also you might want to try to combine a bit to take advantage of FMA when it's available, either
```
  multiply_add(dx, polyeval(...), l2h*dx);
```
or
```
   polyeval(dx, l2h*dx, l2l, ...);
```
Those 2 are actually the same, and when there is no FMA.  Performance tests should show if it improves when FMA is available or not.

================
Comment at: libc/src/math/generic/exp2f.cpp:98
+
+  double result = ml * pe + pe + ml + 1.0;
+  fputil::FPBits<double> bs(result);
----------------
Maybe combining this to `multiply_add(ml, pe, pe + ml + 1.0` to take advantage of FMA if available.

================
Comment at: libc/test/src/math/CMakeLists.txt:1199
   NEED_MPFR
-  SUITE
+    SUITE
     libc_math_unittests
----------------
Indentation.

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D127951/new/

https://reviews.llvm.org/D127951