[libc-commits] [libc] [libc][math][c23] Add expf16 C23 math function (PR #100632)

Thu Jul 25 12:03:11 PDT 2024

================
@@ -0,0 +1,153 @@
+//===-- Half-precision e^x function ---------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/math/expf16.h"
+#include "hdr/errno_macros.h"
+#include "hdr/fenv_macros.h"
+#include "src/__support/CPP/array.h"
+#include "src/__support/FPUtil/FEnvImpl.h"
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/FPUtil/PolyEval.h"
+#include "src/__support/FPUtil/except_value_utils.h"
+#include "src/__support/FPUtil/multiply_add.h"
+#include "src/__support/FPUtil/nearest_integer.h"
+#include "src/__support/FPUtil/rounding_mode.h"
+#include "src/__support/common.h"
+#include "src/__support/macros/config.h"
+#include "src/__support/macros/optimization.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+static constexpr fputil::ExceptValues<float16, 5> EXPF16_EXCEPTS = {{
+    // (input, RZ output, RU offset, RD offset, RN offset)
+    // x = 0x1.de4p-8, expf16(x) = 0x1.01cp+0 (RZ)
+    {0x1f79U, 0x3c07U, 1U, 0U, 0U},
+    // x = 0x1.73cp-6, expf16(x) = 0x1.05cp+0 (RZ)
+    {0x25cfU, 0x3c17U, 1U, 0U, 0U},
+    // x = 0x1.c34p+0, expf16(x) = 0x1.74cp+2 (RZ)
+    {0x3f0dU, 0x45d3U, 1U, 0U, 1U},
+    // x = -0x1.488p-5, expf16(x) = 0x1.ebcp-1 (RZ)
+    {0xa922U, 0x3bafU, 1U, 0U, 0U},
+    // x = -0x1.55p-5, expf16(x) = 0x1.ebp-1 (RZ)
+    {0xa954U, 0x3bacU, 1U, 0U, 0U},
+}};
+
+// Generated by Sollya with the following commands:
+//   > display = hexadecimal;
+//   > for i from -18 to 12 do print(round(exp(i), SG, RN));
+static constexpr cpp::array<float, 31> EXP_HI = {
+    0x1.05a628p-26f, 0x1.639e32p-25f, 0x1.e355bcp-24f, 0x1.4875cap-22f,
+    0x1.be6c7p-21f,  0x1.2f6054p-19f, 0x1.9c54c4p-18f, 0x1.183542p-16f,
+    0x1.7cd79cp-15f, 0x1.02cf22p-13f, 0x1.5fc21p-12f,  0x1.de16bap-11f,
+    0x1.44e52p-9f,   0x1.b993fep-8f,  0x1.2c155cp-6f,  0x1.97db0cp-5f,
+    0x1.152aaap-3f,  0x1.78b564p-2f,  0x1p+0f,         0x1.5bf0a8p+1f,
+    0x1.d8e64cp+2f,  0x1.415e5cp+4f,  0x1.b4c902p+5f,  0x1.28d38ap+7f,
+    0x1.936dc6p+8f,  0x1.122886p+10f, 0x1.749ea8p+11f, 0x1.fa7158p+12f,
+    0x1.5829dcp+14f, 0x1.d3c448p+15f, 0x1.3de166p+17f,
+};
+
+// Generated by Sollya with the following commands:
+//   > display = hexadecimal;
+//   > for i from 0 to 7 do print(round(exp(i * 2^-3), SG, RN));
+static constexpr cpp::array<float, 8> EXP_MID = {
+    0x1p+0f,        0x1.221604p+0f, 0x1.48b5e4p+0f, 0x1.747a52p+0f,
+    0x1.a61298p+0f, 0x1.de455ep+0f, 0x1.0ef9dcp+1f, 0x1.330e58p+1f,
+};
+
+LLVM_LIBC_FUNCTION(float16, expf16, (float16 x)) {
+  using FPBits = fputil::FPBits<float16>;
+  FPBits x_bits(x);
+
+  uint16_t x_u = x_bits.uintval();
+  uint16_t x_abs = x_u & 0x7fffU;
+
+  // When |x| >= 12, or x is NaN.
+  if (LIBC_UNLIKELY(x_abs >= 0x4a00U)) {
+    // exp(NaN) = NaN
+    if (x_bits.is_nan()) {
+      if (x_bits.is_signaling_nan()) {
+        fputil::raise_except_if_required(FE_INVALID);
+        return FPBits::quiet_nan().get_val();
+      }
+
+      return x;
+    }
+
+    // When x >= 12.
+    if (x_bits.is_pos() && x_u >= 0x4a00U) {
----------------
overmighty wrote:

Comparing the integer value instead of the floating-point value here and in the second `if` below reduced average runtime in normal range:

- from 39.6 ns/op to 38.5 ns/op (24.3 s vs 23.6 s total time) without `-march=native`,
- from 37.3 ns/op to 37.0 ns/op (22.9 s vs 22.7 s total time) with `-march=native`,

on i7-13700H.

https://github.com/llvm/llvm-project/pull/100632