[libc-commits] [libc] [libc][math][c23] Add expf16 C23 math function (PR #100632)
via libc-commits
libc-commits at lists.llvm.org
Thu Jul 25 12:03:09 PDT 2024
================
@@ -0,0 +1,153 @@
+//===-- Half-precision e^x function ---------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/math/expf16.h"
+#include "hdr/errno_macros.h"
+#include "hdr/fenv_macros.h"
+#include "src/__support/CPP/array.h"
+#include "src/__support/FPUtil/FEnvImpl.h"
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/FPUtil/PolyEval.h"
+#include "src/__support/FPUtil/except_value_utils.h"
+#include "src/__support/FPUtil/multiply_add.h"
+#include "src/__support/FPUtil/nearest_integer.h"
+#include "src/__support/FPUtil/rounding_mode.h"
+#include "src/__support/common.h"
+#include "src/__support/macros/config.h"
+#include "src/__support/macros/optimization.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+static constexpr fputil::ExceptValues<float16, 5> EXPF16_EXCEPTS = {{
+ // (input, RZ output, RU offset, RD offset, RN offset)
+ // x = 0x1.de4p-8, expf16(x) = 0x1.01cp+0 (RZ)
+ {0x1f79U, 0x3c07U, 1U, 0U, 0U},
+ // x = 0x1.73cp-6, expf16(x) = 0x1.05cp+0 (RZ)
+ {0x25cfU, 0x3c17U, 1U, 0U, 0U},
+ // x = 0x1.c34p+0, expf16(x) = 0x1.74cp+2 (RZ)
+ {0x3f0dU, 0x45d3U, 1U, 0U, 1U},
+ // x = -0x1.488p-5, expf16(x) = 0x1.ebcp-1 (RZ)
+ {0xa922U, 0x3bafU, 1U, 0U, 0U},
+ // x = -0x1.55p-5, expf16(x) = 0x1.ebp-1 (RZ)
+ {0xa954U, 0x3bacU, 1U, 0U, 0U},
+}};
+
+// Generated by Sollya with the following commands:
+// > display = hexadecimal;
+// > for i from -18 to 12 do print(round(exp(i), SG, RN));
+static constexpr cpp::array<float, 31> EXP_HI = {
+ 0x1.05a628p-26f, 0x1.639e32p-25f, 0x1.e355bcp-24f, 0x1.4875cap-22f,
+ 0x1.be6c7p-21f, 0x1.2f6054p-19f, 0x1.9c54c4p-18f, 0x1.183542p-16f,
+ 0x1.7cd79cp-15f, 0x1.02cf22p-13f, 0x1.5fc21p-12f, 0x1.de16bap-11f,
+ 0x1.44e52p-9f, 0x1.b993fep-8f, 0x1.2c155cp-6f, 0x1.97db0cp-5f,
+ 0x1.152aaap-3f, 0x1.78b564p-2f, 0x1p+0f, 0x1.5bf0a8p+1f,
+ 0x1.d8e64cp+2f, 0x1.415e5cp+4f, 0x1.b4c902p+5f, 0x1.28d38ap+7f,
+ 0x1.936dc6p+8f, 0x1.122886p+10f, 0x1.749ea8p+11f, 0x1.fa7158p+12f,
+ 0x1.5829dcp+14f, 0x1.d3c448p+15f, 0x1.3de166p+17f,
+};
+
+// Generated by Sollya with the following commands:
+// > display = hexadecimal;
+// > for i from 0 to 7 do print(round(exp(i * 2^-3), SG, RN));
+static constexpr cpp::array<float, 8> EXP_MID = {
+ 0x1p+0f, 0x1.221604p+0f, 0x1.48b5e4p+0f, 0x1.747a52p+0f,
+ 0x1.a61298p+0f, 0x1.de455ep+0f, 0x1.0ef9dcp+1f, 0x1.330e58p+1f,
+};
+
+LLVM_LIBC_FUNCTION(float16, expf16, (float16 x)) {
+ using FPBits = fputil::FPBits<float16>;
+ FPBits x_bits(x);
+
+ uint16_t x_u = x_bits.uintval();
+ uint16_t x_abs = x_u & 0x7fffU;
+
+ // When |x| >= 12, or x is NaN.
+ if (LIBC_UNLIKELY(x_abs >= 0x4a00U)) {
+ // exp(NaN) = NaN
+ if (x_bits.is_nan()) {
+ if (x_bits.is_signaling_nan()) {
+ fputil::raise_except_if_required(FE_INVALID);
+ return FPBits::quiet_nan().get_val();
+ }
+
+ return x;
+ }
+
+ // When x >= 12.
+ if (x_bits.is_pos() && x_u >= 0x4a00U) {
+ // exp(+inf) = +inf
+ if (x_bits.is_inf())
+ return FPBits::inf().get_val();
+
+ switch (fputil::quick_get_round()) {
+ case FE_TONEAREST:
+ case FE_UPWARD:
+ fputil::set_errno_if_required(ERANGE);
+ fputil::raise_except_if_required(FE_OVERFLOW);
+ return FPBits::inf().get_val();
+ default:
+ return FPBits::max_normal().get_val();
+ }
+ }
+
+ // When x <= -18.
+ if (x_u >= 0xcc80U) {
+ // exp(-inf) = +0
+ if (x_bits.is_inf())
+ return FPBits::zero().get_val();
+
+ fputil::set_errno_if_required(ERANGE);
+ fputil::raise_except_if_required(FE_UNDERFLOW | FE_INEXACT);
+
+ switch (fputil::quick_get_round()) {
+ case FE_UPWARD:
+ return FPBits::min_subnormal().get_val();
+ default:
+ return FPBits::zero().get_val();
+ }
+ }
+ }
+
+ if (auto r = EXPF16_EXCEPTS.lookup(x_u); LIBC_UNLIKELY(r.has_value()))
+ return r.value();
+
+ // For -18 < x < 12, to compute exp(x), we perform the following range
+ // reduction: find hi, mid, lo, such that:
+ // x = hi + mid + lo, in which
+ // hi is an integer,
+ // mid * 2^3 is an integer,
+ // -2^(-4) <= lo < 2^(-4).
+ // In particular,
+ // hi + mid = round(x * 2^3) * 2^(-3).
+ // Then,
+ // exp(x) = exp(hi + mid + lo) = exp(hi) * exp(mid) * exp(lo).
+ // We store exp(hi) and exp(mid) in the lookup tables EXP_HI and EXP_MID
+ // respectively. exp(lo) is computed using a degree-3 minimax polynomial
+ // generated by Sollya.
+
+ float xf = static_cast<float>(x);
+ float kf = fputil::nearest_integer(xf * 0x1.0p+3f);
+ int x_hi_mid = static_cast<int>(kf);
+ int x_hi = x_hi_mid >> 3;
+ int x_mid = x_hi_mid & 0x7;
+ // lo = x - (hi + mid) = round(x * 2^3) * (-2^(-3)) + x
+ float lo = fputil::multiply_add(kf, -0x1.0p-3f, xf);
+
+ float exp_hi = EXP_HI[x_hi + 18];
+ float exp_mid = EXP_MID[x_mid];
+ // Degree-3 minimax polynomial generated by Sollya with the following
+ // commands:
+ // > display = hexadecimal;
+ // > P = fpminimax(expm1(x)/x, 2, [|SG...|], [-2^-4, 2^-4]);
+ // > P;
+ float exp_lo =
+ fputil::polyeval(lo, 0x1p+0f, 0x1p+0f, 0x1.001p-1f, 0x1.555ddep-3f);
----------------
overmighty wrote:
I also tried with a degree-4 minimax polynomial, but it resulted in more exceptional values.
https://github.com/llvm/llvm-project/pull/100632
More information about the libc-commits
mailing list