[libc-commits] [libc] [libc][math] Implement powf function correctly rounded to all rounding modes. (PR #71188)

Mon Nov 6 10:33:35 PST 2023

================
@@ -0,0 +1,141 @@
+//===-- Single-precision 10^x function ------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_MATH_GENERIC_EXP10F_IMPL_H
+#define LLVM_LIBC_SRC_MATH_GENERIC_EXP10F_IMPL_H
+
+#include "explogxf.h"
+#include "src/__support/FPUtil/BasicOperations.h"
+#include "src/__support/FPUtil/FEnvImpl.h"
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/FPUtil/PolyEval.h"
+#include "src/__support/FPUtil/multiply_add.h"
+#include "src/__support/FPUtil/nearest_integer.h"
+#include "src/__support/FPUtil/rounding_mode.h"
+#include "src/__support/common.h"
+#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
+#include "src/math/exp10f.h"
+
+#include <errno.h>
+
+namespace LIBC_NAMESPACE::generic {
+
+LIBC_INLINE float exp10f(float x) {
+  using FPBits = typename fputil::FPBits<float>;
+  FPBits xbits(x);
+
+  uint32_t x_u = xbits.uintval();
+  uint32_t x_abs = x_u & 0x7fff'ffffU;
+
+  // When |x| >= log10(2^128), or x is nan
+  if (LIBC_UNLIKELY(x_abs >= 0x421a'209bU)) {
+    // When x < log10(2^-150) or nan
+    if (x_u > 0xc234'9e35U) {
+      // exp(-Inf) = 0
+      if (xbits.is_inf())
+        return 0.0f;
+      // exp(nan) = nan
+      if (xbits.is_nan())
+        return x;
+      if (fputil::fenv_is_round_up())
+        return static_cast<float>(FPBits(FPBits::MIN_SUBNORMAL));
+      fputil::set_errno_if_required(ERANGE);
+      fputil::raise_except_if_required(FE_UNDERFLOW);
+      return 0.0f;
+    }
+    // x >= log10(2^128) or nan
+    if (!xbits.get_sign() && (x_u >= 0x421a'209bU)) {
+      // x is finite
+      if (x_u < 0x7f80'0000U) {
+        int rounding = fputil::quick_get_round();
+        if (rounding == FE_DOWNWARD || rounding == FE_TOWARDZERO)
+          return static_cast<float>(FPBits(FPBits::MAX_NORMAL));
+
+        fputil::set_errno_if_required(ERANGE);
----------------
lntue wrote:

With that structure, the errno check-and-set logic will be delegated to the entrypoint implementations, which will insert this extra checks in the critical path (of exp2f, exp10f), instead of exceptional input code path.  This reduces the throughputs of `exp2f` and `exp10f` substantially, since their reciprocal throughputs are only few clocks.

So for performance reason, we keep the logic in `exp2f` and `exp10f` unmodified, making them acting as entrypoint functions without registering as such, that will be delegated by `powf` with special inputs.

https://github.com/llvm/llvm-project/pull/71188