[libc-commits] [libc] [libc][math][c23] Add log2p1f16 C23 math function (PR #186754)

Thu Mar 26 01:27:16 PDT 2026

================
@@ -0,0 +1,205 @@
+//===-- Implementation header for log2p1f16 ---------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_LOG2P1F16_H
+#define LLVM_LIBC_SRC___SUPPORT_MATH_LOG2P1F16_H
+
+#include "include/llvm-libc-macros/float16-macros.h"
+
+#ifdef LIBC_TYPES_HAS_FLOAT16
+
+#include "expxf16_utils.h"
+#include "hdr/errno_macros.h"
+#include "hdr/fenv_macros.h"
+#include "src/__support/FPUtil/FEnvImpl.h"
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/FPUtil/PolyEval.h"
+#include "src/__support/FPUtil/cast.h"
+#include "src/__support/FPUtil/except_value_utils.h"
+#include "src/__support/FPUtil/multiply_add.h"
+#include "src/__support/common.h"
+#include "src/__support/macros/config.h"
+#include "src/__support/macros/optimization.h"
+#include "src/__support/macros/properties/cpu_features.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+namespace math {
+
+LIBC_INLINE float16 log2p1f16(float16 x) {
+  using namespace math::expxf16_internal;
+  using FPBits = fputil::FPBits<float16>;
+  FPBits x_bits(x);
+
+  uint16_t x_u = x_bits.uintval();
+  uint16_t x_abs = x_u & 0x7fffU;
+
+  // If x is +-0, NaN, +/-inf, or |x| <= 2^-3.
+  if (LIBC_UNLIKELY(x_abs <= 0x3000U || x_abs >= 0x7c00U)) {
+    // log2p1(NaN) = NaN
+    if (x_bits.is_nan()) {
+      if (x_bits.is_signaling_nan()) {
+        fputil::raise_except_if_required(FE_INVALID);
+        return FPBits::quiet_nan().get_val();
+      }
+
+      return x;
+    }
+
+    // log2p1(+/-0) = +/-0
+    if (x_abs == 0U)
+      return x;
+
+    // log2p1(+inf) = +inf
+    if (x_u == 0x7c00U)
+      return FPBits::inf().get_val();
+
+    // log2p1(-inf) = NaN
+    if (x_abs >= 0x7c00U) {
+      fputil::set_errno_if_required(EDOM);
+      fputil::raise_except_if_required(FE_INVALID);
+      return FPBits::quiet_nan().get_val();
+    }
+
+    // When |x| <= 2^-3, use a degree-5 minimax polynomial on x.
+    // For y = 1+x near 1, the table-based range reduction suffers from
+    // catastrophic cancellation (m + log2(f) nearly cancel).  Computing
+    // log2(1+x) directly via polynomial avoids this.
+    //
+    // Generated by Sollya with:
+    //   > display = hexadecimal;
+    //   > Q = fpminimax(log2(1 + x), [|1, 2, 3, 4, 5|], [|SG...|],
+    //                   [-2^-3, 2^-3]);
+    //   > Q;
+    if (x_abs <= 0x3000U) {
+#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+      constexpr size_t N_LOG2P1F16_EXCEPTS = 11;
+      constexpr fputil::ExceptValues<float16, N_LOG2P1F16_EXCEPTS>
+          LOG2P1F16_EXCEPTS = {{
+              // (input, RZ output, RU offset, RD offset, RN offset)
+              // x = 0x1.ec4p-12
+              {0x0FB1U, 0x118BU, 1U, 0U, 1U},
+              // x = 0x1.c04p-6
+              {0x2701U, 0x28FBU, 1U, 0U, 1U},
+              // x = -0x1.824p-15
+              {0x8309U, 0x8461U, 0U, 1U, 0U},
+              // x = -0x1.414p-10
+              {0x9505U, 0x973EU, 0U, 1U, 1U},
+              // x = -0x1.cb8p-10
+              {0x972EU, 0x992FU, 0U, 1U, 0U},
+              // x = -0x1.99cp-8
+              {0x9E67U, 0xA0A2U, 0U, 1U, 0U},
+              // x = -0x1.ce8p-7
+              {0xA33AU, 0xA540U, 0U, 1U, 0U},
+              // x = -0x1.73cp-6
+              {0xA5CFU, 0xA83DU, 0U, 1U, 0U},
+              // x = -0x1.87p-5
+              {0xAA1CU, 0xAC84U, 0U, 1U, 0U},
+              // x = -0x1.d48p-5
+              {0xAB52U, 0xAD70U, 0U, 1U, 0U},
+              // x = -0x1.da0p-4
+              {0xAF68U, 0xB1ADU, 0U, 1U, 0U},
+          }};
+
+      if (auto r = LOG2P1F16_EXCEPTS.lookup(x_u); LIBC_UNLIKELY(r.has_value()))
+        return r.value();
+#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+
+      float xf = x;
+      return fputil::cast<float16>(
+          xf * fputil::polyeval(xf, 0x1.715476p+0f, -0x1.715204p-1f,
+                                0x1.ec6c5p-2f, -0x1.763338p-2f,
+                                0x1.2bcd3cp-2f));
+    }
+  }
+
+  // log2p1(-1) = -inf
+  if (LIBC_UNLIKELY(x_u == 0xbc00U)) {
+    fputil::raise_except_if_required(FE_DIVBYZERO);
+    return FPBits::inf(Sign::NEG).get_val();
+  }
+
+  // log2p1(x) = NaN for x < -1
+  if (LIBC_UNLIKELY(x_u > 0xbc00U)) {
+    fputil::set_errno_if_required(EDOM);
+    fputil::raise_except_if_required(FE_INVALID);
+    return FPBits::quiet_nan().get_val();
+  }
+
+#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+#ifdef LIBC_TARGET_CPU_HAS_FMA_FLOAT
+  constexpr size_t N_LOG2P1F16_EXCEPTS_HI = 2;
+#else
+  constexpr size_t N_LOG2P1F16_EXCEPTS_HI = 6;
+#endif
+  constexpr fputil::ExceptValues<float16, N_LOG2P1F16_EXCEPTS_HI>
+      LOG2P1F16_EXCEPTS_HI = {{
+  // (input, RZ output, RU offset, RD offset, RN offset)
+#ifndef LIBC_TARGET_CPU_HAS_FMA_FLOAT
+          // x = 0x1.12p-3
+          {0x3048U, 0x31CBU, 1U, 0U, 1U},
+          // x = 0x1.598p-2
+          {0x3566U, 0x36B5U, 1U, 0U, 1U},
+#endif
+          // x = 0x1.23cp-3
+          {0x308FU, 0x3226U, 1U, 0U, 0U},
+          // x = 0x1.accp+0
+          {0x3EB3U, 0x3DADU, 1U, 0U, 0U},
+#ifndef LIBC_TARGET_CPU_HAS_FMA_FLOAT
+          // x = -0x1.534p-2
+          {0xB54DU, 0xB8A5U, 0U, 1U, 0U},
+          // x = -0x1.bb8p-2
+          {0xB6EEU, 0xBA8DU, 0U, 1U, 0U},
+#endif
+      }};
+
+  if (auto r = LOG2P1F16_EXCEPTS_HI.lookup(x_u); LIBC_UNLIKELY(r.has_value()))
+    return r.value();
+#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+
+  // For the range reduction, we compute y = 1 + x in float.  For |x| > 2^-3,
+  // the addition 1.0f + xf is exact in float, and the result y is bounded away
+  // from 1.0, avoiding catastrophic cancellation in the table decomposition.
+  float xf = x;
+  float y = 1.0f + xf;
+
+  using FPBitsFloat = fputil::FPBits<float>;
+  FPBitsFloat y_bits(y);
+
+  // y = 1 + x should stay positive for x > -1, but keep this guard for
+  // completeness in case of unexpected intermediate behavior.
+  if (LIBC_UNLIKELY(y_bits.is_zero()))
+    return FPBits::inf(Sign::NEG).get_val();
+
+  int m = y_bits.get_exponent();
+  y_bits.set_biased_exponent(FPBitsFloat::EXP_BIAS);
+  float mant_f = y_bits.get_val();
+
+  // Leading 23 - 5 = 18, so the top 5 mantissa bits give the index in [0, 31].
+  int f = y_bits.get_mantissa() >> (FPBitsFloat::FRACTION_LEN - 5);
+
+  // v = 1.mant * 1/f - 1 = d/f
+  float v = fputil::multiply_add(mant_f, ONE_OVER_F_F[f], -1.0f);
+
+  // Degree-3 minimax polynomial generated by Sollya with the following
+  // commands:
+  //   > display = hexadecimal;
+  //   > P = fpminimax(log2(1 + x)/x, 2, [|SG...|], [-2^-5, 2^-5]);
+  //   > x * P;
----------------
shikharish wrote:

Using
  dirtyinfnorm((log2(1 + x)/x - P)/(log2(1 + x)/x), [-2^-5, 2^-5])

degree 4 gives
  0x1.dcb0e4ac2a1c66fdab9a4e8b55acfb5b3b5fabcbp-26
  which is < 2^-25,

and degree 5 gives
  0x1.caf6542bd83113415593afe5629b67db08a6c5b18p-27
  which is < 2^-26.

For the exhaustive raw-path sweep over the high path, the hard cases do not improve: on FMA the count stays at 2, and on no-FMA it actually goes from 6 to 7 for both higher-degree variants.


https://github.com/llvm/llvm-project/pull/186754