[libc-commits] [libc] [llvm] [libc][math] Impl bfloat16 lgamma function. (PR #199312)

via libc-commits libc-commits at lists.llvm.org
Sat May 23 06:53:06 PDT 2026


================
@@ -0,0 +1,195 @@
+//===-- Implementation of lgammabf16 ----------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_LGAMMABF16_H
+#define LLVM_LIBC_SRC___SUPPORT_MATH_LGAMMABF16_H
+
+#include "hdr/errno_macros.h"
+#include "hdr/fenv_macros.h"
+#include "src/__support/CPP/bit.h"
+#include "src/__support/FPUtil/FEnvImpl.h"
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/FPUtil/bfloat16.h"
+#include "src/__support/FPUtil/cast.h"
+#include "src/__support/macros/config.h"
+#include "src/__support/macros/optimization.h"
+
+namespace LIBC_NAMESPACE_DECL {
+namespace math {
+
+// Evaluate degree-4 polynomial using Horner's method:
+// p(t) = c0 + t*(c1 + t*(c2 + t*(c3 + t*c4)))
+// coeffs = {c4, c3, c2, c1, c0}
+static inline float poly4(float t, const float c[5]) {
+  return c[4] + t * (c[3] + t * (c[2] + t * (c[1] + t * c[0])));
+}
+
+// Compute natural log of positive float x using range reduction.
+// x = 2^e * m, m in [1, 2)
+// ln(x) = e * ln(2) + ln(m)
+// ln(m) approximated by degree-5 poly centered at 1.5
+static inline float lgamma_logf(float x) {
+  // Coefficients for ln(m) on [1,2), centered at 1.5
+  // Generated by numpy.polyfit, max_err = 1.71e-05 (well within bf16 precision)
+  constexpr float LN_COEFFS[6] = {
+      0x1.ef26300000000p-6f,  // c5
+      -0x1.c2e22a0000000p-5f, // c4
+      0x1.929d980000000p-4f,  // c3
+      -0x1.c615ec0000000p-3f, // c2
+      0x1.5557200000000p-1f,  // c1
+      0x1.9f309c0000000p-2f,  // c0
+  };
+  constexpr float LN2 = 0x1.62e430p-1f;
+
+  // Extract exponent and mantissa
+  uint32_t bits = cpp::bit_cast<uint32_t>(x);
+  int e = static_cast<int>((bits >> 23) & 0xFFU) - 127;
+  // Set exponent to 127 (i.e. value in [1,2))
+  bits = (bits & 0x807FFFFFU) | (127U << 23);
+  float m = cpp::bit_cast<float>(bits);
+
+  float t = m - 1.5f;
+  float lnm =
+      LN_COEFFS[5] +
+      t * (LN_COEFFS[4] +
+           t * (LN_COEFFS[3] +
+                t * (LN_COEFFS[2] + t * (LN_COEFFS[1] + t * LN_COEFFS[0]))));
+  return static_cast<float>(e) * LN2 + lnm;
+}
+
+// Coefficients for lgamma on [n, n+1), centered at n+0.5
+// Each row: {c4, c3, c2, c1, c0} for poly in (x - (n+0.5))
+// Generated by numpy.polyfit with degree 4
+// Intervals: n = 1..7
+static constexpr float LGAMMA_POLY[7][5] = {
+    // [1,2), center=1.5, max_err=1.29e-04
+    {0x1.08d8ae0000000p-4f, -0x1.2d373c0000000p-3f, 0x1.de14880000000p-2f,
+     0x1.2f128a0000000p-5f, -0x1.eeb2800000000p-4f},
+    // [2,3), center=2.5, max_err=9.98e-06
+    {0x1.3b3ccc0000000p-7f, -0x1.48c82a0000000p-5f, 0x1.f613220000000p-3f,
+     0x1.6809be0000000p-1f, 0x1.2383fc0000000p-2f},
+    // [3,4), center=3.5, max_err=2.06e-06
+    {0x1.859be80000000p-9f, -0x1.2a28760000000p-6f, 0x1.52475c0000000p-3f,
+     0x1.1a69120000000p+0f, 0x1.3373020000000p+0f},
+    // [4,5), center=4.5, max_err=6.61e-07
+    {0x1.4e023c0000000p-10f, -0x1.51efee0000000p-7f, 0x1.fd62a00000000p-4f,
+     0x1.638d3e0000000p+0f, 0x1.3a140a0000000p+1f},
+    // [5,6), center=5.5, max_err=2.72e-07
+    {0x1.58a5b20000000p-11f, -0x1.b218380000000p-8f, 0x1.9840800000000p-4f,
+     0x1.9c70ae0000000p+0f, 0x1.fa99a60000000p+1f},
+    // [6,7), center=6.5, max_err=1.32e-07
+    {0x1.9096420000000p-12f, -0x1.2e0b0c0000000p-8f, 0x1.548cda0000000p-4f,
+     0x1.cafc460000000p+0f, 0x1.6a676a0000000p+2f},
+    // [7,8), center=7.5, max_err=7.10e-08
+    {0x1.f9d4be0000000p-13f, -0x1.bc58500000000p-9f, 0x1.2413be0000000p-4f,
+     0x1.f25eb80000000p+0f, 0x1.e233060000000p+2f},
+};
+
+// Compute lgamma for x >= 1 using polynomial or Stirling
+static inline float lgamma_positive(float x) {
+  if (x == 1.0f || x == 2.0f)
----------------
lntue wrote:

`LIBC_UNLIKELY`

https://github.com/llvm/llvm-project/pull/199312


More information about the libc-commits mailing list