[libc-commits] [libc] [llvm] [libc][math] Implement half precision lgamma function (PR #192834)

Sun Apr 19 00:14:38 PDT 2026

https://github.com/AnonMiraj created https://github.com/llvm/llvm-project/pull/192834

The implementation uses three distinct paths based on $|x|$:

- Small ($|x| < 0.66$): Fits a degree-12 Chebyshev for $h(t) = \frac{\text{lgamma}(t) + \log(t)}{t}$, recovering $\text{lgamma}(t) = t \cdot h - \log(t)$.

- Medium ($|x| \in [0.66, 3.37]$): Fits a degree-20 Chebyshev for $g(t) = \frac{\text{lgamma}(t)}{(t-1)(t-2)}$ 
- Stirling ($|x| > 3.37$): Uses $(x-0.5) \cdot \log(x) - x + \frac{\log(2\pi)}{2}$ plus 1/2/4/8-term Bernoulli corrections by sub-range.


Special cases: NaN, +/-Inf -> +Inf, +/-0 pole, negative integer pole,
lgamma(1) = lgamma(2) = 0 exactly, overflow for large positive x.

Exhaustive tests pass in all 4 rounding modes.

-- CORE-MATH reciprocal throughput --
Ntrial = 20 ; Min = 345.634 + 130.772 clc/call; Median-Min = 95.922 clc/call; Max = 649.220 clc/call
-- System LIBC reciprocal throughput --
Ntrial = 20 ; Min = 372.717 + 117.909 clc/call; Median-Min = 103.454 clc/call; Max = 620.330 clc/call

>From c373803b1e79887ed4ca9e55cd196b75b1e3fdf5 Mon Sep 17 00:00:00 2001
From: Anonmiraj <ezzibrahimx at gmail.com>
Date: Sun, 19 Apr 2026 08:32:58 +0200
Subject: [PATCH] [libc][math] Implement half precision lgamma function

---
 libc/config/baremetal/aarch64/entrypoints.txt |   1 +
 libc/config/baremetal/arm/entrypoints.txt     |   1 +
 libc/config/baremetal/riscv/entrypoints.txt   |   1 +
 libc/config/linux/aarch64/entrypoints.txt     |   1 +
 libc/config/linux/riscv/entrypoints.txt       |   1 +
 libc/config/linux/x86_64/entrypoints.txt      |   1 +
 libc/include/math.yaml                        |   7 +
 libc/shared/math.h                            |   1 +
 libc/shared/math/lgammaf16.h                  |  27 ++
 libc/src/__support/math/CMakeLists.txt        |  18 ++
 libc/src/__support/math/lgammaf16.h           | 292 ++++++++++++++++++
 libc/src/math/CMakeLists.txt                  |   2 +
 libc/src/math/generic/CMakeLists.txt          |  13 +
 libc/src/math/generic/lgammaf16.cpp           |  20 ++
 libc/src/math/lgammaf16.h                     |  21 ++
 libc/test/shared/CMakeLists.txt               |   1 +
 libc/test/shared/shared_math_test.cpp         |   1 +
 libc/test/src/math/CMakeLists.txt             |  11 +
 libc/test/src/math/lgammaf16_test.cpp         |  42 +++
 libc/test/src/math/smoke/CMakeLists.txt       |  11 +
 libc/test/src/math/smoke/lgammaf16_test.cpp   |  68 ++++
 libc/utils/MPFRWrapper/MPCommon.cpp           |   7 +
 libc/utils/MPFRWrapper/MPCommon.h             |   1 +
 libc/utils/MPFRWrapper/MPFRUtils.cpp          |   2 +
 libc/utils/MPFRWrapper/MPFRUtils.h            |   1 +
 .../llvm-project-overlay/libc/BUILD.bazel     |  22 ++
 26 files changed, 574 insertions(+)
 create mode 100644 libc/shared/math/lgammaf16.h
 create mode 100644 libc/src/__support/math/lgammaf16.h
 create mode 100644 libc/src/math/generic/lgammaf16.cpp
 create mode 100644 libc/src/math/lgammaf16.h
 create mode 100644 libc/test/src/math/lgammaf16_test.cpp
 create mode 100644 libc/test/src/math/smoke/lgammaf16_test.cpp

diff --git a/libc/config/baremetal/aarch64/entrypoints.txt b/libc/config/baremetal/aarch64/entrypoints.txt
index 452abd985b3a5..ec363eed4674f 100644
--- a/libc/config/baremetal/aarch64/entrypoints.txt
+++ b/libc/config/baremetal/aarch64/entrypoints.txt
@@ -655,6 +655,7 @@ if(LIBC_TYPES_HAS_FLOAT16)
     libc.src.math.iscanonicalf16
     libc.src.math.issignalingf16
     libc.src.math.ldexpf16
+    libc.src.math.lgammaf16
     libc.src.math.llogbf16
     libc.src.math.llrintf16
     libc.src.math.llroundf16
diff --git a/libc/config/baremetal/arm/entrypoints.txt b/libc/config/baremetal/arm/entrypoints.txt
index 9c0e71b2beec0..bceb9c361a899 100644
--- a/libc/config/baremetal/arm/entrypoints.txt
+++ b/libc/config/baremetal/arm/entrypoints.txt
@@ -665,6 +665,7 @@ if(LIBC_TYPES_HAS_FLOAT16)
     libc.src.math.iscanonicalf16
     libc.src.math.issignalingf16
     libc.src.math.ldexpf16
+    libc.src.math.lgammaf16
     libc.src.math.llogbf16
     libc.src.math.llrintf16
     libc.src.math.llroundf16
diff --git a/libc/config/baremetal/riscv/entrypoints.txt b/libc/config/baremetal/riscv/entrypoints.txt
index 5c88bc4ae1fb7..6a039ddbce4de 100644
--- a/libc/config/baremetal/riscv/entrypoints.txt
+++ b/libc/config/baremetal/riscv/entrypoints.txt
@@ -661,6 +661,7 @@ if(LIBC_TYPES_HAS_FLOAT16)
     libc.src.math.iscanonicalf16
     libc.src.math.issignalingf16
     libc.src.math.ldexpf16
+    libc.src.math.lgammaf16
     libc.src.math.llogbf16
     libc.src.math.llrintf16
     libc.src.math.llroundf16
diff --git a/libc/config/linux/aarch64/entrypoints.txt b/libc/config/linux/aarch64/entrypoints.txt
index 6f7567cf3789f..5fb18be6a65d1 100644
--- a/libc/config/linux/aarch64/entrypoints.txt
+++ b/libc/config/linux/aarch64/entrypoints.txt
@@ -566,6 +566,7 @@ set(TARGET_LIBM_ENTRYPOINTS
     libc.src.math.ldexp
     libc.src.math.ldexpf
     libc.src.math.ldexpl
+    libc.src.math.lgammaf16
     libc.src.math.llogb
     libc.src.math.llogbf
     libc.src.math.llogbl
diff --git a/libc/config/linux/riscv/entrypoints.txt b/libc/config/linux/riscv/entrypoints.txt
index 1856d8edba7de..0d70238c8d0c6 100644
--- a/libc/config/linux/riscv/entrypoints.txt
+++ b/libc/config/linux/riscv/entrypoints.txt
@@ -575,6 +575,7 @@ set(TARGET_LIBM_ENTRYPOINTS
     libc.src.math.ldexp
     libc.src.math.ldexpf
     libc.src.math.ldexpl
+    libc.src.math.lgammaf16
     libc.src.math.llogb
     libc.src.math.llogbf
     libc.src.math.llogbl
diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt
index fe5991c695e25..9186f0a1e3edc 100644
--- a/libc/config/linux/x86_64/entrypoints.txt
+++ b/libc/config/linux/x86_64/entrypoints.txt
@@ -625,6 +625,7 @@ set(TARGET_LIBM_ENTRYPOINTS
     libc.src.math.ldexp
     libc.src.math.ldexpf
     libc.src.math.ldexpl
+    libc.src.math.lgammaf16
     libc.src.math.llogb
     libc.src.math.llogbf
     libc.src.math.llogbl
diff --git a/libc/include/math.yaml b/libc/include/math.yaml
index 3984a665b234b..67d19399b99ae 100644
--- a/libc/include/math.yaml
+++ b/libc/include/math.yaml
@@ -1682,6 +1682,13 @@ functions:
     arguments:
       - type: long double
       - type: int *
+  - name: lgammaf16
+    standards:
+      - stdc
+    return_type: _Float16
+    arguments:
+      - type: _Float16
+    guard: LIBC_TYPES_HAS_FLOAT16
   - name: llogb
     standards:
       - stdc
diff --git a/libc/shared/math.h b/libc/shared/math.h
index 83cd6196b146b..388f07249b974 100644
--- a/libc/shared/math.h
+++ b/libc/shared/math.h
@@ -185,6 +185,7 @@
 #include "math/ldexpf.h"
 #include "math/ldexpf128.h"
 #include "math/ldexpf16.h"
+#include "math/lgammaf16.h"
 #include "math/llogb.h"
 #include "math/llogbf.h"
 #include "math/llogbf128.h"
diff --git a/libc/shared/math/lgammaf16.h b/libc/shared/math/lgammaf16.h
new file mode 100644
index 0000000000000..4ae8d36e5ba53
--- /dev/null
+++ b/libc/shared/math/lgammaf16.h
@@ -0,0 +1,27 @@
+//===-- Shared lgammaf16 function -------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SHARED_MATH_LGAMMAF16_H
+#define LLVM_LIBC_SHARED_MATH_LGAMMAF16_H
+
+#ifdef LIBC_TYPES_HAS_FLOAT16
+
+#include "shared/libc_common.h"
+#include "src/__support/math/lgammaf16.h"
+
+namespace LIBC_NAMESPACE_DECL {
+namespace shared {
+
+using math::lgammaf16;
+
+} // namespace shared
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LIBC_TYPES_HAS_FLOAT16
+
+#endif // LLVM_LIBC_SHARED_MATH_LGAMMAF16_H
diff --git a/libc/src/__support/math/CMakeLists.txt b/libc/src/__support/math/CMakeLists.txt
index d1c42afbaafe1..1055110673871 100644
--- a/libc/src/__support/math/CMakeLists.txt
+++ b/libc/src/__support/math/CMakeLists.txt
@@ -1861,6 +1861,24 @@ add_header_library(
     libc.include.llvm-libc-types.float128
 )
 
+add_header_library(
+  lgammaf16
+  HDRS
+    lgammaf16.h
+  DEPENDS
+    libc.include.llvm-libc-macros.float16_macros
+    libc.src.__support.CPP.bit
+    libc.src.__support.FPUtil.cast
+    libc.src.__support.FPUtil.fenv_impl
+    libc.src.__support.FPUtil.fp_bits
+    libc.src.__support.FPUtil.multiply_add
+    libc.src.__support.FPUtil.nearest_integer_operations
+    libc.src.__support.FPUtil.polyeval
+    libc.src.__support.macros.config
+    libc.src.__support.macros.optimization
+    libc.src.__support.macros.properties.types
+)
+
 add_header_library(
   llogbf16
   HDRS
diff --git a/libc/src/__support/math/lgammaf16.h b/libc/src/__support/math/lgammaf16.h
new file mode 100644
index 0000000000000..92449fd63aecf
--- /dev/null
+++ b/libc/src/__support/math/lgammaf16.h
@@ -0,0 +1,292 @@
+//===-- Implementation header for lgammaf16 ---------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_LGAMMAF16_H
+#define LLVM_LIBC_SRC___SUPPORT_MATH_LGAMMAF16_H
+
+#include "include/llvm-libc-macros/float16-macros.h"
+
+#ifdef LIBC_TYPES_HAS_FLOAT16
+
+#include "src/__support/CPP/bit.h"
+#include "src/__support/FPUtil/FEnvImpl.h"
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/FPUtil/NearestIntegerOperations.h"
+#include "src/__support/FPUtil/PolyEval.h"
+#include "src/__support/FPUtil/cast.h"
+#include "src/__support/FPUtil/multiply_add.h"
+#include "src/__support/macros/config.h"
+#include "src/__support/macros/optimization.h"
+#include "src/__support/macros/properties/types.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+namespace math {
+
+namespace lgammaf16_internal {
+
+LIBC_INLINE constexpr bool is_integer(float16 x) {
+  using FPBits = fputil::FPBits<float16>;
+  FPBits xbits(x);
+  uint16_t x_u = xbits.uintval();
+  unsigned x_e = static_cast<unsigned>(xbits.get_biased_exponent());
+  unsigned lsb = static_cast<unsigned>(
+      cpp::countr_zero(static_cast<uint16_t>(x_u | FPBits::EXP_MASK)));
+  constexpr unsigned UNIT_EXPONENT =
+      static_cast<unsigned>(FPBits::EXP_BIAS + FPBits::FRACTION_LEN);
+  return x_e + lsb >= UNIT_EXPONENT;
+}
+
+// sin(pi*x) for x in (0, 1), written as (0.25 - u^2) * P(u^2) where u = x-0.5.
+// Coefficients for P are a degree-7 polynomial in u^2
+// approximating sin(pi*(u+0.5)) / (pi*(0.25-u^2))
+// with max error ~2^{-55}, generated by Sollya with:
+//   > P = fpminimax(sin(pi*(x+0.5))/(pi*(0.25-x^2)),
+//                   [|0,2,4,6,8,10,12,14|], [|D...|], [-0.5, 0.5]);
+LIBC_INLINE constexpr double lg_sinpi(double x) {
+  constexpr double COEFFS[8] = {0x1p+2,
+                                -0x1.de9e64df22ea4p+1,
+                                0x1.472be122401f8p+0,
+                                -0x1.d4fcd82df91bp-3,
+                                0x1.9f05c97e0aab2p-6,
+                                -0x1.f3091c427b611p-10,
+                                0x1.b22c9bfdca547p-14,
+                                -0x1.15484325ef569p-18};
+  double u = x - 0.5;
+  double u2 = u * u, u4 = u2 * u2, u8 = u4 * u4;
+  double p01 = fputil::multiply_add(u2, COEFFS[1], COEFFS[0]);
+  double p23 = fputil::multiply_add(u2, COEFFS[3], COEFFS[2]);
+  double p45 = fputil::multiply_add(u2, COEFFS[5], COEFFS[4]);
+  double p67 = fputil::multiply_add(u2, COEFFS[7], COEFFS[6]);
+  double p03 = fputil::multiply_add(u4, p23, p01);
+  double p47 = fputil::multiply_add(u4, p67, p45);
+  return (0.25 - u2) * fputil::multiply_add(u8, p47, p03);
+}
+
+// Natural logarithm of x (x > 0), using 16-entry table + degree-7 polynomial.
+// Range reduction: x = 2^e * m where m in [1, 2), decomposed as
+// m = (1+i/16)*(1+z) so log(x) = e*log(2) + log(1+i/16) + log(1+z)
+// = e*log(2) + IL[i] + z*P(z).
+// P approximates log(1+z)/z on z in [-1/16, 1/16] with max
+// error ~2^{-54}, generated by Sollya with:
+//   > P = fpminimax(log(1+x)/x, [|0,1,2,3,4,5,6,7|], [|D...|], [-1/16, 1/16]);
+LIBC_INLINE double lg_ln(double x) {
+  using FPBits = fputil::FPBits<double>;
+  uint64_t u = FPBits(x).uintval();
+  int e = static_cast<int>(FPBits(x).get_biased_exponent()) - 0x3ff;
+
+  // Coefficients for log(1 + z)/z on z in [-1/16, 1/16]
+  constexpr double COEFFS[8] = {0x1.fffffffffff24p-1, -0x1.ffffffffd1d67p-2,
+                                0x1.55555537802dep-2, -0x1.ffffeca81b866p-3,
+                                0x1.999611761d772p-3, -0x1.54f3e581b61bfp-3,
+                                0x1.1e642b4cb5143p-3, -0x1.9115a5af1e1edp-4};
+  // IL[i] = log(1 + i/16) for i = 0..15
+  constexpr double IL[16] = {
+      0x1.59caeec280116p-57, 0x1.f0a30c01162aap-5, 0x1.e27076e2af2ebp-4,
+      0x1.5ff3070a793d6p-3,  0x1.c8ff7c79a9a2p-3,  0x1.1675cababa60fp-2,
+      0x1.4618bc21c5ec2p-2,  0x1.739d7f6bbd007p-2, 0x1.9f323ecbf984dp-2,
+      0x1.c8ff7c79a9a21p-2,  0x1.f128f5faf06ecp-2, 0x1.0be72e4252a83p-1,
+      0x1.1e85f5e7040d1p-1,  0x1.307d7334f10bep-1, 0x1.41d8fe84672afp-1,
+      0x1.52a2d265bc5abp-1};
+  // IX[i] = 1 / (1 + i/16) for i = 0..15
+  constexpr double IX[16] = {
+      0x000000000000001p+0, 0x1.e1e1e1e1e1e1ep-1, 0x1.c71c71c71c71cp-1,
+      0x1.af286bca1af28p-1, 0x1.999999999999ap-1, 0x1.8618618618618p-1,
+      0x1.745d1745d1746p-1, 0x1.642c8590b2164p-1, 0x1.5555555555555p-1,
+      0x1.47ae147ae147bp-1, 0x1.3b13b13b13b14p-1, 0x1.2f684bda12f68p-1,
+      0x1.2492492492492p-1, 0x1.1a7b9611a7b96p-1, 0x1.1111111111111p-1,
+      0x1.0842108421084p-1};
+
+  int i = static_cast<int>((u >> 48) & 0xf);
+  // Reduce to mantissa in [1, 2)
+  uint64_t mant_u = (u & (~uint64_t(0) >> 12)) | (uint64_t(0x3ff) << 52);
+  double mant = FPBits(mant_u).get_val();
+  double z = IX[i] * mant - 1.0, z2 = z * z, z4 = z2 * z2;
+  double q01 = fputil::multiply_add(z, COEFFS[1], COEFFS[0]);
+  double q23 = fputil::multiply_add(z, COEFFS[3], COEFFS[2]);
+  double q45 = fputil::multiply_add(z, COEFFS[5], COEFFS[4]);
+  double q67 = fputil::multiply_add(z, COEFFS[7], COEFFS[6]);
+  double q03 = fputil::multiply_add(z2, q23, q01);
+  double q47 = fputil::multiply_add(z2, q67, q45);
+  return e * 0x1.62e42fefa39efp-1 + IL[i] +
+         z * fputil::multiply_add(z4, q47, q03);
+}
+
+} // namespace lgammaf16_internal
+
+LIBC_INLINE float16 lgammaf16(float16 x) {
+  using namespace lgammaf16_internal;
+  using FPBits = fputil::FPBits<float16>;
+
+  FPBits xbits(x);
+  uint16_t x_abs = xbits.abs().uintval();
+
+  // Handle NaN and Inf
+  if (LIBC_UNLIKELY(x_abs >= 0x7c00u)) {
+    if (x_abs == 0x7c00u)
+      return FPBits::inf().get_val();
+    if (xbits.is_signaling_nan()) {
+      fputil::raise_except_if_required(FE_INVALID);
+      return FPBits::quiet_nan().get_val();
+    }
+    return x;
+  }
+
+  // +-0 pole error
+  if (LIBC_UNLIKELY(x_abs == 0)) {
+    fputil::raise_except_if_required(FE_DIVBYZERO);
+    fputil::set_errno_if_required(ERANGE);
+    return FPBits::inf().get_val();
+  }
+
+  if (LIBC_UNLIKELY(is_integer(x))) {
+    if (xbits.is_neg()) {
+      // pole -> +Inf
+      fputil::raise_except_if_required(FE_DIVBYZERO);
+      fputil::set_errno_if_required(ERANGE);
+      return FPBits::inf().get_val();
+    }
+    // lgamma(1) = lgamma(2) = 0
+    if (x_abs == 0x3c00u || x_abs == 0x4000u)
+      return FPBits::zero().get_val();
+  }
+
+  double xd = fputil::cast<double>(x);
+  double abs_xd = xd < 0.0 ? -xd : xd;
+  double lgamma_val;
+
+  if (LIBC_UNLIKELY(x_abs < 0x3948u)) {
+    // Small path: |x| < 0.66015625
+    // For t = |x|: lgamma(t) = t*P(u) - log(t), where P(u) approximates
+    //   h(t) = (lgamma(t) + log(t)) / t,  u = (t - MID_S) / HW_S in [-1, 1]
+    // For x < 0: lgamma(-t) = log(pi) - log(sin(pi*t)) - lgamma(t) - log(t)
+    //          = log(pi) - lg_ln(lg_sinpi(t)) - t*P(u)
+    // Degree-12 Chebyshev approximation for h in u; max lgamma error ~1.5e-13
+    // Coefficients generated by DCT at 13 Chebyshev nodes on
+    // [1e-5, 0.66015625].
+    // Evaluated via Clenshaw: sum_{k=0}^{12} CHEB_H[k] * T_k(u)
+    constexpr double MID_S = 0x1.52014f8b588e3p-2;
+    constexpr double HW_S = 0x1.51feb074a771dp-2;
+    constexpr double CHEB_H[13] = {
+        -0x1.6ab0351d4e02fp-2,  0x1.ac55f136e49cep-3,   -0x1.9fc5280cd41bfp-7,
+        0x1.1825d37c81c1ep-10,  -0x1.ae2f107c31c9ep-14, 0x1.6174bdf95e9abp-17,
+        -0x1.2e6abd22bc4ecp-20, 0x1.09b66d4f14b06p-23,  -0x1.dbb938f189d8ap-27,
+        0x1.afcb748e20e4ep-30,  -0x1.8c221a9d89d8ap-33, 0x1.6e708e49b3aeep-36,
+        -0x1.50accec4ec4ecp-39};
+    // Clenshaw's algorithm: p = sum_k CHEB_H[k] * T_k(u_s)
+    double u_s = (abs_xd - MID_S) / HW_S, u_s2 = 2.0 * u_s;
+    double bh1 = CHEB_H[12], bh2 = 0.0;
+    for (int k = 11; k >= 1; k--) {
+      double bh0 = fputil::multiply_add(u_s2, bh1, CHEB_H[k] - bh2);
+      bh2 = bh1;
+      bh1 = bh0;
+    }
+    double poly_h = fputil::multiply_add(u_s, bh1, CHEB_H[0] - bh2);
+    // abs_xd * poly_h = lgamma(abs_xd) + log(abs_xd)
+    double poly_val = abs_xd * poly_h;
+    if (xbits.is_neg())
+      lgamma_val = 0x1.250d048e7a1bdp+0 - lg_ln(lg_sinpi(abs_xd)) - poly_val;
+    else
+      lgamma_val = poly_val - lg_ln(abs_xd);
+  } else if (x_abs > 0x42BEu) {
+    double log_abs_xd = lg_ln(abs_xd);
+    // lgamma(x) = (x-0.5)*log(x) - x + log(2*pi)/2 + Bernoulli corrections
+    //  Stirling base: (x-0.5)*(log(x)-1) + (log(2*pi)-1)/2
+    lgamma_val = (abs_xd - 0.5) * (log_abs_xd - 1.0) + 0x1.acfe390c97d69p-2;
+
+    double inv_x = 1.0 / abs_xd, inv_x2 = inv_x * inv_x;
+
+    if (x_abs > 0x64AEu) {
+      // |x| > 1198  1-term Stirling-Bernoulli: 1/(12*x)
+      lgamma_val += inv_x * 0x1.5555555555555p-4;
+    } else if (x_abs > 0x549Eu) {
+      // |x| in (73.875, 1198] 2-term correction
+      // Minimax polynomial in 1/x^2 for R(x) = lgamma(x) - stirling_base(x),
+      // where stirling_base(x) = (x-0.5)*log(x) - x + log(2*pi)/2
+      // Exact Bernoulli: B_2/(1*2) = 1/12, B_4/(3*4) = -1/360
+      // Generated by Sollya with:
+      //   > stir = proc(x) { return lgamma(x)-((x-0.5)*log(x)-x+log(2*pi)/2);
+      //   }; > P = fpminimax(stir(x), [|1,3|], [|D...|], [73.875, 1198]);
+      constexpr double BERN2[2] = {0x1.555555547fbadp-4, -0x1.6c0fd270c465p-9};
+      lgamma_val += inv_x * fputil::polyeval(inv_x2, BERN2[0], BERN2[1]);
+    } else if (x_abs > 0x4955u) {
+      // |x| in (10.664, 73.875]  4-term correction
+      // Minimax polynomial in 1/x^2 for R(x); exact Bernoulli terms are
+      // 1/12, -1/360, 1/1260, -1/1680
+      // Generated by Sollya with:
+      //   > display = hexadecimal;
+      //   > stir = proc(x) { return lgamma(x)-((x-0.5)*log(x)-x+log(2*pi)/2);
+      //   }; > P = fpminimax(stir(x), [|1,3,5,7|], [|D...|], [10.664, 73.875]);
+      constexpr double BERN4[4] = {0x1.555555554de0bp-4, -0x1.6c16bdc45944fp-9,
+                                   0x1.a0077f300ecb3p-11,
+                                   -0x1.2e9cfff3b29c2p-11};
+      double inv_x4 = inv_x2 * inv_x2;
+      lgamma_val +=
+          inv_x * fputil::multiply_add(
+                      inv_x4, fputil::multiply_add(inv_x2, BERN4[3], BERN4[2]),
+                      fputil::multiply_add(inv_x2, BERN4[1], BERN4[0]));
+    }
+
+    if (xbits.is_neg()) {
+      // Reflection formula
+      // lgamma(x) = log(pi) - log|x| - log|sin(pi*frac_x)| - lgamma|x|
+      double frac_x = xd - fputil::floor(xd);
+      lgamma_val = 0x1.250d048e7a1bdp+0 - lgamma_val - log_abs_xd;
+      lgamma_val -= lg_ln(lg_sinpi(frac_x));
+    }
+  } else {
+    // Medium path: |x| in [0.66015625, 3.373046875]
+    // lgamma(|x|) = (|x|-1)*(|x|-2) * P(u), where P(u) approximates
+    //   g(t) = lgamma(t) / ((t-1)*(t-2)),  u = (t - MID_M) / HW_M in [-1, 1]
+    // Degree-20 Chebyshev approximation for g in u; max lgamma error ~1.4e-10
+    // Coefficients generated by DCT at 21 Chebyshev nodes on
+    // [0.66015625, 3.373046875]. Evaluated via Clenshaw: sum_{k=0}^{20}
+    // CHEB_G[k] * T_k(u)
+    constexpr double MID_M = 0x1.0220000000000p+1;
+    constexpr double HW_M = 0x1.5b40000000000p+0;
+    constexpr double CHEB_G[21] = {
+        0x1.d690d4de8b355p-2,   -0x1.53439726f7d8bp-3,  0x1.568fc6653c026p-5,
+        -0x1.8d074f73f36efp-7,  0x1.eefebf4ca8e86p-9,   -0x1.4275913d85ff5p-10,
+        0x1.b075f0d96e492p-12,  -0x1.27ed35f357339p-13, 0x1.9b12cbe71a800p-15,
+        -0x1.20c949c2829b5p-16, 0x1.996eb95241555p-18,  -0x1.2460d6c60f800p-19,
+        0x1.a42297aae1862p-21,  -0x1.2f6c7d28230a5p-22, 0x1.b8394aac30c31p-24,
+        -0x1.409726ce1110ep-25, 0x1.d484ac42db6dbp-27,  -0x1.574dae55861abp-28,
+        0x1.f782605555555p-30,  -0x1.6cefb8f65aa2ep-31, 0x1.d9f268f3cf3cfp-33};
+    // Clenshaw's algorithm: poly_g = sum_k CHEB_G[k] * T_k(u_m)
+    double u_m = (abs_xd - MID_M) / HW_M, u_m2 = 2.0 * u_m;
+    double bg1 = CHEB_G[20], bg2 = 0.0;
+    for (int k = 19; k >= 1; k--) {
+      double bg0 = fputil::multiply_add(u_m2, bg1, CHEB_G[k] - bg2);
+      bg2 = bg1;
+      bg1 = bg0;
+    }
+    double poly_g = fputil::multiply_add(u_m, bg1, CHEB_G[0] - bg2);
+    lgamma_val = (abs_xd - 1.0) * (abs_xd - 2.0) * poly_g;
+
+    if (xbits.is_neg()) {
+      // Reflection
+      // (x) = log(pi) - lgamma|x| - log(|x|*|sin(pi*frac_x)|)
+      double frac_x = xd - fputil::floor(xd);
+      lgamma_val = 0x1.250d048e7a1bdp+0 - lgamma_val;
+      lgamma_val -= lg_ln(lg_sinpi(frac_x) * abs_xd);
+    }
+  }
+
+  float16 result = fputil::cast<float16>(lgamma_val);
+  if (LIBC_UNLIKELY(fputil::FPBits<float16>(result).is_inf()))
+    fputil::set_errno_if_required(ERANGE);
+  return result;
+}
+
+} // namespace math
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LIBC_TYPES_HAS_FLOAT16
+
+#endif // LLVM_LIBC_SRC___SUPPORT_MATH_LGAMMAF16_H
diff --git a/libc/src/math/CMakeLists.txt b/libc/src/math/CMakeLists.txt
index b53817e2a1729..8d2a981679980 100644
--- a/libc/src/math/CMakeLists.txt
+++ b/libc/src/math/CMakeLists.txt
@@ -376,6 +376,8 @@ add_math_entrypoint_object(issignalingf16)
 add_math_entrypoint_object(issignalingf128)
 add_math_entrypoint_object(issignalingbf16)
 
+add_math_entrypoint_object(lgammaf16)
+
 add_math_entrypoint_object(llogb)
 add_math_entrypoint_object(llogbf)
 add_math_entrypoint_object(llogbl)
diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt
index 740b78418492b..1a6c9c76d36c2 100644
--- a/libc/src/math/generic/CMakeLists.txt
+++ b/libc/src/math/generic/CMakeLists.txt
@@ -1681,6 +1681,19 @@ add_entrypoint_object(
     libc.src.__support.macros.properties.types
 )
 
+add_entrypoint_object(
+  lgammaf16
+  SRCS
+    lgammaf16.cpp
+  HDRS
+    ../lgammaf16.h
+  DEPENDS
+    libc.src.__support.common
+    libc.src.__support.macros.config
+    libc.src.__support.math.lgammaf16
+    libc.src.errno.errno
+)
+
 add_entrypoint_object(
   llogb
   SRCS
diff --git a/libc/src/math/generic/lgammaf16.cpp b/libc/src/math/generic/lgammaf16.cpp
new file mode 100644
index 0000000000000..91b14aea3b01f
--- /dev/null
+++ b/libc/src/math/generic/lgammaf16.cpp
@@ -0,0 +1,20 @@
+//===-- Implementation of lgammaf16 function ------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/math/lgammaf16.h"
+#include "src/__support/common.h"
+#include "src/__support/macros/config.h"
+#include "src/__support/math/lgammaf16.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+LLVM_LIBC_FUNCTION(float16, lgammaf16, (float16 x)) {
+  return math::lgammaf16(x);
+}
+
+} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/math/lgammaf16.h b/libc/src/math/lgammaf16.h
new file mode 100644
index 0000000000000..6d233848ca1ea
--- /dev/null
+++ b/libc/src/math/lgammaf16.h
@@ -0,0 +1,21 @@
+//===-- Implementation header for lgammaf16 ---------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_MATH_LGAMMAF16_H
+#define LLVM_LIBC_SRC_MATH_LGAMMAF16_H
+
+#include "src/__support/macros/config.h"
+#include "src/__support/macros/properties/types.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+float16 lgammaf16(float16 x);
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC_MATH_LGAMMAF16_H
diff --git a/libc/test/shared/CMakeLists.txt b/libc/test/shared/CMakeLists.txt
index 3289321f25dd4..6f379e3c9071c 100644
--- a/libc/test/shared/CMakeLists.txt
+++ b/libc/test/shared/CMakeLists.txt
@@ -201,6 +201,7 @@ add_fp_unittest(
     libc.src.__support.math.ldexpf
     libc.src.__support.math.ldexpf128
     libc.src.__support.math.ldexpf16
+    libc.src.__support.math.lgammaf16
     libc.src.__support.math.llogbf
     libc.src.__support.math.llogbf128
     libc.src.__support.math.llogbf16
diff --git a/libc/test/shared/shared_math_test.cpp b/libc/test/shared/shared_math_test.cpp
index 5d83ce22af4ab..d6cf972888c9b 100644
--- a/libc/test/shared/shared_math_test.cpp
+++ b/libc/test/shared/shared_math_test.cpp
@@ -58,6 +58,7 @@ TEST(LlvmLibcSharedMathTest, AllFloat16) {
   EXPECT_FP_EQ(1.0f16, LIBC_NAMESPACE::shared::log2f16(2.0f16));
   EXPECT_FP_EQ(1.0f16, LIBC_NAMESPACE::shared::log2p1f16(1.0f16));
   EXPECT_FP_EQ(0.0f16, LIBC_NAMESPACE::shared::logbf16(1.0f16));
+  EXPECT_FP_EQ(0.0f16, LIBC_NAMESPACE::shared::lgammaf16(1.0f16));
   EXPECT_EQ(0L, LIBC_NAMESPACE::shared::llogbf16(1.0f16));
 
   EXPECT_FP_EQ(0x1.921fb6p+0f16, LIBC_NAMESPACE::shared::acosf16(0.0f16));
diff --git a/libc/test/src/math/CMakeLists.txt b/libc/test/src/math/CMakeLists.txt
index 4213c11eca515..3278cf545c501 100644
--- a/libc/test/src/math/CMakeLists.txt
+++ b/libc/test/src/math/CMakeLists.txt
@@ -243,6 +243,17 @@ add_fp_unittest(
     libc.src.__support.FPUtil.fp_bits
 )
 
+add_fp_unittest(
+  lgammaf16_test
+  NEED_MPFR
+  SUITE
+    libc-math-unittests
+  SRCS
+    lgammaf16_test.cpp
+  DEPENDS
+    libc.src.math.lgammaf16
+)
+
 add_fp_unittest(
   tanpif16_test
   NEED_MPFR
diff --git a/libc/test/src/math/lgammaf16_test.cpp b/libc/test/src/math/lgammaf16_test.cpp
new file mode 100644
index 0000000000000..5713b6449f276
--- /dev/null
+++ b/libc/test/src/math/lgammaf16_test.cpp
@@ -0,0 +1,42 @@
+//===-- Unittests for lgammaf16 -------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/math/lgammaf16.h"
+#include "test/UnitTest/FPMatcher.h"
+#include "test/UnitTest/Test.h"
+#include "utils/MPFRWrapper/MPFRUtils.h"
+
+using LlvmLibcLgammaf16Test = LIBC_NAMESPACE::testing::FPTest<float16>;
+
+namespace mpfr = LIBC_NAMESPACE::testing::mpfr;
+
+// Range: ]0, +Inf[
+static constexpr uint16_t POS_START = 0x0001U;
+static constexpr uint16_t POS_STOP = 0x7bffU;
+
+// Range: ]-Inf, 0[
+static constexpr uint16_t NEG_START = 0x8001U;
+static constexpr uint16_t NEG_STOP = 0xfbffU;
+
+TEST_F(LlvmLibcLgammaf16Test, PositiveRange) {
+  for (uint16_t v = POS_START; v <= POS_STOP; ++v) {
+    float16 x = FPBits(v).get_val();
+    EXPECT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Lgamma, x,
+                                   LIBC_NAMESPACE::lgammaf16(x), 0.5);
+  }
+}
+
+TEST_F(LlvmLibcLgammaf16Test, NegativeRange) {
+  for (uint16_t v = NEG_START; v <= NEG_STOP; ++v) {
+    float16 x = FPBits(v).get_val();
+    if (FPBits(v).is_nan())
+      continue;
+    EXPECT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Lgamma, x,
+                                   LIBC_NAMESPACE::lgammaf16(x), 0.5);
+  }
+}
diff --git a/libc/test/src/math/smoke/CMakeLists.txt b/libc/test/src/math/smoke/CMakeLists.txt
index 28b85b1a25bbd..3871798d00b7f 100644
--- a/libc/test/src/math/smoke/CMakeLists.txt
+++ b/libc/test/src/math/smoke/CMakeLists.txt
@@ -145,6 +145,17 @@ add_fp_unittest(
     libc.src.math.tanpif
 )
 
+add_fp_unittest(
+  lgammaf16_test
+  SUITE
+    libc-math-smoke-tests
+  SRCS
+    lgammaf16_test.cpp
+  DEPENDS
+    libc.hdr.errno_macros
+    libc.src.math.lgammaf16
+)
+
 add_fp_unittest(
   tanpif16_test
   SUITE
diff --git a/libc/test/src/math/smoke/lgammaf16_test.cpp b/libc/test/src/math/smoke/lgammaf16_test.cpp
new file mode 100644
index 0000000000000..fd8913748b6b3
--- /dev/null
+++ b/libc/test/src/math/smoke/lgammaf16_test.cpp
@@ -0,0 +1,68 @@
+//===-- Unittests for lgammaf16 -------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "hdr/errno_macros.h"
+#include "src/math/lgammaf16.h"
+#include "test/UnitTest/FPMatcher.h"
+#include "test/UnitTest/Test.h"
+
+using LlvmLibcLgammaf16Test = LIBC_NAMESPACE::testing::FPTest<float16>;
+
+TEST_F(LlvmLibcLgammaf16Test, SpecialNumbers) {
+  // sNaN → qNaN + FE_INVALID
+  EXPECT_FP_EQ_WITH_EXCEPTION(aNaN, LIBC_NAMESPACE::lgammaf16(sNaN),
+                              FE_INVALID);
+  EXPECT_MATH_ERRNO(0);
+
+  // qNaN → qNaN
+  EXPECT_FP_EQ_ALL_ROUNDING(aNaN, LIBC_NAMESPACE::lgammaf16(aNaN));
+
+  // +Inf → +Inf
+  EXPECT_FP_EQ_ALL_ROUNDING(inf, LIBC_NAMESPACE::lgammaf16(inf));
+
+  // -Inf → +Inf
+  EXPECT_FP_EQ_ALL_ROUNDING(inf, LIBC_NAMESPACE::lgammaf16(neg_inf));
+
+  // +0 → +Inf + FE_DIVBYZERO (pole)
+  EXPECT_FP_EQ_WITH_EXCEPTION(inf, LIBC_NAMESPACE::lgammaf16(zero),
+                              FE_DIVBYZERO);
+  EXPECT_MATH_ERRNO(ERANGE);
+
+  // -0 → +Inf + FE_DIVBYZERO (pole)
+  EXPECT_FP_EQ_WITH_EXCEPTION(inf, LIBC_NAMESPACE::lgammaf16(neg_zero),
+                              FE_DIVBYZERO);
+  EXPECT_MATH_ERRNO(ERANGE);
+}
+
+TEST_F(LlvmLibcLgammaf16Test, NegativeIntegers) {
+  // lgamma of negative integer → +Inf + FE_DIVBYZERO (pole)
+  EXPECT_FP_EQ_WITH_EXCEPTION(inf, LIBC_NAMESPACE::lgammaf16(-1.0f16),
+                              FE_DIVBYZERO);
+  EXPECT_MATH_ERRNO(ERANGE);
+
+  EXPECT_FP_EQ_WITH_EXCEPTION(inf, LIBC_NAMESPACE::lgammaf16(-2.0f16),
+                              FE_DIVBYZERO);
+  EXPECT_MATH_ERRNO(ERANGE);
+
+  EXPECT_FP_EQ_WITH_EXCEPTION(inf, LIBC_NAMESPACE::lgammaf16(-3.0f16),
+                              FE_DIVBYZERO);
+  EXPECT_MATH_ERRNO(ERANGE);
+}
+
+TEST_F(LlvmLibcLgammaf16Test, ExactValues) {
+  EXPECT_FP_EQ_ALL_ROUNDING(zero, LIBC_NAMESPACE::lgammaf16(1.0f16));
+  EXPECT_FP_EQ_ALL_ROUNDING(zero, LIBC_NAMESPACE::lgammaf16(2.0f16));
+}
+
+TEST_F(LlvmLibcLgammaf16Test, Overflow) {
+  // lgamma(8192) ≈ 65623 > 65520 (the float16 overflow midpoint), so it
+  // overflows to +Inf in round-to-nearest mode.
+  EXPECT_FP_EQ_WITH_EXCEPTION(inf, LIBC_NAMESPACE::lgammaf16(8192.0f16),
+                              FE_OVERFLOW | FE_INEXACT);
+  EXPECT_MATH_ERRNO(ERANGE);
+}
diff --git a/libc/utils/MPFRWrapper/MPCommon.cpp b/libc/utils/MPFRWrapper/MPCommon.cpp
index bdd8286148641..af69bd1421f95 100644
--- a/libc/utils/MPFRWrapper/MPCommon.cpp
+++ b/libc/utils/MPFRWrapper/MPCommon.cpp
@@ -323,6 +323,13 @@ MPFRNumber MPFRNumber::hypot(const MPFRNumber &b) {
   return result;
 }
 
+MPFRNumber MPFRNumber::lgamma() const {
+  MPFRNumber result(*this);
+  int signp;
+  mpfr_lgamma(result.value, &signp, value, mpfr_rounding);
+  return result;
+}
+
 MPFRNumber MPFRNumber::log() const {
   MPFRNumber result(*this);
   mpfr_log(result.value, value, mpfr_rounding);
diff --git a/libc/utils/MPFRWrapper/MPCommon.h b/libc/utils/MPFRWrapper/MPCommon.h
index 935a2614968a2..38fd15fcc956c 100644
--- a/libc/utils/MPFRWrapper/MPCommon.h
+++ b/libc/utils/MPFRWrapper/MPCommon.h
@@ -212,6 +212,7 @@ class MPFRNumber {
   MPFRNumber fmod(const MPFRNumber &b);
   MPFRNumber frexp(int &exp);
   MPFRNumber hypot(const MPFRNumber &b);
+  MPFRNumber lgamma() const;
   MPFRNumber log() const;
   MPFRNumber log2() const;
   MPFRNumber log2p1() const;
diff --git a/libc/utils/MPFRWrapper/MPFRUtils.cpp b/libc/utils/MPFRWrapper/MPFRUtils.cpp
index 356764302bda8..d585baa2e0d2c 100644
--- a/libc/utils/MPFRWrapper/MPFRUtils.cpp
+++ b/libc/utils/MPFRWrapper/MPFRUtils.cpp
@@ -75,6 +75,8 @@ unary_operation(Operation op, InputType input, unsigned int precision,
     return mpfrInput.expm1();
   case Operation::Floor:
     return mpfrInput.floor();
+  case Operation::Lgamma:
+    return mpfrInput.lgamma();
   case Operation::Log:
     return mpfrInput.log();
   case Operation::Log2:
diff --git a/libc/utils/MPFRWrapper/MPFRUtils.h b/libc/utils/MPFRWrapper/MPFRUtils.h
index 6c1b15598b0f2..d490433685bcf 100644
--- a/libc/utils/MPFRWrapper/MPFRUtils.h
+++ b/libc/utils/MPFRWrapper/MPFRUtils.h
@@ -48,6 +48,7 @@ enum class Operation : int {
   Exp10m1,
   Expm1,
   Floor,
+  Lgamma,
   Log,
   Log2,
   Log2p1,
diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
index eb4a97b457ca8..1daedb664368e 100644
--- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
@@ -5168,6 +5168,23 @@ libc_support_library(
     ],
 )
 
+libc_support_library(
+    name = "__support_math_lgammaf16",
+    hdrs = ["src/__support/math/lgammaf16.h"],
+    deps = [
+        ":__support_cpp_bit",
+        ":__support_fputil_cast",
+        ":__support_fputil_fenv_impl",
+        ":__support_fputil_fp_bits",
+        ":__support_fputil_multiply_add",
+        ":__support_fputil_nearest_integer_operations",
+        ":__support_macros_config",
+        ":__support_macros_optimization",
+        ":__support_macros_properties_types",
+        ":llvm_libc_macros_float16_macros",
+    ],
+)
+
 libc_support_library(
     name = "__support_math_llogbf16",
     hdrs = ["src/__support/math/llogbf16.h"],
@@ -8025,6 +8042,11 @@ libc_math_function(
     ],
 )
 
+libc_math_function(
+    name = "lgammaf16",
+    additional_deps = [":__support_math_lgammaf16"],
+)
+
 libc_math_function(
     name = "llogb",
     additional_deps = [