[libc-commits] [libc] [libc][math][c23] Add f16fma{, l, f128} C23 math function (PR #96711)

Thu Jun 27 10:34:00 PDT 2024

https://github.com/overmighty updated https://github.com/llvm/llvm-project/pull/96711

>From 93dc2d321ac8328625dc2eba4f13054e0dc79002 Mon Sep 17 00:00:00 2001
From: OverMighty <its.overmighty at gmail.com>
Date: Wed, 26 Jun 2024 01:12:15 +0200
Subject: [PATCH 1/8] [libc][math][c23] Add f16fma C23 math function

---
 libc/config/linux/aarch64/entrypoints.txt     |  1 +
 libc/config/linux/x86_64/entrypoints.txt      |  1 +
 libc/docs/math/index.rst                      |  2 +-
 libc/spec/stdc.td                             |  1 +
 libc/src/__support/FPUtil/CMakeLists.txt      |  1 +
 .../__support/FPUtil/generic/CMakeLists.txt   |  1 +
 libc/src/__support/FPUtil/generic/FMA.h       | 87 ++-----------------
 .../FPUtil/generic_hardware/CMakeLists.txt    | 10 +++
 .../__support/FPUtil/generic_hardware/fma.h   | 29 +++++++
 libc/src/__support/FPUtil/multiply_add.h      |  6 +-
 libc/src/math/CMakeLists.txt                  |  1 +
 libc/src/math/f16fma.h                        | 20 +++++
 libc/src/math/generic/CMakeLists.txt          | 13 +++
 libc/src/math/generic/f16fma.cpp              | 19 ++++
 libc/test/src/math/CMakeLists.txt             | 15 ++++
 libc/test/src/math/f16fma_test.cpp            | 21 +++++
 libc/test/src/math/smoke/CMakeLists.txt       | 12 +++
 libc/test/src/math/smoke/f16fma_test.cpp      | 13 +++
 libc/utils/MPFRWrapper/MPFRUtils.cpp          |  6 ++
 19 files changed, 175 insertions(+), 84 deletions(-)
 create mode 100644 libc/src/__support/FPUtil/generic_hardware/CMakeLists.txt
 create mode 100644 libc/src/__support/FPUtil/generic_hardware/fma.h
 create mode 100644 libc/src/math/f16fma.h
 create mode 100644 libc/src/math/generic/f16fma.cpp
 create mode 100644 libc/test/src/math/f16fma_test.cpp
 create mode 100644 libc/test/src/math/smoke/f16fma_test.cpp

diff --git a/libc/config/linux/aarch64/entrypoints.txt b/libc/config/linux/aarch64/entrypoints.txt
index a875a17f06b3e..f798bf282bf5d 100644
--- a/libc/config/linux/aarch64/entrypoints.txt
+++ b/libc/config/linux/aarch64/entrypoints.txt
@@ -506,6 +506,7 @@ if(LIBC_TYPES_HAS_FLOAT16)
     libc.src.math.ceilf16
     libc.src.math.copysignf16
     libc.src.math.f16divf
+    libc.src.math.f16fma
     libc.src.math.f16fmaf
     libc.src.math.f16sqrtf
     libc.src.math.fabsf16
diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt
index 34748ff5950ad..9d88cf2b60222 100644
--- a/libc/config/linux/x86_64/entrypoints.txt
+++ b/libc/config/linux/x86_64/entrypoints.txt
@@ -537,6 +537,7 @@ if(LIBC_TYPES_HAS_FLOAT16)
     libc.src.math.ceilf16
     libc.src.math.copysignf16
     libc.src.math.f16divf
+    libc.src.math.f16fma
     libc.src.math.f16fmaf
     libc.src.math.f16sqrtf
     libc.src.math.fabsf16
diff --git a/libc/docs/math/index.rst b/libc/docs/math/index.rst
index 95f450ab75960..30079e8410f19 100644
--- a/libc/docs/math/index.rst
+++ b/libc/docs/math/index.rst
@@ -126,7 +126,7 @@ Basic Operations
 +------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
 | f16div           | |check|          |                 |                        | N/A                  |                        | 7.12.14.4              | F.10.11                    |
 +------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
-| f16fma           | |check|          |                 |                        | N/A                  |                        | 7.12.14.5              | F.10.11                    |
+| f16fma           | |check|          | |check|         |                        | N/A                  |                        | 7.12.14.5              | F.10.11                    |
 +------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
 | fabs             | |check|          | |check|         | |check|                | |check|              | |check|                | 7.12.7.3               | F.10.4.3                   |
 +------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
diff --git a/libc/spec/stdc.td b/libc/spec/stdc.td
index 651f49deef4c1..367e6b2887dbe 100644
--- a/libc/spec/stdc.td
+++ b/libc/spec/stdc.td
@@ -477,6 +477,7 @@ def StdC : StandardSpec<"stdc"> {
           FunctionSpec<"fma", RetValSpec<DoubleType>, [ArgSpec<DoubleType>, ArgSpec<DoubleType>, ArgSpec<DoubleType>]>,
           FunctionSpec<"fmaf", RetValSpec<FloatType>, [ArgSpec<FloatType>, ArgSpec<FloatType>, ArgSpec<FloatType>]>,
 
+          GuardedFunctionSpec<"f16fma", RetValSpec<Float16Type>, [ArgSpec<DoubleType>, ArgSpec<DoubleType>, ArgSpec<DoubleType>], "LIBC_TYPES_HAS_FLOAT16">,
           GuardedFunctionSpec<"f16fmaf", RetValSpec<Float16Type>, [ArgSpec<FloatType>, ArgSpec<FloatType>, ArgSpec<FloatType>], "LIBC_TYPES_HAS_FLOAT16">,
 
           FunctionSpec<"fmod", RetValSpec<DoubleType>, [ArgSpec<DoubleType>, ArgSpec<DoubleType>]>,
diff --git a/libc/src/__support/FPUtil/CMakeLists.txt b/libc/src/__support/FPUtil/CMakeLists.txt
index 900a7022c3868..0f27b79b059a3 100644
--- a/libc/src/__support/FPUtil/CMakeLists.txt
+++ b/libc/src/__support/FPUtil/CMakeLists.txt
@@ -227,3 +227,4 @@ add_header_library(
 )
 
 add_subdirectory(generic)
+add_subdirectory(generic_hardware)
diff --git a/libc/src/__support/FPUtil/generic/CMakeLists.txt b/libc/src/__support/FPUtil/generic/CMakeLists.txt
index 33b2564bfa087..80af697903286 100644
--- a/libc/src/__support/FPUtil/generic/CMakeLists.txt
+++ b/libc/src/__support/FPUtil/generic/CMakeLists.txt
@@ -24,6 +24,7 @@ add_header_library(
     libc.src.__support.CPP.bit
     libc.src.__support.CPP.limits
     libc.src.__support.CPP.type_traits
+    libc.src.__support.FPUtil.dyadic_float
     libc.src.__support.FPUtil.fenv_impl
     libc.src.__support.FPUtil.fp_bits
     libc.src.__support.FPUtil.rounding_mode
diff --git a/libc/src/__support/FPUtil/generic/FMA.h b/libc/src/__support/FPUtil/generic/FMA.h
index 71b150758d419..40a99fc6ca62e 100644
--- a/libc/src/__support/FPUtil/generic/FMA.h
+++ b/libc/src/__support/FPUtil/generic/FMA.h
@@ -13,6 +13,7 @@
 #include "src/__support/CPP/limits.h"
 #include "src/__support/CPP/type_traits.h"
 #include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/FPUtil/dyadic_float.h"
 #include "src/__support/FPUtil/rounding_mode.h"
 #include "src/__support/big_int.h"
 #include "src/__support/macros/attributes.h"   // LIBC_INLINE
@@ -106,8 +107,6 @@ LIBC_INLINE cpp::enable_if_t<cpp::is_floating_point_v<OutType> &&
                                  sizeof(OutType) <= sizeof(InType),
                              OutType>
 fma(InType x, InType y, InType z) {
-  using OutFPBits = fputil::FPBits<OutType>;
-  using OutStorageType = typename OutFPBits::StorageType;
   using InFPBits = fputil::FPBits<InType>;
   using InStorageType = typename InFPBits::StorageType;
 
@@ -115,11 +114,7 @@ fma(InType x, InType y, InType z) {
   constexpr size_t PROD_LEN = 2 * IN_EXPLICIT_MANT_LEN;
   constexpr size_t TMP_RESULT_LEN = cpp::bit_ceil(PROD_LEN + 1);
   using TmpResultType = UInt<TMP_RESULT_LEN>;
-
-  constexpr size_t EXTRA_FRACTION_LEN =
-      TMP_RESULT_LEN - 1 - OutFPBits::FRACTION_LEN;
-  constexpr TmpResultType EXTRA_FRACTION_STICKY_MASK =
-      (TmpResultType(1) << (EXTRA_FRACTION_LEN - 1)) - 1;
+  using DyadicFloat = DyadicFloat<TMP_RESULT_LEN>;
 
   if (LIBC_UNLIKELY(x == 0 || y == 0 || z == 0))
     return static_cast<OutType>(x * y + z);
@@ -182,7 +177,6 @@ fma(InType x, InType y, InType z) {
   constexpr int RESULT_MIN_LEN = PROD_LEN - InFPBits::FRACTION_LEN;
   z_mant <<= RESULT_MIN_LEN;
   int z_lsb_exp = z_exp - (InFPBits::FRACTION_LEN + RESULT_MIN_LEN);
-  bool round_bit = false;
   bool sticky_bits = false;
   bool z_shifted = false;
 
@@ -221,85 +215,18 @@ fma(InType x, InType y, InType z) {
     }
   }
 
-  OutStorageType result = 0;
-  int r_exp = 0; // Unbiased exponent of the result
-
-  int round_mode = fputil::quick_get_round();
-
-  // Normalize the result.
-  if (prod_mant != 0) {
-    int lead_zeros = cpp::countl_zero(prod_mant);
-    // Move the leading 1 to the most significant bit.
-    prod_mant <<= lead_zeros;
-    prod_lsb_exp -= lead_zeros;
-    r_exp = prod_lsb_exp + (cpp::numeric_limits<TmpResultType>::digits - 1) -
-            InFPBits::EXP_BIAS + OutFPBits::EXP_BIAS;
-
-    if (r_exp > 0) {
-      // The result is normal.  We will shift the mantissa to the right by the
-      // amount of extra bits compared to the length of the explicit mantissa in
-      // the output type.  The rounding bit then becomes the highest bit that is
-      // shifted out, and the following lower bits are merged into sticky bits.
-      round_bit =
-          (prod_mant & (TmpResultType(1) << (EXTRA_FRACTION_LEN - 1))) != 0;
-      sticky_bits |= (prod_mant & EXTRA_FRACTION_STICKY_MASK) != 0;
-      result = static_cast<OutStorageType>(prod_mant >> EXTRA_FRACTION_LEN);
-    } else {
-      if (r_exp < -OutFPBits::FRACTION_LEN) {
-        // The result is smaller than 1/2 of the smallest denormal number.
-        sticky_bits = true; // since the result is non-zero.
-        result = 0;
-      } else {
-        // The result is denormal.
-        TmpResultType mask = TmpResultType(1) << (EXTRA_FRACTION_LEN - r_exp);
-        round_bit = (prod_mant & mask) != 0;
-        sticky_bits |= (prod_mant & (mask - 1)) != 0;
-        if (r_exp > -OutFPBits::FRACTION_LEN)
-          result = static_cast<OutStorageType>(
-              prod_mant >> (EXTRA_FRACTION_LEN + 1 - r_exp));
-        else
-          result = 0;
-      }
-
-      r_exp = 0;
-    }
-  } else {
+  if (prod_mant == 0) {
     // When there is exact cancellation, i.e., x*y == -z exactly, return -0.0 if
     // rounding downward and +0.0 for other rounding modes.
-    if (round_mode == FE_DOWNWARD)
+    if (quick_get_round() == FE_DOWNWARD)
       prod_sign = Sign::NEG;
     else
       prod_sign = Sign::POS;
   }
 
-  // Finalize the result.
-  if (LIBC_UNLIKELY(r_exp >= OutFPBits::MAX_BIASED_EXPONENT)) {
-    if ((round_mode == FE_TOWARDZERO) ||
-        (round_mode == FE_UPWARD && prod_sign.is_neg()) ||
-        (round_mode == FE_DOWNWARD && prod_sign.is_pos())) {
-      return OutFPBits::max_normal(prod_sign).get_val();
-    }
-    return OutFPBits::inf(prod_sign).get_val();
-  }
-
-  // Remove hidden bit and append the exponent field and sign bit.
-  result = static_cast<OutStorageType>(
-      (result & OutFPBits::FRACTION_MASK) |
-      (static_cast<OutStorageType>(r_exp) << OutFPBits::FRACTION_LEN));
-  if (prod_sign.is_neg())
-    result |= OutFPBits::SIGN_MASK;
-
-  // Rounding.
-  if (round_mode == FE_TONEAREST) {
-    if (round_bit && (sticky_bits || ((result & 1) != 0)))
-      ++result;
-  } else if ((round_mode == FE_UPWARD && prod_sign.is_pos()) ||
-             (round_mode == FE_DOWNWARD && prod_sign.is_neg())) {
-    if (round_bit || sticky_bits)
-      ++result;
-  }
-
-  return cpp::bit_cast<OutType>(result);
+  DyadicFloat result(prod_sign, prod_lsb_exp - InFPBits::EXP_BIAS, prod_mant);
+  result.mantissa |= sticky_bits;
+  return result.template as<OutType, /*ShouldSignalExceptions=*/true>();
 }
 
 } // namespace generic
diff --git a/libc/src/__support/FPUtil/generic_hardware/CMakeLists.txt b/libc/src/__support/FPUtil/generic_hardware/CMakeLists.txt
new file mode 100644
index 0000000000000..a094d7f8a6f00
--- /dev/null
+++ b/libc/src/__support/FPUtil/generic_hardware/CMakeLists.txt
@@ -0,0 +1,10 @@
+add_header_library(
+  fma
+  HDRS
+    fma.h
+  DEPENDS
+    libc.src.__support.common
+    libc.src.__support.macros.properties.cpu_features
+  FLAGS
+    FMA_OPT
+)
diff --git a/libc/src/__support/FPUtil/generic_hardware/fma.h b/libc/src/__support/FPUtil/generic_hardware/fma.h
new file mode 100644
index 0000000000000..f878728cd2de5
--- /dev/null
+++ b/libc/src/__support/FPUtil/generic_hardware/fma.h
@@ -0,0 +1,29 @@
+//===-- Generic hardware implementation of fused multiply-add ---*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LIBC_SRC___SUPPORT_FPUTIL_GENERIC_HARDWARE_FMA_H
+#define LIBC_SRC___SUPPORT_FPUTIL_GENERIC_HARDWARE_FMA_H
+
+#include "src/__support/common.h"
+#include "src/__support/macros/properties/cpu_features.h"
+
+namespace LIBC_NAMESPACE::fputil::generic_hardware {
+
+#ifdef LIBC_TARGET_CPU_HAS_FMA
+LIBC_INLINE float fma(float x, float y, float z) {
+  return __builtin_fmaf(x, y, z);
+}
+
+LIBC_INLINE double fma(double x, double y, double z) {
+  return __builtin_fma(x, y, z);
+}
+#endif // LIBC_TARGET_CPU_HAS_FMA
+
+} // namespace LIBC_NAMESPACE::fputil::generic_hardware
+
+#endif // LIBC_SRC___SUPPORT_FPUTIL_GENERIC_HARDWARE_FMA_H
diff --git a/libc/src/__support/FPUtil/multiply_add.h b/libc/src/__support/FPUtil/multiply_add.h
index 622914e4265c9..9683c526aee72 100644
--- a/libc/src/__support/FPUtil/multiply_add.h
+++ b/libc/src/__support/FPUtil/multiply_add.h
@@ -39,17 +39,17 @@ multiply_add(T x, T y, T z) {
 #if defined(LIBC_TARGET_CPU_HAS_FMA)
 
 // FMA instructions are available.
-#include "FMA.h"
+#include "src/__support/FPUtil/generic_hardware/fma.h"
 
 namespace LIBC_NAMESPACE {
 namespace fputil {
 
 LIBC_INLINE float multiply_add(float x, float y, float z) {
-  return fma<float>(x, y, z);
+  return generic_hardware::fma(x, y, z);
 }
 
 LIBC_INLINE double multiply_add(double x, double y, double z) {
-  return fma<double>(x, y, z);
+  return generic_hardware::fma(x, y, z);
 }
 
 } // namespace fputil
diff --git a/libc/src/math/CMakeLists.txt b/libc/src/math/CMakeLists.txt
index 711cbf8bbfdca..e0a59b33b8fc7 100644
--- a/libc/src/math/CMakeLists.txt
+++ b/libc/src/math/CMakeLists.txt
@@ -101,6 +101,7 @@ add_math_entrypoint_object(expm1f)
 
 add_math_entrypoint_object(f16divf)
 
+add_math_entrypoint_object(f16fma)
 add_math_entrypoint_object(f16fmaf)
 
 add_math_entrypoint_object(f16sqrtf)
diff --git a/libc/src/math/f16fma.h b/libc/src/math/f16fma.h
new file mode 100644
index 0000000000000..d9505f88f37af
--- /dev/null
+++ b/libc/src/math/f16fma.h
@@ -0,0 +1,20 @@
+//===-- Implementation header for f16fma ------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_MATH_F16FMA_H
+#define LLVM_LIBC_SRC_MATH_F16FMA_H
+
+#include "src/__support/macros/properties/types.h"
+
+namespace LIBC_NAMESPACE {
+
+float16 f16fma(double x, double y, double z);
+
+} // namespace LIBC_NAMESPACE
+
+#endif // LLVM_LIBC_SRC_MATH_F16FMA_H
diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt
index fc2024c89b5df..29a3cc79dd239 100644
--- a/libc/src/math/generic/CMakeLists.txt
+++ b/libc/src/math/generic/CMakeLists.txt
@@ -3744,6 +3744,19 @@ add_entrypoint_object(
     -O3
 )
 
+add_entrypoint_object(
+  f16fma
+  SRCS
+    f16fma.cpp
+  HDRS
+    ../f16fma.h
+  DEPENDS
+    libc.src.__support.macros.properties.types
+    libc.src.__support.FPUtil.fma
+  COMPILE_OPTIONS
+    -O0 -ggdb3
+)
+
 add_entrypoint_object(
   f16fmaf
   SRCS
diff --git a/libc/src/math/generic/f16fma.cpp b/libc/src/math/generic/f16fma.cpp
new file mode 100644
index 0000000000000..10ee028c06930
--- /dev/null
+++ b/libc/src/math/generic/f16fma.cpp
@@ -0,0 +1,19 @@
+//===-- Implementation of f16fma function ---------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/math/f16fma.h"
+#include "src/__support/FPUtil/FMA.h"
+#include "src/__support/common.h"
+
+namespace LIBC_NAMESPACE {
+
+LLVM_LIBC_FUNCTION(float16, f16fma, (double x, double y, double z)) {
+  return fputil::fma<float16>(x, y, z);
+}
+
+} // namespace LIBC_NAMESPACE
diff --git a/libc/test/src/math/CMakeLists.txt b/libc/test/src/math/CMakeLists.txt
index ba588662f469e..ab3b155f0f92b 100644
--- a/libc/test/src/math/CMakeLists.txt
+++ b/libc/test/src/math/CMakeLists.txt
@@ -1903,6 +1903,21 @@ add_fp_unittest(
     libc.src.math.f16divf
 )
 
+add_fp_unittest(
+  f16fma_test
+  NEED_MPFR
+  SUITE
+    libc-math-unittests
+  SRCS
+    f16fma_test.cpp
+  HDRS
+    FmaTest.h
+  DEPENDS
+    libc.src.math.f16fma
+    libc.src.stdlib.rand
+    libc.src.stdlib.srand
+)
+
 add_fp_unittest(
   f16fmaf_test
   NEED_MPFR
diff --git a/libc/test/src/math/f16fma_test.cpp b/libc/test/src/math/f16fma_test.cpp
new file mode 100644
index 0000000000000..d684c4f304fbc
--- /dev/null
+++ b/libc/test/src/math/f16fma_test.cpp
@@ -0,0 +1,21 @@
+//===-- Unittests for f16fma ----------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "FmaTest.h"
+
+#include "src/math/f16fma.h"
+
+using LlvmLibcF16fmaTest = FmaTestTemplate<float16, double>;
+
+TEST_F(LlvmLibcF16fmaTest, SubnormalRange) {
+  test_subnormal_range(&LIBC_NAMESPACE::f16fma);
+}
+
+TEST_F(LlvmLibcF16fmaTest, NormalRange) {
+  test_normal_range(&LIBC_NAMESPACE::f16fma);
+}
diff --git a/libc/test/src/math/smoke/CMakeLists.txt b/libc/test/src/math/smoke/CMakeLists.txt
index ee99fb96a52ce..21e52a917349c 100644
--- a/libc/test/src/math/smoke/CMakeLists.txt
+++ b/libc/test/src/math/smoke/CMakeLists.txt
@@ -3644,6 +3644,18 @@ add_fp_unittest(
     libc.src.math.f16divf
 )
 
+add_fp_unittest(
+  f16fma_test
+  SUITE
+    libc-math-smoke-tests
+  SRCS
+    f16fma_test.cpp
+  HDRS
+    FmaTest.h
+  DEPENDS
+    libc.src.math.f16fma
+)
+
 add_fp_unittest(
   f16fmaf_test
   SUITE
diff --git a/libc/test/src/math/smoke/f16fma_test.cpp b/libc/test/src/math/smoke/f16fma_test.cpp
new file mode 100644
index 0000000000000..2e46b5bdd4682
--- /dev/null
+++ b/libc/test/src/math/smoke/f16fma_test.cpp
@@ -0,0 +1,13 @@
+//===-- Unittests for f16fma ----------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "FmaTest.h"
+
+#include "src/math/f16fma.h"
+
+LIST_NARROWING_FMA_TESTS(float16, double, LIBC_NAMESPACE::f16fma)
diff --git a/libc/utils/MPFRWrapper/MPFRUtils.cpp b/libc/utils/MPFRWrapper/MPFRUtils.cpp
index 521c2658b327a..d1c814b6bf18f 100644
--- a/libc/utils/MPFRWrapper/MPFRUtils.cpp
+++ b/libc/utils/MPFRWrapper/MPFRUtils.cpp
@@ -977,6 +977,8 @@ explain_ternary_operation_one_output_error(Operation,
 #ifdef LIBC_TYPES_HAS_FLOAT16
 template void explain_ternary_operation_one_output_error(
     Operation, const TernaryInput<float> &, float16, double, RoundingMode);
+template void explain_ternary_operation_one_output_error(
+    Operation, const TernaryInput<double> &, float16, double, RoundingMode);
 #endif
 
 template <typename InputType, typename OutputType>
@@ -1124,6 +1126,10 @@ template bool compare_ternary_operation_one_output(Operation,
                                                    const TernaryInput<float> &,
                                                    float16, double,
                                                    RoundingMode);
+template bool compare_ternary_operation_one_output(Operation,
+                                                   const TernaryInput<double> &,
+                                                   float16, double,
+                                                   RoundingMode);
 #endif
 
 } // namespace internal

>From 83bb31e2fd1adf763a5af2c9c814a5145577a16f Mon Sep 17 00:00:00 2001
From: OverMighty <its.overmighty at gmail.com>
Date: Wed, 26 Jun 2024 14:25:47 +0200
Subject: [PATCH 2/8] fixup! [libc][math][c23] Add f16fma C23 math function

Update FPUtil/CMakeLists.txt to fix circular dependency.
---
 libc/src/__support/FPUtil/CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libc/src/__support/FPUtil/CMakeLists.txt b/libc/src/__support/FPUtil/CMakeLists.txt
index 0f27b79b059a3..7b52872f5f5c3 100644
--- a/libc/src/__support/FPUtil/CMakeLists.txt
+++ b/libc/src/__support/FPUtil/CMakeLists.txt
@@ -154,7 +154,7 @@ add_header_library(
   HDRS
     multiply_add.h
   DEPENDS
-    .fma
+    libc.src.__support.FPUtil.generic_hardware.fma
     libc.src.__support.common
 )
 

>From b394739aa2b0b781350cad98120fccac9f554f4f Mon Sep 17 00:00:00 2001
From: OverMighty <its.overmighty at gmail.com>
Date: Thu, 27 Jun 2024 14:29:40 +0200
Subject: [PATCH 3/8] fixup! fixup! [libc][math][c23] Add f16fma C23 math
 function

Fix conversion to output type.
---
 libc/src/__support/FPUtil/dyadic_float.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libc/src/__support/FPUtil/dyadic_float.h b/libc/src/__support/FPUtil/dyadic_float.h
index 1e1bec676d444..fb1b22467f940 100644
--- a/libc/src/__support/FPUtil/dyadic_float.h
+++ b/libc/src/__support/FPUtil/dyadic_float.h
@@ -156,13 +156,13 @@ template <size_t Bits> struct DyadicFloat {
       // d_lo is denormal, but the output is normal.
       int scale_up_exponent = 1 - exp_lo;
       T scale_up_factor =
-          FPBits<T>::create_value(sign,
+          FPBits<T>::create_value(Sign::POS,
                                   static_cast<output_bits_t>(
                                       FPBits<T>::EXP_BIAS + scale_up_exponent),
                                   IMPLICIT_MASK)
               .get_val();
       T scale_down_factor =
-          FPBits<T>::create_value(sign,
+          FPBits<T>::create_value(Sign::POS,
                                   static_cast<output_bits_t>(
                                       FPBits<T>::EXP_BIAS - scale_up_exponent),
                                   IMPLICIT_MASK)

>From 32cd1b07e9a1aed010639e4874a41daa77d65214 Mon Sep 17 00:00:00 2001
From: OverMighty <its.overmighty at gmail.com>
Date: Thu, 27 Jun 2024 14:49:54 +0200
Subject: [PATCH 4/8] [libc][math][c23] Add f16fmaf128 C23 math function

---
 libc/config/linux/aarch64/entrypoints.txt     |  7 +++++++
 libc/config/linux/x86_64/entrypoints.txt      |  7 +++++++
 libc/docs/math/index.rst                      |  2 +-
 .../include/llvm-libc-macros/float16-macros.h |  8 ++++++++
 libc/spec/stdc.td                             |  1 +
 libc/src/math/CMakeLists.txt                  |  1 +
 libc/src/math/f16fmaf128.h                    | 20 +++++++++++++++++++
 libc/src/math/generic/CMakeLists.txt          | 13 ++++++++++++
 libc/src/math/generic/f16fmaf128.cpp          | 19 ++++++++++++++++++
 libc/test/src/math/smoke/CMakeLists.txt       | 12 +++++++++++
 libc/test/src/math/smoke/f16fmaf128_test.cpp  | 13 ++++++++++++
 11 files changed, 102 insertions(+), 1 deletion(-)
 create mode 100644 libc/src/math/f16fmaf128.h
 create mode 100644 libc/src/math/generic/f16fmaf128.cpp
 create mode 100644 libc/test/src/math/smoke/f16fmaf128_test.cpp

diff --git a/libc/config/linux/aarch64/entrypoints.txt b/libc/config/linux/aarch64/entrypoints.txt
index f798bf282bf5d..587869580f39e 100644
--- a/libc/config/linux/aarch64/entrypoints.txt
+++ b/libc/config/linux/aarch64/entrypoints.txt
@@ -559,6 +559,13 @@ if(LIBC_TYPES_HAS_FLOAT16)
     libc.src.math.ufromfpf16
     libc.src.math.ufromfpxf16
   )
+
+  if(LIBC_TYPES_HAS_FLOAT128)
+    list(APPEND TARGET_LIBM_ENTRYPOINTS
+      # math.h C23 mixed _Float16 and _Float128 entrypoints
+      libc.src.math.f16fma128
+    )
+  endif()
 endif()
 
 if(LIBC_TYPES_HAS_FLOAT128)
diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt
index 9d88cf2b60222..9c602b2a3704a 100644
--- a/libc/config/linux/x86_64/entrypoints.txt
+++ b/libc/config/linux/x86_64/entrypoints.txt
@@ -588,6 +588,13 @@ if(LIBC_TYPES_HAS_FLOAT16)
     libc.src.math.ufromfpf16
     libc.src.math.ufromfpxf16
   )
+
+  if(LIBC_TYPES_HAS_FLOAT128)
+    list(APPEND TARGET_LIBM_ENTRYPOINTS
+      # math.h C23 mixed _Float16 and _Float128 entrypoints
+      libc.src.math.f16fma128
+    )
+  endif()
 endif()
 
 if(LIBC_TYPES_HAS_FLOAT128)
diff --git a/libc/docs/math/index.rst b/libc/docs/math/index.rst
index 30079e8410f19..fdcbf33d7df56 100644
--- a/libc/docs/math/index.rst
+++ b/libc/docs/math/index.rst
@@ -126,7 +126,7 @@ Basic Operations
 +------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
 | f16div           | |check|          |                 |                        | N/A                  |                        | 7.12.14.4              | F.10.11                    |
 +------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
-| f16fma           | |check|          | |check|         |                        | N/A                  |                        | 7.12.14.5              | F.10.11                    |
+| f16fma           | |check|          | |check|         |                        | N/A                  | |check|                | 7.12.14.5              | F.10.11                    |
 +------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
 | fabs             | |check|          | |check|         | |check|                | |check|              | |check|                | 7.12.7.3               | F.10.4.3                   |
 +------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
diff --git a/libc/include/llvm-libc-macros/float16-macros.h b/libc/include/llvm-libc-macros/float16-macros.h
index e7d8d93bca1b6..d84619d5cc5c2 100644
--- a/libc/include/llvm-libc-macros/float16-macros.h
+++ b/libc/include/llvm-libc-macros/float16-macros.h
@@ -9,10 +9,18 @@
 #ifndef LLVM_LIBC_MACROS_FLOAT16_MACROS_H
 #define LLVM_LIBC_MACROS_FLOAT16_MACROS_H
 
+#include "llvm-libc-types/float128.h"
+
 #if defined(__FLT16_MANT_DIG__) &&                                             \
     (!defined(__GNUC__) || __GNUC__ >= 13 || defined(__clang__)) &&            \
     !defined(__arm__) && !defined(_M_ARM) && !defined(__riscv)
 #define LIBC_TYPES_HAS_FLOAT16
+
+// TODO: This would no longer be required if HdrGen let us guard function
+// declarations with multiple macros.
+#ifdef LIBC_TYPES_HAS_FLOAT128
+#define LIBC_TYPES_HAS_FLOAT16_AND_FLOAT128
+#endif // LIBC_TYPES_HAS_FLOAT128
 #endif
 
 #endif // LLVM_LIBC_MACROS_FLOAT16_MACROS_H
diff --git a/libc/spec/stdc.td b/libc/spec/stdc.td
index 367e6b2887dbe..0e347a91057d8 100644
--- a/libc/spec/stdc.td
+++ b/libc/spec/stdc.td
@@ -479,6 +479,7 @@ def StdC : StandardSpec<"stdc"> {
 
           GuardedFunctionSpec<"f16fma", RetValSpec<Float16Type>, [ArgSpec<DoubleType>, ArgSpec<DoubleType>, ArgSpec<DoubleType>], "LIBC_TYPES_HAS_FLOAT16">,
           GuardedFunctionSpec<"f16fmaf", RetValSpec<Float16Type>, [ArgSpec<FloatType>, ArgSpec<FloatType>, ArgSpec<FloatType>], "LIBC_TYPES_HAS_FLOAT16">,
+          GuardedFunctionSpec<"f16fmaf128", RetValSpec<Float16Type>, [ArgSpec<Float128Type>, ArgSpec<Float128Type>, ArgSpec<Float128Type>], "LIBC_TYPES_HAS_FLOAT16_AND_FLOAT128">,
 
           FunctionSpec<"fmod", RetValSpec<DoubleType>, [ArgSpec<DoubleType>, ArgSpec<DoubleType>]>,
           FunctionSpec<"fmodf", RetValSpec<FloatType>, [ArgSpec<FloatType>, ArgSpec<FloatType>]>,
diff --git a/libc/src/math/CMakeLists.txt b/libc/src/math/CMakeLists.txt
index e0a59b33b8fc7..9ed843aaa5d93 100644
--- a/libc/src/math/CMakeLists.txt
+++ b/libc/src/math/CMakeLists.txt
@@ -103,6 +103,7 @@ add_math_entrypoint_object(f16divf)
 
 add_math_entrypoint_object(f16fma)
 add_math_entrypoint_object(f16fmaf)
+add_math_entrypoint_object(f16fmaf128)
 
 add_math_entrypoint_object(f16sqrtf)
 
diff --git a/libc/src/math/f16fmaf128.h b/libc/src/math/f16fmaf128.h
new file mode 100644
index 0000000000000..9203b4d30d212
--- /dev/null
+++ b/libc/src/math/f16fmaf128.h
@@ -0,0 +1,20 @@
+//===-- Implementation header for f16fmaf128 --------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_MATH_F16FMAF128_H
+#define LLVM_LIBC_SRC_MATH_F16FMAF128_H
+
+#include "src/__support/macros/properties/types.h"
+
+namespace LIBC_NAMESPACE {
+
+float16 f16fmaf128(float128 x, float128 y, float128 z);
+
+} // namespace LIBC_NAMESPACE
+
+#endif // LLVM_LIBC_SRC_MATH_F16FMAF128_H
diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt
index 29a3cc79dd239..9dfee2d0f90a9 100644
--- a/libc/src/math/generic/CMakeLists.txt
+++ b/libc/src/math/generic/CMakeLists.txt
@@ -3770,6 +3770,19 @@ add_entrypoint_object(
     -O3
 )
 
+add_entrypoint_object(
+  f16fmaf128
+  SRCS
+    f16fmaf128.cpp
+  HDRS
+    ../f16fmaf128.h
+  DEPENDS
+    libc.src.__support.macros.properties.types
+    libc.src.__support.FPUtil.fma
+  COMPILE_OPTIONS
+    -O3
+)
+
 add_entrypoint_object(
   f16sqrtf
   SRCS
diff --git a/libc/src/math/generic/f16fmaf128.cpp b/libc/src/math/generic/f16fmaf128.cpp
new file mode 100644
index 0000000000000..5b2f801cf21fa
--- /dev/null
+++ b/libc/src/math/generic/f16fmaf128.cpp
@@ -0,0 +1,19 @@
+//===-- Implementation of f16fmaf128 function -----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/math/f16fmaf128.h"
+#include "src/__support/FPUtil/FMA.h"
+#include "src/__support/common.h"
+
+namespace LIBC_NAMESPACE {
+
+LLVM_LIBC_FUNCTION(float16, f16fmaf128, (float128 x, float128 y, float128 z)) {
+  return fputil::fma<float16>(x, y, z);
+}
+
+} // namespace LIBC_NAMESPACE
diff --git a/libc/test/src/math/smoke/CMakeLists.txt b/libc/test/src/math/smoke/CMakeLists.txt
index 21e52a917349c..7ccff3467ead2 100644
--- a/libc/test/src/math/smoke/CMakeLists.txt
+++ b/libc/test/src/math/smoke/CMakeLists.txt
@@ -3668,6 +3668,18 @@ add_fp_unittest(
     libc.src.math.f16fmaf
 )
 
+add_fp_unittest(
+  f16fmaf128_test
+  SUITE
+    libc-math-smoke-tests
+  SRCS
+    f16fmaf128_test.cpp
+  HDRS
+    FmaTest.h
+  DEPENDS
+    libc.src.math.f16fmaf128
+)
+
 add_fp_unittest(
   f16sqrtf_test
   SUITE
diff --git a/libc/test/src/math/smoke/f16fmaf128_test.cpp b/libc/test/src/math/smoke/f16fmaf128_test.cpp
new file mode 100644
index 0000000000000..ea4003bc5f77e
--- /dev/null
+++ b/libc/test/src/math/smoke/f16fmaf128_test.cpp
@@ -0,0 +1,13 @@
+//===-- Unittests for f16fmaf128 ------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "FmaTest.h"
+
+#include "src/math/f16fmaf128.h"
+
+LIST_NARROWING_FMA_TESTS(float16, float128, LIBC_NAMESPACE::f16fmaf128)

>From c85a97394524e9df19512f817d0c528296cba340 Mon Sep 17 00:00:00 2001
From: OverMighty <its.overmighty at gmail.com>
Date: Thu, 27 Jun 2024 14:59:02 +0200
Subject: [PATCH 5/8] [libc][math][c23] Add f16fmal C23 math function

---
 libc/config/linux/aarch64/entrypoints.txt     |  1 +
 libc/config/linux/x86_64/entrypoints.txt      |  1 +
 libc/docs/math/index.rst                      |  2 +-
 .../include/llvm-libc-macros/float16-macros.h |  2 +-
 libc/spec/stdc.td                             |  1 +
 libc/src/math/CMakeLists.txt                  |  1 +
 libc/src/math/f16fmal.h                       | 20 +++++++++++++++++++
 libc/src/math/generic/CMakeLists.txt          | 13 ++++++++++++
 libc/src/math/generic/f16fmal.cpp             | 20 +++++++++++++++++++
 libc/test/src/math/CMakeLists.txt             | 15 ++++++++++++++
 libc/test/src/math/FmaTest.h                  | 13 +++++++++---
 libc/test/src/math/f16fma_test.cpp            | 10 +---------
 libc/test/src/math/f16fmaf_test.cpp           | 10 +---------
 libc/test/src/math/f16fmal_test.cpp           | 13 ++++++++++++
 libc/test/src/math/fmaf_test.cpp              | 10 +---------
 libc/test/src/math/smoke/CMakeLists.txt       | 12 +++++++++++
 libc/test/src/math/smoke/f16fmal_test.cpp     | 13 ++++++++++++
 17 files changed, 125 insertions(+), 32 deletions(-)
 create mode 100644 libc/src/math/f16fmal.h
 create mode 100644 libc/src/math/generic/f16fmal.cpp
 create mode 100644 libc/test/src/math/f16fmal_test.cpp
 create mode 100644 libc/test/src/math/smoke/f16fmal_test.cpp

diff --git a/libc/config/linux/aarch64/entrypoints.txt b/libc/config/linux/aarch64/entrypoints.txt
index 587869580f39e..3a53b8ad45ae1 100644
--- a/libc/config/linux/aarch64/entrypoints.txt
+++ b/libc/config/linux/aarch64/entrypoints.txt
@@ -508,6 +508,7 @@ if(LIBC_TYPES_HAS_FLOAT16)
     libc.src.math.f16divf
     libc.src.math.f16fma
     libc.src.math.f16fmaf
+    libc.src.math.f16fmal
     libc.src.math.f16sqrtf
     libc.src.math.fabsf16
     libc.src.math.fdimf16
diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt
index 9c602b2a3704a..defda2dd15a31 100644
--- a/libc/config/linux/x86_64/entrypoints.txt
+++ b/libc/config/linux/x86_64/entrypoints.txt
@@ -539,6 +539,7 @@ if(LIBC_TYPES_HAS_FLOAT16)
     libc.src.math.f16divf
     libc.src.math.f16fma
     libc.src.math.f16fmaf
+    libc.src.math.f16fmal
     libc.src.math.f16sqrtf
     libc.src.math.fabsf16
     libc.src.math.fdimf16
diff --git a/libc/docs/math/index.rst b/libc/docs/math/index.rst
index fdcbf33d7df56..b19ec3b8d9085 100644
--- a/libc/docs/math/index.rst
+++ b/libc/docs/math/index.rst
@@ -126,7 +126,7 @@ Basic Operations
 +------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
 | f16div           | |check|          |                 |                        | N/A                  |                        | 7.12.14.4              | F.10.11                    |
 +------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
-| f16fma           | |check|          | |check|         |                        | N/A                  | |check|                | 7.12.14.5              | F.10.11                    |
+| f16fma           | |check|          | |check|         | |check|                | N/A                  | |check|                | 7.12.14.5              | F.10.11                    |
 +------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
 | fabs             | |check|          | |check|         | |check|                | |check|              | |check|                | 7.12.7.3               | F.10.4.3                   |
 +------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
diff --git a/libc/include/llvm-libc-macros/float16-macros.h b/libc/include/llvm-libc-macros/float16-macros.h
index d84619d5cc5c2..9a11ecc49307e 100644
--- a/libc/include/llvm-libc-macros/float16-macros.h
+++ b/libc/include/llvm-libc-macros/float16-macros.h
@@ -9,7 +9,7 @@
 #ifndef LLVM_LIBC_MACROS_FLOAT16_MACROS_H
 #define LLVM_LIBC_MACROS_FLOAT16_MACROS_H
 
-#include "llvm-libc-types/float128.h"
+#include "../llvm-libc-types/float128.h"
 
 #if defined(__FLT16_MANT_DIG__) &&                                             \
     (!defined(__GNUC__) || __GNUC__ >= 13 || defined(__clang__)) &&            \
diff --git a/libc/spec/stdc.td b/libc/spec/stdc.td
index 0e347a91057d8..adac7d5932428 100644
--- a/libc/spec/stdc.td
+++ b/libc/spec/stdc.td
@@ -479,6 +479,7 @@ def StdC : StandardSpec<"stdc"> {
 
           GuardedFunctionSpec<"f16fma", RetValSpec<Float16Type>, [ArgSpec<DoubleType>, ArgSpec<DoubleType>, ArgSpec<DoubleType>], "LIBC_TYPES_HAS_FLOAT16">,
           GuardedFunctionSpec<"f16fmaf", RetValSpec<Float16Type>, [ArgSpec<FloatType>, ArgSpec<FloatType>, ArgSpec<FloatType>], "LIBC_TYPES_HAS_FLOAT16">,
+          GuardedFunctionSpec<"f16fmal", RetValSpec<Float16Type>, [ArgSpec<LongDoubleType>, ArgSpec<LongDoubleType>, ArgSpec<LongDoubleType>], "LIBC_TYPES_HAS_FLOAT16">,
           GuardedFunctionSpec<"f16fmaf128", RetValSpec<Float16Type>, [ArgSpec<Float128Type>, ArgSpec<Float128Type>, ArgSpec<Float128Type>], "LIBC_TYPES_HAS_FLOAT16_AND_FLOAT128">,
 
           FunctionSpec<"fmod", RetValSpec<DoubleType>, [ArgSpec<DoubleType>, ArgSpec<DoubleType>]>,
diff --git a/libc/src/math/CMakeLists.txt b/libc/src/math/CMakeLists.txt
index 9ed843aaa5d93..3dfc4ac94827d 100644
--- a/libc/src/math/CMakeLists.txt
+++ b/libc/src/math/CMakeLists.txt
@@ -103,6 +103,7 @@ add_math_entrypoint_object(f16divf)
 
 add_math_entrypoint_object(f16fma)
 add_math_entrypoint_object(f16fmaf)
+add_math_entrypoint_object(f16fmal)
 add_math_entrypoint_object(f16fmaf128)
 
 add_math_entrypoint_object(f16sqrtf)
diff --git a/libc/src/math/f16fmal.h b/libc/src/math/f16fmal.h
new file mode 100644
index 0000000000000..6f5dd33aa18f6
--- /dev/null
+++ b/libc/src/math/f16fmal.h
@@ -0,0 +1,20 @@
+//===-- Implementation header for f16fmal -----------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_MATH_F16FMAL_H
+#define LLVM_LIBC_SRC_MATH_F16FMAL_H
+
+#include "src/__support/macros/properties/types.h"
+
+namespace LIBC_NAMESPACE {
+
+float16 f16fmal(long double x, long double y, long double z);
+
+} // namespace LIBC_NAMESPACE
+
+#endif // LLVM_LIBC_SRC_MATH_F16FMAL_H
diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt
index 9dfee2d0f90a9..d7c9817fc457b 100644
--- a/libc/src/math/generic/CMakeLists.txt
+++ b/libc/src/math/generic/CMakeLists.txt
@@ -3770,6 +3770,19 @@ add_entrypoint_object(
     -O3
 )
 
+add_entrypoint_object(
+  f16fmal
+  SRCS
+    f16fmal.cpp
+  HDRS
+    ../f16fmal.h
+  DEPENDS
+    libc.src.__support.macros.properties.types
+    libc.src.__support.FPUtil.fma
+  COMPILE_OPTIONS
+    -O3
+)
+
 add_entrypoint_object(
   f16fmaf128
   SRCS
diff --git a/libc/src/math/generic/f16fmal.cpp b/libc/src/math/generic/f16fmal.cpp
new file mode 100644
index 0000000000000..067483629a336
--- /dev/null
+++ b/libc/src/math/generic/f16fmal.cpp
@@ -0,0 +1,20 @@
+//===-- Implementation of f16fmal function --------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/math/f16fmal.h"
+#include "src/__support/FPUtil/FMA.h"
+#include "src/__support/common.h"
+
+namespace LIBC_NAMESPACE {
+
+LLVM_LIBC_FUNCTION(float16, f16fmal,
+                   (long double x, long double y, long double z)) {
+  return fputil::fma<float16>(x, y, z);
+}
+
+} // namespace LIBC_NAMESPACE
diff --git a/libc/test/src/math/CMakeLists.txt b/libc/test/src/math/CMakeLists.txt
index ab3b155f0f92b..62ffadd12f03b 100644
--- a/libc/test/src/math/CMakeLists.txt
+++ b/libc/test/src/math/CMakeLists.txt
@@ -1933,6 +1933,21 @@ add_fp_unittest(
     libc.src.stdlib.srand
 )
 
+add_fp_unittest(
+  f16fmal_test
+  NEED_MPFR
+  SUITE
+    libc-math-unittests
+  SRCS
+    f16fmal_test.cpp
+  HDRS
+    FmaTest.h
+  DEPENDS
+    libc.src.math.f16fmal
+    libc.src.stdlib.rand
+    libc.src.stdlib.srand
+)
+
 add_subdirectory(generic)
 add_subdirectory(smoke)
 
diff --git a/libc/test/src/math/FmaTest.h b/libc/test/src/math/FmaTest.h
index 53895e7d633c2..01143331d4ab5 100644
--- a/libc/test/src/math/FmaTest.h
+++ b/libc/test/src/math/FmaTest.h
@@ -45,9 +45,6 @@ class FmaTestTemplate : public LIBC_NAMESPACE::testing::FEnvSafeTest {
   static constexpr InStorageType IN_MIN_SUBNORMAL_U =
       InFPBits::min_subnormal().uintval();
 
-  OutConstants out;
-  InConstants in;
-
   InStorageType get_random_bit_pattern() {
     InStorageType bits{0};
     for (InStorageType i = 0; i < sizeof(InStorageType) / 2; ++i) {
@@ -92,4 +89,14 @@ class FmaTestTemplate : public LIBC_NAMESPACE::testing::FEnvSafeTest {
   }
 };
 
+#define LIST_FMA_TESTS(T, func)                                                \
+  using LlvmLibcFmaTest = FmaTestTemplate<T>;                                  \
+  TEST_F(LlvmLibcFmaTest, SubnormalRange) { test_subnormal_range(&func); }     \
+  TEST_F(LlvmLibcFmaTest, NormalRange) { test_normal_range(&func); }
+
+#define LIST_NARROWING_FMA_TESTS(OutType, InType, func)                        \
+  using LlvmLibcFmaTest = FmaTestTemplate<OutType, InType>;                    \
+  TEST_F(LlvmLibcFmaTest, SubnormalRange) { test_subnormal_range(&func); }     \
+  TEST_F(LlvmLibcFmaTest, NormalRange) { test_normal_range(&func); }
+
 #endif // LLVM_LIBC_TEST_SRC_MATH_FMATEST_H
diff --git a/libc/test/src/math/f16fma_test.cpp b/libc/test/src/math/f16fma_test.cpp
index d684c4f304fbc..2e46b5bdd4682 100644
--- a/libc/test/src/math/f16fma_test.cpp
+++ b/libc/test/src/math/f16fma_test.cpp
@@ -10,12 +10,4 @@
 
 #include "src/math/f16fma.h"
 
-using LlvmLibcF16fmaTest = FmaTestTemplate<float16, double>;
-
-TEST_F(LlvmLibcF16fmaTest, SubnormalRange) {
-  test_subnormal_range(&LIBC_NAMESPACE::f16fma);
-}
-
-TEST_F(LlvmLibcF16fmaTest, NormalRange) {
-  test_normal_range(&LIBC_NAMESPACE::f16fma);
-}
+LIST_NARROWING_FMA_TESTS(float16, double, LIBC_NAMESPACE::f16fma)
diff --git a/libc/test/src/math/f16fmaf_test.cpp b/libc/test/src/math/f16fmaf_test.cpp
index e4ca88b8810e1..5e3aec768c191 100644
--- a/libc/test/src/math/f16fmaf_test.cpp
+++ b/libc/test/src/math/f16fmaf_test.cpp
@@ -10,12 +10,4 @@
 
 #include "src/math/f16fmaf.h"
 
-using LlvmLibcF16fmafTest = FmaTestTemplate<float16, float>;
-
-TEST_F(LlvmLibcF16fmafTest, SubnormalRange) {
-  test_subnormal_range(&LIBC_NAMESPACE::f16fmaf);
-}
-
-TEST_F(LlvmLibcF16fmafTest, NormalRange) {
-  test_normal_range(&LIBC_NAMESPACE::f16fmaf);
-}
+LIST_NARROWING_FMA_TESTS(float16, float, LIBC_NAMESPACE::f16fmaf)
diff --git a/libc/test/src/math/f16fmal_test.cpp b/libc/test/src/math/f16fmal_test.cpp
new file mode 100644
index 0000000000000..5394268a9cd8f
--- /dev/null
+++ b/libc/test/src/math/f16fmal_test.cpp
@@ -0,0 +1,13 @@
+//===-- Unittests for f16fmal ---------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "FmaTest.h"
+
+#include "src/math/f16fmal.h"
+
+LIST_NARROWING_FMA_TESTS(float16, long double, LIBC_NAMESPACE::f16fmal)
diff --git a/libc/test/src/math/fmaf_test.cpp b/libc/test/src/math/fmaf_test.cpp
index 0e498d46ecfb0..09e9c504b942a 100644
--- a/libc/test/src/math/fmaf_test.cpp
+++ b/libc/test/src/math/fmaf_test.cpp
@@ -10,12 +10,4 @@
 
 #include "src/math/fmaf.h"
 
-using LlvmLibcFmafTest = FmaTestTemplate<float>;
-
-TEST_F(LlvmLibcFmafTest, SubnormalRange) {
-  test_subnormal_range(&LIBC_NAMESPACE::fmaf);
-}
-
-TEST_F(LlvmLibcFmafTest, NormalRange) {
-  test_normal_range(&LIBC_NAMESPACE::fmaf);
-}
+LIST_FMA_TESTS(float, LIBC_NAMESPACE::fmaf)
diff --git a/libc/test/src/math/smoke/CMakeLists.txt b/libc/test/src/math/smoke/CMakeLists.txt
index 7ccff3467ead2..2362ee12bcf41 100644
--- a/libc/test/src/math/smoke/CMakeLists.txt
+++ b/libc/test/src/math/smoke/CMakeLists.txt
@@ -3668,6 +3668,18 @@ add_fp_unittest(
     libc.src.math.f16fmaf
 )
 
+add_fp_unittest(
+  f16fmal_test
+  SUITE
+    libc-math-smoke-tests
+  SRCS
+    f16fmal_test.cpp
+  HDRS
+    FmaTest.h
+  DEPENDS
+    libc.src.math.f16fmal
+)
+
 add_fp_unittest(
   f16fmaf128_test
   SUITE
diff --git a/libc/test/src/math/smoke/f16fmal_test.cpp b/libc/test/src/math/smoke/f16fmal_test.cpp
new file mode 100644
index 0000000000000..5394268a9cd8f
--- /dev/null
+++ b/libc/test/src/math/smoke/f16fmal_test.cpp
@@ -0,0 +1,13 @@
+//===-- Unittests for f16fmal ---------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "FmaTest.h"
+
+#include "src/math/f16fmal.h"
+
+LIST_NARROWING_FMA_TESTS(float16, long double, LIBC_NAMESPACE::f16fmal)

>From 5892e2ba49d7c6f24c55a991820a6b9772ad9ce4 Mon Sep 17 00:00:00 2001
From: OverMighty <its.overmighty at gmail.com>
Date: Thu, 27 Jun 2024 16:29:15 +0200
Subject: [PATCH 6/8] fixup! [libc][math][c23] Add f16fmaf128 C23 math function

---
 libc/config/linux/aarch64/entrypoints.txt | 2 +-
 libc/config/linux/x86_64/entrypoints.txt  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/libc/config/linux/aarch64/entrypoints.txt b/libc/config/linux/aarch64/entrypoints.txt
index 3a53b8ad45ae1..086bd4e26cbf8 100644
--- a/libc/config/linux/aarch64/entrypoints.txt
+++ b/libc/config/linux/aarch64/entrypoints.txt
@@ -564,7 +564,7 @@ if(LIBC_TYPES_HAS_FLOAT16)
   if(LIBC_TYPES_HAS_FLOAT128)
     list(APPEND TARGET_LIBM_ENTRYPOINTS
       # math.h C23 mixed _Float16 and _Float128 entrypoints
-      libc.src.math.f16fma128
+      libc.src.math.f16fmaf128
     )
   endif()
 endif()
diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt
index defda2dd15a31..8125dcb7a5633 100644
--- a/libc/config/linux/x86_64/entrypoints.txt
+++ b/libc/config/linux/x86_64/entrypoints.txt
@@ -593,7 +593,7 @@ if(LIBC_TYPES_HAS_FLOAT16)
   if(LIBC_TYPES_HAS_FLOAT128)
     list(APPEND TARGET_LIBM_ENTRYPOINTS
       # math.h C23 mixed _Float16 and _Float128 entrypoints
-      libc.src.math.f16fma128
+      libc.src.math.f16fmaf128
     )
   endif()
 endif()

>From 427873a7cb547fe1f8b9e328fc83ba1b6824704f Mon Sep 17 00:00:00 2001
From: OverMighty <its.overmighty at gmail.com>
Date: Thu, 27 Jun 2024 17:12:49 +0200
Subject: [PATCH 7/8] fixup! [libc][math][c23] Add f16fma C23 math function

Change multiply_add.h to use FMA builtins directly.
---
 libc/src/__support/FPUtil/CMakeLists.txt      |  2 --
 .../FPUtil/generic_hardware/CMakeLists.txt    | 10 -------
 .../__support/FPUtil/generic_hardware/fma.h   | 29 -------------------
 libc/src/__support/FPUtil/multiply_add.h      |  7 +++--
 4 files changed, 4 insertions(+), 44 deletions(-)
 delete mode 100644 libc/src/__support/FPUtil/generic_hardware/CMakeLists.txt
 delete mode 100644 libc/src/__support/FPUtil/generic_hardware/fma.h

diff --git a/libc/src/__support/FPUtil/CMakeLists.txt b/libc/src/__support/FPUtil/CMakeLists.txt
index 7b52872f5f5c3..84c5f802710c4 100644
--- a/libc/src/__support/FPUtil/CMakeLists.txt
+++ b/libc/src/__support/FPUtil/CMakeLists.txt
@@ -154,7 +154,6 @@ add_header_library(
   HDRS
     multiply_add.h
   DEPENDS
-    libc.src.__support.FPUtil.generic_hardware.fma
     libc.src.__support.common
 )
 
@@ -227,4 +226,3 @@ add_header_library(
 )
 
 add_subdirectory(generic)
-add_subdirectory(generic_hardware)
diff --git a/libc/src/__support/FPUtil/generic_hardware/CMakeLists.txt b/libc/src/__support/FPUtil/generic_hardware/CMakeLists.txt
deleted file mode 100644
index a094d7f8a6f00..0000000000000
--- a/libc/src/__support/FPUtil/generic_hardware/CMakeLists.txt
+++ /dev/null
@@ -1,10 +0,0 @@
-add_header_library(
-  fma
-  HDRS
-    fma.h
-  DEPENDS
-    libc.src.__support.common
-    libc.src.__support.macros.properties.cpu_features
-  FLAGS
-    FMA_OPT
-)
diff --git a/libc/src/__support/FPUtil/generic_hardware/fma.h b/libc/src/__support/FPUtil/generic_hardware/fma.h
deleted file mode 100644
index f878728cd2de5..0000000000000
--- a/libc/src/__support/FPUtil/generic_hardware/fma.h
+++ /dev/null
@@ -1,29 +0,0 @@
-//===-- Generic hardware implementation of fused multiply-add ---*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LIBC_SRC___SUPPORT_FPUTIL_GENERIC_HARDWARE_FMA_H
-#define LIBC_SRC___SUPPORT_FPUTIL_GENERIC_HARDWARE_FMA_H
-
-#include "src/__support/common.h"
-#include "src/__support/macros/properties/cpu_features.h"
-
-namespace LIBC_NAMESPACE::fputil::generic_hardware {
-
-#ifdef LIBC_TARGET_CPU_HAS_FMA
-LIBC_INLINE float fma(float x, float y, float z) {
-  return __builtin_fmaf(x, y, z);
-}
-
-LIBC_INLINE double fma(double x, double y, double z) {
-  return __builtin_fma(x, y, z);
-}
-#endif // LIBC_TARGET_CPU_HAS_FMA
-
-} // namespace LIBC_NAMESPACE::fputil::generic_hardware
-
-#endif // LIBC_SRC___SUPPORT_FPUTIL_GENERIC_HARDWARE_FMA_H
diff --git a/libc/src/__support/FPUtil/multiply_add.h b/libc/src/__support/FPUtil/multiply_add.h
index 9683c526aee72..1b9eee4ace62d 100644
--- a/libc/src/__support/FPUtil/multiply_add.h
+++ b/libc/src/__support/FPUtil/multiply_add.h
@@ -39,17 +39,18 @@ multiply_add(T x, T y, T z) {
 #if defined(LIBC_TARGET_CPU_HAS_FMA)
 
 // FMA instructions are available.
-#include "src/__support/FPUtil/generic_hardware/fma.h"
+// We use builtins directly instead of including FMA.h to avoid a circular
+// dependency: multiply_add.h -> FMA.h -> generic/FMA.h -> dyadic_float.h.
 
 namespace LIBC_NAMESPACE {
 namespace fputil {
 
 LIBC_INLINE float multiply_add(float x, float y, float z) {
-  return generic_hardware::fma(x, y, z);
+  return __builtin_fmaf(x, y, z);
 }
 
 LIBC_INLINE double multiply_add(double x, double y, double z) {
-  return generic_hardware::fma(x, y, z);
+  return __builtin_fma(x, y, z);
 }
 
 } // namespace fputil

>From f0170f160f459b5b46f05fb738ce41981a17f851 Mon Sep 17 00:00:00 2001
From: OverMighty <its.overmighty at gmail.com>
Date: Thu, 27 Jun 2024 18:21:07 +0200
Subject: [PATCH 8/8] fixup! [libc][math][c23] Add f16fmal C23 math function

---
 .../__support/FPUtil/generic/CMakeLists.txt   |  1 +
 libc/src/__support/FPUtil/generic/FMA.h       | 45 ++++++++++++++++++-
 libc/utils/MPFRWrapper/MPFRUtils.cpp          |  8 ++++
 3 files changed, 52 insertions(+), 2 deletions(-)

diff --git a/libc/src/__support/FPUtil/generic/CMakeLists.txt b/libc/src/__support/FPUtil/generic/CMakeLists.txt
index 80af697903286..bd8af98473edf 100644
--- a/libc/src/__support/FPUtil/generic/CMakeLists.txt
+++ b/libc/src/__support/FPUtil/generic/CMakeLists.txt
@@ -24,6 +24,7 @@ add_header_library(
     libc.src.__support.CPP.bit
     libc.src.__support.CPP.limits
     libc.src.__support.CPP.type_traits
+    libc.src.__support.FPUtil.basic_operations
     libc.src.__support.FPUtil.dyadic_float
     libc.src.__support.FPUtil.fenv_impl
     libc.src.__support.FPUtil.fp_bits
diff --git a/libc/src/__support/FPUtil/generic/FMA.h b/libc/src/__support/FPUtil/generic/FMA.h
index 40a99fc6ca62e..d0a01c3092c42 100644
--- a/libc/src/__support/FPUtil/generic/FMA.h
+++ b/libc/src/__support/FPUtil/generic/FMA.h
@@ -12,6 +12,7 @@
 #include "src/__support/CPP/bit.h"
 #include "src/__support/CPP/limits.h"
 #include "src/__support/CPP/type_traits.h"
+#include "src/__support/FPUtil/BasicOperations.h"
 #include "src/__support/FPUtil/FPBits.h"
 #include "src/__support/FPUtil/dyadic_float.h"
 #include "src/__support/FPUtil/rounding_mode.h"
@@ -107,7 +108,9 @@ LIBC_INLINE cpp::enable_if_t<cpp::is_floating_point_v<OutType> &&
                                  sizeof(OutType) <= sizeof(InType),
                              OutType>
 fma(InType x, InType y, InType z) {
-  using InFPBits = fputil::FPBits<InType>;
+  using OutFPBits = FPBits<OutType>;
+  using OutStorageType = typename OutFPBits::StorageType;
+  using InFPBits = FPBits<InType>;
   using InStorageType = typename InFPBits::StorageType;
 
   constexpr int IN_EXPLICIT_MANT_LEN = InFPBits::FRACTION_LEN + 1;
@@ -116,6 +119,42 @@ fma(InType x, InType y, InType z) {
   using TmpResultType = UInt<TMP_RESULT_LEN>;
   using DyadicFloat = DyadicFloat<TMP_RESULT_LEN>;
 
+  InFPBits x_bits(x), y_bits(y), z_bits(z);
+
+  if (LIBC_UNLIKELY(x_bits.is_nan() || y_bits.is_nan() || z_bits.is_nan())) {
+    if (x_bits.is_nan() || y_bits.is_nan()) {
+      if (x_bits.is_signaling_nan() || y_bits.is_signaling_nan() ||
+          z_bits.is_signaling_nan())
+        raise_except_if_required(FE_INVALID);
+
+      if (x_bits.is_quiet_nan()) {
+        InStorageType x_payload = static_cast<InStorageType>(getpayload(x));
+        if ((x_payload & ~(OutFPBits::FRACTION_MASK >> 1)) == 0)
+          return OutFPBits::quiet_nan(x_bits.sign(),
+                                      static_cast<OutStorageType>(x_payload))
+              .get_val();
+      }
+
+      if (y_bits.is_quiet_nan()) {
+        InStorageType y_payload = static_cast<InStorageType>(getpayload(y));
+        if ((y_payload & ~(OutFPBits::FRACTION_MASK >> 1)) == 0)
+          return OutFPBits::quiet_nan(y_bits.sign(),
+                                      static_cast<OutStorageType>(y_payload))
+              .get_val();
+      }
+
+      if (z_bits.is_quiet_nan()) {
+        InStorageType z_payload = static_cast<InStorageType>(getpayload(z));
+        if ((z_payload & ~(OutFPBits::FRACTION_MASK >> 1)) == 0)
+          return OutFPBits::quiet_nan(z_bits.sign(),
+                                      static_cast<OutStorageType>(z_payload))
+              .get_val();
+      }
+
+      return OutFPBits::quiet_nan().get_val();
+    }
+  }
+
   if (LIBC_UNLIKELY(x == 0 || y == 0 || z == 0))
     return static_cast<OutType>(x * y + z);
 
@@ -137,7 +176,9 @@ fma(InType x, InType y, InType z) {
     z *= InType(InStorageType(1) << InFPBits::FRACTION_LEN);
   }
 
-  InFPBits x_bits(x), y_bits(y), z_bits(z);
+  x_bits = InFPBits(x);
+  y_bits = InFPBits(y);
+  z_bits = InFPBits(z);
   const Sign z_sign = z_bits.sign();
   Sign prod_sign = (x_bits.sign() == y_bits.sign()) ? Sign::POS : Sign::NEG;
   x_exp += x_bits.get_biased_exponent();
diff --git a/libc/utils/MPFRWrapper/MPFRUtils.cpp b/libc/utils/MPFRWrapper/MPFRUtils.cpp
index d1c814b6bf18f..97ce0f63b57fb 100644
--- a/libc/utils/MPFRWrapper/MPFRUtils.cpp
+++ b/libc/utils/MPFRWrapper/MPFRUtils.cpp
@@ -979,6 +979,10 @@ template void explain_ternary_operation_one_output_error(
     Operation, const TernaryInput<float> &, float16, double, RoundingMode);
 template void explain_ternary_operation_one_output_error(
     Operation, const TernaryInput<double> &, float16, double, RoundingMode);
+template void
+explain_ternary_operation_one_output_error(Operation,
+                                           const TernaryInput<long double> &,
+                                           float16, double, RoundingMode);
 #endif
 
 template <typename InputType, typename OutputType>
@@ -1130,6 +1134,10 @@ template bool compare_ternary_operation_one_output(Operation,
                                                    const TernaryInput<double> &,
                                                    float16, double,
                                                    RoundingMode);
+template bool
+compare_ternary_operation_one_output(Operation,
+                                     const TernaryInput<long double> &, float16,
+                                     double, RoundingMode);
 #endif
 
 } // namespace internal