[libc-commits] [libc] [libc][math][c23] Add f16fmaf C23 math function (PR #95483)
via libc-commits
libc-commits at lists.llvm.org
Thu Jun 13 15:59:22 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-libc
Author: OverMighty (overmighty)
<details>
<summary>Changes</summary>
Part of #<!-- -->93566.
---
Patch is 54.68 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/95483.diff
27 Files Affected:
- (modified) libc/config/linux/aarch64/entrypoints.txt (+1)
- (modified) libc/config/linux/x86_64/entrypoints.txt (+1)
- (modified) libc/docs/math/index.rst (+2)
- (modified) libc/spec/stdc.td (+2)
- (modified) libc/src/__support/FPUtil/FMA.h (+10-22)
- (modified) libc/src/__support/FPUtil/generic/FMA.h (+86-59)
- (modified) libc/src/__support/FPUtil/multiply_add.h (+2-2)
- (modified) libc/src/__support/big_int.h (+16)
- (modified) libc/src/math/CMakeLists.txt (+2)
- (added) libc/src/math/f16fmaf.h (+20)
- (modified) libc/src/math/generic/CMakeLists.txt (+12)
- (modified) libc/src/math/generic/expm1f.cpp (+1-1)
- (added) libc/src/math/generic/f16fmaf.cpp (+19)
- (modified) libc/src/math/generic/fma.cpp (+1-1)
- (modified) libc/src/math/generic/fmaf.cpp (+1-1)
- (modified) libc/src/math/generic/range_reduction_fma.h (+13-12)
- (modified) libc/test/src/math/CMakeLists.txt (+19-2)
- (modified) libc/test/src/math/FmaTest.h (+88-59)
- (added) libc/test/src/math/f16fmaf_test.cpp (+25)
- (modified) libc/test/src/math/smoke/CMakeLists.txt (+16-2)
- (modified) libc/test/src/math/smoke/FmaTest.h (+65-31)
- (added) libc/test/src/math/smoke/f16fmaf_test.cpp (+13)
- (modified) libc/test/src/math/smoke/fma_test.cpp (+1-5)
- (modified) libc/test/src/math/smoke/fmaf_test.cpp (+1-5)
- (modified) libc/test/src/sched/CMakeLists.txt (+17-17)
- (modified) libc/utils/MPFRWrapper/MPFRUtils.cpp (+46-32)
- (modified) libc/utils/MPFRWrapper/MPFRUtils.h (+33-15)
``````````diff
diff --git a/libc/config/linux/aarch64/entrypoints.txt b/libc/config/linux/aarch64/entrypoints.txt
index 2b2d0985a8992..dab747ca0ac74 100644
--- a/libc/config/linux/aarch64/entrypoints.txt
+++ b/libc/config/linux/aarch64/entrypoints.txt
@@ -503,6 +503,7 @@ if(LIBC_TYPES_HAS_FLOAT16)
libc.src.math.canonicalizef16
libc.src.math.ceilf16
libc.src.math.copysignf16
+ libc.src.math.f16fmaf
libc.src.math.f16sqrtf
libc.src.math.fabsf16
libc.src.math.fdimf16
diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt
index 2d36ca296c3a4..45914fe9f7ad2 100644
--- a/libc/config/linux/x86_64/entrypoints.txt
+++ b/libc/config/linux/x86_64/entrypoints.txt
@@ -535,6 +535,7 @@ if(LIBC_TYPES_HAS_FLOAT16)
libc.src.math.canonicalizef16
libc.src.math.ceilf16
libc.src.math.copysignf16
+ libc.src.math.f16fmaf
libc.src.math.f16sqrtf
libc.src.math.fabsf16
libc.src.math.fdimf16
diff --git a/libc/docs/math/index.rst b/libc/docs/math/index.rst
index 790786147c164..293edd1c15100 100644
--- a/libc/docs/math/index.rst
+++ b/libc/docs/math/index.rst
@@ -124,6 +124,8 @@ Basic Operations
+------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
| dsub | N/A | N/A | | N/A | | 7.12.14.2 | F.10.11 |
+------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
+| f16fma | |check| | | | N/A | | 7.12.14.5 | F.10.11 |
++------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
| fabs | |check| | |check| | |check| | |check| | |check| | 7.12.7.3 | F.10.4.3 |
+------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
| fadd | N/A | | | N/A | | 7.12.14.1 | F.10.11 |
diff --git a/libc/spec/stdc.td b/libc/spec/stdc.td
index 7c4135032a0b2..b089b596b0958 100644
--- a/libc/spec/stdc.td
+++ b/libc/spec/stdc.td
@@ -715,6 +715,8 @@ def StdC : StandardSpec<"stdc"> {
GuardedFunctionSpec<"totalordermagf16", RetValSpec<IntType>, [ArgSpec<Float16Ptr>, ArgSpec<Float16Ptr>], "LIBC_TYPES_HAS_FLOAT16">,
+ GuardedFunctionSpec<"f16fmaf", RetValSpec<Float16Type>, [ArgSpec<FloatType>, ArgSpec<FloatType>, ArgSpec<FloatType>], "LIBC_TYPES_HAS_FLOAT16">,
+
GuardedFunctionSpec<"f16sqrtf", RetValSpec<Float16Type>, [ArgSpec<FloatType>], "LIBC_TYPES_HAS_FLOAT16">,
]
>;
diff --git a/libc/src/__support/FPUtil/FMA.h b/libc/src/__support/FPUtil/FMA.h
index c277da49538bf..cf01a317d7359 100644
--- a/libc/src/__support/FPUtil/FMA.h
+++ b/libc/src/__support/FPUtil/FMA.h
@@ -10,41 +10,29 @@
#define LLVM_LIBC_SRC___SUPPORT_FPUTIL_FMA_H
#include "src/__support/CPP/type_traits.h"
+#include "src/__support/FPUtil/generic/FMA.h"
#include "src/__support/macros/properties/architectures.h"
#include "src/__support/macros/properties/cpu_features.h" // LIBC_TARGET_CPU_HAS_FMA
-#if defined(LIBC_TARGET_CPU_HAS_FMA)
-
namespace LIBC_NAMESPACE {
namespace fputil {
-template <typename T>
-LIBC_INLINE cpp::enable_if_t<cpp::is_same_v<T, float>, T> fma(T x, T y, T z) {
- return __builtin_fmaf(x, y, z);
+template <typename OutType, typename InType>
+LIBC_INLINE OutType fma(InType x, InType y, InType z) {
+ return generic::fma<OutType>(x, y, z);
}
-template <typename T>
-LIBC_INLINE cpp::enable_if_t<cpp::is_same_v<T, double>, T> fma(T x, T y, T z) {
- return __builtin_fma(x, y, z);
+#ifdef LIBC_TARGET_CPU_HAS_FMA
+template <> LIBC_INLINE float fma(float x, float y, float z) {
+ return __builtin_fmaf(x, y, z);
}
-} // namespace fputil
-} // namespace LIBC_NAMESPACE
-
-#else
-// FMA instructions are not available
-#include "generic/FMA.h"
-
-namespace LIBC_NAMESPACE {
-namespace fputil {
-
-template <typename T> LIBC_INLINE T fma(T x, T y, T z) {
- return generic::fma(x, y, z);
+template <> LIBC_INLINE double fma(double x, double y, double z) {
+ return __builtin_fma(x, y, z);
}
+#endif // LIBC_TARGET_CPU_HAS_FMA
} // namespace fputil
} // namespace LIBC_NAMESPACE
-#endif
-
#endif // LLVM_LIBC_SRC___SUPPORT_FPUTIL_FMA_H
diff --git a/libc/src/__support/FPUtil/generic/FMA.h b/libc/src/__support/FPUtil/generic/FMA.h
index f403aa7333b39..3bbb30476e5d8 100644
--- a/libc/src/__support/FPUtil/generic/FMA.h
+++ b/libc/src/__support/FPUtil/generic/FMA.h
@@ -10,20 +10,28 @@
#define LLVM_LIBC_SRC___SUPPORT_FPUTIL_GENERIC_FMA_H
#include "src/__support/CPP/bit.h"
+#include "src/__support/CPP/limits.h"
#include "src/__support/CPP/type_traits.h"
-#include "src/__support/FPUtil/FEnvImpl.h"
#include "src/__support/FPUtil/FPBits.h"
#include "src/__support/FPUtil/rounding_mode.h"
+#include "src/__support/big_int.h"
#include "src/__support/macros/attributes.h" // LIBC_INLINE
#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
-#include "src/__support/uint128.h"
+
+#include "hdr/fenv_macros.h"
namespace LIBC_NAMESPACE {
namespace fputil {
namespace generic {
-template <typename T> LIBC_INLINE T fma(T x, T y, T z);
+template <typename OutType, typename InType>
+LIBC_INLINE cpp::enable_if_t<cpp::is_floating_point_v<OutType> &&
+ cpp::is_floating_point_v<InType> &&
+ sizeof(OutType) <= sizeof(InType),
+ OutType>
+fma(InType x, InType y, InType z);
+#ifndef LIBC_TARGET_CPU_HAS_FMA
// TODO(lntue): Implement fmaf that is correctly rounded to all rounding modes.
// The implementation below only is only correct for the default rounding mode,
// round-to-nearest tie-to-even.
@@ -74,17 +82,20 @@ template <> LIBC_INLINE float fma<float>(float x, float y, float z) {
return static_cast<float>(bit_sum.get_val());
}
+#endif // LIBC_TARGET_CPU_HAS_FMA
namespace internal {
// Extract the sticky bits and shift the `mantissa` to the right by
// `shift_length`.
-LIBC_INLINE bool shift_mantissa(int shift_length, UInt128 &mant) {
- if (shift_length >= 128) {
+template <typename T>
+LIBC_INLINE cpp::enable_if_t<is_unsigned_integral_or_big_int_v<T>, bool>
+shift_mantissa(int shift_length, T &mant) {
+ if (shift_length >= cpp::numeric_limits<T>::digits) {
mant = 0;
return true; // prod_mant is non-zero.
}
- UInt128 mask = (UInt128(1) << shift_length) - 1;
+ T mask = (T(1) << shift_length) - 1;
bool sticky_bits = (mant & mask) != 0;
mant >>= shift_length;
return sticky_bits;
@@ -92,11 +103,29 @@ LIBC_INLINE bool shift_mantissa(int shift_length, UInt128 &mant) {
} // namespace internal
-template <> LIBC_INLINE double fma<double>(double x, double y, double z) {
- using FPBits = fputil::FPBits<double>;
+template <typename OutType, typename InType>
+LIBC_INLINE cpp::enable_if_t<cpp::is_floating_point_v<OutType> &&
+ cpp::is_floating_point_v<InType> &&
+ sizeof(OutType) <= sizeof(InType),
+ OutType>
+fma(InType x, InType y, InType z) {
+ using OutFPBits = fputil::FPBits<OutType>;
+ using OutStorageType = typename OutFPBits::StorageType;
+ using InFPBits = fputil::FPBits<InType>;
+ using InStorageType = typename InFPBits::StorageType;
+
+ constexpr int IN_EXPLICIT_MANT_LEN = InFPBits::FRACTION_LEN + 1;
+ constexpr size_t PROD_LEN = 2 * (IN_EXPLICIT_MANT_LEN);
+ constexpr size_t TMP_RESULT_LEN = cpp::bit_ceil(PROD_LEN + 1);
+ using TmpResultType = UInt<TMP_RESULT_LEN>;
+
+ constexpr size_t EXTRA_FRACTION_LEN =
+ TMP_RESULT_LEN - 1 - OutFPBits::FRACTION_LEN;
+ constexpr TmpResultType EXTRA_FRACTION_STICKY_MASK =
+ (TmpResultType(1) << (EXTRA_FRACTION_LEN - 1)) - 1;
if (LIBC_UNLIKELY(x == 0 || y == 0 || z == 0)) {
- return x * y + z;
+ return static_cast<OutType>(x * y + z);
}
int x_exp = 0;
@@ -104,35 +133,35 @@ template <> LIBC_INLINE double fma<double>(double x, double y, double z) {
int z_exp = 0;
// Normalize denormal inputs.
- if (LIBC_UNLIKELY(FPBits(x).is_subnormal())) {
- x_exp -= 52;
- x *= 0x1.0p+52;
+ if (LIBC_UNLIKELY(InFPBits(x).is_subnormal())) {
+ x_exp -= InFPBits::FRACTION_LEN;
+ x *= InType(InStorageType(1) << InFPBits::FRACTION_LEN);
}
- if (LIBC_UNLIKELY(FPBits(y).is_subnormal())) {
- y_exp -= 52;
- y *= 0x1.0p+52;
+ if (LIBC_UNLIKELY(InFPBits(y).is_subnormal())) {
+ y_exp -= InFPBits::FRACTION_LEN;
+ y *= InType(InStorageType(1) << InFPBits::FRACTION_LEN);
}
- if (LIBC_UNLIKELY(FPBits(z).is_subnormal())) {
- z_exp -= 52;
- z *= 0x1.0p+52;
+ if (LIBC_UNLIKELY(InFPBits(z).is_subnormal())) {
+ z_exp -= InFPBits::FRACTION_LEN;
+ z *= InType(InStorageType(1) << InFPBits::FRACTION_LEN);
}
- FPBits x_bits(x), y_bits(y), z_bits(z);
+ InFPBits x_bits(x), y_bits(y), z_bits(z);
const Sign z_sign = z_bits.sign();
Sign prod_sign = (x_bits.sign() == y_bits.sign()) ? Sign::POS : Sign::NEG;
x_exp += x_bits.get_biased_exponent();
y_exp += y_bits.get_biased_exponent();
z_exp += z_bits.get_biased_exponent();
- if (LIBC_UNLIKELY(x_exp == FPBits::MAX_BIASED_EXPONENT ||
- y_exp == FPBits::MAX_BIASED_EXPONENT ||
- z_exp == FPBits::MAX_BIASED_EXPONENT))
- return x * y + z;
+ if (LIBC_UNLIKELY(x_exp == InFPBits::MAX_BIASED_EXPONENT ||
+ y_exp == InFPBits::MAX_BIASED_EXPONENT ||
+ z_exp == InFPBits::MAX_BIASED_EXPONENT))
+ return static_cast<OutType>(x * y + z);
// Extract mantissa and append hidden leading bits.
- UInt128 x_mant = x_bits.get_explicit_mantissa();
- UInt128 y_mant = y_bits.get_explicit_mantissa();
- UInt128 z_mant = z_bits.get_explicit_mantissa();
+ InStorageType x_mant = x_bits.get_explicit_mantissa();
+ InStorageType y_mant = y_bits.get_explicit_mantissa();
+ TmpResultType z_mant = z_bits.get_explicit_mantissa();
// If the exponent of the product x*y > the exponent of z, then no extra
// precision beside the entire product x*y is needed. On the other hand, when
@@ -144,21 +173,24 @@ template <> LIBC_INLINE double fma<double>(double x, double y, double z) {
// - prod : 1bb...bb....b
// In that case, in order to store the exact result, we need at least
// (Length of prod) - (MantissaLength of z) = 2*(52 + 1) - 52 = 54.
+ // TODO: 53? (Explicit mantissa.) ^
// Overall, before aligning the mantissas and exponents, we can simply left-
// shift the mantissa of z by at least 54, and left-shift the product of x*y
// by (that amount - 52). After that, it is enough to align the least
+ // TODO: ^ 54?
// significant bit, given that we keep track of the round and sticky bits
// after the least significant bit.
// We pick shifting z_mant by 64 bits so that technically we can simply use
// the original mantissa as high part when constructing 128-bit z_mant. So the
// mantissa of prod will be left-shifted by 64 - 54 = 10 initially.
- UInt128 prod_mant = x_mant * y_mant << 10;
+ TmpResultType prod_mant = TmpResultType(x_mant) * y_mant;
int prod_lsb_exp =
- x_exp + y_exp - (FPBits::EXP_BIAS + 2 * FPBits::FRACTION_LEN + 10);
+ x_exp + y_exp - (InFPBits::EXP_BIAS + 2 * InFPBits::FRACTION_LEN);
- z_mant <<= 64;
- int z_lsb_exp = z_exp - (FPBits::FRACTION_LEN + 64);
+ constexpr int RESULT_MIN_LEN = PROD_LEN - InFPBits::FRACTION_LEN;
+ z_mant <<= RESULT_MIN_LEN;
+ int z_lsb_exp = z_exp - (InFPBits::FRACTION_LEN + RESULT_MIN_LEN);
bool round_bit = false;
bool sticky_bits = false;
bool z_shifted = false;
@@ -198,46 +230,40 @@ template <> LIBC_INLINE double fma<double>(double x, double y, double z) {
}
}
- uint64_t result = 0;
+ OutStorageType result = 0;
int r_exp = 0; // Unbiased exponent of the result
// Normalize the result.
if (prod_mant != 0) {
- uint64_t prod_hi = static_cast<uint64_t>(prod_mant >> 64);
- int lead_zeros =
- prod_hi ? cpp::countl_zero(prod_hi)
- : 64 + cpp::countl_zero(static_cast<uint64_t>(prod_mant));
+ int lead_zeros = cpp::countl_zero(prod_mant);
// Move the leading 1 to the most significant bit.
prod_mant <<= lead_zeros;
- // The lower 64 bits are always sticky bits after moving the leading 1 to
- // the most significant bit.
- sticky_bits |= (static_cast<uint64_t>(prod_mant) != 0);
- result = static_cast<uint64_t>(prod_mant >> 64);
- // Change prod_lsb_exp the be the exponent of the least significant bit of
- // the result.
- prod_lsb_exp += 64 - lead_zeros;
- r_exp = prod_lsb_exp + 63;
+ prod_lsb_exp -= lead_zeros;
+ r_exp = prod_lsb_exp + (cpp::numeric_limits<TmpResultType>::digits - 1) -
+ InFPBits::EXP_BIAS + OutFPBits::EXP_BIAS;
if (r_exp > 0) {
// The result is normal. We will shift the mantissa to the right by
// 63 - 52 = 11 bits (from the locations of the most significant bit).
// Then the rounding bit will correspond the 11th bit, and the lowest
// 10 bits are merged into sticky bits.
- round_bit = (result & 0x0400ULL) != 0;
- sticky_bits |= (result & 0x03ffULL) != 0;
- result >>= 11;
+ round_bit =
+ (prod_mant & (TmpResultType(1) << (EXTRA_FRACTION_LEN - 1))) != 0;
+ sticky_bits |= (prod_mant & EXTRA_FRACTION_STICKY_MASK) != 0;
+ result = static_cast<OutStorageType>(prod_mant >> EXTRA_FRACTION_LEN);
} else {
- if (r_exp < -52) {
+ if (r_exp < -OutFPBits::FRACTION_LEN) {
// The result is smaller than 1/2 of the smallest denormal number.
sticky_bits = true; // since the result is non-zero.
result = 0;
} else {
// The result is denormal.
- uint64_t mask = 1ULL << (11 - r_exp);
- round_bit = (result & mask) != 0;
- sticky_bits |= (result & (mask - 1)) != 0;
- if (r_exp > -52)
- result >>= 12 - r_exp;
+ TmpResultType mask = TmpResultType(1) << (EXTRA_FRACTION_LEN - r_exp);
+ round_bit = (prod_mant & mask) != 0;
+ sticky_bits |= (prod_mant & (mask - 1)) != 0;
+ if (r_exp > -OutFPBits::FRACTION_LEN)
+ result = static_cast<OutStorageType>(
+ prod_mant >> (EXTRA_FRACTION_LEN + 1 - r_exp));
else
result = 0;
}
@@ -251,20 +277,21 @@ template <> LIBC_INLINE double fma<double>(double x, double y, double z) {
// Finalize the result.
int round_mode = fputil::quick_get_round();
- if (LIBC_UNLIKELY(r_exp >= FPBits::MAX_BIASED_EXPONENT)) {
+ if (LIBC_UNLIKELY(r_exp >= OutFPBits::MAX_BIASED_EXPONENT)) {
if ((round_mode == FE_TOWARDZERO) ||
(round_mode == FE_UPWARD && prod_sign.is_neg()) ||
(round_mode == FE_DOWNWARD && prod_sign.is_pos())) {
- return FPBits::max_normal(prod_sign).get_val();
+ return OutFPBits::max_normal(prod_sign).get_val();
}
- return FPBits::inf(prod_sign).get_val();
+ return OutFPBits::inf(prod_sign).get_val();
}
// Remove hidden bit and append the exponent field and sign bit.
- result = (result & FPBits::FRACTION_MASK) |
- (static_cast<uint64_t>(r_exp) << FPBits::FRACTION_LEN);
+ result = static_cast<OutStorageType>(
+ (result & OutFPBits::FRACTION_MASK) |
+ (static_cast<OutStorageType>(r_exp) << OutFPBits::FRACTION_LEN));
if (prod_sign.is_neg()) {
- result |= FPBits::SIGN_MASK;
+ result |= OutFPBits::SIGN_MASK;
}
// Rounding.
@@ -277,7 +304,7 @@ template <> LIBC_INLINE double fma<double>(double x, double y, double z) {
++result;
}
- return cpp::bit_cast<double>(result);
+ return cpp::bit_cast<OutType>(result);
}
} // namespace generic
diff --git a/libc/src/__support/FPUtil/multiply_add.h b/libc/src/__support/FPUtil/multiply_add.h
index 82932da5417c8..622914e4265c9 100644
--- a/libc/src/__support/FPUtil/multiply_add.h
+++ b/libc/src/__support/FPUtil/multiply_add.h
@@ -45,11 +45,11 @@ namespace LIBC_NAMESPACE {
namespace fputil {
LIBC_INLINE float multiply_add(float x, float y, float z) {
- return fma(x, y, z);
+ return fma<float>(x, y, z);
}
LIBC_INLINE double multiply_add(double x, double y, double z) {
- return fma(x, y, z);
+ return fma<double>(x, y, z);
}
} // namespace fputil
diff --git a/libc/src/__support/big_int.h b/libc/src/__support/big_int.h
index 40ad6eeed7ac2..27cb3b783470c 100644
--- a/libc/src/__support/big_int.h
+++ b/libc/src/__support/big_int.h
@@ -983,6 +983,12 @@ using UInt = BigInt<Bits, false, internal::WordTypeSelectorT<Bits>>;
template <size_t Bits>
using Int = BigInt<Bits, true, internal::WordTypeSelectorT<Bits>>;
+// Provides limits of BigInt.
+template <size_t Bits, bool Signed, typename T>
+struct cpp::numeric_limits<BigInt<Bits, Signed, T>> {
+ LIBC_INLINE_VAR static constexpr int digits = Bits - Signed;
+};
+
// Provides limits of U/Int<128>.
template <> class cpp::numeric_limits<UInt<128>> {
public:
@@ -1073,6 +1079,16 @@ template <typename T>
using make_integral_or_big_int_signed_t =
typename make_integral_or_big_int_signed<T>::type;
+// is_unsigned_integral_or_big_int
+template <typename T>
+struct is_unsigned_integral_or_big_int
+ : cpp::bool_constant<
+ cpp::is_same_v<T, make_integral_or_big_int_unsigned_t<T>>> {};
+
+template <typename T>
+LIBC_INLINE_VAR constexpr bool is_unsigned_integral_or_big_int_v =
+ is_unsigned_integral_or_big_int<T>::value;
+
namespace cpp {
// Specialization of cpp::bit_cast ('bit.h') from T to BigInt.
diff --git a/libc/src/math/CMakeLists.txt b/libc/src/math/CMakeLists.txt
index df8e6c0b253da..4472367d6c073 100644
--- a/libc/src/math/CMakeLists.txt
+++ b/libc/src/math/CMakeLists.txt
@@ -99,6 +99,8 @@ add_math_entrypoint_object(exp10f)
add_math_entrypoint_object(expm1)
add_math_entrypoint_object(expm1f)
+add_math_entrypoint_object(f16fmaf)
+
add_math_entrypoint_object(f16sqrtf)
add_math_entrypoint_object(fabs)
diff --git a/libc/src/math/f16fmaf.h b/libc/src/math/f16fmaf.h
new file mode 100644
index 0000000000000..d92cb43c292eb
--- /dev/null
+++ b/libc/src/math/f16fmaf.h
@@ -0,0 +1,20 @@
+//===-- Implementation header for f16fmaf -----------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_MATH_F16FMAF_H
+#define LLVM_LIBC_SRC_MATH_F16FMAF_H
+
+#include "src/__support/macros/properties/types.h"
+
+namespace LIBC_NAMESPACE {
+
+float16 f16fmaf(float x, float y, float z);
+
+} // namespace LIBC_NAMESPACE
+
+#endif // LLVM_LIBC_SRC_MATH_F16FMAF_H
diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt
index f1f7d6c367be2..706bfc4b08670 100644
--- a/libc/src/math/generic/CMakeLists.txt
+++ b/libc/src/math/generic/CMakeLists.txt
@@ -3602,6 +3602,18 @@ add_entrypoint_object(
-O3
)
+add_entrypoint_object(
+ f16fmaf
+ SRCS
+ f16fmaf.cpp
+ HDRS
+ ../f16fmaf.h
+ DEPENDS
+ libc.src.__support.FPUtil.fma
+ COMPILE_OPTIONS
+ -O3
+)
+
add_entrypoint_object(
f16sqrtf
SRCS
diff --git a/libc/src/math/generic/expm1f.cpp b/libc/src/math/generic/expm1f.cpp
index 037e60021b296..6b9f07476a650 100644
--- a/libc/src/math/generic/expm1f.cpp
+++ b/libc/src/math/generic/expm1f.cpp
@@ -104,7 +104,7 @@ LLVM_LIBC_FUNCTION(float, expm1f, (float x)) {
// intermediate results as it is more efficient than using an emulated
// version of...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/95483
More information about the libc-commits
mailing list