[libc-commits] [libc] [libc][math][c23] Add f16fmaf C23 math function (PR #95483)
via libc-commits
libc-commits at lists.llvm.org
Fri Jun 14 08:56:49 PDT 2024
https://github.com/overmighty updated https://github.com/llvm/llvm-project/pull/95483
>From 0f9a842b067f0ce4868c68219b0364ae8863935a Mon Sep 17 00:00:00 2001
From: OverMighty <its.overmighty at gmail.com>
Date: Fri, 14 Jun 2024 00:57:03 +0200
Subject: [PATCH 01/11] [libc][math][c23] Add f16fmaf C23 math function
---
libc/config/linux/aarch64/entrypoints.txt | 1 +
libc/config/linux/x86_64/entrypoints.txt | 1 +
libc/docs/math/index.rst | 2 +
libc/spec/stdc.td | 2 +
libc/src/__support/FPUtil/FMA.h | 32 ++---
libc/src/__support/FPUtil/generic/FMA.h | 145 +++++++++++--------
libc/src/__support/FPUtil/multiply_add.h | 4 +-
libc/src/__support/big_int.h | 16 +++
libc/src/math/CMakeLists.txt | 2 +
libc/src/math/f16fmaf.h | 20 +++
libc/src/math/generic/CMakeLists.txt | 12 ++
libc/src/math/generic/expm1f.cpp | 2 +-
libc/src/math/generic/f16fmaf.cpp | 19 +++
libc/src/math/generic/fma.cpp | 2 +-
libc/src/math/generic/fmaf.cpp | 2 +-
libc/src/math/generic/range_reduction_fma.h | 25 ++--
libc/test/src/math/CMakeLists.txt | 21 ++-
libc/test/src/math/FmaTest.h | 147 ++++++++++++--------
libc/test/src/math/f16fmaf_test.cpp | 25 ++++
libc/test/src/math/smoke/CMakeLists.txt | 18 ++-
libc/test/src/math/smoke/FmaTest.h | 96 ++++++++-----
libc/test/src/math/smoke/f16fmaf_test.cpp | 13 ++
libc/test/src/math/smoke/fma_test.cpp | 6 +-
libc/test/src/math/smoke/fmaf_test.cpp | 6 +-
libc/test/src/sched/CMakeLists.txt | 34 ++---
libc/utils/MPFRWrapper/MPFRUtils.cpp | 78 ++++++-----
libc/utils/MPFRWrapper/MPFRUtils.h | 48 +++++--
27 files changed, 513 insertions(+), 266 deletions(-)
create mode 100644 libc/src/math/f16fmaf.h
create mode 100644 libc/src/math/generic/f16fmaf.cpp
create mode 100644 libc/test/src/math/f16fmaf_test.cpp
create mode 100644 libc/test/src/math/smoke/f16fmaf_test.cpp
diff --git a/libc/config/linux/aarch64/entrypoints.txt b/libc/config/linux/aarch64/entrypoints.txt
index 2b2d0985a8992..dab747ca0ac74 100644
--- a/libc/config/linux/aarch64/entrypoints.txt
+++ b/libc/config/linux/aarch64/entrypoints.txt
@@ -503,6 +503,7 @@ if(LIBC_TYPES_HAS_FLOAT16)
libc.src.math.canonicalizef16
libc.src.math.ceilf16
libc.src.math.copysignf16
+ libc.src.math.f16fmaf
libc.src.math.f16sqrtf
libc.src.math.fabsf16
libc.src.math.fdimf16
diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt
index 2d36ca296c3a4..45914fe9f7ad2 100644
--- a/libc/config/linux/x86_64/entrypoints.txt
+++ b/libc/config/linux/x86_64/entrypoints.txt
@@ -535,6 +535,7 @@ if(LIBC_TYPES_HAS_FLOAT16)
libc.src.math.canonicalizef16
libc.src.math.ceilf16
libc.src.math.copysignf16
+ libc.src.math.f16fmaf
libc.src.math.f16sqrtf
libc.src.math.fabsf16
libc.src.math.fdimf16
diff --git a/libc/docs/math/index.rst b/libc/docs/math/index.rst
index 790786147c164..293edd1c15100 100644
--- a/libc/docs/math/index.rst
+++ b/libc/docs/math/index.rst
@@ -124,6 +124,8 @@ Basic Operations
+------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
| dsub | N/A | N/A | | N/A | | 7.12.14.2 | F.10.11 |
+------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
+| f16fma | |check| | | | N/A | | 7.12.14.5 | F.10.11 |
++------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
| fabs | |check| | |check| | |check| | |check| | |check| | 7.12.7.3 | F.10.4.3 |
+------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
| fadd | N/A | | | N/A | | 7.12.14.1 | F.10.11 |
diff --git a/libc/spec/stdc.td b/libc/spec/stdc.td
index 7c4135032a0b2..b089b596b0958 100644
--- a/libc/spec/stdc.td
+++ b/libc/spec/stdc.td
@@ -715,6 +715,8 @@ def StdC : StandardSpec<"stdc"> {
GuardedFunctionSpec<"totalordermagf16", RetValSpec<IntType>, [ArgSpec<Float16Ptr>, ArgSpec<Float16Ptr>], "LIBC_TYPES_HAS_FLOAT16">,
+ GuardedFunctionSpec<"f16fmaf", RetValSpec<Float16Type>, [ArgSpec<FloatType>, ArgSpec<FloatType>, ArgSpec<FloatType>], "LIBC_TYPES_HAS_FLOAT16">,
+
GuardedFunctionSpec<"f16sqrtf", RetValSpec<Float16Type>, [ArgSpec<FloatType>], "LIBC_TYPES_HAS_FLOAT16">,
]
>;
diff --git a/libc/src/__support/FPUtil/FMA.h b/libc/src/__support/FPUtil/FMA.h
index c277da49538bf..cf01a317d7359 100644
--- a/libc/src/__support/FPUtil/FMA.h
+++ b/libc/src/__support/FPUtil/FMA.h
@@ -10,41 +10,29 @@
#define LLVM_LIBC_SRC___SUPPORT_FPUTIL_FMA_H
#include "src/__support/CPP/type_traits.h"
+#include "src/__support/FPUtil/generic/FMA.h"
#include "src/__support/macros/properties/architectures.h"
#include "src/__support/macros/properties/cpu_features.h" // LIBC_TARGET_CPU_HAS_FMA
-#if defined(LIBC_TARGET_CPU_HAS_FMA)
-
namespace LIBC_NAMESPACE {
namespace fputil {
-template <typename T>
-LIBC_INLINE cpp::enable_if_t<cpp::is_same_v<T, float>, T> fma(T x, T y, T z) {
- return __builtin_fmaf(x, y, z);
+template <typename OutType, typename InType>
+LIBC_INLINE OutType fma(InType x, InType y, InType z) {
+ return generic::fma<OutType>(x, y, z);
}
-template <typename T>
-LIBC_INLINE cpp::enable_if_t<cpp::is_same_v<T, double>, T> fma(T x, T y, T z) {
- return __builtin_fma(x, y, z);
+#ifdef LIBC_TARGET_CPU_HAS_FMA
+template <> LIBC_INLINE float fma(float x, float y, float z) {
+ return __builtin_fmaf(x, y, z);
}
-} // namespace fputil
-} // namespace LIBC_NAMESPACE
-
-#else
-// FMA instructions are not available
-#include "generic/FMA.h"
-
-namespace LIBC_NAMESPACE {
-namespace fputil {
-
-template <typename T> LIBC_INLINE T fma(T x, T y, T z) {
- return generic::fma(x, y, z);
+template <> LIBC_INLINE double fma(double x, double y, double z) {
+ return __builtin_fma(x, y, z);
}
+#endif // LIBC_TARGET_CPU_HAS_FMA
} // namespace fputil
} // namespace LIBC_NAMESPACE
-#endif
-
#endif // LLVM_LIBC_SRC___SUPPORT_FPUTIL_FMA_H
diff --git a/libc/src/__support/FPUtil/generic/FMA.h b/libc/src/__support/FPUtil/generic/FMA.h
index f403aa7333b39..3bbb30476e5d8 100644
--- a/libc/src/__support/FPUtil/generic/FMA.h
+++ b/libc/src/__support/FPUtil/generic/FMA.h
@@ -10,20 +10,28 @@
#define LLVM_LIBC_SRC___SUPPORT_FPUTIL_GENERIC_FMA_H
#include "src/__support/CPP/bit.h"
+#include "src/__support/CPP/limits.h"
#include "src/__support/CPP/type_traits.h"
-#include "src/__support/FPUtil/FEnvImpl.h"
#include "src/__support/FPUtil/FPBits.h"
#include "src/__support/FPUtil/rounding_mode.h"
+#include "src/__support/big_int.h"
#include "src/__support/macros/attributes.h" // LIBC_INLINE
#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
-#include "src/__support/uint128.h"
+
+#include "hdr/fenv_macros.h"
namespace LIBC_NAMESPACE {
namespace fputil {
namespace generic {
-template <typename T> LIBC_INLINE T fma(T x, T y, T z);
+template <typename OutType, typename InType>
+LIBC_INLINE cpp::enable_if_t<cpp::is_floating_point_v<OutType> &&
+ cpp::is_floating_point_v<InType> &&
+ sizeof(OutType) <= sizeof(InType),
+ OutType>
+fma(InType x, InType y, InType z);
+#ifndef LIBC_TARGET_CPU_HAS_FMA
// TODO(lntue): Implement fmaf that is correctly rounded to all rounding modes.
// The implementation below only is only correct for the default rounding mode,
// round-to-nearest tie-to-even.
@@ -74,17 +82,20 @@ template <> LIBC_INLINE float fma<float>(float x, float y, float z) {
return static_cast<float>(bit_sum.get_val());
}
+#endif // LIBC_TARGET_CPU_HAS_FMA
namespace internal {
// Extract the sticky bits and shift the `mantissa` to the right by
// `shift_length`.
-LIBC_INLINE bool shift_mantissa(int shift_length, UInt128 &mant) {
- if (shift_length >= 128) {
+template <typename T>
+LIBC_INLINE cpp::enable_if_t<is_unsigned_integral_or_big_int_v<T>, bool>
+shift_mantissa(int shift_length, T &mant) {
+ if (shift_length >= cpp::numeric_limits<T>::digits) {
mant = 0;
return true; // prod_mant is non-zero.
}
- UInt128 mask = (UInt128(1) << shift_length) - 1;
+ T mask = (T(1) << shift_length) - 1;
bool sticky_bits = (mant & mask) != 0;
mant >>= shift_length;
return sticky_bits;
@@ -92,11 +103,29 @@ LIBC_INLINE bool shift_mantissa(int shift_length, UInt128 &mant) {
} // namespace internal
-template <> LIBC_INLINE double fma<double>(double x, double y, double z) {
- using FPBits = fputil::FPBits<double>;
+template <typename OutType, typename InType>
+LIBC_INLINE cpp::enable_if_t<cpp::is_floating_point_v<OutType> &&
+ cpp::is_floating_point_v<InType> &&
+ sizeof(OutType) <= sizeof(InType),
+ OutType>
+fma(InType x, InType y, InType z) {
+ using OutFPBits = fputil::FPBits<OutType>;
+ using OutStorageType = typename OutFPBits::StorageType;
+ using InFPBits = fputil::FPBits<InType>;
+ using InStorageType = typename InFPBits::StorageType;
+
+ constexpr int IN_EXPLICIT_MANT_LEN = InFPBits::FRACTION_LEN + 1;
+ constexpr size_t PROD_LEN = 2 * (IN_EXPLICIT_MANT_LEN);
+ constexpr size_t TMP_RESULT_LEN = cpp::bit_ceil(PROD_LEN + 1);
+ using TmpResultType = UInt<TMP_RESULT_LEN>;
+
+ constexpr size_t EXTRA_FRACTION_LEN =
+ TMP_RESULT_LEN - 1 - OutFPBits::FRACTION_LEN;
+ constexpr TmpResultType EXTRA_FRACTION_STICKY_MASK =
+ (TmpResultType(1) << (EXTRA_FRACTION_LEN - 1)) - 1;
if (LIBC_UNLIKELY(x == 0 || y == 0 || z == 0)) {
- return x * y + z;
+ return static_cast<OutType>(x * y + z);
}
int x_exp = 0;
@@ -104,35 +133,35 @@ template <> LIBC_INLINE double fma<double>(double x, double y, double z) {
int z_exp = 0;
// Normalize denormal inputs.
- if (LIBC_UNLIKELY(FPBits(x).is_subnormal())) {
- x_exp -= 52;
- x *= 0x1.0p+52;
+ if (LIBC_UNLIKELY(InFPBits(x).is_subnormal())) {
+ x_exp -= InFPBits::FRACTION_LEN;
+ x *= InType(InStorageType(1) << InFPBits::FRACTION_LEN);
}
- if (LIBC_UNLIKELY(FPBits(y).is_subnormal())) {
- y_exp -= 52;
- y *= 0x1.0p+52;
+ if (LIBC_UNLIKELY(InFPBits(y).is_subnormal())) {
+ y_exp -= InFPBits::FRACTION_LEN;
+ y *= InType(InStorageType(1) << InFPBits::FRACTION_LEN);
}
- if (LIBC_UNLIKELY(FPBits(z).is_subnormal())) {
- z_exp -= 52;
- z *= 0x1.0p+52;
+ if (LIBC_UNLIKELY(InFPBits(z).is_subnormal())) {
+ z_exp -= InFPBits::FRACTION_LEN;
+ z *= InType(InStorageType(1) << InFPBits::FRACTION_LEN);
}
- FPBits x_bits(x), y_bits(y), z_bits(z);
+ InFPBits x_bits(x), y_bits(y), z_bits(z);
const Sign z_sign = z_bits.sign();
Sign prod_sign = (x_bits.sign() == y_bits.sign()) ? Sign::POS : Sign::NEG;
x_exp += x_bits.get_biased_exponent();
y_exp += y_bits.get_biased_exponent();
z_exp += z_bits.get_biased_exponent();
- if (LIBC_UNLIKELY(x_exp == FPBits::MAX_BIASED_EXPONENT ||
- y_exp == FPBits::MAX_BIASED_EXPONENT ||
- z_exp == FPBits::MAX_BIASED_EXPONENT))
- return x * y + z;
+ if (LIBC_UNLIKELY(x_exp == InFPBits::MAX_BIASED_EXPONENT ||
+ y_exp == InFPBits::MAX_BIASED_EXPONENT ||
+ z_exp == InFPBits::MAX_BIASED_EXPONENT))
+ return static_cast<OutType>(x * y + z);
// Extract mantissa and append hidden leading bits.
- UInt128 x_mant = x_bits.get_explicit_mantissa();
- UInt128 y_mant = y_bits.get_explicit_mantissa();
- UInt128 z_mant = z_bits.get_explicit_mantissa();
+ InStorageType x_mant = x_bits.get_explicit_mantissa();
+ InStorageType y_mant = y_bits.get_explicit_mantissa();
+ TmpResultType z_mant = z_bits.get_explicit_mantissa();
// If the exponent of the product x*y > the exponent of z, then no extra
// precision beside the entire product x*y is needed. On the other hand, when
@@ -144,21 +173,24 @@ template <> LIBC_INLINE double fma<double>(double x, double y, double z) {
// - prod : 1bb...bb....b
// In that case, in order to store the exact result, we need at least
// (Length of prod) - (MantissaLength of z) = 2*(52 + 1) - 52 = 54.
+ // TODO: 53? (Explicit mantissa.) ^
// Overall, before aligning the mantissas and exponents, we can simply left-
// shift the mantissa of z by at least 54, and left-shift the product of x*y
// by (that amount - 52). After that, it is enough to align the least
+ // TODO: ^ 54?
// significant bit, given that we keep track of the round and sticky bits
// after the least significant bit.
// We pick shifting z_mant by 64 bits so that technically we can simply use
// the original mantissa as high part when constructing 128-bit z_mant. So the
// mantissa of prod will be left-shifted by 64 - 54 = 10 initially.
- UInt128 prod_mant = x_mant * y_mant << 10;
+ TmpResultType prod_mant = TmpResultType(x_mant) * y_mant;
int prod_lsb_exp =
- x_exp + y_exp - (FPBits::EXP_BIAS + 2 * FPBits::FRACTION_LEN + 10);
+ x_exp + y_exp - (InFPBits::EXP_BIAS + 2 * InFPBits::FRACTION_LEN);
- z_mant <<= 64;
- int z_lsb_exp = z_exp - (FPBits::FRACTION_LEN + 64);
+ constexpr int RESULT_MIN_LEN = PROD_LEN - InFPBits::FRACTION_LEN;
+ z_mant <<= RESULT_MIN_LEN;
+ int z_lsb_exp = z_exp - (InFPBits::FRACTION_LEN + RESULT_MIN_LEN);
bool round_bit = false;
bool sticky_bits = false;
bool z_shifted = false;
@@ -198,46 +230,40 @@ template <> LIBC_INLINE double fma<double>(double x, double y, double z) {
}
}
- uint64_t result = 0;
+ OutStorageType result = 0;
int r_exp = 0; // Unbiased exponent of the result
// Normalize the result.
if (prod_mant != 0) {
- uint64_t prod_hi = static_cast<uint64_t>(prod_mant >> 64);
- int lead_zeros =
- prod_hi ? cpp::countl_zero(prod_hi)
- : 64 + cpp::countl_zero(static_cast<uint64_t>(prod_mant));
+ int lead_zeros = cpp::countl_zero(prod_mant);
// Move the leading 1 to the most significant bit.
prod_mant <<= lead_zeros;
- // The lower 64 bits are always sticky bits after moving the leading 1 to
- // the most significant bit.
- sticky_bits |= (static_cast<uint64_t>(prod_mant) != 0);
- result = static_cast<uint64_t>(prod_mant >> 64);
- // Change prod_lsb_exp the be the exponent of the least significant bit of
- // the result.
- prod_lsb_exp += 64 - lead_zeros;
- r_exp = prod_lsb_exp + 63;
+ prod_lsb_exp -= lead_zeros;
+ r_exp = prod_lsb_exp + (cpp::numeric_limits<TmpResultType>::digits - 1) -
+ InFPBits::EXP_BIAS + OutFPBits::EXP_BIAS;
if (r_exp > 0) {
// The result is normal. We will shift the mantissa to the right by
// 63 - 52 = 11 bits (from the locations of the most significant bit).
// Then the rounding bit will correspond the 11th bit, and the lowest
// 10 bits are merged into sticky bits.
- round_bit = (result & 0x0400ULL) != 0;
- sticky_bits |= (result & 0x03ffULL) != 0;
- result >>= 11;
+ round_bit =
+ (prod_mant & (TmpResultType(1) << (EXTRA_FRACTION_LEN - 1))) != 0;
+ sticky_bits |= (prod_mant & EXTRA_FRACTION_STICKY_MASK) != 0;
+ result = static_cast<OutStorageType>(prod_mant >> EXTRA_FRACTION_LEN);
} else {
- if (r_exp < -52) {
+ if (r_exp < -OutFPBits::FRACTION_LEN) {
// The result is smaller than 1/2 of the smallest denormal number.
sticky_bits = true; // since the result is non-zero.
result = 0;
} else {
// The result is denormal.
- uint64_t mask = 1ULL << (11 - r_exp);
- round_bit = (result & mask) != 0;
- sticky_bits |= (result & (mask - 1)) != 0;
- if (r_exp > -52)
- result >>= 12 - r_exp;
+ TmpResultType mask = TmpResultType(1) << (EXTRA_FRACTION_LEN - r_exp);
+ round_bit = (prod_mant & mask) != 0;
+ sticky_bits |= (prod_mant & (mask - 1)) != 0;
+ if (r_exp > -OutFPBits::FRACTION_LEN)
+ result = static_cast<OutStorageType>(
+ prod_mant >> (EXTRA_FRACTION_LEN + 1 - r_exp));
else
result = 0;
}
@@ -251,20 +277,21 @@ template <> LIBC_INLINE double fma<double>(double x, double y, double z) {
// Finalize the result.
int round_mode = fputil::quick_get_round();
- if (LIBC_UNLIKELY(r_exp >= FPBits::MAX_BIASED_EXPONENT)) {
+ if (LIBC_UNLIKELY(r_exp >= OutFPBits::MAX_BIASED_EXPONENT)) {
if ((round_mode == FE_TOWARDZERO) ||
(round_mode == FE_UPWARD && prod_sign.is_neg()) ||
(round_mode == FE_DOWNWARD && prod_sign.is_pos())) {
- return FPBits::max_normal(prod_sign).get_val();
+ return OutFPBits::max_normal(prod_sign).get_val();
}
- return FPBits::inf(prod_sign).get_val();
+ return OutFPBits::inf(prod_sign).get_val();
}
// Remove hidden bit and append the exponent field and sign bit.
- result = (result & FPBits::FRACTION_MASK) |
- (static_cast<uint64_t>(r_exp) << FPBits::FRACTION_LEN);
+ result = static_cast<OutStorageType>(
+ (result & OutFPBits::FRACTION_MASK) |
+ (static_cast<OutStorageType>(r_exp) << OutFPBits::FRACTION_LEN));
if (prod_sign.is_neg()) {
- result |= FPBits::SIGN_MASK;
+ result |= OutFPBits::SIGN_MASK;
}
// Rounding.
@@ -277,7 +304,7 @@ template <> LIBC_INLINE double fma<double>(double x, double y, double z) {
++result;
}
- return cpp::bit_cast<double>(result);
+ return cpp::bit_cast<OutType>(result);
}
} // namespace generic
diff --git a/libc/src/__support/FPUtil/multiply_add.h b/libc/src/__support/FPUtil/multiply_add.h
index 82932da5417c8..622914e4265c9 100644
--- a/libc/src/__support/FPUtil/multiply_add.h
+++ b/libc/src/__support/FPUtil/multiply_add.h
@@ -45,11 +45,11 @@ namespace LIBC_NAMESPACE {
namespace fputil {
LIBC_INLINE float multiply_add(float x, float y, float z) {
- return fma(x, y, z);
+ return fma<float>(x, y, z);
}
LIBC_INLINE double multiply_add(double x, double y, double z) {
- return fma(x, y, z);
+ return fma<double>(x, y, z);
}
} // namespace fputil
diff --git a/libc/src/__support/big_int.h b/libc/src/__support/big_int.h
index 40ad6eeed7ac2..27cb3b783470c 100644
--- a/libc/src/__support/big_int.h
+++ b/libc/src/__support/big_int.h
@@ -983,6 +983,12 @@ using UInt = BigInt<Bits, false, internal::WordTypeSelectorT<Bits>>;
template <size_t Bits>
using Int = BigInt<Bits, true, internal::WordTypeSelectorT<Bits>>;
+// Provides limits of BigInt.
+template <size_t Bits, bool Signed, typename T>
+struct cpp::numeric_limits<BigInt<Bits, Signed, T>> {
+ LIBC_INLINE_VAR static constexpr int digits = Bits - Signed;
+};
+
// Provides limits of U/Int<128>.
template <> class cpp::numeric_limits<UInt<128>> {
public:
@@ -1073,6 +1079,16 @@ template <typename T>
using make_integral_or_big_int_signed_t =
typename make_integral_or_big_int_signed<T>::type;
+// is_unsigned_integral_or_big_int
+template <typename T>
+struct is_unsigned_integral_or_big_int
+ : cpp::bool_constant<
+ cpp::is_same_v<T, make_integral_or_big_int_unsigned_t<T>>> {};
+
+template <typename T>
+LIBC_INLINE_VAR constexpr bool is_unsigned_integral_or_big_int_v =
+ is_unsigned_integral_or_big_int<T>::value;
+
namespace cpp {
// Specialization of cpp::bit_cast ('bit.h') from T to BigInt.
diff --git a/libc/src/math/CMakeLists.txt b/libc/src/math/CMakeLists.txt
index df8e6c0b253da..4472367d6c073 100644
--- a/libc/src/math/CMakeLists.txt
+++ b/libc/src/math/CMakeLists.txt
@@ -99,6 +99,8 @@ add_math_entrypoint_object(exp10f)
add_math_entrypoint_object(expm1)
add_math_entrypoint_object(expm1f)
+add_math_entrypoint_object(f16fmaf)
+
add_math_entrypoint_object(f16sqrtf)
add_math_entrypoint_object(fabs)
diff --git a/libc/src/math/f16fmaf.h b/libc/src/math/f16fmaf.h
new file mode 100644
index 0000000000000..d92cb43c292eb
--- /dev/null
+++ b/libc/src/math/f16fmaf.h
@@ -0,0 +1,20 @@
+//===-- Implementation header for f16fmaf -----------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_MATH_F16FMAF_H
+#define LLVM_LIBC_SRC_MATH_F16FMAF_H
+
+#include "src/__support/macros/properties/types.h"
+
+namespace LIBC_NAMESPACE {
+
+float16 f16fmaf(float x, float y, float z);
+
+} // namespace LIBC_NAMESPACE
+
+#endif // LLVM_LIBC_SRC_MATH_F16FMAF_H
diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt
index f1f7d6c367be2..706bfc4b08670 100644
--- a/libc/src/math/generic/CMakeLists.txt
+++ b/libc/src/math/generic/CMakeLists.txt
@@ -3602,6 +3602,18 @@ add_entrypoint_object(
-O3
)
+add_entrypoint_object(
+ f16fmaf
+ SRCS
+ f16fmaf.cpp
+ HDRS
+ ../f16fmaf.h
+ DEPENDS
+ libc.src.__support.FPUtil.fma
+ COMPILE_OPTIONS
+ -O3
+)
+
add_entrypoint_object(
f16sqrtf
SRCS
diff --git a/libc/src/math/generic/expm1f.cpp b/libc/src/math/generic/expm1f.cpp
index 037e60021b296..6b9f07476a650 100644
--- a/libc/src/math/generic/expm1f.cpp
+++ b/libc/src/math/generic/expm1f.cpp
@@ -104,7 +104,7 @@ LLVM_LIBC_FUNCTION(float, expm1f, (float x)) {
// intermediate results as it is more efficient than using an emulated
// version of FMA.
#if defined(LIBC_TARGET_CPU_HAS_FMA)
- return fputil::fma(x, x, x);
+ return fputil::fma<float>(x, x, x);
#else
double xd = x;
return static_cast<float>(fputil::multiply_add(xd, xd, xd));
diff --git a/libc/src/math/generic/f16fmaf.cpp b/libc/src/math/generic/f16fmaf.cpp
new file mode 100644
index 0000000000000..09f2712639335
--- /dev/null
+++ b/libc/src/math/generic/f16fmaf.cpp
@@ -0,0 +1,19 @@
+//===-- Implementation of f16fmaf function --------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/math/f16fmaf.h"
+#include "src/__support/FPUtil/FMA.h"
+#include "src/__support/common.h"
+
+namespace LIBC_NAMESPACE {
+
+LLVM_LIBC_FUNCTION(float16, f16fmaf, (float x, float y, float z)) {
+ return fputil::fma<float16>(x, y, z);
+}
+
+} // namespace LIBC_NAMESPACE
diff --git a/libc/src/math/generic/fma.cpp b/libc/src/math/generic/fma.cpp
index e27e5baeddf58..7937766dccd71 100644
--- a/libc/src/math/generic/fma.cpp
+++ b/libc/src/math/generic/fma.cpp
@@ -14,7 +14,7 @@
namespace LIBC_NAMESPACE {
LLVM_LIBC_FUNCTION(double, fma, (double x, double y, double z)) {
- return fputil::fma(x, y, z);
+ return fputil::fma<double>(x, y, z);
}
} // namespace LIBC_NAMESPACE
diff --git a/libc/src/math/generic/fmaf.cpp b/libc/src/math/generic/fmaf.cpp
index 7512b82005d0f..d367a069ea7d8 100644
--- a/libc/src/math/generic/fmaf.cpp
+++ b/libc/src/math/generic/fmaf.cpp
@@ -14,7 +14,7 @@
namespace LIBC_NAMESPACE {
LLVM_LIBC_FUNCTION(float, fmaf, (float x, float y, float z)) {
- return fputil::fma(x, y, z);
+ return fputil::fma<float>(x, y, z);
}
} // namespace LIBC_NAMESPACE
diff --git a/libc/src/math/generic/range_reduction_fma.h b/libc/src/math/generic/range_reduction_fma.h
index aee8cbb1332a6..82b4ae1c705e1 100644
--- a/libc/src/math/generic/range_reduction_fma.h
+++ b/libc/src/math/generic/range_reduction_fma.h
@@ -33,8 +33,8 @@ static constexpr double THIRTYTWO_OVER_PI[5] = {
// k = round(x * 32 / pi) and y = (x * 32 / pi) - k.
LIBC_INLINE int64_t small_range_reduction(double x, double &y) {
double kd = fputil::nearest_integer(x * THIRTYTWO_OVER_PI[0]);
- y = fputil::fma(x, THIRTYTWO_OVER_PI[0], -kd);
- y = fputil::fma(x, THIRTYTWO_OVER_PI[1], y);
+ y = fputil::fma<double>(x, THIRTYTWO_OVER_PI[0], -kd);
+ y = fputil::fma<double>(x, THIRTYTWO_OVER_PI[1], y);
return static_cast<int64_t>(kd);
}
@@ -54,12 +54,13 @@ LIBC_INLINE int64_t large_range_reduction(double x, int x_exp, double &y) {
prod_hi.set_uintval(prod_hi.uintval() &
((x_exp < 55) ? (~0xfffULL) : (~0ULL))); // |x| < 2^55
double k_hi = fputil::nearest_integer(prod_hi.get_val());
- double truncated_prod = fputil::fma(x, THIRTYTWO_OVER_PI[0], -k_hi);
- double prod_lo = fputil::fma(x, THIRTYTWO_OVER_PI[1], truncated_prod);
+ double truncated_prod = fputil::fma<double>(x, THIRTYTWO_OVER_PI[0], -k_hi);
+ double prod_lo =
+ fputil::fma<double>(x, THIRTYTWO_OVER_PI[1], truncated_prod);
double k_lo = fputil::nearest_integer(prod_lo);
- y = fputil::fma(x, THIRTYTWO_OVER_PI[1], truncated_prod - k_lo);
- y = fputil::fma(x, THIRTYTWO_OVER_PI[2], y);
- y = fputil::fma(x, THIRTYTWO_OVER_PI[3], y);
+ y = fputil::fma<double>(x, THIRTYTWO_OVER_PI[1], truncated_prod - k_lo);
+ y = fputil::fma<double>(x, THIRTYTWO_OVER_PI[2], y);
+ y = fputil::fma<double>(x, THIRTYTWO_OVER_PI[3], y);
return static_cast<int64_t>(k_lo);
}
@@ -74,12 +75,12 @@ LIBC_INLINE int64_t large_range_reduction(double x, int x_exp, double &y) {
prod_hi.set_uintval(prod_hi.uintval() &
((x_exp < 110) ? (~0xfffULL) : (~0ULL))); // |x| < 2^110
double k_hi = fputil::nearest_integer(prod_hi.get_val());
- double truncated_prod = fputil::fma(x, THIRTYTWO_OVER_PI[1], -k_hi);
- double prod_lo = fputil::fma(x, THIRTYTWO_OVER_PI[2], truncated_prod);
+ double truncated_prod = fputil::fma<double>(x, THIRTYTWO_OVER_PI[1], -k_hi);
+ double prod_lo = fputil::fma<double>(x, THIRTYTWO_OVER_PI[2], truncated_prod);
double k_lo = fputil::nearest_integer(prod_lo);
- y = fputil::fma(x, THIRTYTWO_OVER_PI[2], truncated_prod - k_lo);
- y = fputil::fma(x, THIRTYTWO_OVER_PI[3], y);
- y = fputil::fma(x, THIRTYTWO_OVER_PI[4], y);
+ y = fputil::fma<double>(x, THIRTYTWO_OVER_PI[2], truncated_prod - k_lo);
+ y = fputil::fma<double>(x, THIRTYTWO_OVER_PI[3], y);
+ y = fputil::fma<double>(x, THIRTYTWO_OVER_PI[4], y);
return static_cast<int64_t>(k_lo);
}
diff --git a/libc/test/src/math/CMakeLists.txt b/libc/test/src/math/CMakeLists.txt
index 79e6e89a5324e..bb364c3f0a175 100644
--- a/libc/test/src/math/CMakeLists.txt
+++ b/libc/test/src/math/CMakeLists.txt
@@ -1455,11 +1455,12 @@ add_fp_unittest(
libc-math-unittests
SRCS
fmaf_test.cpp
+ HDRS
+ FmaTest.h
DEPENDS
libc.src.math.fmaf
libc.src.stdlib.rand
libc.src.stdlib.srand
- libc.src.__support.FPUtil.fp_bits
FLAGS
FMA_OPT__ONLY
)
@@ -1471,11 +1472,12 @@ add_fp_unittest(
libc-math-unittests
SRCS
fma_test.cpp
+ HDRS
+ FmaTest.h
DEPENDS
libc.src.math.fma
libc.src.stdlib.rand
libc.src.stdlib.srand
- libc.src.__support.FPUtil.fp_bits
)
add_fp_unittest(
@@ -1888,6 +1890,21 @@ add_fp_unittest(
libc.src.__support.FPUtil.fp_bits
)
+add_fp_unittest(
+ f16fmaf_test
+ NEED_MPFR
+ SUITE
+ libc-math-unittests
+ SRCS
+ f16fmaf_test.cpp
+ HDRS
+ FmaTest.h
+ DEPENDS
+ libc.src.math.f16fmaf
+ libc.src.stdlib.rand
+ libc.src.stdlib.srand
+)
+
add_subdirectory(generic)
add_subdirectory(smoke)
diff --git a/libc/test/src/math/FmaTest.h b/libc/test/src/math/FmaTest.h
index 5a40f694ebd10..0e0f05aad5d13 100644
--- a/libc/test/src/math/FmaTest.h
+++ b/libc/test/src/math/FmaTest.h
@@ -9,7 +9,6 @@
#ifndef LLVM_LIBC_TEST_SRC_MATH_FMATEST_H
#define LLVM_LIBC_TEST_SRC_MATH_FMATEST_H
-#include "src/__support/FPUtil/FPBits.h"
#include "src/stdlib/rand.h"
#include "src/stdlib/srand.h"
#include "test/UnitTest/FEnvSafeTest.h"
@@ -19,85 +18,115 @@
namespace mpfr = LIBC_NAMESPACE::testing::mpfr;
-template <typename T>
+template <typename OutType, typename InType = OutType>
class FmaTestTemplate : public LIBC_NAMESPACE::testing::FEnvSafeTest {
-private:
- using Func = T (*)(T, T, T);
- using FPBits = LIBC_NAMESPACE::fputil::FPBits<T>;
- using StorageType = typename FPBits::StorageType;
-
- const T min_subnormal = FPBits::min_subnormal(Sign::POS).get_val();
- const T min_normal = FPBits::min_normal(Sign::POS).get_val();
- const T max_normal = FPBits::max_normal(Sign::POS).get_val();
- const T inf = FPBits::inf(Sign::POS).get_val();
- const T neg_inf = FPBits::inf(Sign::NEG).get_val();
- const T zero = FPBits::zero(Sign::POS).get_val();
- const T neg_zero = FPBits::zero(Sign::NEG).get_val();
- const T nan = FPBits::quiet_nan().get_val();
-
- static constexpr StorageType MAX_NORMAL = FPBits::max_normal().uintval();
- static constexpr StorageType MIN_NORMAL = FPBits::min_normal().uintval();
- static constexpr StorageType MAX_SUBNORMAL =
- FPBits::max_subnormal().uintval();
- static constexpr StorageType MIN_SUBNORMAL =
- FPBits::min_subnormal().uintval();
-
- StorageType get_random_bit_pattern() {
- StorageType bits{0};
- for (StorageType i = 0; i < sizeof(StorageType) / 2; ++i) {
+
+ struct OutConstants {
+ DECLARE_SPECIAL_CONSTANTS(OutType)
+ };
+
+ struct InConstants {
+ DECLARE_SPECIAL_CONSTANTS(InType)
+ };
+
+ using OutFPBits = typename OutConstants::FPBits;
+ using OutStorageType = typename OutConstants::StorageType;
+ using InFPBits = typename InConstants::FPBits;
+ using InStorageType = typename InConstants::StorageType;
+
+ static constexpr OutStorageType OUT_MIN_NORMAL_U =
+ OutFPBits::min_normal().uintval();
+ static constexpr InStorageType IN_MAX_NORMAL_U =
+ InFPBits::max_normal().uintval();
+ static constexpr InStorageType IN_MIN_NORMAL_U =
+ InFPBits::min_normal().uintval();
+ static constexpr InStorageType IN_MAX_SUBNORMAL_U =
+ InFPBits::max_subnormal().uintval();
+ static constexpr InStorageType IN_MIN_SUBNORMAL_U =
+ InFPBits::min_subnormal().uintval();
+
+ OutConstants out;
+ InConstants in;
+
+ InStorageType get_random_bit_pattern() {
+ InStorageType bits{0};
+ for (InStorageType i = 0; i < sizeof(InStorageType) / 2; ++i) {
bits = (bits << 2) + static_cast<uint16_t>(LIBC_NAMESPACE::rand());
}
return bits;
}
public:
- void test_special_numbers(Func func) {
- EXPECT_FP_EQ(func(zero, zero, zero), zero);
- EXPECT_FP_EQ(func(zero, neg_zero, neg_zero), neg_zero);
- EXPECT_FP_EQ(func(inf, inf, zero), inf);
- EXPECT_FP_EQ(func(neg_inf, inf, neg_inf), neg_inf);
- EXPECT_FP_EQ(func(inf, zero, zero), nan);
- EXPECT_FP_EQ(func(inf, neg_inf, inf), nan);
- EXPECT_FP_EQ(func(nan, zero, inf), nan);
- EXPECT_FP_EQ(func(inf, neg_inf, nan), nan);
+ using FmaFunc = OutType (*)(InType, InType, InType);
+
+ void test_special_numbers(FmaFunc func) {
+ EXPECT_FP_EQ(out.zero, func(in.zero, in.zero, in.zero));
+ EXPECT_FP_EQ(out.neg_zero, func(in.zero, in.neg_zero, in.neg_zero));
+ EXPECT_FP_EQ(out.inf, func(in.inf, in.inf, in.zero));
+ EXPECT_FP_EQ(out.neg_inf, func(in.neg_inf, in.inf, in.neg_inf));
+ EXPECT_FP_EQ(out.aNaN, func(in.inf, in.zero, in.zero));
+ EXPECT_FP_EQ(out.aNaN, func(in.inf, in.neg_inf, in.inf));
+ EXPECT_FP_EQ(out.aNaN, func(in.aNaN, in.zero, in.inf));
+ EXPECT_FP_EQ(out.aNaN, func(in.inf, in.neg_inf, in.aNaN));
// Test underflow rounding up.
- EXPECT_FP_EQ(func(T(0.5), min_subnormal, min_subnormal),
- FPBits(StorageType(2)).get_val());
+ EXPECT_FP_EQ(OutFPBits(OutStorageType(2)).get_val(),
+ func(OutType(0.5), out.min_denormal, out.min_denormal));
+
+ if constexpr (sizeof(OutType) < sizeof(InType)) {
+ EXPECT_FP_EQ(out.zero,
+ func(InType(0.5), in.min_denormal, in.min_denormal));
+ }
// Test underflow rounding down.
- T v = FPBits(MIN_NORMAL + StorageType(1)).get_val();
- EXPECT_FP_EQ(func(T(1) / T(MIN_NORMAL << 1), v, min_normal), v);
+ OutType v = OutFPBits(static_cast<OutStorageType>(OUT_MIN_NORMAL_U +
+ OutStorageType(1)))
+ .get_val();
+ EXPECT_FP_EQ(v, func(OutType(1) / OutType(OUT_MIN_NORMAL_U << 1), v,
+ out.min_normal));
+
+ if constexpr (sizeof(OutType) < sizeof(InType)) {
+ InType v = InFPBits(static_cast<InStorageType>(IN_MIN_NORMAL_U +
+ InStorageType(1)))
+ .get_val();
+ EXPECT_FP_EQ(
+ out.min_normal,
+ func(InType(1) / InType(IN_MIN_NORMAL_U << 1), v, out.min_normal));
+ }
// Test overflow.
- T z = max_normal;
- EXPECT_FP_EQ(func(T(1.75), z, -z), T(0.75) * z);
+ OutType z = out.max_normal;
+ EXPECT_FP_EQ(OutType(0.75) * z, func(InType(1.75), z, -z));
// Exact cancellation.
- EXPECT_FP_EQ(func(T(3.0), T(5.0), -T(15.0)), T(0.0));
- EXPECT_FP_EQ(func(T(-3.0), T(5.0), T(15.0)), T(0.0));
+ EXPECT_FP_EQ(out.zero, func(InType(3.0), InType(5.0), -InType(15.0)));
+ EXPECT_FP_EQ(out.zero, func(InType(-3.0), InType(5.0), InType(15.0)));
}
- void test_subnormal_range(Func func) {
- constexpr StorageType COUNT = 100'001;
- constexpr StorageType STEP = (MAX_SUBNORMAL - MIN_SUBNORMAL) / COUNT;
+ void test_subnormal_range(FmaFunc func) {
+ constexpr InStorageType COUNT = 100'001;
+ constexpr InStorageType STEP =
+ (IN_MAX_SUBNORMAL_U - IN_MIN_SUBNORMAL_U) / COUNT;
LIBC_NAMESPACE::srand(1);
- for (StorageType v = MIN_SUBNORMAL, w = MAX_SUBNORMAL;
- v <= MAX_SUBNORMAL && w >= MIN_SUBNORMAL; v += STEP, w -= STEP) {
- T x = FPBits(get_random_bit_pattern()).get_val(), y = FPBits(v).get_val(),
- z = FPBits(w).get_val();
- mpfr::TernaryInput<T> input{x, y, z};
+ for (InStorageType v = IN_MIN_SUBNORMAL_U, w = IN_MAX_SUBNORMAL_U;
+ v <= IN_MAX_SUBNORMAL_U && w >= IN_MIN_SUBNORMAL_U;
+ v += STEP, w -= STEP) {
+ InType x = InFPBits(get_random_bit_pattern()).get_val();
+ InType y = InFPBits(v).get_val();
+ InType z = InFPBits(w).get_val();
+ mpfr::TernaryInput<InType> input{x, y, z};
ASSERT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Fma, input, func(x, y, z),
0.5);
}
}
- void test_normal_range(Func func) {
- constexpr StorageType COUNT = 100'001;
- constexpr StorageType STEP = (MAX_NORMAL - MIN_NORMAL) / COUNT;
+ void test_normal_range(FmaFunc func) {
+ constexpr InStorageType COUNT = 100'001;
+ constexpr InStorageType STEP = (IN_MAX_NORMAL_U - IN_MIN_NORMAL_U) / COUNT;
LIBC_NAMESPACE::srand(1);
- for (StorageType v = MIN_NORMAL, w = MAX_NORMAL;
- v <= MAX_NORMAL && w >= MIN_NORMAL; v += STEP, w -= STEP) {
- T x = FPBits(v).get_val(), y = FPBits(w).get_val(),
- z = FPBits(get_random_bit_pattern()).get_val();
- mpfr::TernaryInput<T> input{x, y, z};
+ for (InStorageType v = IN_MIN_NORMAL_U, w = IN_MAX_NORMAL_U;
+ v <= IN_MAX_NORMAL_U && w >= IN_MIN_NORMAL_U; v += STEP, w -= STEP) {
+ InType x = InFPBits(v).get_val();
+ InType y = InFPBits(w).get_val();
+ InType z = InFPBits(get_random_bit_pattern()).get_val();
+ mpfr::TernaryInput<InType> input{x, y, z};
ASSERT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Fma, input, func(x, y, z),
0.5);
}
diff --git a/libc/test/src/math/f16fmaf_test.cpp b/libc/test/src/math/f16fmaf_test.cpp
new file mode 100644
index 0000000000000..dc197e297c11e
--- /dev/null
+++ b/libc/test/src/math/f16fmaf_test.cpp
@@ -0,0 +1,25 @@
+//===-- Unittests for f16fmaf ---------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "FmaTest.h"
+
+#include "src/math/f16fmaf.h"
+
+using LlvmLibcF16fmafTest = FmaTestTemplate<float16, float>;
+
+TEST_F(LlvmLibcF16fmafTest, SpecialNumbers) {
+ test_special_numbers(&LIBC_NAMESPACE::f16fmaf);
+}
+
+TEST_F(LlvmLibcF16fmafTest, SubnormalRange) {
+ test_subnormal_range(&LIBC_NAMESPACE::f16fmaf);
+}
+
+TEST_F(LlvmLibcF16fmafTest, NormalRange) {
+ test_normal_range(&LIBC_NAMESPACE::f16fmaf);
+}
diff --git a/libc/test/src/math/smoke/CMakeLists.txt b/libc/test/src/math/smoke/CMakeLists.txt
index 3e9edc51b004f..a67d0437592d5 100644
--- a/libc/test/src/math/smoke/CMakeLists.txt
+++ b/libc/test/src/math/smoke/CMakeLists.txt
@@ -3109,9 +3109,10 @@ add_fp_unittest(
libc-math-smoke-tests
SRCS
fmaf_test.cpp
+ HDRS
+ FmaTest.h
DEPENDS
libc.src.math.fmaf
- libc.src.__support.FPUtil.fp_bits
FLAGS
FMA_OPT__ONLY
)
@@ -3122,9 +3123,10 @@ add_fp_unittest(
libc-math-smoke-tests
SRCS
fma_test.cpp
+ HDRS
+ FmaTest.h
DEPENDS
libc.src.math.fma
- libc.src.__support.FPUtil.fp_bits
)
add_fp_unittest(
@@ -3551,6 +3553,18 @@ add_fp_unittest(
libc.src.math.totalordermagf16
)
+add_fp_unittest(
+ f16fmaf_test
+ SUITE
+ libc-math-smoke-tests
+ SRCS
+ f16fmaf_test.cpp
+ HDRS
+ FmaTest.h
+ DEPENDS
+ libc.src.math.f16fmaf
+)
+
add_fp_unittest(
f16sqrtf_test
SUITE
diff --git a/libc/test/src/math/smoke/FmaTest.h b/libc/test/src/math/smoke/FmaTest.h
index 7063ecf199837..3edd3aceb4847 100644
--- a/libc/test/src/math/smoke/FmaTest.h
+++ b/libc/test/src/math/smoke/FmaTest.h
@@ -9,51 +9,85 @@
#ifndef LLVM_LIBC_TEST_SRC_MATH_FMATEST_H
#define LLVM_LIBC_TEST_SRC_MATH_FMATEST_H
-#include "src/__support/FPUtil/FPBits.h"
#include "test/UnitTest/FEnvSafeTest.h"
#include "test/UnitTest/FPMatcher.h"
#include "test/UnitTest/Test.h"
-template <typename T>
+template <typename OutType, typename InType = OutType>
class FmaTestTemplate : public LIBC_NAMESPACE::testing::FEnvSafeTest {
-private:
- using Func = T (*)(T, T, T);
- using FPBits = LIBC_NAMESPACE::fputil::FPBits<T>;
- using StorageType = typename FPBits::StorageType;
- const T inf = FPBits::inf(Sign::POS).get_val();
- const T neg_inf = FPBits::inf(Sign::NEG).get_val();
- const T zero = FPBits::zero(Sign::POS).get_val();
- const T neg_zero = FPBits::zero(Sign::NEG).get_val();
- const T nan = FPBits::quiet_nan().get_val();
+ struct OutConstants {
+ DECLARE_SPECIAL_CONSTANTS(OutType)
+ };
+
+ struct InConstants {
+ DECLARE_SPECIAL_CONSTANTS(InType)
+ };
+
+ using OutFPBits = typename OutConstants::FPBits;
+ using OutStorageType = typename OutConstants::StorageType;
+ using InFPBits = typename InConstants::FPBits;
+ using InStorageType = typename InConstants::StorageType;
+
+ static constexpr OutStorageType OUT_MIN_NORMAL_U =
+ OutFPBits::min_normal().uintval();
+ static constexpr InStorageType IN_MIN_NORMAL_U =
+ InFPBits::min_normal().uintval();
+
+ OutConstants out;
+ InConstants in;
public:
- void test_special_numbers(Func func) {
- EXPECT_FP_EQ(func(zero, zero, zero), zero);
- EXPECT_FP_EQ(func(zero, neg_zero, neg_zero), neg_zero);
- EXPECT_FP_EQ(func(inf, inf, zero), inf);
- EXPECT_FP_EQ(func(neg_inf, inf, neg_inf), neg_inf);
- EXPECT_FP_EQ(func(inf, zero, zero), nan);
- EXPECT_FP_EQ(func(inf, neg_inf, inf), nan);
- EXPECT_FP_EQ(func(nan, zero, inf), nan);
- EXPECT_FP_EQ(func(inf, neg_inf, nan), nan);
+ using FmaFunc = OutType (*)(InType, InType, InType);
+
+ void test_special_numbers(FmaFunc func) {
+ EXPECT_FP_EQ(out.zero, func(in.zero, in.zero, in.zero));
+ EXPECT_FP_EQ(out.neg_zero, func(in.zero, in.neg_zero, in.neg_zero));
+ EXPECT_FP_EQ(out.inf, func(in.inf, in.inf, in.zero));
+ EXPECT_FP_EQ(out.neg_inf, func(in.neg_inf, in.inf, in.neg_inf));
+ EXPECT_FP_EQ(out.aNaN, func(in.inf, in.zero, in.zero));
+ EXPECT_FP_EQ(out.aNaN, func(in.inf, in.neg_inf, in.inf));
+ EXPECT_FP_EQ(out.aNaN, func(in.aNaN, in.zero, in.inf));
+ EXPECT_FP_EQ(out.aNaN, func(in.inf, in.neg_inf, in.aNaN));
// Test underflow rounding up.
- EXPECT_FP_EQ(func(T(0.5), FPBits::min_subnormal().get_val(),
- FPBits::min_subnormal().get_val()),
- FPBits(StorageType(2)).get_val());
+ EXPECT_FP_EQ(OutFPBits(OutStorageType(2)).get_val(),
+ func(OutType(0.5), out.min_denormal, out.min_denormal));
+
+ if constexpr (sizeof(OutType) < sizeof(InType)) {
+ EXPECT_FP_EQ(out.zero,
+ func(InType(0.5), in.min_denormal, in.min_denormal));
+ }
// Test underflow rounding down.
- StorageType MIN_NORMAL = FPBits::min_normal().uintval();
- T v = FPBits(MIN_NORMAL + StorageType(1)).get_val();
- EXPECT_FP_EQ(
- func(T(1) / T(MIN_NORMAL << 1), v, FPBits::min_normal().get_val()), v);
+ OutType v = OutFPBits(static_cast<OutStorageType>(OUT_MIN_NORMAL_U +
+ OutStorageType(1)))
+ .get_val();
+ EXPECT_FP_EQ(v, func(OutType(1) / OutType(OUT_MIN_NORMAL_U << 1), v,
+ out.min_normal));
+
+ if constexpr (sizeof(OutType) < sizeof(InType)) {
+ InType v = InFPBits(static_cast<InStorageType>(IN_MIN_NORMAL_U +
+ InStorageType(1)))
+ .get_val();
+ EXPECT_FP_EQ(
+ out.min_normal,
+ func(InType(1) / InType(IN_MIN_NORMAL_U << 1), v, out.min_normal));
+ }
// Test overflow.
- T z = FPBits::max_normal().get_val();
- EXPECT_FP_EQ(func(T(1.75), z, -z), T(0.75) * z);
+ OutType z = out.max_normal;
+ EXPECT_FP_EQ(OutType(0.75) * z, func(InType(1.75), z, -z));
// Exact cancellation.
- EXPECT_FP_EQ(func(T(3.0), T(5.0), -T(15.0)), T(0.0));
- EXPECT_FP_EQ(func(T(-3.0), T(5.0), T(15.0)), T(0.0));
+ EXPECT_FP_EQ(out.zero, func(InType(3.0), InType(5.0), -InType(15.0)));
+ EXPECT_FP_EQ(out.zero, func(InType(-3.0), InType(5.0), InType(15.0)));
}
};
+#define LIST_FMA_TESTS(T, func) \
+ using LlvmLibcFmaTest = FmaTestTemplate<T>; \
+ TEST_F(LlvmLibcFmaTest, SpecialNumbers) { test_special_numbers(&func); }
+
+#define LIST_NARROWING_FMA_TESTS(OutType, InType, func) \
+ using LlvmLibcFmaTest = FmaTestTemplate<OutType, InType>; \
+ TEST_F(LlvmLibcFmaTest, SpecialNumbers) { test_special_numbers(&func); }
+
#endif // LLVM_LIBC_TEST_SRC_MATH_FMATEST_H
diff --git a/libc/test/src/math/smoke/f16fmaf_test.cpp b/libc/test/src/math/smoke/f16fmaf_test.cpp
new file mode 100644
index 0000000000000..5e3aec768c191
--- /dev/null
+++ b/libc/test/src/math/smoke/f16fmaf_test.cpp
@@ -0,0 +1,13 @@
+//===-- Unittests for f16fmaf ---------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "FmaTest.h"
+
+#include "src/math/f16fmaf.h"
+
+LIST_NARROWING_FMA_TESTS(float16, float, LIBC_NAMESPACE::f16fmaf)
diff --git a/libc/test/src/math/smoke/fma_test.cpp b/libc/test/src/math/smoke/fma_test.cpp
index 4460b80d9ad65..c5d802a532eb0 100644
--- a/libc/test/src/math/smoke/fma_test.cpp
+++ b/libc/test/src/math/smoke/fma_test.cpp
@@ -10,8 +10,4 @@
#include "src/math/fma.h"
-using LlvmLibcFmaTest = FmaTestTemplate<double>;
-
-TEST_F(LlvmLibcFmaTest, SpecialNumbers) {
- test_special_numbers(&LIBC_NAMESPACE::fma);
-}
+LIST_FMA_TESTS(double, LIBC_NAMESPACE::fma)
diff --git a/libc/test/src/math/smoke/fmaf_test.cpp b/libc/test/src/math/smoke/fmaf_test.cpp
index a645efb8776d0..09e9c504b942a 100644
--- a/libc/test/src/math/smoke/fmaf_test.cpp
+++ b/libc/test/src/math/smoke/fmaf_test.cpp
@@ -10,8 +10,4 @@
#include "src/math/fmaf.h"
-using LlvmLibcFmafTest = FmaTestTemplate<float>;
-
-TEST_F(LlvmLibcFmafTest, SpecialNumbers) {
- test_special_numbers(&LIBC_NAMESPACE::fmaf);
-}
+LIST_FMA_TESTS(float, LIBC_NAMESPACE::fmaf)
diff --git a/libc/test/src/sched/CMakeLists.txt b/libc/test/src/sched/CMakeLists.txt
index 9dda4ea16e101..3dd56c72fc86d 100644
--- a/libc/test/src/sched/CMakeLists.txt
+++ b/libc/test/src/sched/CMakeLists.txt
@@ -40,23 +40,23 @@ add_libc_unittest(
libc.src.sched.sched_get_priority_max
)
-add_libc_unittest(
- scheduler_test
- SUITE
- libc_sched_unittests
- SRCS
- param_and_scheduler_test.cpp
- DEPENDS
- libc.include.sched
- libc.src.errno.errno
- libc.src.sched.sched_getscheduler
- libc.src.sched.sched_setscheduler
- libc.src.sched.sched_getparam
- libc.src.sched.sched_setparam
- libc.src.sched.sched_get_priority_min
- libc.src.sched.sched_get_priority_max
- libc.src.unistd.getuid
-)
+# add_libc_unittest(
+# scheduler_test
+# SUITE
+# libc_sched_unittests
+# SRCS
+# param_and_scheduler_test.cpp
+# DEPENDS
+# libc.include.sched
+# libc.src.errno.errno
+# libc.src.sched.sched_getscheduler
+# libc.src.sched.sched_setscheduler
+# libc.src.sched.sched_getparam
+# libc.src.sched.sched_setparam
+# libc.src.sched.sched_get_priority_min
+# libc.src.sched.sched_get_priority_max
+# libc.src.unistd.getuid
+# )
add_libc_unittest(
sched_rr_get_interval_test
diff --git a/libc/utils/MPFRWrapper/MPFRUtils.cpp b/libc/utils/MPFRWrapper/MPFRUtils.cpp
index 100c6b1644b16..2eac4dd8e199d 100644
--- a/libc/utils/MPFRWrapper/MPFRUtils.cpp
+++ b/libc/utils/MPFRWrapper/MPFRUtils.cpp
@@ -922,46 +922,49 @@ template void explain_binary_operation_one_output_error<long double>(
Operation, const BinaryInput<long double> &, long double, double,
RoundingMode);
-template <typename T>
-void explain_ternary_operation_one_output_error(Operation op,
- const TernaryInput<T> &input,
- T libc_result,
- double ulp_tolerance,
- RoundingMode rounding) {
- unsigned int precision = get_precision<T>(ulp_tolerance);
+template <typename InputType, typename OutputType>
+void explain_ternary_operation_one_output_error(
+ Operation op, const TernaryInput<InputType> &input, OutputType libc_result,
+ double ulp_tolerance, RoundingMode rounding) {
+ unsigned int precision = get_precision<InputType>(ulp_tolerance);
MPFRNumber mpfrX(input.x, precision);
MPFRNumber mpfrY(input.y, precision);
MPFRNumber mpfrZ(input.z, precision);
- FPBits<T> xbits(input.x);
- FPBits<T> ybits(input.y);
- FPBits<T> zbits(input.z);
+ FPBits<InputType> xbits(input.x);
+ FPBits<InputType> ybits(input.y);
+ FPBits<InputType> zbits(input.z);
MPFRNumber mpfr_result = ternary_operation_one_output(
op, input.x, input.y, input.z, precision, rounding);
MPFRNumber mpfrMatchValue(libc_result);
tlog << "Input decimal: x: " << mpfrX.str() << " y: " << mpfrY.str()
<< " z: " << mpfrZ.str() << '\n';
- tlog << " First input bits: " << str(FPBits<T>(input.x)) << '\n';
- tlog << "Second input bits: " << str(FPBits<T>(input.y)) << '\n';
- tlog << " Third input bits: " << str(FPBits<T>(input.z)) << '\n';
+ tlog << " First input bits: " << str(FPBits<InputType>(input.x)) << '\n';
+ tlog << "Second input bits: " << str(FPBits<InputType>(input.y)) << '\n';
+ tlog << " Third input bits: " << str(FPBits<InputType>(input.z)) << '\n';
tlog << "Libc result: " << mpfrMatchValue.str() << '\n'
<< "MPFR result: " << mpfr_result.str() << '\n';
- tlog << "Libc floating point result bits: " << str(FPBits<T>(libc_result))
- << '\n';
+ tlog << "Libc floating point result bits: "
+ << str(FPBits<OutputType>(libc_result)) << '\n';
tlog << " MPFR rounded bits: "
- << str(FPBits<T>(mpfr_result.as<T>())) << '\n';
+ << str(FPBits<OutputType>(mpfr_result.as<OutputType>())) << '\n';
tlog << "ULP error: " << mpfr_result.ulp_as_mpfr_number(libc_result).str()
<< '\n';
}
-template void explain_ternary_operation_one_output_error<float>(
+template void explain_ternary_operation_one_output_error(
Operation, const TernaryInput<float> &, float, double, RoundingMode);
-template void explain_ternary_operation_one_output_error<double>(
+template void explain_ternary_operation_one_output_error(
Operation, const TernaryInput<double> &, double, double, RoundingMode);
-template void explain_ternary_operation_one_output_error<long double>(
- Operation, const TernaryInput<long double> &, long double, double,
- RoundingMode);
+template void
+explain_ternary_operation_one_output_error(Operation,
+ const TernaryInput<long double> &,
+ long double, double, RoundingMode);
+#ifdef LIBC_TYPES_HAS_FLOAT16
+template void explain_ternary_operation_one_output_error(
+ Operation, const TernaryInput<float> &, float16, double, RoundingMode);
+#endif
template <typename InputType, typename OutputType>
bool compare_unary_operation_single_output(Operation op, InputType input,
@@ -1069,12 +1072,13 @@ template bool compare_binary_operation_one_output<long double>(
Operation, const BinaryInput<long double> &, long double, double,
RoundingMode);
-template <typename T>
+template <typename InputType, typename OutputType>
bool compare_ternary_operation_one_output(Operation op,
- const TernaryInput<T> &input,
- T libc_result, double ulp_tolerance,
+ const TernaryInput<InputType> &input,
+ OutputType libc_result,
+ double ulp_tolerance,
RoundingMode rounding) {
- unsigned int precision = get_precision<T>(ulp_tolerance);
+ unsigned int precision = get_precision<InputType>(ulp_tolerance);
MPFRNumber mpfr_result = ternary_operation_one_output(
op, input.x, input.y, input.z, precision, rounding);
double ulp = mpfr_result.ulp(libc_result);
@@ -1082,13 +1086,23 @@ bool compare_ternary_operation_one_output(Operation op,
return (ulp <= ulp_tolerance);
}
-template bool compare_ternary_operation_one_output<float>(
- Operation, const TernaryInput<float> &, float, double, RoundingMode);
-template bool compare_ternary_operation_one_output<double>(
- Operation, const TernaryInput<double> &, double, double, RoundingMode);
-template bool compare_ternary_operation_one_output<long double>(
- Operation, const TernaryInput<long double> &, long double, double,
- RoundingMode);
+template bool compare_ternary_operation_one_output(Operation,
+ const TernaryInput<float> &,
+ float, double, RoundingMode);
+template bool compare_ternary_operation_one_output(Operation,
+ const TernaryInput<double> &,
+ double, double,
+ RoundingMode);
+template bool
+compare_ternary_operation_one_output(Operation,
+ const TernaryInput<long double> &,
+ long double, double, RoundingMode);
+#ifdef LIBC_TYPES_HAS_FLOAT16
+template bool compare_ternary_operation_one_output(Operation,
+ const TernaryInput<float> &,
+ float16, double,
+ RoundingMode);
+#endif
} // namespace internal
diff --git a/libc/utils/MPFRWrapper/MPFRUtils.h b/libc/utils/MPFRWrapper/MPFRUtils.h
index 805678b96c2ef..c8c0506164bb2 100644
--- a/libc/utils/MPFRWrapper/MPFRUtils.h
+++ b/libc/utils/MPFRWrapper/MPFRUtils.h
@@ -10,6 +10,7 @@
#define LLVM_LIBC_UTILS_MPFRWRAPPER_MPFRUTILS_H
#include "src/__support/CPP/type_traits.h"
+#include "src/__support/CPP/type_traits/type_identity.h"
#include "test/UnitTest/RoundingModeUtils.h"
#include "test/UnitTest/Test.h"
@@ -129,6 +130,18 @@ struct AreMatchingBinaryInputAndBinaryOutput<BinaryInput<T>, BinaryOutput<T>> {
static constexpr bool VALUE = cpp::is_floating_point_v<T>;
};
+template <typename T> struct IsTernaryInput {
+ static constexpr bool VALUE = false;
+};
+
+template <typename T> struct IsTernaryInput<TernaryInput<T>> {
+ static constexpr bool VALUE = true;
+};
+
+template <typename T> struct MakeScalarInput : cpp::type_identity<T> {};
+template <typename T>
+struct MakeScalarInput<TernaryInput<T>> : cpp::type_identity<T> {};
+
template <typename InputType, typename OutputType>
bool compare_unary_operation_single_output(Operation op, InputType input,
OutputType libc_output,
@@ -152,10 +165,11 @@ bool compare_binary_operation_one_output(Operation op,
T libc_output, double ulp_tolerance,
RoundingMode rounding);
-template <typename T>
+template <typename InputType, typename OutputType>
bool compare_ternary_operation_one_output(Operation op,
- const TernaryInput<T> &input,
- T libc_output, double ulp_tolerance,
+ const TernaryInput<InputType> &input,
+ OutputType libc_output,
+ double ulp_tolerance,
RoundingMode rounding);
template <typename InputType, typename OutputType>
@@ -180,12 +194,10 @@ void explain_binary_operation_one_output_error(Operation op,
double ulp_tolerance,
RoundingMode rounding);
-template <typename T>
-void explain_ternary_operation_one_output_error(Operation op,
- const TernaryInput<T> &input,
- T match_value,
- double ulp_tolerance,
- RoundingMode rounding);
+template <typename InputType, typename OutputType>
+void explain_ternary_operation_one_output_error(
+ Operation op, const TernaryInput<InputType> &input, OutputType match_value,
+ double ulp_tolerance, RoundingMode rounding);
template <Operation op, bool silent, typename InputType, typename OutputType>
class MPFRMatcher : public testing::Matcher<OutputType> {
@@ -234,7 +246,8 @@ class MPFRMatcher : public testing::Matcher<OutputType> {
rounding);
}
- template <typename T> bool match(const TernaryInput<T> &in, T out) {
+ template <typename T, typename U>
+ bool match(const TernaryInput<T> &in, U out) {
return compare_ternary_operation_one_output(op, in, out, ulp_tolerance,
rounding);
}
@@ -260,7 +273,8 @@ class MPFRMatcher : public testing::Matcher<OutputType> {
rounding);
}
- template <typename T> void explain_error(const TernaryInput<T> &in, T out) {
+ template <typename T, typename U>
+ void explain_error(const TernaryInput<T> &in, U out) {
explain_ternary_operation_one_output_error(op, in, out, ulp_tolerance,
rounding);
}
@@ -272,10 +286,14 @@ class MPFRMatcher : public testing::Matcher<OutputType> {
// types.
template <Operation op, typename InputType, typename OutputType>
constexpr bool is_valid_operation() {
- constexpr bool IS_NARROWING_OP = op == Operation::Sqrt &&
- cpp::is_floating_point_v<InputType> &&
- cpp::is_floating_point_v<OutputType> &&
- sizeof(OutputType) <= sizeof(InputType);
+ constexpr bool IS_NARROWING_OP =
+ (op == Operation::Sqrt && cpp::is_floating_point_v<InputType> &&
+ cpp::is_floating_point_v<OutputType> &&
+ sizeof(OutputType) <= sizeof(InputType)) ||
+ (op == Operation::Fma && internal::IsTernaryInput<InputType>::VALUE &&
+ cpp::is_floating_point_v<
+ typename internal::MakeScalarInput<InputType>::type> &&
+ cpp::is_floating_point_v<OutputType>);
if (IS_NARROWING_OP)
return true;
return (Operation::BeginUnaryOperationsSingleOutput < op &&
>From 06e598e490065f25788abe3faa33460a0b463c43 Mon Sep 17 00:00:00 2001
From: OverMighty <its.overmighty at gmail.com>
Date: Fri, 14 Jun 2024 01:16:03 +0200
Subject: [PATCH 02/11] [libc] Undo disabling of scheduler_test
---
libc/test/src/sched/CMakeLists.txt | 34 +++++++++++++++---------------
1 file changed, 17 insertions(+), 17 deletions(-)
diff --git a/libc/test/src/sched/CMakeLists.txt b/libc/test/src/sched/CMakeLists.txt
index 3dd56c72fc86d..9dda4ea16e101 100644
--- a/libc/test/src/sched/CMakeLists.txt
+++ b/libc/test/src/sched/CMakeLists.txt
@@ -40,23 +40,23 @@ add_libc_unittest(
libc.src.sched.sched_get_priority_max
)
-# add_libc_unittest(
-# scheduler_test
-# SUITE
-# libc_sched_unittests
-# SRCS
-# param_and_scheduler_test.cpp
-# DEPENDS
-# libc.include.sched
-# libc.src.errno.errno
-# libc.src.sched.sched_getscheduler
-# libc.src.sched.sched_setscheduler
-# libc.src.sched.sched_getparam
-# libc.src.sched.sched_setparam
-# libc.src.sched.sched_get_priority_min
-# libc.src.sched.sched_get_priority_max
-# libc.src.unistd.getuid
-# )
+add_libc_unittest(
+ scheduler_test
+ SUITE
+ libc_sched_unittests
+ SRCS
+ param_and_scheduler_test.cpp
+ DEPENDS
+ libc.include.sched
+ libc.src.errno.errno
+ libc.src.sched.sched_getscheduler
+ libc.src.sched.sched_setscheduler
+ libc.src.sched.sched_getparam
+ libc.src.sched.sched_setparam
+ libc.src.sched.sched_get_priority_min
+ libc.src.sched.sched_get_priority_max
+ libc.src.unistd.getuid
+)
add_libc_unittest(
sched_rr_get_interval_test
>From 05bc63cba9427b74f1392a2864d33fe1f97ca133 Mon Sep 17 00:00:00 2001
From: OverMighty <its.overmighty at gmail.com>
Date: Fri, 14 Jun 2024 01:18:42 +0200
Subject: [PATCH 03/11] [libc] Fix includes and dependencies
---
libc/src/__support/FPUtil/generic/CMakeLists.txt | 3 +++
libc/src/math/generic/CMakeLists.txt | 1 +
libc/utils/MPFRWrapper/MPFRUtils.h | 1 -
3 files changed, 4 insertions(+), 1 deletion(-)
diff --git a/libc/src/__support/FPUtil/generic/CMakeLists.txt b/libc/src/__support/FPUtil/generic/CMakeLists.txt
index 595656e3e8d90..a8a95ba3f15ff 100644
--- a/libc/src/__support/FPUtil/generic/CMakeLists.txt
+++ b/libc/src/__support/FPUtil/generic/CMakeLists.txt
@@ -19,12 +19,15 @@ add_header_library(
HDRS
FMA.h
DEPENDS
+ libc.hdr.fenv_macros
libc.src.__support.common
libc.src.__support.CPP.bit
+ libc.src.__support.CPP.limits
libc.src.__support.CPP.type_traits
libc.src.__support.FPUtil.fenv_impl
libc.src.__support.FPUtil.fp_bits
libc.src.__support.FPUtil.rounding_mode
+ libc.src.__support.big_int
libc.src.__support.macros.optimization
libc.src.__support.uint128
)
diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt
index 706bfc4b08670..aa0069d821d0d 100644
--- a/libc/src/math/generic/CMakeLists.txt
+++ b/libc/src/math/generic/CMakeLists.txt
@@ -3609,6 +3609,7 @@ add_entrypoint_object(
HDRS
../f16fmaf.h
DEPENDS
+ libc.src.__support.macros.properties.types
libc.src.__support.FPUtil.fma
COMPILE_OPTIONS
-O3
diff --git a/libc/utils/MPFRWrapper/MPFRUtils.h b/libc/utils/MPFRWrapper/MPFRUtils.h
index c8c0506164bb2..9221b4a457454 100644
--- a/libc/utils/MPFRWrapper/MPFRUtils.h
+++ b/libc/utils/MPFRWrapper/MPFRUtils.h
@@ -10,7 +10,6 @@
#define LLVM_LIBC_UTILS_MPFRWRAPPER_MPFRUTILS_H
#include "src/__support/CPP/type_traits.h"
-#include "src/__support/CPP/type_traits/type_identity.h"
#include "test/UnitTest/RoundingModeUtils.h"
#include "test/UnitTest/Test.h"
>From b112ac4a94b2969bf400ee2a156bcba9d5c27b7f Mon Sep 17 00:00:00 2001
From: OverMighty <its.overmighty at gmail.com>
Date: Fri, 14 Jun 2024 01:19:47 +0200
Subject: [PATCH 04/11] [libc] Add blank line between template declaration and
specialization
---
libc/utils/MPFRWrapper/MPFRUtils.h | 1 +
1 file changed, 1 insertion(+)
diff --git a/libc/utils/MPFRWrapper/MPFRUtils.h b/libc/utils/MPFRWrapper/MPFRUtils.h
index 9221b4a457454..0b4f42a72ec81 100644
--- a/libc/utils/MPFRWrapper/MPFRUtils.h
+++ b/libc/utils/MPFRWrapper/MPFRUtils.h
@@ -138,6 +138,7 @@ template <typename T> struct IsTernaryInput<TernaryInput<T>> {
};
template <typename T> struct MakeScalarInput : cpp::type_identity<T> {};
+
template <typename T>
struct MakeScalarInput<TernaryInput<T>> : cpp::type_identity<T> {};
>From 7e7c3542e7c1730502bc761db5e0236fa8bd24e7 Mon Sep 17 00:00:00 2001
From: OverMighty <its.overmighty at gmail.com>
Date: Fri, 14 Jun 2024 15:10:27 +0200
Subject: [PATCH 05/11] [libc][math] Remove useless header guard around
fputil::generic::fma<float>
---
libc/src/__support/FPUtil/generic/FMA.h | 2 --
1 file changed, 2 deletions(-)
diff --git a/libc/src/__support/FPUtil/generic/FMA.h b/libc/src/__support/FPUtil/generic/FMA.h
index 3bbb30476e5d8..4cbbe9cbb81e7 100644
--- a/libc/src/__support/FPUtil/generic/FMA.h
+++ b/libc/src/__support/FPUtil/generic/FMA.h
@@ -31,7 +31,6 @@ LIBC_INLINE cpp::enable_if_t<cpp::is_floating_point_v<OutType> &&
OutType>
fma(InType x, InType y, InType z);
-#ifndef LIBC_TARGET_CPU_HAS_FMA
// TODO(lntue): Implement fmaf that is correctly rounded to all rounding modes.
// The implementation below only is only correct for the default rounding mode,
// round-to-nearest tie-to-even.
@@ -82,7 +81,6 @@ template <> LIBC_INLINE float fma<float>(float x, float y, float z) {
return static_cast<float>(bit_sum.get_val());
}
-#endif // LIBC_TARGET_CPU_HAS_FMA
namespace internal {
>From e409c828618637891af4ae4bffec2d03a6298a51 Mon Sep 17 00:00:00 2001
From: OverMighty <its.overmighty at gmail.com>
Date: Fri, 14 Jun 2024 15:38:31 +0200
Subject: [PATCH 06/11] [libc] Misc cleanup
---
libc/spec/stdc.td | 5 ++---
libc/src/__support/FPUtil/generic/FMA.h | 2 +-
2 files changed, 3 insertions(+), 4 deletions(-)
diff --git a/libc/spec/stdc.td b/libc/spec/stdc.td
index b089b596b0958..b7375fb411220 100644
--- a/libc/spec/stdc.td
+++ b/libc/spec/stdc.td
@@ -474,10 +474,11 @@ def StdC : StandardSpec<"stdc"> {
FunctionSpec<"fmul", RetValSpec<FloatType>, [ArgSpec<DoubleType>, ArgSpec<DoubleType>]>,
-
FunctionSpec<"fma", RetValSpec<DoubleType>, [ArgSpec<DoubleType>, ArgSpec<DoubleType>, ArgSpec<DoubleType>]>,
FunctionSpec<"fmaf", RetValSpec<FloatType>, [ArgSpec<FloatType>, ArgSpec<FloatType>, ArgSpec<FloatType>]>,
+ GuardedFunctionSpec<"f16fmaf", RetValSpec<Float16Type>, [ArgSpec<FloatType>, ArgSpec<FloatType>, ArgSpec<FloatType>], "LIBC_TYPES_HAS_FLOAT16">,
+
FunctionSpec<"fmod", RetValSpec<DoubleType>, [ArgSpec<DoubleType>, ArgSpec<DoubleType>]>,
FunctionSpec<"fmodf", RetValSpec<FloatType>, [ArgSpec<FloatType>, ArgSpec<FloatType>]>,
FunctionSpec<"fmodl", RetValSpec<LongDoubleType>, [ArgSpec<LongDoubleType>, ArgSpec<LongDoubleType>]>,
@@ -715,8 +716,6 @@ def StdC : StandardSpec<"stdc"> {
GuardedFunctionSpec<"totalordermagf16", RetValSpec<IntType>, [ArgSpec<Float16Ptr>, ArgSpec<Float16Ptr>], "LIBC_TYPES_HAS_FLOAT16">,
- GuardedFunctionSpec<"f16fmaf", RetValSpec<Float16Type>, [ArgSpec<FloatType>, ArgSpec<FloatType>, ArgSpec<FloatType>], "LIBC_TYPES_HAS_FLOAT16">,
-
GuardedFunctionSpec<"f16sqrtf", RetValSpec<Float16Type>, [ArgSpec<FloatType>], "LIBC_TYPES_HAS_FLOAT16">,
]
>;
diff --git a/libc/src/__support/FPUtil/generic/FMA.h b/libc/src/__support/FPUtil/generic/FMA.h
index 4cbbe9cbb81e7..d954006f8de69 100644
--- a/libc/src/__support/FPUtil/generic/FMA.h
+++ b/libc/src/__support/FPUtil/generic/FMA.h
@@ -113,7 +113,7 @@ fma(InType x, InType y, InType z) {
using InStorageType = typename InFPBits::StorageType;
constexpr int IN_EXPLICIT_MANT_LEN = InFPBits::FRACTION_LEN + 1;
- constexpr size_t PROD_LEN = 2 * (IN_EXPLICIT_MANT_LEN);
+ constexpr size_t PROD_LEN = 2 * IN_EXPLICIT_MANT_LEN;
constexpr size_t TMP_RESULT_LEN = cpp::bit_ceil(PROD_LEN + 1);
using TmpResultType = UInt<TMP_RESULT_LEN>;
>From bd971fe9e52be5a5059a9b9c5f51b70c6569f3a8 Mon Sep 17 00:00:00 2001
From: OverMighty <its.overmighty at gmail.com>
Date: Fri, 14 Jun 2024 15:51:01 +0200
Subject: [PATCH 07/11] [libc] Replace 128-bit BigInt numeric_limits with
generic BigInt specialization
---
libc/src/__support/big_int.h | 27 +++++++++------------------
1 file changed, 9 insertions(+), 18 deletions(-)
diff --git a/libc/src/__support/big_int.h b/libc/src/__support/big_int.h
index 27cb3b783470c..c30c3ece54a30 100644
--- a/libc/src/__support/big_int.h
+++ b/libc/src/__support/big_int.h
@@ -986,26 +986,15 @@ using Int = BigInt<Bits, true, internal::WordTypeSelectorT<Bits>>;
// Provides limits of BigInt.
template <size_t Bits, bool Signed, typename T>
struct cpp::numeric_limits<BigInt<Bits, Signed, T>> {
- LIBC_INLINE_VAR static constexpr int digits = Bits - Signed;
-};
-
-// Provides limits of U/Int<128>.
-template <> class cpp::numeric_limits<UInt<128>> {
-public:
- LIBC_INLINE static constexpr UInt<128> max() { return UInt<128>::max(); }
- LIBC_INLINE static constexpr UInt<128> min() { return UInt<128>::min(); }
- // Meant to match std::numeric_limits interface.
- // NOLINTNEXTLINE(readability-identifier-naming)
- LIBC_INLINE_VAR static constexpr int digits = 128;
-};
-
-template <> class cpp::numeric_limits<Int<128>> {
-public:
- LIBC_INLINE static constexpr Int<128> max() { return Int<128>::max(); }
- LIBC_INLINE static constexpr Int<128> min() { return Int<128>::min(); }
+ LIBC_INLINE static constexpr BigInt<Bits, Signed, T> max() {
+ return BigInt<Bits, Signed, T>::max();
+ }
+ LIBC_INLINE static constexpr BigInt<Bits, Signed, T> min() {
+ return BigInt<Bits, Signed, T>::min();
+ }
// Meant to match std::numeric_limits interface.
// NOLINTNEXTLINE(readability-identifier-naming)
- LIBC_INLINE_VAR static constexpr int digits = 128;
+ LIBC_INLINE_VAR static constexpr int digits = Bits - Signed;
};
// type traits to determine whether a T is a BigInt.
@@ -1086,6 +1075,8 @@ struct is_unsigned_integral_or_big_int
cpp::is_same_v<T, make_integral_or_big_int_unsigned_t<T>>> {};
template <typename T>
+// Meant to look like <type_traits> helper variable templates.
+// NOLINTNEXTLINE(readability-identifier-naming)
LIBC_INLINE_VAR constexpr bool is_unsigned_integral_or_big_int_v =
is_unsigned_integral_or_big_int<T>::value;
>From 5dae03b46e367836b4403049ef4edb5d71112646 Mon Sep 17 00:00:00 2001
From: OverMighty <its.overmighty at gmail.com>
Date: Fri, 14 Jun 2024 17:17:12 +0200
Subject: [PATCH 08/11] [libc][math] Fix sign of FMA with exact cancellation
---
libc/src/__support/FPUtil/generic/FMA.h | 11 ++++++++---
libc/test/src/math/smoke/FmaTest.h | 24 +++++++++++++++++++++---
2 files changed, 29 insertions(+), 6 deletions(-)
diff --git a/libc/src/__support/FPUtil/generic/FMA.h b/libc/src/__support/FPUtil/generic/FMA.h
index d954006f8de69..754efc0f5c21d 100644
--- a/libc/src/__support/FPUtil/generic/FMA.h
+++ b/libc/src/__support/FPUtil/generic/FMA.h
@@ -231,6 +231,8 @@ fma(InType x, InType y, InType z) {
OutStorageType result = 0;
int r_exp = 0; // Unbiased exponent of the result
+ int round_mode = fputil::quick_get_round();
+
// Normalize the result.
if (prod_mant != 0) {
int lead_zeros = cpp::countl_zero(prod_mant);
@@ -269,12 +271,15 @@ fma(InType x, InType y, InType z) {
r_exp = 0;
}
} else {
- // Return +0.0 when there is exact cancellation, i.e., x*y == -z exactly.
- prod_sign = Sign::POS;
+ // When there is exact cancellation, i.e., x*y == -z exactly, return -0.0 if
+ // rounding downward and +0.0 for other rounding modes.
+ if (round_mode == FE_DOWNWARD)
+ prod_sign = Sign::NEG;
+ else
+ prod_sign = Sign::POS;
}
// Finalize the result.
- int round_mode = fputil::quick_get_round();
if (LIBC_UNLIKELY(r_exp >= OutFPBits::MAX_BIASED_EXPONENT)) {
if ((round_mode == FE_TOWARDZERO) ||
(round_mode == FE_UPWARD && prod_sign.is_neg()) ||
diff --git a/libc/test/src/math/smoke/FmaTest.h b/libc/test/src/math/smoke/FmaTest.h
index 3edd3aceb4847..f942de37654dd 100644
--- a/libc/test/src/math/smoke/FmaTest.h
+++ b/libc/test/src/math/smoke/FmaTest.h
@@ -58,6 +58,7 @@ class FmaTestTemplate : public LIBC_NAMESPACE::testing::FEnvSafeTest {
EXPECT_FP_EQ(out.zero,
func(InType(0.5), in.min_denormal, in.min_denormal));
}
+
// Test underflow rounding down.
OutType v = OutFPBits(static_cast<OutStorageType>(OUT_MIN_NORMAL_U +
OutStorageType(1)))
@@ -73,12 +74,29 @@ class FmaTestTemplate : public LIBC_NAMESPACE::testing::FEnvSafeTest {
out.min_normal,
func(InType(1) / InType(IN_MIN_NORMAL_U << 1), v, out.min_normal));
}
+
// Test overflow.
OutType z = out.max_normal;
- EXPECT_FP_EQ(OutType(0.75) * z, func(InType(1.75), z, -z));
+ EXPECT_FP_EQ_ALL_ROUNDING(OutType(0.75) * z, func(InType(1.75), z, -z));
+
// Exact cancellation.
- EXPECT_FP_EQ(out.zero, func(InType(3.0), InType(5.0), -InType(15.0)));
- EXPECT_FP_EQ(out.zero, func(InType(-3.0), InType(5.0), InType(15.0)));
+ EXPECT_FP_EQ_ROUNDING_NEAREST(
+ out.zero, func(InType(3.0), InType(5.0), InType(-15.0)));
+ EXPECT_FP_EQ_ROUNDING_UPWARD(out.zero,
+ func(InType(3.0), InType(5.0), InType(-15.0)));
+ EXPECT_FP_EQ_ROUNDING_TOWARD_ZERO(
+ out.zero, func(InType(3.0), InType(5.0), InType(-15.0)));
+ EXPECT_FP_EQ_ROUNDING_DOWNWARD(
+ out.neg_zero, func(InType(3.0), InType(5.0), InType(-15.0)));
+
+ EXPECT_FP_EQ_ROUNDING_NEAREST(
+ out.zero, func(InType(-3.0), InType(5.0), InType(15.0)));
+ EXPECT_FP_EQ_ROUNDING_UPWARD(out.zero,
+ func(InType(-3.0), InType(5.0), InType(15.0)));
+ EXPECT_FP_EQ_ROUNDING_TOWARD_ZERO(
+ out.zero, func(InType(-3.0), InType(5.0), InType(15.0)));
+ EXPECT_FP_EQ_ROUNDING_DOWNWARD(
+ out.neg_zero, func(InType(-3.0), InType(5.0), InType(15.0)));
}
};
>From 9d4dc22bb68c02a43123cd8fc6732cf0bd9eac7f Mon Sep 17 00:00:00 2001
From: OverMighty <its.overmighty at gmail.com>
Date: Fri, 14 Jun 2024 17:34:32 +0200
Subject: [PATCH 09/11] [libc] Remove braces from single-statement ifs as per
LLVM code style
---
libc/src/__support/FPUtil/generic/FMA.h | 11 ++++-------
1 file changed, 4 insertions(+), 7 deletions(-)
diff --git a/libc/src/__support/FPUtil/generic/FMA.h b/libc/src/__support/FPUtil/generic/FMA.h
index 754efc0f5c21d..950db8c1ac671 100644
--- a/libc/src/__support/FPUtil/generic/FMA.h
+++ b/libc/src/__support/FPUtil/generic/FMA.h
@@ -71,11 +71,10 @@ template <> LIBC_INLINE float fma<float>(float x, float y, float z) {
// Update sticky bits if t != 0.0 and the least (52 - 23 - 1 = 28) bits are
// zero.
if (!t.is_zero() && ((bit_sum.get_mantissa() & 0xfff'ffffULL) == 0)) {
- if (bit_sum.sign() != t.sign()) {
+ if (bit_sum.sign() != t.sign())
bit_sum.set_mantissa(bit_sum.get_mantissa() + 1);
- } else if (bit_sum.get_mantissa()) {
+ else if (bit_sum.get_mantissa())
bit_sum.set_mantissa(bit_sum.get_mantissa() - 1);
- }
}
}
@@ -122,9 +121,8 @@ fma(InType x, InType y, InType z) {
constexpr TmpResultType EXTRA_FRACTION_STICKY_MASK =
(TmpResultType(1) << (EXTRA_FRACTION_LEN - 1)) - 1;
- if (LIBC_UNLIKELY(x == 0 || y == 0 || z == 0)) {
+ if (LIBC_UNLIKELY(x == 0 || y == 0 || z == 0))
return static_cast<OutType>(x * y + z);
- }
int x_exp = 0;
int y_exp = 0;
@@ -293,9 +291,8 @@ fma(InType x, InType y, InType z) {
result = static_cast<OutStorageType>(
(result & OutFPBits::FRACTION_MASK) |
(static_cast<OutStorageType>(r_exp) << OutFPBits::FRACTION_LEN));
- if (prod_sign.is_neg()) {
+ if (prod_sign.is_neg())
result |= OutFPBits::SIGN_MASK;
- }
// Rounding.
if (round_mode == FE_TONEAREST) {
>From eb0ff6bb8809f47358be7c0dece4f212b365ffb9 Mon Sep 17 00:00:00 2001
From: OverMighty <its.overmighty at gmail.com>
Date: Fri, 14 Jun 2024 17:36:07 +0200
Subject: [PATCH 10/11] [libc][math] Remove test_special_numbers from FMA MPFR
unit test duplicated from smoke test
---
libc/test/src/math/FmaTest.h | 41 -----------------------------
libc/test/src/math/f16fmaf_test.cpp | 4 ---
libc/test/src/math/fma_test.cpp | 4 ---
libc/test/src/math/fmaf_test.cpp | 4 ---
4 files changed, 53 deletions(-)
diff --git a/libc/test/src/math/FmaTest.h b/libc/test/src/math/FmaTest.h
index 0e0f05aad5d13..53895e7d633c2 100644
--- a/libc/test/src/math/FmaTest.h
+++ b/libc/test/src/math/FmaTest.h
@@ -59,47 +59,6 @@ class FmaTestTemplate : public LIBC_NAMESPACE::testing::FEnvSafeTest {
public:
using FmaFunc = OutType (*)(InType, InType, InType);
- void test_special_numbers(FmaFunc func) {
- EXPECT_FP_EQ(out.zero, func(in.zero, in.zero, in.zero));
- EXPECT_FP_EQ(out.neg_zero, func(in.zero, in.neg_zero, in.neg_zero));
- EXPECT_FP_EQ(out.inf, func(in.inf, in.inf, in.zero));
- EXPECT_FP_EQ(out.neg_inf, func(in.neg_inf, in.inf, in.neg_inf));
- EXPECT_FP_EQ(out.aNaN, func(in.inf, in.zero, in.zero));
- EXPECT_FP_EQ(out.aNaN, func(in.inf, in.neg_inf, in.inf));
- EXPECT_FP_EQ(out.aNaN, func(in.aNaN, in.zero, in.inf));
- EXPECT_FP_EQ(out.aNaN, func(in.inf, in.neg_inf, in.aNaN));
-
- // Test underflow rounding up.
- EXPECT_FP_EQ(OutFPBits(OutStorageType(2)).get_val(),
- func(OutType(0.5), out.min_denormal, out.min_denormal));
-
- if constexpr (sizeof(OutType) < sizeof(InType)) {
- EXPECT_FP_EQ(out.zero,
- func(InType(0.5), in.min_denormal, in.min_denormal));
- }
- // Test underflow rounding down.
- OutType v = OutFPBits(static_cast<OutStorageType>(OUT_MIN_NORMAL_U +
- OutStorageType(1)))
- .get_val();
- EXPECT_FP_EQ(v, func(OutType(1) / OutType(OUT_MIN_NORMAL_U << 1), v,
- out.min_normal));
-
- if constexpr (sizeof(OutType) < sizeof(InType)) {
- InType v = InFPBits(static_cast<InStorageType>(IN_MIN_NORMAL_U +
- InStorageType(1)))
- .get_val();
- EXPECT_FP_EQ(
- out.min_normal,
- func(InType(1) / InType(IN_MIN_NORMAL_U << 1), v, out.min_normal));
- }
- // Test overflow.
- OutType z = out.max_normal;
- EXPECT_FP_EQ(OutType(0.75) * z, func(InType(1.75), z, -z));
- // Exact cancellation.
- EXPECT_FP_EQ(out.zero, func(InType(3.0), InType(5.0), -InType(15.0)));
- EXPECT_FP_EQ(out.zero, func(InType(-3.0), InType(5.0), InType(15.0)));
- }
-
void test_subnormal_range(FmaFunc func) {
constexpr InStorageType COUNT = 100'001;
constexpr InStorageType STEP =
diff --git a/libc/test/src/math/f16fmaf_test.cpp b/libc/test/src/math/f16fmaf_test.cpp
index dc197e297c11e..e4ca88b8810e1 100644
--- a/libc/test/src/math/f16fmaf_test.cpp
+++ b/libc/test/src/math/f16fmaf_test.cpp
@@ -12,10 +12,6 @@
using LlvmLibcF16fmafTest = FmaTestTemplate<float16, float>;
-TEST_F(LlvmLibcF16fmafTest, SpecialNumbers) {
- test_special_numbers(&LIBC_NAMESPACE::f16fmaf);
-}
-
TEST_F(LlvmLibcF16fmafTest, SubnormalRange) {
test_subnormal_range(&LIBC_NAMESPACE::f16fmaf);
}
diff --git a/libc/test/src/math/fma_test.cpp b/libc/test/src/math/fma_test.cpp
index 20224d99894be..dd761382631d5 100644
--- a/libc/test/src/math/fma_test.cpp
+++ b/libc/test/src/math/fma_test.cpp
@@ -276,10 +276,6 @@ struct LlvmLibcFmaTest : public FmaTestTemplate<double> {
}
};
-TEST_F(LlvmLibcFmaTest, SpecialNumbers) {
- test_special_numbers(&LIBC_NAMESPACE::fma);
-}
-
TEST_F(LlvmLibcFmaTest, SubnormalRange) {
test_subnormal_range(&LIBC_NAMESPACE::fma);
}
diff --git a/libc/test/src/math/fmaf_test.cpp b/libc/test/src/math/fmaf_test.cpp
index b607d4a66f8eb..0e498d46ecfb0 100644
--- a/libc/test/src/math/fmaf_test.cpp
+++ b/libc/test/src/math/fmaf_test.cpp
@@ -12,10 +12,6 @@
using LlvmLibcFmafTest = FmaTestTemplate<float>;
-TEST_F(LlvmLibcFmafTest, SpecialNumbers) {
- test_special_numbers(&LIBC_NAMESPACE::fmaf);
-}
-
TEST_F(LlvmLibcFmafTest, SubnormalRange) {
test_subnormal_range(&LIBC_NAMESPACE::fmaf);
}
>From f3128ee0c2262f81a6813f3a486699cc26a7b585 Mon Sep 17 00:00:00 2001
From: OverMighty <its.overmighty at gmail.com>
Date: Fri, 14 Jun 2024 17:56:05 +0200
Subject: [PATCH 11/11] [libc][math] Update comments in fputil::generic::fma
---
libc/src/__support/FPUtil/generic/FMA.h | 17 ++++++-----------
1 file changed, 6 insertions(+), 11 deletions(-)
diff --git a/libc/src/__support/FPUtil/generic/FMA.h b/libc/src/__support/FPUtil/generic/FMA.h
index 950db8c1ac671..eafe7c31e6625 100644
--- a/libc/src/__support/FPUtil/generic/FMA.h
+++ b/libc/src/__support/FPUtil/generic/FMA.h
@@ -168,17 +168,12 @@ fma(InType x, InType y, InType z) {
// z : 10aa...a
// - prod : 1bb...bb....b
// In that case, in order to store the exact result, we need at least
- // (Length of prod) - (MantissaLength of z) = 2*(52 + 1) - 52 = 54.
- // TODO: 53? (Explicit mantissa.) ^
+ // (Length of prod) - (Fraction length of z) = 2*(52 + 1) - 52 = 54.
// Overall, before aligning the mantissas and exponents, we can simply left-
// shift the mantissa of z by at least 54, and left-shift the product of x*y
- // by (that amount - 52). After that, it is enough to align the least
- // TODO: ^ 54?
+ // by (that amount - 54). After that, it is enough to align the least
// significant bit, given that we keep track of the round and sticky bits
// after the least significant bit.
- // We pick shifting z_mant by 64 bits so that technically we can simply use
- // the original mantissa as high part when constructing 128-bit z_mant. So the
- // mantissa of prod will be left-shifted by 64 - 54 = 10 initially.
TmpResultType prod_mant = TmpResultType(x_mant) * y_mant;
int prod_lsb_exp =
@@ -241,10 +236,10 @@ fma(InType x, InType y, InType z) {
InFPBits::EXP_BIAS + OutFPBits::EXP_BIAS;
if (r_exp > 0) {
- // The result is normal. We will shift the mantissa to the right by
- // 63 - 52 = 11 bits (from the locations of the most significant bit).
- // Then the rounding bit will correspond the 11th bit, and the lowest
- // 10 bits are merged into sticky bits.
+ // The result is normal. We will shift the mantissa to the right by the
+ // amount of extra bits compared to the length of the explicit mantissa in
+ // the output type. The rounding bit then becomes the highest bit that is
+ // shifted out, and the following lower bits are merged into sticky bits.
round_bit =
(prod_mant & (TmpResultType(1) << (EXTRA_FRACTION_LEN - 1))) != 0;
sticky_bits |= (prod_mant & EXTRA_FRACTION_STICKY_MASK) != 0;
More information about the libc-commits
mailing list