[libc-commits] [libc] [libc][math][c23] Add {ldexp, scalbn, scalbln}f16 C23 math functions (PR #94797)
via libc-commits
libc-commits at lists.llvm.org
Mon Jun 10 11:13:23 PDT 2024
================
@@ -183,6 +184,10 @@ template <size_t Bits> struct DyadicFloat {
return r;
}
+ LIBC_INLINE explicit constexpr operator float16() const {
+ return static_cast<float16>(static_cast<float>(*this));
+ }
----------------
overmighty wrote:
> Once `DyadicFloat` can support `uint16_t` base type, the current `operator T()` implementation should work correctly.
It still doesn't work:
```
[ RUN ] LlvmLibcLdExpTest.NormalOperation
/home/overmighty/projects/llvm-project/libc/test/src/math/smoke/LdExpTest.h:128: FAILURE
Failed to match x * static_cast<T>(two_to_exp) against LIBC_NAMESPACE::testing::getMatcher< LIBC_NAMESPACE::testing::TestCond::EQ>(func(x, exp)).
Expected floating point value: 0x1920 = (S: 0, E: 0x0006, M: 0x0120)
Actual floating point value: 0x1923 = (S: 0, E: 0x0006, M: 0x0123)
[ FAILED ] LlvmLibcLdExpTest.NormalOperation
```
I deleted `operator float16()` and undid the `DyadicFloat<cpp::max(FPBits<T>::STORAGE_LEN, 32)> normal(bits.get_val());` change. I had to add casts to `StorageType` (`output_bits_t`) in `DyadicFloat`.
Here's the full `git diff`:
```diff
diff --git a/libc/src/__support/FPUtil/ManipulationFunctions.h b/libc/src/__support/FPUtil/ManipulationFunctions.h
index a9c64122d931..97c43126a71a 100644
--- a/libc/src/__support/FPUtil/ManipulationFunctions.h
+++ b/libc/src/__support/FPUtil/ManipulationFunctions.h
@@ -142,8 +142,10 @@ LIBC_INLINE constexpr T logb(T x) {
return static_cast<T>(normal.get_unbiased_exponent());
}
-template <typename T, cpp::enable_if_t<cpp::is_floating_point_v<T>, int> = 0>
-LIBC_INLINE constexpr T ldexp(T x, long exp) {
+template <typename T, typename U>
+LIBC_INLINE constexpr cpp::enable_if_t<
+ cpp::is_floating_point_v<T> && cpp::is_integral_v<U>, T>
+ldexp(T x, U exp) {
FPBits<T> bits(x);
if (LIBC_UNLIKELY((exp == 0) || bits.is_zero() || bits.is_inf_or_nan()))
return x;
@@ -156,6 +158,8 @@ LIBC_INLINE constexpr T ldexp(T x, long exp) {
// calculating the limit.
constexpr int EXP_LIMIT =
FPBits<T>::MAX_BIASED_EXPONENT + FPBits<T>::FRACTION_LEN + 1;
+ // Make sure that we can safely cast exp to int when not returning early.
+ static_assert(EXP_LIMIT <= INT_MAX && -EXP_LIMIT >= INT_MIN);
if (LIBC_UNLIKELY(exp > EXP_LIMIT)) {
int rounding_mode = quick_get_round();
Sign sign = bits.sign();
@@ -185,9 +189,7 @@ LIBC_INLINE constexpr T ldexp(T x, long exp) {
}
// For all other values, NormalFloat to T conversion handles it the right way.
- DyadicFloat<cpp::max(FPBits<T>::STORAGE_LEN, 32)> normal(bits.get_val());
- // Make sure that exp fits into an int when not taking the fast paths above.
- static_assert(EXP_LIMIT <= INT_MAX && -EXP_LIMIT >= INT_MIN);
+ DyadicFloat<FPBits<T>::STORAGE_LEN> normal(bits.get_val());
normal.exponent += static_cast<int>(exp);
return static_cast<T>(normal);
}
diff --git a/libc/src/__support/FPUtil/dyadic_float.h b/libc/src/__support/FPUtil/dyadic_float.h
index f1791f9da56a..f1b58e39ad9b 100644
--- a/libc/src/__support/FPUtil/dyadic_float.h
+++ b/libc/src/__support/FPUtil/dyadic_float.h
@@ -14,7 +14,6 @@
#include "src/__support/CPP/type_traits.h"
#include "src/__support/big_int.h"
#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
-#include "src/__support/macros/properties/types.h" // float16
#include <stddef.h>
@@ -127,7 +126,7 @@ template <size_t Bits> struct DyadicFloat {
shift >= MantissaType::BITS ? MantissaType(0) : mantissa >> shift;
T d_hi = FPBits<T>::create_value(
- sign, exp_hi,
+ sign, static_cast<output_bits_t>(exp_hi),
(static_cast<output_bits_t>(m_hi) & FPBits<T>::SIG_MASK) |
IMPLICIT_MASK)
.get_val();
@@ -146,23 +145,30 @@ template <size_t Bits> struct DyadicFloat {
// d_lo is denormal, but the output is normal.
int scale_up_exponent = 2 * PRECISION;
T scale_up_factor =
- FPBits<T>::create_value(sign, FPBits<T>::EXP_BIAS + scale_up_exponent,
+ FPBits<T>::create_value(sign,
+ static_cast<output_bits_t>(
+ FPBits<T>::EXP_BIAS + scale_up_exponent),
IMPLICIT_MASK)
.get_val();
T scale_down_factor =
- FPBits<T>::create_value(sign, FPBits<T>::EXP_BIAS - scale_up_exponent,
+ FPBits<T>::create_value(sign,
+ static_cast<output_bits_t>(
+ FPBits<T>::EXP_BIAS - scale_up_exponent),
IMPLICIT_MASK)
.get_val();
- d_lo = FPBits<T>::create_value(sign, exp_lo + scale_up_exponent,
- IMPLICIT_MASK)
+ d_lo = FPBits<T>::create_value(
+ sign, static_cast<output_bits_t>(exp_lo + scale_up_exponent),
+ IMPLICIT_MASK)
.get_val();
return multiply_add(d_lo, T(round_and_sticky), d_hi * scale_up_factor) *
scale_down_factor;
}
- d_lo = FPBits<T>::create_value(sign, exp_lo, IMPLICIT_MASK).get_val();
+ d_lo = FPBits<T>::create_value(sign, static_cast<output_bits_t>(exp_lo),
+ IMPLICIT_MASK)
+ .get_val();
// Still correct without FMA instructions if `d_lo` is not underflow.
T r = multiply_add(d_lo, T(round_and_sticky), d_hi);
@@ -170,7 +176,8 @@ template <size_t Bits> struct DyadicFloat {
if (LIBC_UNLIKELY(denorm)) {
// Exponent before rounding is in denormal range, simply clear the
// exponent field.
- output_bits_t clear_exp = (output_bits_t(exp_hi) << FPBits<T>::SIG_LEN);
+ output_bits_t clear_exp = static_cast<output_bits_t>(
+ output_bits_t(exp_hi) << FPBits<T>::SIG_LEN);
output_bits_t r_bits = FPBits<T>(r).uintval() - clear_exp;
if (!(r_bits & FPBits<T>::EXP_MASK)) {
// Output is denormal after rounding, clear the implicit bit for 80-bit
@@ -184,10 +191,6 @@ template <size_t Bits> struct DyadicFloat {
return r;
}
- LIBC_INLINE explicit constexpr operator float16() const {
- return static_cast<float16>(static_cast<float>(*this));
- }
-
LIBC_INLINE explicit constexpr operator MantissaType() const {
if (mantissa.is_zero())
return 0;
diff --git a/libc/src/__support/big_int.h b/libc/src/__support/big_int.h
index 40ad6eeed7ac..aa2ab1e054d8 100644
--- a/libc/src/__support/big_int.h
+++ b/libc/src/__support/big_int.h
@@ -302,7 +302,8 @@ LIBC_INLINE constexpr cpp::array<word, N> shift(cpp::array<word, N> array,
dst = static_cast<word>((part1 << bit_offset) |
(part2 >> (WORD_BITS - bit_offset)));
else
- dst = (part1 >> bit_offset) | (part2 << (WORD_BITS - bit_offset));
+ dst = static_cast<word>((part1 >> bit_offset) |
+ (part2 << (WORD_BITS - bit_offset)));
}
return out;
}
diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt
index def5a1a755c6..6d187fd43a32 100644
--- a/libc/src/math/generic/CMakeLists.txt
+++ b/libc/src/math/generic/CMakeLists.txt
@@ -1494,7 +1494,7 @@ add_entrypoint_object(
HDRS
../ldexpf16.h
COMPILE_OPTIONS
- -O0 -ggdb3
+ -O3
DEPENDS
libc.src.__support.macros.properties.types
libc.src.__support.FPUtil.manipulation_functions
diff --git a/libc/test/src/math/smoke/LdExpTest.h b/libc/test/src/math/smoke/LdExpTest.h
index 231535a9a505..7739bd71e5e7 100644
--- a/libc/test/src/math/smoke/LdExpTest.h
+++ b/libc/test/src/math/smoke/LdExpTest.h
@@ -71,7 +71,7 @@ public:
void testOverflow(LdExpFunc func) {
NormalFloat x(Sign::POS, FPBits::MAX_BIASED_EXPONENT - 10,
- NormalFloat::ONE + 0xF00);
+ NormalFloat::ONE + 0xFB);
for (int32_t exp = 10; exp < 100; ++exp) {
ASSERT_FP_EQ(inf, func(T(x), exp));
ASSERT_FP_EQ(neg_inf, func(-T(x), exp));
```
https://github.com/llvm/llvm-project/pull/94797
More information about the libc-commits
mailing list