[libc-commits] [libc] [libc][math][c23] Add {ldexp, scalbn, scalbln}f16 C23 math functions (PR #94797)

Mon Jun 10 11:13:23 PDT 2024

================
@@ -183,6 +184,10 @@ template <size_t Bits> struct DyadicFloat {
     return r;
   }
 
+  LIBC_INLINE explicit constexpr operator float16() const {
+    return static_cast<float16>(static_cast<float>(*this));
+  }
----------------
overmighty wrote:

> Once `DyadicFloat` can support `uint16_t` base type, the current `operator T()` implementation should work correctly.

It still doesn't work:

```
[ RUN      ] LlvmLibcLdExpTest.NormalOperation
/home/overmighty/projects/llvm-project/libc/test/src/math/smoke/LdExpTest.h:128: FAILURE
Failed to match x * static_cast<T>(two_to_exp) against LIBC_NAMESPACE::testing::getMatcher< LIBC_NAMESPACE::testing::TestCond::EQ>(func(x, exp)).
Expected floating point value: 0x1920 = (S: 0, E: 0x0006, M: 0x0120)
Actual floating point value: 0x1923 = (S: 0, E: 0x0006, M: 0x0123)
[  FAILED  ] LlvmLibcLdExpTest.NormalOperation
```

I deleted `operator float16()` and undid the `DyadicFloat<cpp::max(FPBits<T>::STORAGE_LEN, 32)> normal(bits.get_val());` change. I had to add casts to `StorageType` (`output_bits_t`) in `DyadicFloat`.

Here's the full `git diff`:

```diff

diff --git a/libc/src/__support/FPUtil/ManipulationFunctions.h b/libc/src/__support/FPUtil/ManipulationFunctions.h
index a9c64122d931..97c43126a71a 100644
--- a/libc/src/__support/FPUtil/ManipulationFunctions.h
+++ b/libc/src/__support/FPUtil/ManipulationFunctions.h
@@ -142,8 +142,10 @@ LIBC_INLINE constexpr T logb(T x) {
   return static_cast<T>(normal.get_unbiased_exponent());
 }
 
-template <typename T, cpp::enable_if_t<cpp::is_floating_point_v<T>, int> = 0>
-LIBC_INLINE constexpr T ldexp(T x, long exp) {
+template <typename T, typename U>
+LIBC_INLINE constexpr cpp::enable_if_t<
+    cpp::is_floating_point_v<T> && cpp::is_integral_v<U>, T>
+ldexp(T x, U exp) {
   FPBits<T> bits(x);
   if (LIBC_UNLIKELY((exp == 0) || bits.is_zero() || bits.is_inf_or_nan()))
     return x;
@@ -156,6 +158,8 @@ LIBC_INLINE constexpr T ldexp(T x, long exp) {
   // calculating the limit.
   constexpr int EXP_LIMIT =
       FPBits<T>::MAX_BIASED_EXPONENT + FPBits<T>::FRACTION_LEN + 1;
+  // Make sure that we can safely cast exp to int when not returning early.
+  static_assert(EXP_LIMIT <= INT_MAX && -EXP_LIMIT >= INT_MIN);
   if (LIBC_UNLIKELY(exp > EXP_LIMIT)) {
     int rounding_mode = quick_get_round();
     Sign sign = bits.sign();
@@ -185,9 +189,7 @@ LIBC_INLINE constexpr T ldexp(T x, long exp) {
   }
 
   // For all other values, NormalFloat to T conversion handles it the right way.
-  DyadicFloat<cpp::max(FPBits<T>::STORAGE_LEN, 32)> normal(bits.get_val());
-  // Make sure that exp fits into an int when not taking the fast paths above.
-  static_assert(EXP_LIMIT <= INT_MAX && -EXP_LIMIT >= INT_MIN);
+  DyadicFloat<FPBits<T>::STORAGE_LEN> normal(bits.get_val());
   normal.exponent += static_cast<int>(exp);
   return static_cast<T>(normal);
 }
diff --git a/libc/src/__support/FPUtil/dyadic_float.h b/libc/src/__support/FPUtil/dyadic_float.h
index f1791f9da56a..f1b58e39ad9b 100644
--- a/libc/src/__support/FPUtil/dyadic_float.h
+++ b/libc/src/__support/FPUtil/dyadic_float.h
@@ -14,7 +14,6 @@
 #include "src/__support/CPP/type_traits.h"
 #include "src/__support/big_int.h"
 #include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
-#include "src/__support/macros/properties/types.h" // float16
 
 #include <stddef.h>
 
@@ -127,7 +126,7 @@ template <size_t Bits> struct DyadicFloat {
         shift >= MantissaType::BITS ? MantissaType(0) : mantissa >> shift;
 
     T d_hi = FPBits<T>::create_value(
-                 sign, exp_hi,
+                 sign, static_cast<output_bits_t>(exp_hi),
                  (static_cast<output_bits_t>(m_hi) & FPBits<T>::SIG_MASK) |
                      IMPLICIT_MASK)
                  .get_val();
@@ -146,23 +145,30 @@ template <size_t Bits> struct DyadicFloat {
       // d_lo is denormal, but the output is normal.
       int scale_up_exponent = 2 * PRECISION;
       T scale_up_factor =
-          FPBits<T>::create_value(sign, FPBits<T>::EXP_BIAS + scale_up_exponent,
+          FPBits<T>::create_value(sign,
+                                  static_cast<output_bits_t>(
+                                      FPBits<T>::EXP_BIAS + scale_up_exponent),
                                   IMPLICIT_MASK)
               .get_val();
       T scale_down_factor =
-          FPBits<T>::create_value(sign, FPBits<T>::EXP_BIAS - scale_up_exponent,
+          FPBits<T>::create_value(sign,
+                                  static_cast<output_bits_t>(
+                                      FPBits<T>::EXP_BIAS - scale_up_exponent),
                                   IMPLICIT_MASK)
               .get_val();
 
-      d_lo = FPBits<T>::create_value(sign, exp_lo + scale_up_exponent,
-                                     IMPLICIT_MASK)
+      d_lo = FPBits<T>::create_value(
+                 sign, static_cast<output_bits_t>(exp_lo + scale_up_exponent),
+                 IMPLICIT_MASK)
                  .get_val();
 
       return multiply_add(d_lo, T(round_and_sticky), d_hi * scale_up_factor) *
              scale_down_factor;
     }
 
-    d_lo = FPBits<T>::create_value(sign, exp_lo, IMPLICIT_MASK).get_val();
+    d_lo = FPBits<T>::create_value(sign, static_cast<output_bits_t>(exp_lo),
+                                   IMPLICIT_MASK)
+               .get_val();
 
     // Still correct without FMA instructions if `d_lo` is not underflow.
     T r = multiply_add(d_lo, T(round_and_sticky), d_hi);
@@ -170,7 +176,8 @@ template <size_t Bits> struct DyadicFloat {
     if (LIBC_UNLIKELY(denorm)) {
       // Exponent before rounding is in denormal range, simply clear the
       // exponent field.
-      output_bits_t clear_exp = (output_bits_t(exp_hi) << FPBits<T>::SIG_LEN);
+      output_bits_t clear_exp = static_cast<output_bits_t>(
+          output_bits_t(exp_hi) << FPBits<T>::SIG_LEN);
       output_bits_t r_bits = FPBits<T>(r).uintval() - clear_exp;
       if (!(r_bits & FPBits<T>::EXP_MASK)) {
         // Output is denormal after rounding, clear the implicit bit for 80-bit
@@ -184,10 +191,6 @@ template <size_t Bits> struct DyadicFloat {
     return r;
   }
 
-  LIBC_INLINE explicit constexpr operator float16() const {
-    return static_cast<float16>(static_cast<float>(*this));
-  }
-
   LIBC_INLINE explicit constexpr operator MantissaType() const {
     if (mantissa.is_zero())
       return 0;
diff --git a/libc/src/__support/big_int.h b/libc/src/__support/big_int.h
index 40ad6eeed7ac..aa2ab1e054d8 100644
--- a/libc/src/__support/big_int.h
+++ b/libc/src/__support/big_int.h
@@ -302,7 +302,8 @@ LIBC_INLINE constexpr cpp::array<word, N> shift(cpp::array<word, N> array,
       dst = static_cast<word>((part1 << bit_offset) |
                               (part2 >> (WORD_BITS - bit_offset)));
     else
-      dst = (part1 >> bit_offset) | (part2 << (WORD_BITS - bit_offset));
+      dst = static_cast<word>((part1 >> bit_offset) |
+                              (part2 << (WORD_BITS - bit_offset)));
   }
   return out;
 }
diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt
index def5a1a755c6..6d187fd43a32 100644
--- a/libc/src/math/generic/CMakeLists.txt
+++ b/libc/src/math/generic/CMakeLists.txt
@@ -1494,7 +1494,7 @@ add_entrypoint_object(
   HDRS
     ../ldexpf16.h
   COMPILE_OPTIONS
-    -O0 -ggdb3
+    -O3
   DEPENDS
     libc.src.__support.macros.properties.types
     libc.src.__support.FPUtil.manipulation_functions
diff --git a/libc/test/src/math/smoke/LdExpTest.h b/libc/test/src/math/smoke/LdExpTest.h
index 231535a9a505..7739bd71e5e7 100644
--- a/libc/test/src/math/smoke/LdExpTest.h
+++ b/libc/test/src/math/smoke/LdExpTest.h
@@ -71,7 +71,7 @@ public:
 
   void testOverflow(LdExpFunc func) {
     NormalFloat x(Sign::POS, FPBits::MAX_BIASED_EXPONENT - 10,
-                  NormalFloat::ONE + 0xF00);
+                  NormalFloat::ONE + 0xFB);
     for (int32_t exp = 10; exp < 100; ++exp) {
       ASSERT_FP_EQ(inf, func(T(x), exp));
       ASSERT_FP_EQ(neg_inf, func(-T(x), exp));
```

https://github.com/llvm/llvm-project/pull/94797