[libc-commits] [libc] [libc][NFC] Reuse `FloatProperties` constant instead of creating new ones (PR #75187)

Tue Dec 12 06:46:48 PST 2023

https://github.com/gchatelet updated https://github.com/llvm/llvm-project/pull/75187

>From 9ad65f3d639ef674036c9445f87905ba7760da80 Mon Sep 17 00:00:00 2001
From: Guillaume Chatelet <gchatelet at google.com>
Date: Tue, 12 Dec 2023 14:07:53 +0000
Subject: [PATCH 1/2] [libc][NFC] Various simplifications

Also exposes a bit more of FloatProperties and adds documentation.
This should have been several patches but I was lazy.
---
 libc/src/__support/FPUtil/FloatProperties.h   | 36 +++++++------
 .../__support/FPUtil/x86_64/LongDoubleBits.h  | 18 +------
 libc/src/__support/float_to_string.h          |  9 ++--
 libc/src/__support/str_to_float.h             | 52 ++++++++++---------
 4 files changed, 53 insertions(+), 62 deletions(-)

diff --git a/libc/src/__support/FPUtil/FloatProperties.h b/libc/src/__support/FPUtil/FloatProperties.h
index 7f396a649e4f58..b8ca6e5f2c8a8a 100644
--- a/libc/src/__support/FPUtil/FloatProperties.h
+++ b/libc/src/__support/FPUtil/FloatProperties.h
@@ -87,49 +87,55 @@ template <FPType fp_type>
 struct FPProperties : public internal::FPBaseProperties<fp_type> {
 private:
   using UP = internal::FPBaseProperties<fp_type>;
-  using UP::EXP_BITS;
-  using UP::SIG_BITS;
-  using UP::TOTAL_BITS;
+  // The number of bits to represent sign. For documentation purpose, always 1.
+  LIBC_INLINE_VAR static constexpr int SIGN_BITS = 1;
+  using UP::EXP_BITS;   // The number of bits for the *exponent* part
+  using UP::SIG_BITS;   // The number of bits for the *significand* part
+  using UP::TOTAL_BITS; // For convenience, the sum of `SIG_BITS`, `EXP_BITS`,
+                        // and `SIGN_BITS`.
+  static_assert(SIGN_BITS + EXP_BITS + SIG_BITS == TOTAL_BITS);
 
 public:
   using UIntType = typename UP::UIntType;
 
-private:
-  LIBC_INLINE_VAR static constexpr int STORAGE_BITS =
+  // The number of bits in UIntType.
+  LIBC_INLINE_VAR static constexpr int UINTTYPE_BITS =
       sizeof(UIntType) * CHAR_BIT;
-  static_assert(STORAGE_BITS >= TOTAL_BITS);
-
-  // The number of bits to represent sign.
-  // For documentation purpose, always 1.
-  LIBC_INLINE_VAR static constexpr int SIGN_BITS = 1;
-  static_assert(SIGN_BITS + EXP_BITS + SIG_BITS == TOTAL_BITS);
+  static_assert(UINTTYPE_BITS >= TOTAL_BITS);
 
+private:
   // The exponent bias. Always positive.
   LIBC_INLINE_VAR static constexpr int32_t EXP_BIAS =
       (1U << (EXP_BITS - 1U)) - 1U;
   static_assert(EXP_BIAS > 0);
 
-  // Shifts
+  // The shift amount to get the *significand* part to the least significant
+  // bit. Always `0` but kept for consistency.
   LIBC_INLINE_VAR static constexpr int SIG_MASK_SHIFT = 0;
+  // The shift amount to get the *exponent* part to the least significant bit.
   LIBC_INLINE_VAR static constexpr int EXP_MASK_SHIFT = SIG_BITS;
+  // The shift amount to get the *sign* part to the least significant bit.
   LIBC_INLINE_VAR static constexpr int SIGN_MASK_SHIFT = SIG_BITS + EXP_BITS;
 
-  // Masks
+  // The bit pattern that keeps only the *significand* part.
   LIBC_INLINE_VAR static constexpr UIntType SIG_MASK =
       mask_trailing_ones<UIntType, SIG_BITS>() << SIG_MASK_SHIFT;
+  // The bit pattern that keeps only the *exponent* part.
   LIBC_INLINE_VAR static constexpr UIntType EXP_MASK =
       mask_trailing_ones<UIntType, EXP_BITS>() << EXP_MASK_SHIFT;
 
 public:
+  // The bit pattern that keeps only the *sign* part.
   LIBC_INLINE_VAR static constexpr UIntType SIGN_MASK =
       mask_trailing_ones<UIntType, SIGN_BITS>() << SIGN_MASK_SHIFT;
-
-private:
+  // The bit pattern that keeps only the *sign + exponent + significand* part.
   LIBC_INLINE_VAR static constexpr UIntType FP_MASK =
       mask_trailing_ones<UIntType, TOTAL_BITS>();
+
   static_assert((SIG_MASK & EXP_MASK & SIGN_MASK) == 0, "masks disjoint");
   static_assert((SIG_MASK | EXP_MASK | SIGN_MASK) == FP_MASK, "masks cover");
 
+private:
   LIBC_INLINE static constexpr UIntType bit_at(int position) {
     return UIntType(1) << position;
   }
diff --git a/libc/src/__support/FPUtil/x86_64/LongDoubleBits.h b/libc/src/__support/FPUtil/x86_64/LongDoubleBits.h
index 60953313a695c5..f1ef928f230819 100644
--- a/libc/src/__support/FPUtil/x86_64/LongDoubleBits.h
+++ b/libc/src/__support/FPUtil/x86_64/LongDoubleBits.h
@@ -26,18 +26,6 @@
 namespace LIBC_NAMESPACE {
 namespace fputil {
 
-template <unsigned Width> struct Padding;
-
-// i386 padding.
-template <> struct Padding<4> {
-  static constexpr unsigned VALUE = 16;
-};
-
-// x86_64 padding.
-template <> struct Padding<8> {
-  static constexpr unsigned VALUE = 48;
-};
-
 template <> struct FPBits<long double> {
   using UIntType = UInt128;
 
@@ -129,11 +117,7 @@ template <> struct FPBits<long double> {
 
   LIBC_INLINE constexpr UIntType uintval() {
     // We zero the padding bits as they can contain garbage.
-    constexpr UIntType MASK =
-        (UIntType(1) << (sizeof(long double) * 8 -
-                         Padding<sizeof(uintptr_t)>::VALUE)) -
-        1;
-    return bits & MASK;
+    return bits & FloatProp::FP_MASK;
   }
 
   LIBC_INLINE constexpr long double get_val() const {
diff --git a/libc/src/__support/float_to_string.h b/libc/src/__support/float_to_string.h
index 34c0c0ceef286d..be105830a91ac1 100644
--- a/libc/src/__support/float_to_string.h
+++ b/libc/src/__support/float_to_string.h
@@ -105,7 +105,7 @@ namespace LIBC_NAMESPACE {
 using BlockInt = uint32_t;
 constexpr uint32_t BLOCK_SIZE = 9;
 
-using MantissaInt = fputil::FPBits<long double>::UIntType;
+using FloatProp = fputil::FloatProperties<long double>;
 
 // Larger numbers prefer a slightly larger constant than is used for the smaller
 // numbers.
@@ -382,11 +382,10 @@ LIBC_INLINE uint32_t fast_uint_mod_1e9(const cpp::UInt<MID_INT_SIZE> &val) {
                                (1000000000 * shifted));
 }
 
-LIBC_INLINE uint32_t mul_shift_mod_1e9(const MantissaInt mantissa,
+LIBC_INLINE uint32_t mul_shift_mod_1e9(const FloatProp::UIntType mantissa,
                                        const cpp::UInt<MID_INT_SIZE> &large,
                                        const int32_t shift_amount) {
-  constexpr size_t MANT_INT_SIZE = sizeof(MantissaInt) * 8;
-  cpp::UInt<MID_INT_SIZE + MANT_INT_SIZE> val(large);
+  cpp::UInt<MID_INT_SIZE + FloatProp::UINTTYPE_BITS> val(large);
   val = (val * mantissa) >> shift_amount;
   return static_cast<uint32_t>(
       val.div_uint32_times_pow_2(1000000000, 0).value());
@@ -415,7 +414,7 @@ class FloatToString {
   fputil::FPBits<T> float_bits;
   bool is_negative;
   int exponent;
-  MantissaInt mantissa;
+  FloatProp::UIntType mantissa;
 
   static constexpr int MANT_WIDTH = fputil::MantissaWidth<T>::VALUE;
   static constexpr int EXP_BIAS = fputil::FPBits<T>::EXPONENT_BIAS;
diff --git a/libc/src/__support/str_to_float.h b/libc/src/__support/str_to_float.h
index 79d35682d0b714..2a6f15c018f1ec 100644
--- a/libc/src/__support/str_to_float.h
+++ b/libc/src/__support/str_to_float.h
@@ -77,8 +77,6 @@ eisel_lemire(ExpandedFloat<T> init_num,
   UIntType mantissa = init_num.mantissa;
   int32_t exp10 = init_num.exponent;
 
-  constexpr uint32_t BITS_IN_MANTISSA = sizeof(mantissa) * 8;
-
   if (sizeof(T) > 8) { // This algorithm cannot handle anything longer than a
                        // double, so we skip straight to the fallback.
     return cpp::nullopt;
@@ -94,8 +92,8 @@ eisel_lemire(ExpandedFloat<T> init_num,
   uint32_t clz = cpp::countl_zero<UIntType>(mantissa);
   mantissa <<= clz;
 
-  int32_t exp2 =
-      exp10_to_exp2(exp10) + BITS_IN_MANTISSA + FloatProp::EXPONENT_BIAS - clz;
+  int32_t exp2 = exp10_to_exp2(exp10) + FloatProp::UINTTYPE_BITS +
+                 FloatProp::EXPONENT_BIAS - clz;
 
   // Multiplication
   const uint64_t *power_of_ten =
@@ -112,7 +110,9 @@ eisel_lemire(ExpandedFloat<T> init_num,
   // accuracy, and the most significant bit is ignored.) = 9 bits. Similarly,
   // it's 6 bits for floats in this case.
   const uint64_t halfway_constant =
-      (uint64_t(1) << (BITS_IN_MANTISSA - (FloatProp::MANTISSA_WIDTH + 3))) - 1;
+      (uint64_t(1) << (FloatProp::UINTTYPE_BITS -
+                       (FloatProp::MANTISSA_WIDTH + 3))) -
+      1;
   if ((high64(first_approx) & halfway_constant) == halfway_constant &&
       low64(first_approx) + mantissa < mantissa) {
     UInt128 low_bits =
@@ -131,11 +131,11 @@ eisel_lemire(ExpandedFloat<T> init_num,
   }
 
   // Shifting to 54 bits for doubles and 25 bits for floats
-  UIntType msb =
-      static_cast<UIntType>(high64(final_approx) >> (BITS_IN_MANTISSA - 1));
+  UIntType msb = static_cast<UIntType>(high64(final_approx) >>
+                                       (FloatProp::UINTTYPE_BITS - 1));
   UIntType final_mantissa = static_cast<UIntType>(
       high64(final_approx) >>
-      (msb + BITS_IN_MANTISSA - (FloatProp::MANTISSA_WIDTH + 3)));
+      (msb + FloatProp::UINTTYPE_BITS - (FloatProp::MANTISSA_WIDTH + 3)));
   exp2 -= static_cast<uint32_t>(1 ^ msb); // same as !msb
 
   if (round == RoundDirection::Nearest) {
@@ -190,8 +190,6 @@ eisel_lemire<long double>(ExpandedFloat<long double> init_num,
   UIntType mantissa = init_num.mantissa;
   int32_t exp10 = init_num.exponent;
 
-  constexpr uint32_t BITS_IN_MANTISSA = sizeof(mantissa) * 8;
-
   // Exp10 Range
   // This doesn't reach very far into the range for long doubles, since it's
   // sized for doubles and their 11 exponent bits, and not for long doubles and
@@ -211,8 +209,8 @@ eisel_lemire<long double>(ExpandedFloat<long double> init_num,
   uint32_t clz = cpp::countl_zero<UIntType>(mantissa);
   mantissa <<= clz;
 
-  int32_t exp2 =
-      exp10_to_exp2(exp10) + BITS_IN_MANTISSA + FloatProp::EXPONENT_BIAS - clz;
+  int32_t exp2 = exp10_to_exp2(exp10) + FloatProp::UINTTYPE_BITS +
+                 FloatProp::EXPONENT_BIAS - clz;
 
   // Multiplication
   const uint64_t *power_of_ten =
@@ -249,7 +247,9 @@ eisel_lemire<long double>(ExpandedFloat<long double> init_num,
   // accuracy, and the most significant bit is ignored.) = 61 bits. Similarly,
   // it's 12 bits for 128 bit floats in this case.
   constexpr UInt128 HALFWAY_CONSTANT =
-      (UInt128(1) << (BITS_IN_MANTISSA - (FloatProp::MANTISSA_WIDTH + 3))) - 1;
+      (UInt128(1) << (FloatProp::UINTTYPE_BITS -
+                      (FloatProp::MANTISSA_WIDTH + 3))) -
+      1;
 
   if ((final_approx_upper & HALFWAY_CONSTANT) == HALFWAY_CONSTANT &&
       final_approx_lower + mantissa < mantissa) {
@@ -257,11 +257,11 @@ eisel_lemire<long double>(ExpandedFloat<long double> init_num,
   }
 
   // Shifting to 65 bits for 80 bit floats and 113 bits for 128 bit floats
-  uint32_t msb =
-      static_cast<uint32_t>(final_approx_upper >> (BITS_IN_MANTISSA - 1));
+  uint32_t msb = static_cast<uint32_t>(final_approx_upper >>
+                                       (FloatProp::UINTTYPE_BITS - 1));
   UIntType final_mantissa =
       final_approx_upper >>
-      (msb + BITS_IN_MANTISSA - (FloatProp::MANTISSA_WIDTH + 3));
+      (msb + FloatProp::UINTTYPE_BITS - (FloatProp::MANTISSA_WIDTH + 3));
   exp2 -= static_cast<uint32_t>(1 ^ msb); // same as !msb
 
   if (round == RoundDirection::Nearest) {
@@ -622,9 +622,10 @@ template <> constexpr int32_t get_upper_bound<double>() { return 309; }
 // other out, and subnormal numbers allow for the result to be at the very low
 // end of the final mantissa.
 template <typename T> constexpr int32_t get_lower_bound() {
-  return -((fputil::FloatProperties<T>::EXPONENT_BIAS +
-            static_cast<int32_t>(fputil::FloatProperties<T>::MANTISSA_WIDTH +
-                                 (sizeof(T) * 8))) /
+  using FloatProp = typename fputil::FloatProperties<T>;
+  return -((FloatProp::EXPONENT_BIAS +
+            static_cast<int32_t>(FloatProp::MANTISSA_WIDTH +
+                                 FloatProp::UINTTYPE_BITS)) /
            3);
 }
 
@@ -733,7 +734,6 @@ LIBC_INLINE FloatConvertReturn<T> binary_exp_to_float(ExpandedFloat<T> init_num,
 
   // This is the number of leading zeroes a properly normalized float of type T
   // should have.
-  constexpr int32_t NUMBITS = sizeof(UIntType) * 8;
   constexpr int32_t INF_EXP = (1 << FloatProp::EXPONENT_WIDTH) - 1;
 
   // Normalization step 1: Bring the leading bit to the highest bit of UIntType.
@@ -743,8 +743,9 @@ LIBC_INLINE FloatConvertReturn<T> binary_exp_to_float(ExpandedFloat<T> init_num,
   // Keep exp2 representing the exponent of the lowest bit of UIntType.
   exp2 -= amount_to_shift_left;
 
-  // biasedExponent represents the biased exponent of the most significant bit.
-  int32_t biased_exponent = exp2 + NUMBITS + FPBits::EXPONENT_BIAS - 1;
+  // biased_exponent represents the biased exponent of the most significant bit.
+  int32_t biased_exponent =
+      exp2 + FloatProp::UINTTYPE_BITS + FPBits::EXPONENT_BIAS - 1;
 
   // Handle numbers that're too large and get squashed to inf
   if (biased_exponent >= INF_EXP) {
@@ -754,14 +755,15 @@ LIBC_INLINE FloatConvertReturn<T> binary_exp_to_float(ExpandedFloat<T> init_num,
     return output;
   }
 
-  uint32_t amount_to_shift_right = NUMBITS - FloatProp::MANTISSA_WIDTH - 1;
+  uint32_t amount_to_shift_right =
+      FloatProp::UINTTYPE_BITS - FloatProp::MANTISSA_WIDTH - 1;
 
   // Handle subnormals.
   if (biased_exponent <= 0) {
     amount_to_shift_right += 1 - biased_exponent;
     biased_exponent = 0;
 
-    if (amount_to_shift_right > NUMBITS) {
+    if (amount_to_shift_right > FloatProp::UINTTYPE_BITS) {
       // Return 0 if the exponent is too small.
       output.num = {0, 0};
       output.error = ERANGE;
@@ -774,7 +776,7 @@ LIBC_INLINE FloatConvertReturn<T> binary_exp_to_float(ExpandedFloat<T> init_num,
   bool round_bit = static_cast<bool>(mantissa & round_bit_mask);
   bool sticky_bit = static_cast<bool>(mantissa & sticky_mask) || truncated;
 
-  if (amount_to_shift_right < NUMBITS) {
+  if (amount_to_shift_right < FloatProp::UINTTYPE_BITS) {
     // Shift the mantissa and clear the implicit bit.
     mantissa >>= amount_to_shift_right;
     mantissa &= FloatProp::MANTISSA_MASK;

>From 875b2dd5e3da14b9b956d5bc96187d94c5dc9838 Mon Sep 17 00:00:00 2001
From: Guillaume Chatelet <gchatelet at google.com>
Date: Tue, 12 Dec 2023 14:46:31 +0000
Subject: [PATCH 2/2] Document UIntType as well

---
 libc/src/__support/FPUtil/FloatProperties.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/libc/src/__support/FPUtil/FloatProperties.h b/libc/src/__support/FPUtil/FloatProperties.h
index b8ca6e5f2c8a8a..3f7dbdc5af3425 100644
--- a/libc/src/__support/FPUtil/FloatProperties.h
+++ b/libc/src/__support/FPUtil/FloatProperties.h
@@ -96,6 +96,8 @@ struct FPProperties : public internal::FPBaseProperties<fp_type> {
   static_assert(SIGN_BITS + EXP_BITS + SIG_BITS == TOTAL_BITS);
 
 public:
+  // An unsigned integer that is wide enough to contain all of the floating
+  // point bits.
   using UIntType = typename UP::UIntType;
 
   // The number of bits in UIntType.