[libc-commits] [libc] [libc] Add conversions between `FPBits` and greater precision floating point representations (PR #80709)

Guillaume Chatelet via libc-commits libc-commits at lists.llvm.org
Wed Feb 7 03:21:44 PST 2024


https://github.com/gchatelet updated https://github.com/llvm/llvm-project/pull/80709

>From 5c7021657b1eeadcbac9c65df945909aadbab618 Mon Sep 17 00:00:00 2001
From: Guillaume Chatelet <gchatelet at google.com>
Date: Mon, 5 Feb 2024 16:46:27 +0000
Subject: [PATCH 1/2] [libc] Add conversions between `FPBits` and greater
 precision floating point representations

---
 libc/src/__support/FPUtil/FPBits.h            | 130 +++++++++
 .../test/src/__support/FPUtil/fpbits_test.cpp | 258 ++++++++++++++++++
 2 files changed, 388 insertions(+)

diff --git a/libc/src/__support/FPUtil/FPBits.h b/libc/src/__support/FPUtil/FPBits.h
index 6665c90845683b..6ce3a1bb08ffa5 100644
--- a/libc/src/__support/FPUtil/FPBits.h
+++ b/libc/src/__support/FPUtil/FPBits.h
@@ -769,6 +769,136 @@ struct FPRepImpl : public FPRepSem<fp_type, RetT> {
     }
     return RetT(result.uintval());
   }
+
+  // 'Number' represents a finite (non-inf, non-NaN) floating point number. It
+  // is independant of the underlying encoding and allows for easy manipulation
+  // of sign, exponent and significand. This format's precision is larger than
+  // the encoded form. There is no notion of subnormal for a 'Number'.
+  struct Number {
+
+    // The number of extra bits for the significand compared to the encoded
+    // form.
+    LIBC_INLINE_VAR static constexpr int EXTRA_PRECISION =
+        UP::STORAGE_LEN - UP::FRACTION_LEN - 1;
+
+    Sign sign = Sign::POS;
+    int32_t exponent = 0;
+    StorageType significand = 0;
+
+    LIBC_INLINE constexpr bool is_zero() const { return significand == 0; }
+
+    // Moves the leading one of the significand to StorageType's MSB position
+    // and changes the exponent accordingly. This changes the internal
+    // representation to maximize the precision of the Number but it doesn't
+    // change its value.
+    LIBC_INLINE constexpr Number maximize_precision() const {
+      return get_scaled(-cpp::countl_zero(significand));
+    }
+
+    // Moves the trailing one of the significand to StorageType's LSB position
+    // and changes the exponent accordingly. This changes the internal
+    // representation to minimize the precision of the Number but it doesn't
+    // change its value.
+    LIBC_INLINE constexpr Number minimize_precision() const {
+      return get_scaled(cpp::countr_zero(significand));
+    }
+
+    // If non-zero, normalizes this number by moving the leading bit of the
+    // significand to StorageType's MSB position (maximize_precision). If zero
+    // also makes the exponent 0.
+    LIBC_INLINE constexpr Number normalize() const {
+      if (is_zero())
+        return {sign, 0, significand};
+      return maximize_precision();
+    }
+
+    // The rounding mode to use when materializing a Number (see below).
+    enum Rounding { TOWARDZERO, AWAYZERO, TONEAREST };
+
+    // The precision to use when materializing a Number (see below).
+    // - EXACT means this Number contains all the information,
+    // - TRUNCATED means that the significand was truncated.
+    enum Precision { TRUNCATED, EXACT };
+
+    // Creates a 'RetT' from the number representation.
+    // When this Number is too large to be represented 'infinity' is returned.
+    // When this Number is too small to be represented 'zero' or 'min_subnormal'
+    // is returned depending on the rounding mode.
+    LIBC_INLINE constexpr RetT materialize(Rounding rounding = TOWARDZERO,
+                                           Precision precision = EXACT) const {
+      if (exponent <= (INT32_MIN + UP::STORAGE_LEN))
+        return rounding == AWAYZERO ? RetT::min_subnormal(sign)
+                                    : RetT::zero(sign);
+      if (is_zero())
+        return precision == TRUNCATED && rounding == AWAYZERO
+                   ? RetT::min_subnormal(sign)
+                   : RetT::zero(sign);
+      if (exponent >= (INT32_MAX - UP::STORAGE_LEN))
+        return rounding == TOWARDZERO ? RetT::max_normal(sign)
+                                      : RetT::inf(sign);
+
+      const int leading_zeroes = cpp::countl_zero(significand);
+      const int extra_len = EXTRA_PRECISION - leading_zeroes;
+      // 'extra_len' is smaller than 'STORAGE_LEN' by definition.
+      static_assert(EXTRA_PRECISION < UP::STORAGE_LEN);
+      const StorageType extra_bits_mask =
+          extra_len <= 0 ? StorageType(0)
+                         : (StorageType(1) << extra_len) - StorageType(1);
+      const StorageType extra_bits = significand & extra_bits_mask;
+      const StorageType extra_bits_midpoint = extra_bits_mask >> 1;
+      const bool round_toward_inf =
+          (rounding == AWAYZERO && (extra_bits || precision == TRUNCATED)) ||
+          (rounding == TONEAREST &&
+           ((extra_bits > extra_bits_midpoint) ||
+            ((extra_bits == extra_bits_midpoint) && (precision == TRUNCATED))));
+      int32_t rep_exponent = exponent - leading_zeroes;
+      constexpr int32_t EXP_MIN = (int32_t)Exponent::MIN();
+      constexpr int32_t EXP_SUBNORMAL = (int32_t)Exponent::SUBNORMAL();
+
+      int lshift = leading_zeroes - EXTRA_PRECISION;
+      if (rep_exponent < EXP_MIN) {
+        lshift -= EXP_MIN - rep_exponent;
+        rep_exponent = EXP_SUBNORMAL;
+      }
+
+      StorageType rep_significand = significand;
+      if (lshift > 0)
+        rep_significand <<= lshift;
+      else if (lshift < 0)
+        rep_significand >>= -lshift;
+
+      const RetT rep(
+          encode(sign, Exponent(rep_exponent), Significand(rep_significand)));
+
+      return round_toward_inf ? rep.next_toward_inf() : rep;
+    }
+
+  private:
+    // This operation changes the scale of the Number by offsetting the exponent
+    // and shift the significand.
+    LIBC_INLINE constexpr Number get_scaled(int offset) const {
+      if (offset == 0)
+        return *this;
+      Number num;
+      num.sign = sign;
+      num.exponent = exponent + offset;
+      num.significand = offset == 0 ? significand
+                                    : (offset > 0 ? significand >> offset
+                                                  : significand << -offset);
+      return num;
+    }
+  };
+
+  // Returns a 'Number' representation of the number, the returned number
+  // may or may not be normalized (leading bit of the significant at MSB
+  // position). Only valid to call when is_finite().
+  LIBC_INLINE constexpr Number get_number() const {
+    Number num;
+    num.sign = sign();
+    num.exponent = get_explicit_exponent() + Number::EXTRA_PRECISION;
+    num.significand = get_explicit_mantissa();
+    return num;
+  }
 };
 
 // A generic class to manipulate floating point formats.
diff --git a/libc/test/src/__support/FPUtil/fpbits_test.cpp b/libc/test/src/__support/FPUtil/fpbits_test.cpp
index 4504a4f0cfcc7d..2305eed3866640 100644
--- a/libc/test/src/__support/FPUtil/fpbits_test.cpp
+++ b/libc/test/src/__support/FPUtil/fpbits_test.cpp
@@ -327,6 +327,264 @@ TYPED_TEST(LlvmLibcFPBitsTest, NextTowardInf, FPTypes) {
   }
 }
 
+TYPED_TEST(LlvmLibcFPBitsTest, NumberConstruction, FPTypes) {
+  using LIBC_NAMESPACE::cpp::countl_zero;
+  using LIBC_NAMESPACE::cpp::countr_zero;
+  using Number = typename T::Number;
+
+  // When using get_number() the significand is transfered as-is and the
+  // exponent is adjusted to reflect the extra precision (now the significand
+  // uses (STORAGE_LEN - 1) bits instead of FRACTION_LEN bits).
+
+  // e.g., with IEEE754_Binary16
+  // 1.0 in IEEE754_Binary16 : 0b0011110000000000
+  //                             SEEEEEMMMMMMMMMM
+  // number's significand    : 0b0000010000000000
+  // EXTRA_PRECISION         :   ^^^^^
+  // number's exponent       : EXTRA_PRECISION
+
+  const T one = T::one();
+
+  const Number num = one.get_number();
+
+  // "num" and "one" have the same sign.
+  ASSERT_EQ(num.sign.is_pos(), one.is_pos());
+
+  // For 'one', the leading one of the significant is at position FRACTION_LEN.
+  // So we have FRACTION_LEN zeroes after it.
+  ASSERT_EQ(countr_zero(num.significand), T::FRACTION_LEN);
+
+  // The exponent is increased by EXTRA_PRECISION.
+  // Since the exponent for 'one' is '0' the number's exponent is just
+  // EXTRA_PRECISION.
+  ASSERT_EQ(num.exponent, Number::EXTRA_PRECISION);
+
+  // Because the significant is now stored in 'StorageType' we have extra
+  // precisions bits available at the left of the leading one.
+  ASSERT_GT(Number::EXTRA_PRECISION, 0);
+  ASSERT_EQ(countl_zero(num.significand), Number::EXTRA_PRECISION);
+
+  // In maximized precision form, the leading one is moved at StorageType's MSB.
+  // number's significand    : 0b1000000000000000
+  // number's exponent       : 0
+  const Number max_precision = one.get_number().maximize_precision();
+  ASSERT_TRUE(max_precision.sign.is_pos());
+  // The leading bit is now in the MSB of the storage.
+  ASSERT_EQ(countl_zero(max_precision.significand), 0);
+  ASSERT_EQ(max_precision.exponent, 0);
+
+  // In minimized precision form, the leading one is moved at StorageType's LSB.
+  // number's significand    : 0b0000000000000001
+  // number's exponent       : FRACTION_LEN + EXTRA_PRECISION
+  const Number min_precision = one.get_number().minimize_precision();
+  ASSERT_TRUE(min_precision.sign.is_pos());
+  // The leading bit is now in the MSB of the storage.
+  ASSERT_EQ(countr_zero(min_precision.significand), 0);
+  ASSERT_EQ(min_precision.exponent, T::FRACTION_LEN + Number::EXTRA_PRECISION);
+}
+
+#define ASSERT_MATERIALIZE_AS(NUMBER, ROUNDING, PRECISION, REP)                \
+  ASSERT_SAME_REP(NUMBER.materialize(ROUNDING, PRECISION), REP)
+
+// For all 'FPType' and all finite 'FP' values, we check that we can convert the
+// 'FPRep' to a 'Number' and back to the original 'FPRep' without loss.
+// We also check that changing the scale of the intermediary 'Number' has no
+// effect.
+TYPED_TEST(LlvmLibcFPBitsTest, NumberBackAndForth, FPTypes) {
+  // using StorageType = typename T::StorageType;
+  using Number = typename T::Number;
+  for (Sign sign : all_signs) {
+    for (FP fp : all_fp_values) {
+      const T rep = make<T>(sign, fp);
+      if (!rep.is_finite())
+        continue;
+      // We test numbers at different scales.
+      // Note: changing scale changes the internal representation but not the
+      // Number's value.
+      const Number scaled_numbers[] = {
+          rep.get_number(),
+          rep.get_number().maximize_precision(),
+          rep.get_number().minimize_precision(),
+      };
+      for (const Number &num : scaled_numbers) {
+        // When numbers are exact (i.e., not truncated) they should materialize
+        // back exactly whatever the rounding mode.
+        ASSERT_MATERIALIZE_AS(num, Number::TOWARDZERO, Number::EXACT, rep);
+        ASSERT_MATERIALIZE_AS(num, Number::AWAYZERO, Number::EXACT, rep);
+        ASSERT_MATERIALIZE_AS(num, Number::TONEAREST, Number::EXACT, rep);
+      }
+    }
+  }
+}
+
+// Here we test materialization of a 'Number' back to an 'FPRep' with the
+// 'TOWARDZERO' rounding mode. This rounding mode corresponds to C++ cast
+// semantics and simply discards the extra precision.
+// That is, whatever the values of the extra bits, 'Number' will materialize
+// back as 'FPRep' exactly.
+TYPED_TEST(LlvmLibcFPBitsTest, NumberRoundTowardZero, FPTypes) {
+  using StorageType = typename T::StorageType;
+  using Number = typename T::Number;
+  static constexpr StorageType EXTRA_BITS_MASK =
+      LIBC_NAMESPACE::mask_trailing_ones<StorageType,
+                                         Number::EXTRA_PRECISION>();
+  for (Sign sign : all_signs) {
+    for (FP fp : all_fp_values) {
+      const T rep = make<T>(sign, fp);
+      if (!rep.is_finite())
+        continue;
+      // Number with EXTRA_PRECISION bits.
+      Number num = rep.get_number().maximize_precision();
+
+      // Exact number converts back to rep.
+      ASSERT_MATERIALIZE_AS(num, Number::TOWARDZERO, Number::EXACT, rep);
+      // Non-exact numbers converts back to rep.
+      ASSERT_MATERIALIZE_AS(num, Number::TOWARDZERO, Number::TRUNCATED, rep);
+
+      if (rep.is_zero())
+        continue; // extra bits are only present for non-zero numbers.
+
+      ++num.significand; // Smallest extra bits value.
+      ASSERT_MATERIALIZE_AS(num, Number::TOWARDZERO, Number::EXACT, rep);
+      ASSERT_MATERIALIZE_AS(num, Number::TOWARDZERO, Number::TRUNCATED, rep);
+      num.significand |= EXTRA_BITS_MASK; // Largest extra bits value.
+      ASSERT_MATERIALIZE_AS(num, Number::TOWARDZERO, Number::EXACT, rep);
+      ASSERT_MATERIALIZE_AS(num, Number::TOWARDZERO, Number::TRUNCATED, rep);
+    }
+  }
+}
+
+// Here we test materialization of a 'Number' back to an 'FPRep' with the
+// 'AWAYZERO' rounding mode. This rounding mode will convert back to 'FPRep'
+// only if there is no extra bit set and Truncation is 'EXACT', otherwise it
+// will materialize as the next representable number.
+TYPED_TEST(LlvmLibcFPBitsTest, NumberRoundAwayZero, FPTypes) {
+  using StorageType = typename T::StorageType;
+  using Number = typename T::Number;
+  static constexpr StorageType EXTRA_BITS_MASK =
+      LIBC_NAMESPACE::mask_trailing_ones<StorageType,
+                                         Number::EXTRA_PRECISION>();
+  const struct {
+    FP initial;
+    FP rounded;
+  } TESTS[] = {
+      {FP::ZERO, FP::MIN_SUBNORMAL},          //
+      {FP::MAX_SUBNORMAL, FP::MIN_NORMAL},    //
+      {FP::MAX_NORMAL, FP::INF},              //
+      {FP::INF, FP::INF},                     //
+      {FP::QUIET_NAN, FP::QUIET_NAN},         //
+      {FP::SIGNALING_NAN, FP::SIGNALING_NAN}, //
+  };
+  for (Sign sign : all_signs) {
+    for (auto tc : TESTS) {
+      const T rep = make<T>(sign, tc.initial);
+      const T rounded = make<T>(sign, tc.rounded);
+      Number num = rep.get_number().maximize_precision();
+
+      // Exact number converts back to rep.
+      ASSERT_MATERIALIZE_AS(num, Number::AWAYZERO, Number::EXACT, rep);
+      // Non-exact numbers get rounded toward infinity.
+      ASSERT_MATERIALIZE_AS(num, Number::AWAYZERO, Number::TRUNCATED, rounded);
+
+      if (rep.is_zero())
+        continue; // extra bits are only present for non-zero numbers.
+
+      ++num.significand; // Smallest extra bits value.
+      ASSERT_MATERIALIZE_AS(num, Number::AWAYZERO, Number::EXACT, rounded);
+      ASSERT_MATERIALIZE_AS(num, Number::AWAYZERO, Number::TRUNCATED, rounded);
+      num.significand |= EXTRA_BITS_MASK; // Largest extra bits value.
+      ASSERT_MATERIALIZE_AS(num, Number::AWAYZERO, Number::EXACT, rounded);
+      ASSERT_MATERIALIZE_AS(num, Number::AWAYZERO, Number::TRUNCATED, rounded);
+    }
+  }
+}
+
+// Here we test materialization of a 'Number' back to an 'FPRep' with the
+// 'TONEAREST' rounding mode. This rounding mode will convert back to 'FPRep'
+// only if there is no extra bit set and Truncation is 'EXACT', otherwise it
+// will materialize as the next representable number.
+TYPED_TEST(LlvmLibcFPBitsTest, NumberRoundToNearest, FPTypes) {
+  using StorageType = typename T::StorageType;
+  using Number = typename T::Number;
+  static constexpr StorageType EXTRA_BITS_MASK =
+      LIBC_NAMESPACE::mask_trailing_ones<StorageType,
+                                         Number::EXTRA_PRECISION>();
+  const struct {
+    FP initial;
+    FP rounded;
+  } TESTS[] = {
+      {FP::ZERO, FP::MIN_SUBNORMAL},          //
+      {FP::MAX_SUBNORMAL, FP::MIN_NORMAL},    //
+      {FP::MAX_NORMAL, FP::INF},              //
+      {FP::INF, FP::INF},                     //
+      {FP::QUIET_NAN, FP::QUIET_NAN},         //
+      {FP::SIGNALING_NAN, FP::SIGNALING_NAN}, //
+  };
+  for (Sign sign : all_signs) {
+    for (auto tc : TESTS) {
+      const T rep = make<T>(sign, tc.initial);
+      const T rounded = make<T>(sign, tc.rounded);
+      Number num = rep.get_number().maximize_precision();
+
+      // Exact number converts back to rep.
+      ASSERT_MATERIALIZE_AS(num, Number::TONEAREST, Number::EXACT, rep);
+      // Non-exact numbers converts back to rep.
+      ASSERT_MATERIALIZE_AS(num, Number::TONEAREST, Number::TRUNCATED, rep);
+
+      if (rep.is_zero())
+        continue; // extra bits are only present for non-zero numbers.
+
+      ++num.significand; // Smallest extra bits value.
+      ASSERT_MATERIALIZE_AS(num, Number::TONEAREST, Number::EXACT, rep);
+      ASSERT_MATERIALIZE_AS(num, Number::TONEAREST, Number::TRUNCATED, rep);
+      num.significand |= EXTRA_BITS_MASK; // Largest extra bits value.
+      ASSERT_MATERIALIZE_AS(num, Number::TONEAREST, Number::EXACT, rounded);
+      ASSERT_MATERIALIZE_AS(num, Number::TONEAREST, Number::TRUNCATED, rounded);
+    }
+  }
+}
+
+// We test the materialization of
+TYPED_TEST(LlvmLibcFPBitsTest, SmallestNumber, FPTypes) {
+  using StorageType = typename T::StorageType;
+  using Number = typename T::Number;
+  for (Sign sign : all_signs) {
+    Number num;
+    num.sign = sign;
+    num.exponent = INT32_MIN;
+    num.significand = StorageType(1);
+
+    const T zero = make<T>(sign, FP::ZERO);
+    const T min = make<T>(sign, FP::MIN_SUBNORMAL);
+    ASSERT_MATERIALIZE_AS(num, Number::TOWARDZERO, Number::EXACT, zero);
+    ASSERT_MATERIALIZE_AS(num, Number::TOWARDZERO, Number::TRUNCATED, zero);
+    ASSERT_MATERIALIZE_AS(num, Number::AWAYZERO, Number::EXACT, min);
+    ASSERT_MATERIALIZE_AS(num, Number::AWAYZERO, Number::TRUNCATED, min);
+    ASSERT_MATERIALIZE_AS(num, Number::TONEAREST, Number::EXACT, zero);
+    ASSERT_MATERIALIZE_AS(num, Number::TONEAREST, Number::TRUNCATED, zero);
+  }
+}
+
+TYPED_TEST(LlvmLibcFPBitsTest, LargestNumber, FPTypes) {
+  using StorageType = typename T::StorageType;
+  using Number = typename T::Number;
+  for (Sign sign : all_signs) {
+    Number num;
+    num.sign = sign;
+    num.exponent = INT32_MAX;
+    num.significand = ~StorageType(0);
+
+    const T inf = make<T>(sign, FP::INF);
+    const T max = make<T>(sign, FP::MAX_NORMAL);
+    ASSERT_MATERIALIZE_AS(num, Number::TOWARDZERO, Number::EXACT, max);
+    ASSERT_MATERIALIZE_AS(num, Number::TOWARDZERO, Number::TRUNCATED, max);
+    ASSERT_MATERIALIZE_AS(num, Number::AWAYZERO, Number::EXACT, inf);
+    ASSERT_MATERIALIZE_AS(num, Number::AWAYZERO, Number::TRUNCATED, inf);
+    ASSERT_MATERIALIZE_AS(num, Number::TONEAREST, Number::EXACT, inf);
+    ASSERT_MATERIALIZE_AS(num, Number::TONEAREST, Number::TRUNCATED, inf);
+  }
+}
+
 TEST(LlvmLibcFPBitsTest, FloatType) {
   using FloatBits = FPBits<float>;
 

>From 08e6c466be5fcc1a2330804b44fbf69796ac9fd2 Mon Sep 17 00:00:00 2001
From: Guillaume Chatelet <gchatelet at google.com>
Date: Wed, 7 Feb 2024 11:21:28 +0000
Subject: [PATCH 2/2] Add more tests, fix rounding bugs

---
 libc/src/__support/FPUtil/FPBits.h            |  87 +++++++---
 .../test/src/__support/FPUtil/fpbits_test.cpp | 149 +++++++++++-------
 2 files changed, 154 insertions(+), 82 deletions(-)

diff --git a/libc/src/__support/FPUtil/FPBits.h b/libc/src/__support/FPUtil/FPBits.h
index 6ce3a1bb08ffa5..d83db080c97e18 100644
--- a/libc/src/__support/FPUtil/FPBits.h
+++ b/libc/src/__support/FPUtil/FPBits.h
@@ -20,6 +20,12 @@
 
 #include <stdint.h>
 
+#include <stdio.h> // DO NOT SUBMIT
+#define eprintf(...)                                                           \
+  if constexpr (sizeof(StorageType) == 0) {                                    \
+    ::fprintf(::stderr, __VA_ARGS__);                                          \
+  }
+
 namespace LIBC_NAMESPACE {
 namespace fputil {
 
@@ -821,55 +827,89 @@ struct FPRepImpl : public FPRepSem<fp_type, RetT> {
     enum Precision { TRUNCATED, EXACT };
 
     // Creates a 'RetT' from the number representation.
-    // When this Number is too large to be represented 'infinity' is returned.
-    // When this Number is too small to be represented 'zero' or 'min_subnormal'
-    // is returned depending on the rounding mode.
+    //  - When this 'Number' is too large to be represented 'infinity' or
+    //    'max_normal' is returned depending on the rounding mode.
+    //  - When this 'Number' is too small to be represented 'zero' or
+    //    'min_subnormal' is returned depending on the rounding mode.
     LIBC_INLINE constexpr RetT materialize(Rounding rounding = TOWARDZERO,
                                            Precision precision = EXACT) const {
-      if (exponent <= (INT32_MIN + UP::STORAGE_LEN))
-        return rounding == AWAYZERO ? RetT::min_subnormal(sign)
-                                    : RetT::zero(sign);
       if (is_zero())
         return precision == TRUNCATED && rounding == AWAYZERO
                    ? RetT::min_subnormal(sign)
                    : RetT::zero(sign);
-      if (exponent >= (INT32_MAX - UP::STORAGE_LEN))
+
+      const auto underflow = [&]() -> RetT {
+        return rounding == AWAYZERO ? RetT::min_subnormal(sign)
+                                    : RetT::zero(sign);
+      };
+      const auto overflow = [&]() -> RetT {
         return rounding == TOWARDZERO ? RetT::max_normal(sign)
                                       : RetT::inf(sign);
+      };
 
       const int leading_zeroes = cpp::countl_zero(significand);
-      const int extra_len = EXTRA_PRECISION - leading_zeroes;
-      // 'extra_len' is smaller than 'STORAGE_LEN' by definition.
-      static_assert(EXTRA_PRECISION < UP::STORAGE_LEN);
-      const StorageType extra_bits_mask =
-          extra_len <= 0 ? StorageType(0)
-                         : (StorageType(1) << extra_len) - StorageType(1);
-      const StorageType extra_bits = significand & extra_bits_mask;
-      const StorageType extra_bits_midpoint = extra_bits_mask >> 1;
-      const bool round_toward_inf =
-          (rounding == AWAYZERO && (extra_bits || precision == TRUNCATED)) ||
-          (rounding == TONEAREST &&
-           ((extra_bits > extra_bits_midpoint) ||
-            ((extra_bits == extra_bits_midpoint) && (precision == TRUNCATED))));
+      LIBC_ASSERT(leading_zeroes <= UP::STORAGE_LEN);
+      // If 'exponent' is too small 'exponent - leading_zeroes' below can
+      // overflow which is undefined behavior for signed integers. If exponent
+      // is too close from INT32_MIN we bail out and return the appropriate
+      // underflow value.
+      constexpr int32_t smallest_exponent = INT32_MIN + UP::STORAGE_LEN;
+      if (exponent <= smallest_exponent)
+        return underflow();
+
+      // The exponent when the leading bit is at its final position.
       int32_t rep_exponent = exponent - leading_zeroes;
-      constexpr int32_t EXP_MIN = (int32_t)Exponent::MIN();
-      constexpr int32_t EXP_SUBNORMAL = (int32_t)Exponent::SUBNORMAL();
+
+      constexpr int32_t EXP_MAX(Exponent::MAX());
+      constexpr int32_t EXP_MIN(Exponent::MIN());
+      constexpr int32_t EXP_SUBNORMAL(Exponent::SUBNORMAL());
 
       int lshift = leading_zeroes - EXTRA_PRECISION;
+
+      // Adjust shift and exponent when the number is subnormal.
       if (rep_exponent < EXP_MIN) {
         lshift -= EXP_MIN - rep_exponent;
         rep_exponent = EXP_SUBNORMAL;
       }
 
+      // The final significand shifted accordingly.
       StorageType rep_significand = significand;
       if (lshift > 0)
         rep_significand <<= lshift;
       else if (lshift < 0)
         rep_significand >>= -lshift;
 
+      // The number of extra precision bits we have in 'significand'.
+      const int extra_len = -lshift;
+
+      if (extra_len > UP::STORAGE_LEN)
+        return underflow();
+
+      if (rep_exponent > EXP_MAX)
+        return overflow();
+
+      // When rounding is AWAYZERO or TONEAREST we need to consider extra
+      // precision bits.
+      LIBC_ASSERT(extra_len <= UP::STORAGE_LEN);
+      const bool has_extra_len = extra_len > 0;
+      StorageType extra_bits_mask{};
+      StorageType extra_bits_midpoint{};
+      if (has_extra_len) {
+        if (extra_len == UP::STORAGE_LEN)
+          extra_bits_mask = StorageType(~(StorageType(0))); // subnormals
+        else
+          extra_bits_mask = (StorageType(1) << extra_len) - StorageType(1);
+        extra_bits_midpoint = (extra_bits_mask >> 1) + StorageType(1);
+      }
+      const StorageType extra_bits = significand & extra_bits_mask;
+      const bool round_toward_inf =
+          (rounding == AWAYZERO &&
+           ((extra_bits > 0) || (precision == TRUNCATED))) ||
+          (rounding == TONEAREST &&
+           ((extra_bits > extra_bits_midpoint) ||
+            ((extra_bits == extra_bits_midpoint) && (precision == TRUNCATED))));
       const RetT rep(
           encode(sign, Exponent(rep_exponent), Significand(rep_significand)));
-
       return round_toward_inf ? rep.next_toward_inf() : rep;
     }
 
@@ -893,6 +933,7 @@ struct FPRepImpl : public FPRepSem<fp_type, RetT> {
   // may or may not be normalized (leading bit of the significant at MSB
   // position). Only valid to call when is_finite().
   LIBC_INLINE constexpr Number get_number() const {
+    LIBC_ASSERT(is_finite());
     Number num;
     num.sign = sign();
     num.exponent = get_explicit_exponent() + Number::EXTRA_PRECISION;
diff --git a/libc/test/src/__support/FPUtil/fpbits_test.cpp b/libc/test/src/__support/FPUtil/fpbits_test.cpp
index 2305eed3866640..22f930720fbd8d 100644
--- a/libc/test/src/__support/FPUtil/fpbits_test.cpp
+++ b/libc/test/src/__support/FPUtil/fpbits_test.cpp
@@ -239,10 +239,13 @@ constexpr FP all_fp_values[] = {
 
 constexpr Sign all_signs[] = {Sign::POS, Sign::NEG};
 
-using FPTypes = LIBC_NAMESPACE::testing::TypeList<
-    FPRep<FPType::IEEE754_Binary16>, FPRep<FPType::IEEE754_Binary32>,
-    FPRep<FPType::IEEE754_Binary64>, FPRep<FPType::IEEE754_Binary128>,
-    FPRep<FPType::X86_Binary80>>;
+using FPTypes =
+    LIBC_NAMESPACE::testing::TypeList<FPRep<FPType::IEEE754_Binary16>,  //
+                                      FPRep<FPType::IEEE754_Binary32>,  //
+                                      FPRep<FPType::IEEE754_Binary64>,  //
+                                      FPRep<FPType::IEEE754_Binary128>, //
+                                      FPRep<FPType::X86_Binary80>       //
+                                      >;
 
 template <typename T> constexpr auto make(Sign sign, FP fp) {
   switch (fp) {
@@ -425,15 +428,15 @@ TYPED_TEST(LlvmLibcFPBitsTest, NumberBackAndForth, FPTypes) {
 TYPED_TEST(LlvmLibcFPBitsTest, NumberRoundTowardZero, FPTypes) {
   using StorageType = typename T::StorageType;
   using Number = typename T::Number;
-  static constexpr StorageType EXTRA_BITS_MASK =
-      LIBC_NAMESPACE::mask_trailing_ones<StorageType,
-                                         Number::EXTRA_PRECISION>();
+  constexpr auto set_last_bits = [](StorageType value, int bits) {
+    return value | ((StorageType(1) << bits) - StorageType(1));
+  };
   for (Sign sign : all_signs) {
     for (FP fp : all_fp_values) {
       const T rep = make<T>(sign, fp);
       if (!rep.is_finite())
         continue;
-      // Number with EXTRA_PRECISION bits.
+      // Number with extra precision bits.
       Number num = rep.get_number().maximize_precision();
 
       // Exact number converts back to rep.
@@ -444,10 +447,14 @@ TYPED_TEST(LlvmLibcFPBitsTest, NumberRoundTowardZero, FPTypes) {
       if (rep.is_zero())
         continue; // extra bits are only present for non-zero numbers.
 
-      ++num.significand; // Smallest extra bits value.
+      const auto sig = num.significand;
+      num.significand = set_last_bits(sig, 1); // Smallest extra bits value.
       ASSERT_MATERIALIZE_AS(num, Number::TOWARDZERO, Number::EXACT, rep);
       ASSERT_MATERIALIZE_AS(num, Number::TOWARDZERO, Number::TRUNCATED, rep);
-      num.significand |= EXTRA_BITS_MASK; // Largest extra bits value.
+      if (rep.is_subnormal()) // Largest extra bits value.
+        num.significand = set_last_bits(sig, Number::EXTRA_PRECISION + 1);
+      else
+        num.significand = set_last_bits(sig, Number::EXTRA_PRECISION);
       ASSERT_MATERIALIZE_AS(num, Number::TOWARDZERO, Number::EXACT, rep);
       ASSERT_MATERIALIZE_AS(num, Number::TOWARDZERO, Number::TRUNCATED, rep);
     }
@@ -461,24 +468,22 @@ TYPED_TEST(LlvmLibcFPBitsTest, NumberRoundTowardZero, FPTypes) {
 TYPED_TEST(LlvmLibcFPBitsTest, NumberRoundAwayZero, FPTypes) {
   using StorageType = typename T::StorageType;
   using Number = typename T::Number;
-  static constexpr StorageType EXTRA_BITS_MASK =
-      LIBC_NAMESPACE::mask_trailing_ones<StorageType,
-                                         Number::EXTRA_PRECISION>();
+  constexpr auto set_last_bits = [](StorageType value, int bits) {
+    return value | ((StorageType(1) << bits) - StorageType(1));
+  };
   const struct {
     FP initial;
     FP rounded;
   } TESTS[] = {
-      {FP::ZERO, FP::MIN_SUBNORMAL},          //
-      {FP::MAX_SUBNORMAL, FP::MIN_NORMAL},    //
-      {FP::MAX_NORMAL, FP::INF},              //
-      {FP::INF, FP::INF},                     //
-      {FP::QUIET_NAN, FP::QUIET_NAN},         //
-      {FP::SIGNALING_NAN, FP::SIGNALING_NAN}, //
+      {FP::ZERO, FP::MIN_SUBNORMAL},       //
+      {FP::MAX_SUBNORMAL, FP::MIN_NORMAL}, //
+      {FP::MAX_NORMAL, FP::INF},           //
   };
   for (Sign sign : all_signs) {
     for (auto tc : TESTS) {
       const T rep = make<T>(sign, tc.initial);
       const T rounded = make<T>(sign, tc.rounded);
+      // Number with extra precision bits.
       Number num = rep.get_number().maximize_precision();
 
       // Exact number converts back to rep.
@@ -489,10 +494,14 @@ TYPED_TEST(LlvmLibcFPBitsTest, NumberRoundAwayZero, FPTypes) {
       if (rep.is_zero())
         continue; // extra bits are only present for non-zero numbers.
 
-      ++num.significand; // Smallest extra bits value.
+      const auto sig = num.significand;
+      num.significand = set_last_bits(sig, 1); // Smallest extra bits value.
       ASSERT_MATERIALIZE_AS(num, Number::AWAYZERO, Number::EXACT, rounded);
       ASSERT_MATERIALIZE_AS(num, Number::AWAYZERO, Number::TRUNCATED, rounded);
-      num.significand |= EXTRA_BITS_MASK; // Largest extra bits value.
+      if (rep.is_subnormal()) // Largest extra bits value.
+        num.significand = set_last_bits(sig, Number::EXTRA_PRECISION + 1);
+      else
+        num.significand = set_last_bits(sig, Number::EXTRA_PRECISION);
       ASSERT_MATERIALIZE_AS(num, Number::AWAYZERO, Number::EXACT, rounded);
       ASSERT_MATERIALIZE_AS(num, Number::AWAYZERO, Number::TRUNCATED, rounded);
     }
@@ -506,19 +515,19 @@ TYPED_TEST(LlvmLibcFPBitsTest, NumberRoundAwayZero, FPTypes) {
 TYPED_TEST(LlvmLibcFPBitsTest, NumberRoundToNearest, FPTypes) {
   using StorageType = typename T::StorageType;
   using Number = typename T::Number;
-  static constexpr StorageType EXTRA_BITS_MASK =
-      LIBC_NAMESPACE::mask_trailing_ones<StorageType,
-                                         Number::EXTRA_PRECISION>();
+  constexpr auto set_last_bits = [](StorageType value, int bits) {
+    return value | ((StorageType(1) << bits) - StorageType(1));
+  };
+  constexpr auto set_bit_at = [](StorageType value, int pos) {
+    return value | (StorageType(1) << (pos - 1));
+  };
   const struct {
     FP initial;
     FP rounded;
   } TESTS[] = {
-      {FP::ZERO, FP::MIN_SUBNORMAL},          //
-      {FP::MAX_SUBNORMAL, FP::MIN_NORMAL},    //
-      {FP::MAX_NORMAL, FP::INF},              //
-      {FP::INF, FP::INF},                     //
-      {FP::QUIET_NAN, FP::QUIET_NAN},         //
-      {FP::SIGNALING_NAN, FP::SIGNALING_NAN}, //
+      {FP::ZERO, FP::MIN_SUBNORMAL},       //
+      {FP::MAX_SUBNORMAL, FP::MIN_NORMAL}, //
+      {FP::MAX_NORMAL, FP::INF},           //
   };
   for (Sign sign : all_signs) {
     for (auto tc : TESTS) {
@@ -534,54 +543,76 @@ TYPED_TEST(LlvmLibcFPBitsTest, NumberRoundToNearest, FPTypes) {
       if (rep.is_zero())
         continue; // extra bits are only present for non-zero numbers.
 
-      ++num.significand; // Smallest extra bits value.
+      const auto sig = num.significand;
+      num.significand = set_last_bits(sig, 1); // Smallest extra bits value.
       ASSERT_MATERIALIZE_AS(num, Number::TONEAREST, Number::EXACT, rep);
       ASSERT_MATERIALIZE_AS(num, Number::TONEAREST, Number::TRUNCATED, rep);
-      num.significand |= EXTRA_BITS_MASK; // Largest extra bits value.
+      if (rep.is_subnormal()) // Largest extra bits value.
+        num.significand = set_last_bits(sig, Number::EXTRA_PRECISION + 1);
+      else
+        num.significand = set_last_bits(sig, Number::EXTRA_PRECISION);
+      ASSERT_MATERIALIZE_AS(num, Number::TONEAREST, Number::EXACT, rounded);
+      ASSERT_MATERIALIZE_AS(num, Number::TONEAREST, Number::TRUNCATED, rounded);
+      if (rep.is_subnormal()) // Half extra bits value.
+        num.significand = set_bit_at(sig, Number::EXTRA_PRECISION + 1);
+      else
+        num.significand = set_bit_at(sig, Number::EXTRA_PRECISION);
+      // We're exactly half-way between two numbers.
+      // If exact we round toward zero.
+      ASSERT_MATERIALIZE_AS(num, Number::TONEAREST, Number::EXACT, rep);
+      // If truncated we round toward infinity.
+      ASSERT_MATERIALIZE_AS(num, Number::TONEAREST, Number::TRUNCATED, rounded);
+      // The next value will always round toward infinity.
+      ++num.significand;
       ASSERT_MATERIALIZE_AS(num, Number::TONEAREST, Number::EXACT, rounded);
       ASSERT_MATERIALIZE_AS(num, Number::TONEAREST, Number::TRUNCATED, rounded);
     }
   }
 }
 
-// We test the materialization of
 TYPED_TEST(LlvmLibcFPBitsTest, SmallestNumber, FPTypes) {
   using StorageType = typename T::StorageType;
   using Number = typename T::Number;
+  constexpr int32_t exponents[] = {INT32_MIN, INT32_MIN / 2};
   for (Sign sign : all_signs) {
-    Number num;
-    num.sign = sign;
-    num.exponent = INT32_MIN;
-    num.significand = StorageType(1);
-
-    const T zero = make<T>(sign, FP::ZERO);
-    const T min = make<T>(sign, FP::MIN_SUBNORMAL);
-    ASSERT_MATERIALIZE_AS(num, Number::TOWARDZERO, Number::EXACT, zero);
-    ASSERT_MATERIALIZE_AS(num, Number::TOWARDZERO, Number::TRUNCATED, zero);
-    ASSERT_MATERIALIZE_AS(num, Number::AWAYZERO, Number::EXACT, min);
-    ASSERT_MATERIALIZE_AS(num, Number::AWAYZERO, Number::TRUNCATED, min);
-    ASSERT_MATERIALIZE_AS(num, Number::TONEAREST, Number::EXACT, zero);
-    ASSERT_MATERIALIZE_AS(num, Number::TONEAREST, Number::TRUNCATED, zero);
+    for (int32_t exponent : exponents) {
+      Number num;
+      num.sign = sign;
+      num.exponent = exponent;
+      num.significand = StorageType(1);
+
+      const T zero = make<T>(sign, FP::ZERO);
+      const T min = make<T>(sign, FP::MIN_SUBNORMAL);
+      ASSERT_MATERIALIZE_AS(num, Number::TOWARDZERO, Number::EXACT, zero);
+      ASSERT_MATERIALIZE_AS(num, Number::TOWARDZERO, Number::TRUNCATED, zero);
+      ASSERT_MATERIALIZE_AS(num, Number::AWAYZERO, Number::EXACT, min);
+      ASSERT_MATERIALIZE_AS(num, Number::AWAYZERO, Number::TRUNCATED, min);
+      ASSERT_MATERIALIZE_AS(num, Number::TONEAREST, Number::EXACT, zero);
+      ASSERT_MATERIALIZE_AS(num, Number::TONEAREST, Number::TRUNCATED, zero);
+    }
   }
 }
 
 TYPED_TEST(LlvmLibcFPBitsTest, LargestNumber, FPTypes) {
   using StorageType = typename T::StorageType;
   using Number = typename T::Number;
+  constexpr int32_t exponents[] = {INT32_MAX, INT32_MAX / 2};
   for (Sign sign : all_signs) {
-    Number num;
-    num.sign = sign;
-    num.exponent = INT32_MAX;
-    num.significand = ~StorageType(0);
-
-    const T inf = make<T>(sign, FP::INF);
-    const T max = make<T>(sign, FP::MAX_NORMAL);
-    ASSERT_MATERIALIZE_AS(num, Number::TOWARDZERO, Number::EXACT, max);
-    ASSERT_MATERIALIZE_AS(num, Number::TOWARDZERO, Number::TRUNCATED, max);
-    ASSERT_MATERIALIZE_AS(num, Number::AWAYZERO, Number::EXACT, inf);
-    ASSERT_MATERIALIZE_AS(num, Number::AWAYZERO, Number::TRUNCATED, inf);
-    ASSERT_MATERIALIZE_AS(num, Number::TONEAREST, Number::EXACT, inf);
-    ASSERT_MATERIALIZE_AS(num, Number::TONEAREST, Number::TRUNCATED, inf);
+    for (int32_t exponent : exponents) {
+      Number num;
+      num.sign = sign;
+      num.exponent = exponent;
+      num.significand = ~StorageType(0);
+
+      const T max = make<T>(sign, FP::MAX_NORMAL);
+      const T inf = make<T>(sign, FP::INF);
+      ASSERT_MATERIALIZE_AS(num, Number::TOWARDZERO, Number::EXACT, max);
+      ASSERT_MATERIALIZE_AS(num, Number::TOWARDZERO, Number::TRUNCATED, max);
+      ASSERT_MATERIALIZE_AS(num, Number::AWAYZERO, Number::EXACT, inf);
+      ASSERT_MATERIALIZE_AS(num, Number::AWAYZERO, Number::TRUNCATED, inf);
+      ASSERT_MATERIALIZE_AS(num, Number::TONEAREST, Number::EXACT, inf);
+      ASSERT_MATERIALIZE_AS(num, Number::TONEAREST, Number::TRUNCATED, inf);
+    }
   }
 }
 



More information about the libc-commits mailing list