[libc-commits] [libc] [libc] Add conversions between `FPBits` and greater precision floating point representations (PR #80709)
Guillaume Chatelet via libc-commits
libc-commits at lists.llvm.org
Wed Feb 7 03:21:44 PST 2024
https://github.com/gchatelet updated https://github.com/llvm/llvm-project/pull/80709
>From 5c7021657b1eeadcbac9c65df945909aadbab618 Mon Sep 17 00:00:00 2001
From: Guillaume Chatelet <gchatelet at google.com>
Date: Mon, 5 Feb 2024 16:46:27 +0000
Subject: [PATCH 1/2] [libc] Add conversions between `FPBits` and greater
precision floating point representations
---
libc/src/__support/FPUtil/FPBits.h | 130 +++++++++
.../test/src/__support/FPUtil/fpbits_test.cpp | 258 ++++++++++++++++++
2 files changed, 388 insertions(+)
diff --git a/libc/src/__support/FPUtil/FPBits.h b/libc/src/__support/FPUtil/FPBits.h
index 6665c90845683b..6ce3a1bb08ffa5 100644
--- a/libc/src/__support/FPUtil/FPBits.h
+++ b/libc/src/__support/FPUtil/FPBits.h
@@ -769,6 +769,136 @@ struct FPRepImpl : public FPRepSem<fp_type, RetT> {
}
return RetT(result.uintval());
}
+
+ // 'Number' represents a finite (non-inf, non-NaN) floating point number. It
+ // is independant of the underlying encoding and allows for easy manipulation
+ // of sign, exponent and significand. This format's precision is larger than
+ // the encoded form. There is no notion of subnormal for a 'Number'.
+ struct Number {
+
+ // The number of extra bits for the significand compared to the encoded
+ // form.
+ LIBC_INLINE_VAR static constexpr int EXTRA_PRECISION =
+ UP::STORAGE_LEN - UP::FRACTION_LEN - 1;
+
+ Sign sign = Sign::POS;
+ int32_t exponent = 0;
+ StorageType significand = 0;
+
+ LIBC_INLINE constexpr bool is_zero() const { return significand == 0; }
+
+ // Moves the leading one of the significand to StorageType's MSB position
+ // and changes the exponent accordingly. This changes the internal
+ // representation to maximize the precision of the Number but it doesn't
+ // change its value.
+ LIBC_INLINE constexpr Number maximize_precision() const {
+ return get_scaled(-cpp::countl_zero(significand));
+ }
+
+ // Moves the trailing one of the significand to StorageType's LSB position
+ // and changes the exponent accordingly. This changes the internal
+ // representation to minimize the precision of the Number but it doesn't
+ // change its value.
+ LIBC_INLINE constexpr Number minimize_precision() const {
+ return get_scaled(cpp::countr_zero(significand));
+ }
+
+ // If non-zero, normalizes this number by moving the leading bit of the
+ // significand to StorageType's MSB position (maximize_precision). If zero
+ // also makes the exponent 0.
+ LIBC_INLINE constexpr Number normalize() const {
+ if (is_zero())
+ return {sign, 0, significand};
+ return maximize_precision();
+ }
+
+ // The rounding mode to use when materializing a Number (see below).
+ enum Rounding { TOWARDZERO, AWAYZERO, TONEAREST };
+
+ // The precision to use when materializing a Number (see below).
+ // - EXACT means this Number contains all the information,
+ // - TRUNCATED means that the significand was truncated.
+ enum Precision { TRUNCATED, EXACT };
+
+ // Creates a 'RetT' from the number representation.
+ // When this Number is too large to be represented 'infinity' is returned.
+ // When this Number is too small to be represented 'zero' or 'min_subnormal'
+ // is returned depending on the rounding mode.
+ LIBC_INLINE constexpr RetT materialize(Rounding rounding = TOWARDZERO,
+ Precision precision = EXACT) const {
+ if (exponent <= (INT32_MIN + UP::STORAGE_LEN))
+ return rounding == AWAYZERO ? RetT::min_subnormal(sign)
+ : RetT::zero(sign);
+ if (is_zero())
+ return precision == TRUNCATED && rounding == AWAYZERO
+ ? RetT::min_subnormal(sign)
+ : RetT::zero(sign);
+ if (exponent >= (INT32_MAX - UP::STORAGE_LEN))
+ return rounding == TOWARDZERO ? RetT::max_normal(sign)
+ : RetT::inf(sign);
+
+ const int leading_zeroes = cpp::countl_zero(significand);
+ const int extra_len = EXTRA_PRECISION - leading_zeroes;
+ // 'extra_len' is smaller than 'STORAGE_LEN' by definition.
+ static_assert(EXTRA_PRECISION < UP::STORAGE_LEN);
+ const StorageType extra_bits_mask =
+ extra_len <= 0 ? StorageType(0)
+ : (StorageType(1) << extra_len) - StorageType(1);
+ const StorageType extra_bits = significand & extra_bits_mask;
+ const StorageType extra_bits_midpoint = extra_bits_mask >> 1;
+ const bool round_toward_inf =
+ (rounding == AWAYZERO && (extra_bits || precision == TRUNCATED)) ||
+ (rounding == TONEAREST &&
+ ((extra_bits > extra_bits_midpoint) ||
+ ((extra_bits == extra_bits_midpoint) && (precision == TRUNCATED))));
+ int32_t rep_exponent = exponent - leading_zeroes;
+ constexpr int32_t EXP_MIN = (int32_t)Exponent::MIN();
+ constexpr int32_t EXP_SUBNORMAL = (int32_t)Exponent::SUBNORMAL();
+
+ int lshift = leading_zeroes - EXTRA_PRECISION;
+ if (rep_exponent < EXP_MIN) {
+ lshift -= EXP_MIN - rep_exponent;
+ rep_exponent = EXP_SUBNORMAL;
+ }
+
+ StorageType rep_significand = significand;
+ if (lshift > 0)
+ rep_significand <<= lshift;
+ else if (lshift < 0)
+ rep_significand >>= -lshift;
+
+ const RetT rep(
+ encode(sign, Exponent(rep_exponent), Significand(rep_significand)));
+
+ return round_toward_inf ? rep.next_toward_inf() : rep;
+ }
+
+ private:
+ // This operation changes the scale of the Number by offsetting the exponent
+ // and shift the significand.
+ LIBC_INLINE constexpr Number get_scaled(int offset) const {
+ if (offset == 0)
+ return *this;
+ Number num;
+ num.sign = sign;
+ num.exponent = exponent + offset;
+ num.significand = offset == 0 ? significand
+ : (offset > 0 ? significand >> offset
+ : significand << -offset);
+ return num;
+ }
+ };
+
+ // Returns a 'Number' representation of the number, the returned number
+ // may or may not be normalized (leading bit of the significant at MSB
+ // position). Only valid to call when is_finite().
+ LIBC_INLINE constexpr Number get_number() const {
+ Number num;
+ num.sign = sign();
+ num.exponent = get_explicit_exponent() + Number::EXTRA_PRECISION;
+ num.significand = get_explicit_mantissa();
+ return num;
+ }
};
// A generic class to manipulate floating point formats.
diff --git a/libc/test/src/__support/FPUtil/fpbits_test.cpp b/libc/test/src/__support/FPUtil/fpbits_test.cpp
index 4504a4f0cfcc7d..2305eed3866640 100644
--- a/libc/test/src/__support/FPUtil/fpbits_test.cpp
+++ b/libc/test/src/__support/FPUtil/fpbits_test.cpp
@@ -327,6 +327,264 @@ TYPED_TEST(LlvmLibcFPBitsTest, NextTowardInf, FPTypes) {
}
}
+TYPED_TEST(LlvmLibcFPBitsTest, NumberConstruction, FPTypes) {
+ using LIBC_NAMESPACE::cpp::countl_zero;
+ using LIBC_NAMESPACE::cpp::countr_zero;
+ using Number = typename T::Number;
+
+ // When using get_number() the significand is transfered as-is and the
+ // exponent is adjusted to reflect the extra precision (now the significand
+ // uses (STORAGE_LEN - 1) bits instead of FRACTION_LEN bits).
+
+ // e.g., with IEEE754_Binary16
+ // 1.0 in IEEE754_Binary16 : 0b0011110000000000
+ // SEEEEEMMMMMMMMMM
+ // number's significand : 0b0000010000000000
+ // EXTRA_PRECISION : ^^^^^
+ // number's exponent : EXTRA_PRECISION
+
+ const T one = T::one();
+
+ const Number num = one.get_number();
+
+ // "num" and "one" have the same sign.
+ ASSERT_EQ(num.sign.is_pos(), one.is_pos());
+
+ // For 'one', the leading one of the significant is at position FRACTION_LEN.
+ // So we have FRACTION_LEN zeroes after it.
+ ASSERT_EQ(countr_zero(num.significand), T::FRACTION_LEN);
+
+ // The exponent is increased by EXTRA_PRECISION.
+ // Since the exponent for 'one' is '0' the number's exponent is just
+ // EXTRA_PRECISION.
+ ASSERT_EQ(num.exponent, Number::EXTRA_PRECISION);
+
+ // Because the significant is now stored in 'StorageType' we have extra
+ // precisions bits available at the left of the leading one.
+ ASSERT_GT(Number::EXTRA_PRECISION, 0);
+ ASSERT_EQ(countl_zero(num.significand), Number::EXTRA_PRECISION);
+
+ // In maximized precision form, the leading one is moved at StorageType's MSB.
+ // number's significand : 0b1000000000000000
+ // number's exponent : 0
+ const Number max_precision = one.get_number().maximize_precision();
+ ASSERT_TRUE(max_precision.sign.is_pos());
+ // The leading bit is now in the MSB of the storage.
+ ASSERT_EQ(countl_zero(max_precision.significand), 0);
+ ASSERT_EQ(max_precision.exponent, 0);
+
+ // In minimized precision form, the leading one is moved at StorageType's LSB.
+ // number's significand : 0b0000000000000001
+ // number's exponent : FRACTION_LEN + EXTRA_PRECISION
+ const Number min_precision = one.get_number().minimize_precision();
+ ASSERT_TRUE(min_precision.sign.is_pos());
+ // The leading bit is now in the MSB of the storage.
+ ASSERT_EQ(countr_zero(min_precision.significand), 0);
+ ASSERT_EQ(min_precision.exponent, T::FRACTION_LEN + Number::EXTRA_PRECISION);
+}
+
+#define ASSERT_MATERIALIZE_AS(NUMBER, ROUNDING, PRECISION, REP) \
+ ASSERT_SAME_REP(NUMBER.materialize(ROUNDING, PRECISION), REP)
+
+// For all 'FPType' and all finite 'FP' values, we check that we can convert the
+// 'FPRep' to a 'Number' and back to the original 'FPRep' without loss.
+// We also check that changing the scale of the intermediary 'Number' has no
+// effect.
+TYPED_TEST(LlvmLibcFPBitsTest, NumberBackAndForth, FPTypes) {
+ // using StorageType = typename T::StorageType;
+ using Number = typename T::Number;
+ for (Sign sign : all_signs) {
+ for (FP fp : all_fp_values) {
+ const T rep = make<T>(sign, fp);
+ if (!rep.is_finite())
+ continue;
+ // We test numbers at different scales.
+ // Note: changing scale changes the internal representation but not the
+ // Number's value.
+ const Number scaled_numbers[] = {
+ rep.get_number(),
+ rep.get_number().maximize_precision(),
+ rep.get_number().minimize_precision(),
+ };
+ for (const Number &num : scaled_numbers) {
+ // When numbers are exact (i.e., not truncated) they should materialize
+ // back exactly whatever the rounding mode.
+ ASSERT_MATERIALIZE_AS(num, Number::TOWARDZERO, Number::EXACT, rep);
+ ASSERT_MATERIALIZE_AS(num, Number::AWAYZERO, Number::EXACT, rep);
+ ASSERT_MATERIALIZE_AS(num, Number::TONEAREST, Number::EXACT, rep);
+ }
+ }
+ }
+}
+
+// Here we test materialization of a 'Number' back to an 'FPRep' with the
+// 'TOWARDZERO' rounding mode. This rounding mode corresponds to C++ cast
+// semantics and simply discards the extra precision.
+// That is, whatever the values of the extra bits, 'Number' will materialize
+// back as 'FPRep' exactly.
+TYPED_TEST(LlvmLibcFPBitsTest, NumberRoundTowardZero, FPTypes) {
+ using StorageType = typename T::StorageType;
+ using Number = typename T::Number;
+ static constexpr StorageType EXTRA_BITS_MASK =
+ LIBC_NAMESPACE::mask_trailing_ones<StorageType,
+ Number::EXTRA_PRECISION>();
+ for (Sign sign : all_signs) {
+ for (FP fp : all_fp_values) {
+ const T rep = make<T>(sign, fp);
+ if (!rep.is_finite())
+ continue;
+ // Number with EXTRA_PRECISION bits.
+ Number num = rep.get_number().maximize_precision();
+
+ // Exact number converts back to rep.
+ ASSERT_MATERIALIZE_AS(num, Number::TOWARDZERO, Number::EXACT, rep);
+ // Non-exact numbers converts back to rep.
+ ASSERT_MATERIALIZE_AS(num, Number::TOWARDZERO, Number::TRUNCATED, rep);
+
+ if (rep.is_zero())
+ continue; // extra bits are only present for non-zero numbers.
+
+ ++num.significand; // Smallest extra bits value.
+ ASSERT_MATERIALIZE_AS(num, Number::TOWARDZERO, Number::EXACT, rep);
+ ASSERT_MATERIALIZE_AS(num, Number::TOWARDZERO, Number::TRUNCATED, rep);
+ num.significand |= EXTRA_BITS_MASK; // Largest extra bits value.
+ ASSERT_MATERIALIZE_AS(num, Number::TOWARDZERO, Number::EXACT, rep);
+ ASSERT_MATERIALIZE_AS(num, Number::TOWARDZERO, Number::TRUNCATED, rep);
+ }
+ }
+}
+
+// Here we test materialization of a 'Number' back to an 'FPRep' with the
+// 'AWAYZERO' rounding mode. This rounding mode will convert back to 'FPRep'
+// only if there is no extra bit set and Truncation is 'EXACT', otherwise it
+// will materialize as the next representable number.
+TYPED_TEST(LlvmLibcFPBitsTest, NumberRoundAwayZero, FPTypes) {
+ using StorageType = typename T::StorageType;
+ using Number = typename T::Number;
+ static constexpr StorageType EXTRA_BITS_MASK =
+ LIBC_NAMESPACE::mask_trailing_ones<StorageType,
+ Number::EXTRA_PRECISION>();
+ const struct {
+ FP initial;
+ FP rounded;
+ } TESTS[] = {
+ {FP::ZERO, FP::MIN_SUBNORMAL}, //
+ {FP::MAX_SUBNORMAL, FP::MIN_NORMAL}, //
+ {FP::MAX_NORMAL, FP::INF}, //
+ {FP::INF, FP::INF}, //
+ {FP::QUIET_NAN, FP::QUIET_NAN}, //
+ {FP::SIGNALING_NAN, FP::SIGNALING_NAN}, //
+ };
+ for (Sign sign : all_signs) {
+ for (auto tc : TESTS) {
+ const T rep = make<T>(sign, tc.initial);
+ const T rounded = make<T>(sign, tc.rounded);
+ Number num = rep.get_number().maximize_precision();
+
+ // Exact number converts back to rep.
+ ASSERT_MATERIALIZE_AS(num, Number::AWAYZERO, Number::EXACT, rep);
+ // Non-exact numbers get rounded toward infinity.
+ ASSERT_MATERIALIZE_AS(num, Number::AWAYZERO, Number::TRUNCATED, rounded);
+
+ if (rep.is_zero())
+ continue; // extra bits are only present for non-zero numbers.
+
+ ++num.significand; // Smallest extra bits value.
+ ASSERT_MATERIALIZE_AS(num, Number::AWAYZERO, Number::EXACT, rounded);
+ ASSERT_MATERIALIZE_AS(num, Number::AWAYZERO, Number::TRUNCATED, rounded);
+ num.significand |= EXTRA_BITS_MASK; // Largest extra bits value.
+ ASSERT_MATERIALIZE_AS(num, Number::AWAYZERO, Number::EXACT, rounded);
+ ASSERT_MATERIALIZE_AS(num, Number::AWAYZERO, Number::TRUNCATED, rounded);
+ }
+ }
+}
+
+// Here we test materialization of a 'Number' back to an 'FPRep' with the
+// 'TONEAREST' rounding mode. This rounding mode will convert back to 'FPRep'
+// only if there is no extra bit set and Truncation is 'EXACT', otherwise it
+// will materialize as the next representable number.
+TYPED_TEST(LlvmLibcFPBitsTest, NumberRoundToNearest, FPTypes) {
+ using StorageType = typename T::StorageType;
+ using Number = typename T::Number;
+ static constexpr StorageType EXTRA_BITS_MASK =
+ LIBC_NAMESPACE::mask_trailing_ones<StorageType,
+ Number::EXTRA_PRECISION>();
+ const struct {
+ FP initial;
+ FP rounded;
+ } TESTS[] = {
+ {FP::ZERO, FP::MIN_SUBNORMAL}, //
+ {FP::MAX_SUBNORMAL, FP::MIN_NORMAL}, //
+ {FP::MAX_NORMAL, FP::INF}, //
+ {FP::INF, FP::INF}, //
+ {FP::QUIET_NAN, FP::QUIET_NAN}, //
+ {FP::SIGNALING_NAN, FP::SIGNALING_NAN}, //
+ };
+ for (Sign sign : all_signs) {
+ for (auto tc : TESTS) {
+ const T rep = make<T>(sign, tc.initial);
+ const T rounded = make<T>(sign, tc.rounded);
+ Number num = rep.get_number().maximize_precision();
+
+ // Exact number converts back to rep.
+ ASSERT_MATERIALIZE_AS(num, Number::TONEAREST, Number::EXACT, rep);
+ // Non-exact numbers converts back to rep.
+ ASSERT_MATERIALIZE_AS(num, Number::TONEAREST, Number::TRUNCATED, rep);
+
+ if (rep.is_zero())
+ continue; // extra bits are only present for non-zero numbers.
+
+ ++num.significand; // Smallest extra bits value.
+ ASSERT_MATERIALIZE_AS(num, Number::TONEAREST, Number::EXACT, rep);
+ ASSERT_MATERIALIZE_AS(num, Number::TONEAREST, Number::TRUNCATED, rep);
+ num.significand |= EXTRA_BITS_MASK; // Largest extra bits value.
+ ASSERT_MATERIALIZE_AS(num, Number::TONEAREST, Number::EXACT, rounded);
+ ASSERT_MATERIALIZE_AS(num, Number::TONEAREST, Number::TRUNCATED, rounded);
+ }
+ }
+}
+
+// We test the materialization of
+TYPED_TEST(LlvmLibcFPBitsTest, SmallestNumber, FPTypes) {
+ using StorageType = typename T::StorageType;
+ using Number = typename T::Number;
+ for (Sign sign : all_signs) {
+ Number num;
+ num.sign = sign;
+ num.exponent = INT32_MIN;
+ num.significand = StorageType(1);
+
+ const T zero = make<T>(sign, FP::ZERO);
+ const T min = make<T>(sign, FP::MIN_SUBNORMAL);
+ ASSERT_MATERIALIZE_AS(num, Number::TOWARDZERO, Number::EXACT, zero);
+ ASSERT_MATERIALIZE_AS(num, Number::TOWARDZERO, Number::TRUNCATED, zero);
+ ASSERT_MATERIALIZE_AS(num, Number::AWAYZERO, Number::EXACT, min);
+ ASSERT_MATERIALIZE_AS(num, Number::AWAYZERO, Number::TRUNCATED, min);
+ ASSERT_MATERIALIZE_AS(num, Number::TONEAREST, Number::EXACT, zero);
+ ASSERT_MATERIALIZE_AS(num, Number::TONEAREST, Number::TRUNCATED, zero);
+ }
+}
+
+TYPED_TEST(LlvmLibcFPBitsTest, LargestNumber, FPTypes) {
+ using StorageType = typename T::StorageType;
+ using Number = typename T::Number;
+ for (Sign sign : all_signs) {
+ Number num;
+ num.sign = sign;
+ num.exponent = INT32_MAX;
+ num.significand = ~StorageType(0);
+
+ const T inf = make<T>(sign, FP::INF);
+ const T max = make<T>(sign, FP::MAX_NORMAL);
+ ASSERT_MATERIALIZE_AS(num, Number::TOWARDZERO, Number::EXACT, max);
+ ASSERT_MATERIALIZE_AS(num, Number::TOWARDZERO, Number::TRUNCATED, max);
+ ASSERT_MATERIALIZE_AS(num, Number::AWAYZERO, Number::EXACT, inf);
+ ASSERT_MATERIALIZE_AS(num, Number::AWAYZERO, Number::TRUNCATED, inf);
+ ASSERT_MATERIALIZE_AS(num, Number::TONEAREST, Number::EXACT, inf);
+ ASSERT_MATERIALIZE_AS(num, Number::TONEAREST, Number::TRUNCATED, inf);
+ }
+}
+
TEST(LlvmLibcFPBitsTest, FloatType) {
using FloatBits = FPBits<float>;
>From 08e6c466be5fcc1a2330804b44fbf69796ac9fd2 Mon Sep 17 00:00:00 2001
From: Guillaume Chatelet <gchatelet at google.com>
Date: Wed, 7 Feb 2024 11:21:28 +0000
Subject: [PATCH 2/2] Add more tests, fix rounding bugs
---
libc/src/__support/FPUtil/FPBits.h | 87 +++++++---
.../test/src/__support/FPUtil/fpbits_test.cpp | 149 +++++++++++-------
2 files changed, 154 insertions(+), 82 deletions(-)
diff --git a/libc/src/__support/FPUtil/FPBits.h b/libc/src/__support/FPUtil/FPBits.h
index 6ce3a1bb08ffa5..d83db080c97e18 100644
--- a/libc/src/__support/FPUtil/FPBits.h
+++ b/libc/src/__support/FPUtil/FPBits.h
@@ -20,6 +20,12 @@
#include <stdint.h>
+#include <stdio.h> // DO NOT SUBMIT
+#define eprintf(...) \
+ if constexpr (sizeof(StorageType) == 0) { \
+ ::fprintf(::stderr, __VA_ARGS__); \
+ }
+
namespace LIBC_NAMESPACE {
namespace fputil {
@@ -821,55 +827,89 @@ struct FPRepImpl : public FPRepSem<fp_type, RetT> {
enum Precision { TRUNCATED, EXACT };
// Creates a 'RetT' from the number representation.
- // When this Number is too large to be represented 'infinity' is returned.
- // When this Number is too small to be represented 'zero' or 'min_subnormal'
- // is returned depending on the rounding mode.
+ // - When this 'Number' is too large to be represented 'infinity' or
+ // 'max_normal' is returned depending on the rounding mode.
+ // - When this 'Number' is too small to be represented 'zero' or
+ // 'min_subnormal' is returned depending on the rounding mode.
LIBC_INLINE constexpr RetT materialize(Rounding rounding = TOWARDZERO,
Precision precision = EXACT) const {
- if (exponent <= (INT32_MIN + UP::STORAGE_LEN))
- return rounding == AWAYZERO ? RetT::min_subnormal(sign)
- : RetT::zero(sign);
if (is_zero())
return precision == TRUNCATED && rounding == AWAYZERO
? RetT::min_subnormal(sign)
: RetT::zero(sign);
- if (exponent >= (INT32_MAX - UP::STORAGE_LEN))
+
+ const auto underflow = [&]() -> RetT {
+ return rounding == AWAYZERO ? RetT::min_subnormal(sign)
+ : RetT::zero(sign);
+ };
+ const auto overflow = [&]() -> RetT {
return rounding == TOWARDZERO ? RetT::max_normal(sign)
: RetT::inf(sign);
+ };
const int leading_zeroes = cpp::countl_zero(significand);
- const int extra_len = EXTRA_PRECISION - leading_zeroes;
- // 'extra_len' is smaller than 'STORAGE_LEN' by definition.
- static_assert(EXTRA_PRECISION < UP::STORAGE_LEN);
- const StorageType extra_bits_mask =
- extra_len <= 0 ? StorageType(0)
- : (StorageType(1) << extra_len) - StorageType(1);
- const StorageType extra_bits = significand & extra_bits_mask;
- const StorageType extra_bits_midpoint = extra_bits_mask >> 1;
- const bool round_toward_inf =
- (rounding == AWAYZERO && (extra_bits || precision == TRUNCATED)) ||
- (rounding == TONEAREST &&
- ((extra_bits > extra_bits_midpoint) ||
- ((extra_bits == extra_bits_midpoint) && (precision == TRUNCATED))));
+ LIBC_ASSERT(leading_zeroes <= UP::STORAGE_LEN);
+ // If 'exponent' is too small 'exponent - leading_zeroes' below can
+ // overflow which is undefined behavior for signed integers. If exponent
+ // is too close from INT32_MIN we bail out and return the appropriate
+ // underflow value.
+ constexpr int32_t smallest_exponent = INT32_MIN + UP::STORAGE_LEN;
+ if (exponent <= smallest_exponent)
+ return underflow();
+
+ // The exponent when the leading bit is at its final position.
int32_t rep_exponent = exponent - leading_zeroes;
- constexpr int32_t EXP_MIN = (int32_t)Exponent::MIN();
- constexpr int32_t EXP_SUBNORMAL = (int32_t)Exponent::SUBNORMAL();
+
+ constexpr int32_t EXP_MAX(Exponent::MAX());
+ constexpr int32_t EXP_MIN(Exponent::MIN());
+ constexpr int32_t EXP_SUBNORMAL(Exponent::SUBNORMAL());
int lshift = leading_zeroes - EXTRA_PRECISION;
+
+ // Adjust shift and exponent when the number is subnormal.
if (rep_exponent < EXP_MIN) {
lshift -= EXP_MIN - rep_exponent;
rep_exponent = EXP_SUBNORMAL;
}
+ // The final significand shifted accordingly.
StorageType rep_significand = significand;
if (lshift > 0)
rep_significand <<= lshift;
else if (lshift < 0)
rep_significand >>= -lshift;
+ // The number of extra precision bits we have in 'significand'.
+ const int extra_len = -lshift;
+
+ if (extra_len > UP::STORAGE_LEN)
+ return underflow();
+
+ if (rep_exponent > EXP_MAX)
+ return overflow();
+
+ // When rounding is AWAYZERO or TONEAREST we need to consider extra
+ // precision bits.
+ LIBC_ASSERT(extra_len <= UP::STORAGE_LEN);
+ const bool has_extra_len = extra_len > 0;
+ StorageType extra_bits_mask{};
+ StorageType extra_bits_midpoint{};
+ if (has_extra_len) {
+ if (extra_len == UP::STORAGE_LEN)
+ extra_bits_mask = StorageType(~(StorageType(0))); // subnormals
+ else
+ extra_bits_mask = (StorageType(1) << extra_len) - StorageType(1);
+ extra_bits_midpoint = (extra_bits_mask >> 1) + StorageType(1);
+ }
+ const StorageType extra_bits = significand & extra_bits_mask;
+ const bool round_toward_inf =
+ (rounding == AWAYZERO &&
+ ((extra_bits > 0) || (precision == TRUNCATED))) ||
+ (rounding == TONEAREST &&
+ ((extra_bits > extra_bits_midpoint) ||
+ ((extra_bits == extra_bits_midpoint) && (precision == TRUNCATED))));
const RetT rep(
encode(sign, Exponent(rep_exponent), Significand(rep_significand)));
-
return round_toward_inf ? rep.next_toward_inf() : rep;
}
@@ -893,6 +933,7 @@ struct FPRepImpl : public FPRepSem<fp_type, RetT> {
// may or may not be normalized (leading bit of the significant at MSB
// position). Only valid to call when is_finite().
LIBC_INLINE constexpr Number get_number() const {
+ LIBC_ASSERT(is_finite());
Number num;
num.sign = sign();
num.exponent = get_explicit_exponent() + Number::EXTRA_PRECISION;
diff --git a/libc/test/src/__support/FPUtil/fpbits_test.cpp b/libc/test/src/__support/FPUtil/fpbits_test.cpp
index 2305eed3866640..22f930720fbd8d 100644
--- a/libc/test/src/__support/FPUtil/fpbits_test.cpp
+++ b/libc/test/src/__support/FPUtil/fpbits_test.cpp
@@ -239,10 +239,13 @@ constexpr FP all_fp_values[] = {
constexpr Sign all_signs[] = {Sign::POS, Sign::NEG};
-using FPTypes = LIBC_NAMESPACE::testing::TypeList<
- FPRep<FPType::IEEE754_Binary16>, FPRep<FPType::IEEE754_Binary32>,
- FPRep<FPType::IEEE754_Binary64>, FPRep<FPType::IEEE754_Binary128>,
- FPRep<FPType::X86_Binary80>>;
+using FPTypes =
+ LIBC_NAMESPACE::testing::TypeList<FPRep<FPType::IEEE754_Binary16>, //
+ FPRep<FPType::IEEE754_Binary32>, //
+ FPRep<FPType::IEEE754_Binary64>, //
+ FPRep<FPType::IEEE754_Binary128>, //
+ FPRep<FPType::X86_Binary80> //
+ >;
template <typename T> constexpr auto make(Sign sign, FP fp) {
switch (fp) {
@@ -425,15 +428,15 @@ TYPED_TEST(LlvmLibcFPBitsTest, NumberBackAndForth, FPTypes) {
TYPED_TEST(LlvmLibcFPBitsTest, NumberRoundTowardZero, FPTypes) {
using StorageType = typename T::StorageType;
using Number = typename T::Number;
- static constexpr StorageType EXTRA_BITS_MASK =
- LIBC_NAMESPACE::mask_trailing_ones<StorageType,
- Number::EXTRA_PRECISION>();
+ constexpr auto set_last_bits = [](StorageType value, int bits) {
+ return value | ((StorageType(1) << bits) - StorageType(1));
+ };
for (Sign sign : all_signs) {
for (FP fp : all_fp_values) {
const T rep = make<T>(sign, fp);
if (!rep.is_finite())
continue;
- // Number with EXTRA_PRECISION bits.
+ // Number with extra precision bits.
Number num = rep.get_number().maximize_precision();
// Exact number converts back to rep.
@@ -444,10 +447,14 @@ TYPED_TEST(LlvmLibcFPBitsTest, NumberRoundTowardZero, FPTypes) {
if (rep.is_zero())
continue; // extra bits are only present for non-zero numbers.
- ++num.significand; // Smallest extra bits value.
+ const auto sig = num.significand;
+ num.significand = set_last_bits(sig, 1); // Smallest extra bits value.
ASSERT_MATERIALIZE_AS(num, Number::TOWARDZERO, Number::EXACT, rep);
ASSERT_MATERIALIZE_AS(num, Number::TOWARDZERO, Number::TRUNCATED, rep);
- num.significand |= EXTRA_BITS_MASK; // Largest extra bits value.
+ if (rep.is_subnormal()) // Largest extra bits value.
+ num.significand = set_last_bits(sig, Number::EXTRA_PRECISION + 1);
+ else
+ num.significand = set_last_bits(sig, Number::EXTRA_PRECISION);
ASSERT_MATERIALIZE_AS(num, Number::TOWARDZERO, Number::EXACT, rep);
ASSERT_MATERIALIZE_AS(num, Number::TOWARDZERO, Number::TRUNCATED, rep);
}
@@ -461,24 +468,22 @@ TYPED_TEST(LlvmLibcFPBitsTest, NumberRoundTowardZero, FPTypes) {
TYPED_TEST(LlvmLibcFPBitsTest, NumberRoundAwayZero, FPTypes) {
using StorageType = typename T::StorageType;
using Number = typename T::Number;
- static constexpr StorageType EXTRA_BITS_MASK =
- LIBC_NAMESPACE::mask_trailing_ones<StorageType,
- Number::EXTRA_PRECISION>();
+ constexpr auto set_last_bits = [](StorageType value, int bits) {
+ return value | ((StorageType(1) << bits) - StorageType(1));
+ };
const struct {
FP initial;
FP rounded;
} TESTS[] = {
- {FP::ZERO, FP::MIN_SUBNORMAL}, //
- {FP::MAX_SUBNORMAL, FP::MIN_NORMAL}, //
- {FP::MAX_NORMAL, FP::INF}, //
- {FP::INF, FP::INF}, //
- {FP::QUIET_NAN, FP::QUIET_NAN}, //
- {FP::SIGNALING_NAN, FP::SIGNALING_NAN}, //
+ {FP::ZERO, FP::MIN_SUBNORMAL}, //
+ {FP::MAX_SUBNORMAL, FP::MIN_NORMAL}, //
+ {FP::MAX_NORMAL, FP::INF}, //
};
for (Sign sign : all_signs) {
for (auto tc : TESTS) {
const T rep = make<T>(sign, tc.initial);
const T rounded = make<T>(sign, tc.rounded);
+ // Number with extra precision bits.
Number num = rep.get_number().maximize_precision();
// Exact number converts back to rep.
@@ -489,10 +494,14 @@ TYPED_TEST(LlvmLibcFPBitsTest, NumberRoundAwayZero, FPTypes) {
if (rep.is_zero())
continue; // extra bits are only present for non-zero numbers.
- ++num.significand; // Smallest extra bits value.
+ const auto sig = num.significand;
+ num.significand = set_last_bits(sig, 1); // Smallest extra bits value.
ASSERT_MATERIALIZE_AS(num, Number::AWAYZERO, Number::EXACT, rounded);
ASSERT_MATERIALIZE_AS(num, Number::AWAYZERO, Number::TRUNCATED, rounded);
- num.significand |= EXTRA_BITS_MASK; // Largest extra bits value.
+ if (rep.is_subnormal()) // Largest extra bits value.
+ num.significand = set_last_bits(sig, Number::EXTRA_PRECISION + 1);
+ else
+ num.significand = set_last_bits(sig, Number::EXTRA_PRECISION);
ASSERT_MATERIALIZE_AS(num, Number::AWAYZERO, Number::EXACT, rounded);
ASSERT_MATERIALIZE_AS(num, Number::AWAYZERO, Number::TRUNCATED, rounded);
}
@@ -506,19 +515,19 @@ TYPED_TEST(LlvmLibcFPBitsTest, NumberRoundAwayZero, FPTypes) {
TYPED_TEST(LlvmLibcFPBitsTest, NumberRoundToNearest, FPTypes) {
using StorageType = typename T::StorageType;
using Number = typename T::Number;
- static constexpr StorageType EXTRA_BITS_MASK =
- LIBC_NAMESPACE::mask_trailing_ones<StorageType,
- Number::EXTRA_PRECISION>();
+ constexpr auto set_last_bits = [](StorageType value, int bits) {
+ return value | ((StorageType(1) << bits) - StorageType(1));
+ };
+ constexpr auto set_bit_at = [](StorageType value, int pos) {
+ return value | (StorageType(1) << (pos - 1));
+ };
const struct {
FP initial;
FP rounded;
} TESTS[] = {
- {FP::ZERO, FP::MIN_SUBNORMAL}, //
- {FP::MAX_SUBNORMAL, FP::MIN_NORMAL}, //
- {FP::MAX_NORMAL, FP::INF}, //
- {FP::INF, FP::INF}, //
- {FP::QUIET_NAN, FP::QUIET_NAN}, //
- {FP::SIGNALING_NAN, FP::SIGNALING_NAN}, //
+ {FP::ZERO, FP::MIN_SUBNORMAL}, //
+ {FP::MAX_SUBNORMAL, FP::MIN_NORMAL}, //
+ {FP::MAX_NORMAL, FP::INF}, //
};
for (Sign sign : all_signs) {
for (auto tc : TESTS) {
@@ -534,54 +543,76 @@ TYPED_TEST(LlvmLibcFPBitsTest, NumberRoundToNearest, FPTypes) {
if (rep.is_zero())
continue; // extra bits are only present for non-zero numbers.
- ++num.significand; // Smallest extra bits value.
+ const auto sig = num.significand;
+ num.significand = set_last_bits(sig, 1); // Smallest extra bits value.
ASSERT_MATERIALIZE_AS(num, Number::TONEAREST, Number::EXACT, rep);
ASSERT_MATERIALIZE_AS(num, Number::TONEAREST, Number::TRUNCATED, rep);
- num.significand |= EXTRA_BITS_MASK; // Largest extra bits value.
+ if (rep.is_subnormal()) // Largest extra bits value.
+ num.significand = set_last_bits(sig, Number::EXTRA_PRECISION + 1);
+ else
+ num.significand = set_last_bits(sig, Number::EXTRA_PRECISION);
+ ASSERT_MATERIALIZE_AS(num, Number::TONEAREST, Number::EXACT, rounded);
+ ASSERT_MATERIALIZE_AS(num, Number::TONEAREST, Number::TRUNCATED, rounded);
+ if (rep.is_subnormal()) // Half extra bits value.
+ num.significand = set_bit_at(sig, Number::EXTRA_PRECISION + 1);
+ else
+ num.significand = set_bit_at(sig, Number::EXTRA_PRECISION);
+ // We're exactly half-way between two numbers.
+ // If exact we round toward zero.
+ ASSERT_MATERIALIZE_AS(num, Number::TONEAREST, Number::EXACT, rep);
+ // If truncated we round toward infinity.
+ ASSERT_MATERIALIZE_AS(num, Number::TONEAREST, Number::TRUNCATED, rounded);
+ // The next value will always round toward infinity.
+ ++num.significand;
ASSERT_MATERIALIZE_AS(num, Number::TONEAREST, Number::EXACT, rounded);
ASSERT_MATERIALIZE_AS(num, Number::TONEAREST, Number::TRUNCATED, rounded);
}
}
}
-// We test the materialization of
TYPED_TEST(LlvmLibcFPBitsTest, SmallestNumber, FPTypes) {
using StorageType = typename T::StorageType;
using Number = typename T::Number;
+ constexpr int32_t exponents[] = {INT32_MIN, INT32_MIN / 2};
for (Sign sign : all_signs) {
- Number num;
- num.sign = sign;
- num.exponent = INT32_MIN;
- num.significand = StorageType(1);
-
- const T zero = make<T>(sign, FP::ZERO);
- const T min = make<T>(sign, FP::MIN_SUBNORMAL);
- ASSERT_MATERIALIZE_AS(num, Number::TOWARDZERO, Number::EXACT, zero);
- ASSERT_MATERIALIZE_AS(num, Number::TOWARDZERO, Number::TRUNCATED, zero);
- ASSERT_MATERIALIZE_AS(num, Number::AWAYZERO, Number::EXACT, min);
- ASSERT_MATERIALIZE_AS(num, Number::AWAYZERO, Number::TRUNCATED, min);
- ASSERT_MATERIALIZE_AS(num, Number::TONEAREST, Number::EXACT, zero);
- ASSERT_MATERIALIZE_AS(num, Number::TONEAREST, Number::TRUNCATED, zero);
+ for (int32_t exponent : exponents) {
+ Number num;
+ num.sign = sign;
+ num.exponent = exponent;
+ num.significand = StorageType(1);
+
+ const T zero = make<T>(sign, FP::ZERO);
+ const T min = make<T>(sign, FP::MIN_SUBNORMAL);
+ ASSERT_MATERIALIZE_AS(num, Number::TOWARDZERO, Number::EXACT, zero);
+ ASSERT_MATERIALIZE_AS(num, Number::TOWARDZERO, Number::TRUNCATED, zero);
+ ASSERT_MATERIALIZE_AS(num, Number::AWAYZERO, Number::EXACT, min);
+ ASSERT_MATERIALIZE_AS(num, Number::AWAYZERO, Number::TRUNCATED, min);
+ ASSERT_MATERIALIZE_AS(num, Number::TONEAREST, Number::EXACT, zero);
+ ASSERT_MATERIALIZE_AS(num, Number::TONEAREST, Number::TRUNCATED, zero);
+ }
}
}
TYPED_TEST(LlvmLibcFPBitsTest, LargestNumber, FPTypes) {
using StorageType = typename T::StorageType;
using Number = typename T::Number;
+ constexpr int32_t exponents[] = {INT32_MAX, INT32_MAX / 2};
for (Sign sign : all_signs) {
- Number num;
- num.sign = sign;
- num.exponent = INT32_MAX;
- num.significand = ~StorageType(0);
-
- const T inf = make<T>(sign, FP::INF);
- const T max = make<T>(sign, FP::MAX_NORMAL);
- ASSERT_MATERIALIZE_AS(num, Number::TOWARDZERO, Number::EXACT, max);
- ASSERT_MATERIALIZE_AS(num, Number::TOWARDZERO, Number::TRUNCATED, max);
- ASSERT_MATERIALIZE_AS(num, Number::AWAYZERO, Number::EXACT, inf);
- ASSERT_MATERIALIZE_AS(num, Number::AWAYZERO, Number::TRUNCATED, inf);
- ASSERT_MATERIALIZE_AS(num, Number::TONEAREST, Number::EXACT, inf);
- ASSERT_MATERIALIZE_AS(num, Number::TONEAREST, Number::TRUNCATED, inf);
+ for (int32_t exponent : exponents) {
+ Number num;
+ num.sign = sign;
+ num.exponent = exponent;
+ num.significand = ~StorageType(0);
+
+ const T max = make<T>(sign, FP::MAX_NORMAL);
+ const T inf = make<T>(sign, FP::INF);
+ ASSERT_MATERIALIZE_AS(num, Number::TOWARDZERO, Number::EXACT, max);
+ ASSERT_MATERIALIZE_AS(num, Number::TOWARDZERO, Number::TRUNCATED, max);
+ ASSERT_MATERIALIZE_AS(num, Number::AWAYZERO, Number::EXACT, inf);
+ ASSERT_MATERIALIZE_AS(num, Number::AWAYZERO, Number::TRUNCATED, inf);
+ ASSERT_MATERIALIZE_AS(num, Number::TONEAREST, Number::EXACT, inf);
+ ASSERT_MATERIALIZE_AS(num, Number::TONEAREST, Number::TRUNCATED, inf);
+ }
}
}
More information about the libc-commits
mailing list