[libc-commits] [libc] [libc] Add conversions between `FPBits` and greater precision floating point representations (PR #80709)
Guillaume Chatelet via libc-commits
libc-commits at lists.llvm.org
Mon Feb 5 08:46:50 PST 2024
https://github.com/gchatelet created https://github.com/llvm/llvm-project/pull/80709
None
>From 5c7021657b1eeadcbac9c65df945909aadbab618 Mon Sep 17 00:00:00 2001
From: Guillaume Chatelet <gchatelet at google.com>
Date: Mon, 5 Feb 2024 16:46:27 +0000
Subject: [PATCH] [libc] Add conversions between `FPBits` and greater precision
floating point representations
---
libc/src/__support/FPUtil/FPBits.h | 130 +++++++++
.../test/src/__support/FPUtil/fpbits_test.cpp | 258 ++++++++++++++++++
2 files changed, 388 insertions(+)
diff --git a/libc/src/__support/FPUtil/FPBits.h b/libc/src/__support/FPUtil/FPBits.h
index 6665c90845683..6ce3a1bb08ffa 100644
--- a/libc/src/__support/FPUtil/FPBits.h
+++ b/libc/src/__support/FPUtil/FPBits.h
@@ -769,6 +769,136 @@ struct FPRepImpl : public FPRepSem<fp_type, RetT> {
}
return RetT(result.uintval());
}
+
+ // 'Number' represents a finite (non-inf, non-NaN) floating point number. It
+ // is independant of the underlying encoding and allows for easy manipulation
+ // of sign, exponent and significand. This format's precision is larger than
+ // the encoded form. There is no notion of subnormal for a 'Number'.
+ struct Number {
+
+ // The number of extra bits for the significand compared to the encoded
+ // form.
+ LIBC_INLINE_VAR static constexpr int EXTRA_PRECISION =
+ UP::STORAGE_LEN - UP::FRACTION_LEN - 1;
+
+ Sign sign = Sign::POS;
+ int32_t exponent = 0;
+ StorageType significand = 0;
+
+ LIBC_INLINE constexpr bool is_zero() const { return significand == 0; }
+
+ // Moves the leading one of the significand to StorageType's MSB position
+ // and changes the exponent accordingly. This changes the internal
+ // representation to maximize the precision of the Number but it doesn't
+ // change its value.
+ LIBC_INLINE constexpr Number maximize_precision() const {
+ return get_scaled(-cpp::countl_zero(significand));
+ }
+
+ // Moves the trailing one of the significand to StorageType's LSB position
+ // and changes the exponent accordingly. This changes the internal
+ // representation to minimize the precision of the Number but it doesn't
+ // change its value.
+ LIBC_INLINE constexpr Number minimize_precision() const {
+ return get_scaled(cpp::countr_zero(significand));
+ }
+
+ // If non-zero, normalizes this number by moving the leading bit of the
+ // significand to StorageType's MSB position (maximize_precision). If zero
+ // also makes the exponent 0.
+ LIBC_INLINE constexpr Number normalize() const {
+ if (is_zero())
+ return {sign, 0, significand};
+ return maximize_precision();
+ }
+
+ // The rounding mode to use when materializing a Number (see below).
+ enum Rounding { TOWARDZERO, AWAYZERO, TONEAREST };
+
+ // The precision to use when materializing a Number (see below).
+ // - EXACT means this Number contains all the information,
+ // - TRUNCATED means that the significand was truncated.
+ enum Precision { TRUNCATED, EXACT };
+
+ // Creates a 'RetT' from the number representation.
+ // When this Number is too large to be represented 'infinity' is returned.
+ // When this Number is too small to be represented 'zero' or 'min_subnormal'
+ // is returned depending on the rounding mode.
+ LIBC_INLINE constexpr RetT materialize(Rounding rounding = TOWARDZERO,
+ Precision precision = EXACT) const {
+ if (exponent <= (INT32_MIN + UP::STORAGE_LEN))
+ return rounding == AWAYZERO ? RetT::min_subnormal(sign)
+ : RetT::zero(sign);
+ if (is_zero())
+ return precision == TRUNCATED && rounding == AWAYZERO
+ ? RetT::min_subnormal(sign)
+ : RetT::zero(sign);
+ if (exponent >= (INT32_MAX - UP::STORAGE_LEN))
+ return rounding == TOWARDZERO ? RetT::max_normal(sign)
+ : RetT::inf(sign);
+
+ const int leading_zeroes = cpp::countl_zero(significand);
+ const int extra_len = EXTRA_PRECISION - leading_zeroes;
+ // 'extra_len' is smaller than 'STORAGE_LEN' by definition.
+ static_assert(EXTRA_PRECISION < UP::STORAGE_LEN);
+ const StorageType extra_bits_mask =
+ extra_len <= 0 ? StorageType(0)
+ : (StorageType(1) << extra_len) - StorageType(1);
+ const StorageType extra_bits = significand & extra_bits_mask;
+ const StorageType extra_bits_midpoint = extra_bits_mask >> 1;
+ const bool round_toward_inf =
+ (rounding == AWAYZERO && (extra_bits || precision == TRUNCATED)) ||
+ (rounding == TONEAREST &&
+ ((extra_bits > extra_bits_midpoint) ||
+ ((extra_bits == extra_bits_midpoint) && (precision == TRUNCATED))));
+ int32_t rep_exponent = exponent - leading_zeroes;
+ constexpr int32_t EXP_MIN = (int32_t)Exponent::MIN();
+ constexpr int32_t EXP_SUBNORMAL = (int32_t)Exponent::SUBNORMAL();
+
+ int lshift = leading_zeroes - EXTRA_PRECISION;
+ if (rep_exponent < EXP_MIN) {
+ lshift -= EXP_MIN - rep_exponent;
+ rep_exponent = EXP_SUBNORMAL;
+ }
+
+ StorageType rep_significand = significand;
+ if (lshift > 0)
+ rep_significand <<= lshift;
+ else if (lshift < 0)
+ rep_significand >>= -lshift;
+
+ const RetT rep(
+ encode(sign, Exponent(rep_exponent), Significand(rep_significand)));
+
+ return round_toward_inf ? rep.next_toward_inf() : rep;
+ }
+
+ private:
+ // This operation changes the scale of the Number by offsetting the exponent
+ // and shift the significand.
+ LIBC_INLINE constexpr Number get_scaled(int offset) const {
+ if (offset == 0)
+ return *this;
+ Number num;
+ num.sign = sign;
+ num.exponent = exponent + offset;
+ num.significand = offset == 0 ? significand
+ : (offset > 0 ? significand >> offset
+ : significand << -offset);
+ return num;
+ }
+ };
+
+ // Returns a 'Number' representation of the number, the returned number
+ // may or may not be normalized (leading bit of the significant at MSB
+ // position). Only valid to call when is_finite().
+ LIBC_INLINE constexpr Number get_number() const {
+ Number num;
+ num.sign = sign();
+ num.exponent = get_explicit_exponent() + Number::EXTRA_PRECISION;
+ num.significand = get_explicit_mantissa();
+ return num;
+ }
};
// A generic class to manipulate floating point formats.
diff --git a/libc/test/src/__support/FPUtil/fpbits_test.cpp b/libc/test/src/__support/FPUtil/fpbits_test.cpp
index 4504a4f0cfcc7..2305eed386664 100644
--- a/libc/test/src/__support/FPUtil/fpbits_test.cpp
+++ b/libc/test/src/__support/FPUtil/fpbits_test.cpp
@@ -327,6 +327,264 @@ TYPED_TEST(LlvmLibcFPBitsTest, NextTowardInf, FPTypes) {
}
}
+TYPED_TEST(LlvmLibcFPBitsTest, NumberConstruction, FPTypes) {
+ using LIBC_NAMESPACE::cpp::countl_zero;
+ using LIBC_NAMESPACE::cpp::countr_zero;
+ using Number = typename T::Number;
+
+ // When using get_number() the significand is transfered as-is and the
+ // exponent is adjusted to reflect the extra precision (now the significand
+ // uses (STORAGE_LEN - 1) bits instead of FRACTION_LEN bits).
+
+ // e.g., with IEEE754_Binary16
+ // 1.0 in IEEE754_Binary16 : 0b0011110000000000
+ // SEEEEEMMMMMMMMMM
+ // number's significand : 0b0000010000000000
+ // EXTRA_PRECISION : ^^^^^
+ // number's exponent : EXTRA_PRECISION
+
+ const T one = T::one();
+
+ const Number num = one.get_number();
+
+ // "num" and "one" have the same sign.
+ ASSERT_EQ(num.sign.is_pos(), one.is_pos());
+
+ // For 'one', the leading one of the significant is at position FRACTION_LEN.
+ // So we have FRACTION_LEN zeroes after it.
+ ASSERT_EQ(countr_zero(num.significand), T::FRACTION_LEN);
+
+ // The exponent is increased by EXTRA_PRECISION.
+ // Since the exponent for 'one' is '0' the number's exponent is just
+ // EXTRA_PRECISION.
+ ASSERT_EQ(num.exponent, Number::EXTRA_PRECISION);
+
+ // Because the significant is now stored in 'StorageType' we have extra
+ // precisions bits available at the left of the leading one.
+ ASSERT_GT(Number::EXTRA_PRECISION, 0);
+ ASSERT_EQ(countl_zero(num.significand), Number::EXTRA_PRECISION);
+
+ // In maximized precision form, the leading one is moved at StorageType's MSB.
+ // number's significand : 0b1000000000000000
+ // number's exponent : 0
+ const Number max_precision = one.get_number().maximize_precision();
+ ASSERT_TRUE(max_precision.sign.is_pos());
+ // The leading bit is now in the MSB of the storage.
+ ASSERT_EQ(countl_zero(max_precision.significand), 0);
+ ASSERT_EQ(max_precision.exponent, 0);
+
+ // In minimized precision form, the leading one is moved at StorageType's LSB.
+ // number's significand : 0b0000000000000001
+ // number's exponent : FRACTION_LEN + EXTRA_PRECISION
+ const Number min_precision = one.get_number().minimize_precision();
+ ASSERT_TRUE(min_precision.sign.is_pos());
+ // The leading bit is now in the MSB of the storage.
+ ASSERT_EQ(countr_zero(min_precision.significand), 0);
+ ASSERT_EQ(min_precision.exponent, T::FRACTION_LEN + Number::EXTRA_PRECISION);
+}
+
+#define ASSERT_MATERIALIZE_AS(NUMBER, ROUNDING, PRECISION, REP) \
+ ASSERT_SAME_REP(NUMBER.materialize(ROUNDING, PRECISION), REP)
+
+// For all 'FPType' and all finite 'FP' values, we check that we can convert the
+// 'FPRep' to a 'Number' and back to the original 'FPRep' without loss.
+// We also check that changing the scale of the intermediary 'Number' has no
+// effect.
+TYPED_TEST(LlvmLibcFPBitsTest, NumberBackAndForth, FPTypes) {
+ // using StorageType = typename T::StorageType;
+ using Number = typename T::Number;
+ for (Sign sign : all_signs) {
+ for (FP fp : all_fp_values) {
+ const T rep = make<T>(sign, fp);
+ if (!rep.is_finite())
+ continue;
+ // We test numbers at different scales.
+ // Note: changing scale changes the internal representation but not the
+ // Number's value.
+ const Number scaled_numbers[] = {
+ rep.get_number(),
+ rep.get_number().maximize_precision(),
+ rep.get_number().minimize_precision(),
+ };
+ for (const Number &num : scaled_numbers) {
+ // When numbers are exact (i.e., not truncated) they should materialize
+ // back exactly whatever the rounding mode.
+ ASSERT_MATERIALIZE_AS(num, Number::TOWARDZERO, Number::EXACT, rep);
+ ASSERT_MATERIALIZE_AS(num, Number::AWAYZERO, Number::EXACT, rep);
+ ASSERT_MATERIALIZE_AS(num, Number::TONEAREST, Number::EXACT, rep);
+ }
+ }
+ }
+}
+
+// Here we test materialization of a 'Number' back to an 'FPRep' with the
+// 'TOWARDZERO' rounding mode. This rounding mode corresponds to C++ cast
+// semantics and simply discards the extra precision.
+// That is, whatever the values of the extra bits, 'Number' will materialize
+// back as 'FPRep' exactly.
+TYPED_TEST(LlvmLibcFPBitsTest, NumberRoundTowardZero, FPTypes) {
+ using StorageType = typename T::StorageType;
+ using Number = typename T::Number;
+ static constexpr StorageType EXTRA_BITS_MASK =
+ LIBC_NAMESPACE::mask_trailing_ones<StorageType,
+ Number::EXTRA_PRECISION>();
+ for (Sign sign : all_signs) {
+ for (FP fp : all_fp_values) {
+ const T rep = make<T>(sign, fp);
+ if (!rep.is_finite())
+ continue;
+ // Number with EXTRA_PRECISION bits.
+ Number num = rep.get_number().maximize_precision();
+
+ // Exact number converts back to rep.
+ ASSERT_MATERIALIZE_AS(num, Number::TOWARDZERO, Number::EXACT, rep);
+ // Non-exact numbers converts back to rep.
+ ASSERT_MATERIALIZE_AS(num, Number::TOWARDZERO, Number::TRUNCATED, rep);
+
+ if (rep.is_zero())
+ continue; // extra bits are only present for non-zero numbers.
+
+ ++num.significand; // Smallest extra bits value.
+ ASSERT_MATERIALIZE_AS(num, Number::TOWARDZERO, Number::EXACT, rep);
+ ASSERT_MATERIALIZE_AS(num, Number::TOWARDZERO, Number::TRUNCATED, rep);
+ num.significand |= EXTRA_BITS_MASK; // Largest extra bits value.
+ ASSERT_MATERIALIZE_AS(num, Number::TOWARDZERO, Number::EXACT, rep);
+ ASSERT_MATERIALIZE_AS(num, Number::TOWARDZERO, Number::TRUNCATED, rep);
+ }
+ }
+}
+
+// Here we test materialization of a 'Number' back to an 'FPRep' with the
+// 'AWAYZERO' rounding mode. This rounding mode will convert back to 'FPRep'
+// only if there is no extra bit set and Truncation is 'EXACT', otherwise it
+// will materialize as the next representable number.
+TYPED_TEST(LlvmLibcFPBitsTest, NumberRoundAwayZero, FPTypes) {
+ using StorageType = typename T::StorageType;
+ using Number = typename T::Number;
+ static constexpr StorageType EXTRA_BITS_MASK =
+ LIBC_NAMESPACE::mask_trailing_ones<StorageType,
+ Number::EXTRA_PRECISION>();
+ const struct {
+ FP initial;
+ FP rounded;
+ } TESTS[] = {
+ {FP::ZERO, FP::MIN_SUBNORMAL}, //
+ {FP::MAX_SUBNORMAL, FP::MIN_NORMAL}, //
+ {FP::MAX_NORMAL, FP::INF}, //
+ {FP::INF, FP::INF}, //
+ {FP::QUIET_NAN, FP::QUIET_NAN}, //
+ {FP::SIGNALING_NAN, FP::SIGNALING_NAN}, //
+ };
+ for (Sign sign : all_signs) {
+ for (auto tc : TESTS) {
+ const T rep = make<T>(sign, tc.initial);
+ const T rounded = make<T>(sign, tc.rounded);
+ Number num = rep.get_number().maximize_precision();
+
+ // Exact number converts back to rep.
+ ASSERT_MATERIALIZE_AS(num, Number::AWAYZERO, Number::EXACT, rep);
+ // Non-exact numbers get rounded toward infinity.
+ ASSERT_MATERIALIZE_AS(num, Number::AWAYZERO, Number::TRUNCATED, rounded);
+
+ if (rep.is_zero())
+ continue; // extra bits are only present for non-zero numbers.
+
+ ++num.significand; // Smallest extra bits value.
+ ASSERT_MATERIALIZE_AS(num, Number::AWAYZERO, Number::EXACT, rounded);
+ ASSERT_MATERIALIZE_AS(num, Number::AWAYZERO, Number::TRUNCATED, rounded);
+ num.significand |= EXTRA_BITS_MASK; // Largest extra bits value.
+ ASSERT_MATERIALIZE_AS(num, Number::AWAYZERO, Number::EXACT, rounded);
+ ASSERT_MATERIALIZE_AS(num, Number::AWAYZERO, Number::TRUNCATED, rounded);
+ }
+ }
+}
+
+// Here we test materialization of a 'Number' back to an 'FPRep' with the
+// 'TONEAREST' rounding mode. This rounding mode will convert back to 'FPRep'
+// only if there is no extra bit set and Truncation is 'EXACT', otherwise it
+// will materialize as the next representable number.
+TYPED_TEST(LlvmLibcFPBitsTest, NumberRoundToNearest, FPTypes) {
+ using StorageType = typename T::StorageType;
+ using Number = typename T::Number;
+ static constexpr StorageType EXTRA_BITS_MASK =
+ LIBC_NAMESPACE::mask_trailing_ones<StorageType,
+ Number::EXTRA_PRECISION>();
+ const struct {
+ FP initial;
+ FP rounded;
+ } TESTS[] = {
+ {FP::ZERO, FP::MIN_SUBNORMAL}, //
+ {FP::MAX_SUBNORMAL, FP::MIN_NORMAL}, //
+ {FP::MAX_NORMAL, FP::INF}, //
+ {FP::INF, FP::INF}, //
+ {FP::QUIET_NAN, FP::QUIET_NAN}, //
+ {FP::SIGNALING_NAN, FP::SIGNALING_NAN}, //
+ };
+ for (Sign sign : all_signs) {
+ for (auto tc : TESTS) {
+ const T rep = make<T>(sign, tc.initial);
+ const T rounded = make<T>(sign, tc.rounded);
+ Number num = rep.get_number().maximize_precision();
+
+ // Exact number converts back to rep.
+ ASSERT_MATERIALIZE_AS(num, Number::TONEAREST, Number::EXACT, rep);
+ // Non-exact numbers converts back to rep.
+ ASSERT_MATERIALIZE_AS(num, Number::TONEAREST, Number::TRUNCATED, rep);
+
+ if (rep.is_zero())
+ continue; // extra bits are only present for non-zero numbers.
+
+ ++num.significand; // Smallest extra bits value.
+ ASSERT_MATERIALIZE_AS(num, Number::TONEAREST, Number::EXACT, rep);
+ ASSERT_MATERIALIZE_AS(num, Number::TONEAREST, Number::TRUNCATED, rep);
+ num.significand |= EXTRA_BITS_MASK; // Largest extra bits value.
+ ASSERT_MATERIALIZE_AS(num, Number::TONEAREST, Number::EXACT, rounded);
+ ASSERT_MATERIALIZE_AS(num, Number::TONEAREST, Number::TRUNCATED, rounded);
+ }
+ }
+}
+
+// We test the materialization of
+TYPED_TEST(LlvmLibcFPBitsTest, SmallestNumber, FPTypes) {
+ using StorageType = typename T::StorageType;
+ using Number = typename T::Number;
+ for (Sign sign : all_signs) {
+ Number num;
+ num.sign = sign;
+ num.exponent = INT32_MIN;
+ num.significand = StorageType(1);
+
+ const T zero = make<T>(sign, FP::ZERO);
+ const T min = make<T>(sign, FP::MIN_SUBNORMAL);
+ ASSERT_MATERIALIZE_AS(num, Number::TOWARDZERO, Number::EXACT, zero);
+ ASSERT_MATERIALIZE_AS(num, Number::TOWARDZERO, Number::TRUNCATED, zero);
+ ASSERT_MATERIALIZE_AS(num, Number::AWAYZERO, Number::EXACT, min);
+ ASSERT_MATERIALIZE_AS(num, Number::AWAYZERO, Number::TRUNCATED, min);
+ ASSERT_MATERIALIZE_AS(num, Number::TONEAREST, Number::EXACT, zero);
+ ASSERT_MATERIALIZE_AS(num, Number::TONEAREST, Number::TRUNCATED, zero);
+ }
+}
+
+TYPED_TEST(LlvmLibcFPBitsTest, LargestNumber, FPTypes) {
+ using StorageType = typename T::StorageType;
+ using Number = typename T::Number;
+ for (Sign sign : all_signs) {
+ Number num;
+ num.sign = sign;
+ num.exponent = INT32_MAX;
+ num.significand = ~StorageType(0);
+
+ const T inf = make<T>(sign, FP::INF);
+ const T max = make<T>(sign, FP::MAX_NORMAL);
+ ASSERT_MATERIALIZE_AS(num, Number::TOWARDZERO, Number::EXACT, max);
+ ASSERT_MATERIALIZE_AS(num, Number::TOWARDZERO, Number::TRUNCATED, max);
+ ASSERT_MATERIALIZE_AS(num, Number::AWAYZERO, Number::EXACT, inf);
+ ASSERT_MATERIALIZE_AS(num, Number::AWAYZERO, Number::TRUNCATED, inf);
+ ASSERT_MATERIALIZE_AS(num, Number::TONEAREST, Number::EXACT, inf);
+ ASSERT_MATERIALIZE_AS(num, Number::TONEAREST, Number::TRUNCATED, inf);
+ }
+}
+
TEST(LlvmLibcFPBitsTest, FloatType) {
using FloatBits = FPBits<float>;
More information about the libc-commits
mailing list