[libc-commits] [libc] [libc][NFC] Reuse `FloatProperties` constant instead of creating new ones (PR #75187)
Guillaume Chatelet via libc-commits
libc-commits at lists.llvm.org
Tue Dec 12 06:46:48 PST 2023
https://github.com/gchatelet updated https://github.com/llvm/llvm-project/pull/75187
>From 9ad65f3d639ef674036c9445f87905ba7760da80 Mon Sep 17 00:00:00 2001
From: Guillaume Chatelet <gchatelet at google.com>
Date: Tue, 12 Dec 2023 14:07:53 +0000
Subject: [PATCH 1/2] [libc][NFC] Various simplifications
Also exposes a bit more of FloatProperties and adds documentation.
This should have been several patches but I was lazy.
---
libc/src/__support/FPUtil/FloatProperties.h | 36 +++++++------
.../__support/FPUtil/x86_64/LongDoubleBits.h | 18 +------
libc/src/__support/float_to_string.h | 9 ++--
libc/src/__support/str_to_float.h | 52 ++++++++++---------
4 files changed, 53 insertions(+), 62 deletions(-)
diff --git a/libc/src/__support/FPUtil/FloatProperties.h b/libc/src/__support/FPUtil/FloatProperties.h
index 7f396a649e4f58..b8ca6e5f2c8a8a 100644
--- a/libc/src/__support/FPUtil/FloatProperties.h
+++ b/libc/src/__support/FPUtil/FloatProperties.h
@@ -87,49 +87,55 @@ template <FPType fp_type>
struct FPProperties : public internal::FPBaseProperties<fp_type> {
private:
using UP = internal::FPBaseProperties<fp_type>;
- using UP::EXP_BITS;
- using UP::SIG_BITS;
- using UP::TOTAL_BITS;
+ // The number of bits to represent sign. For documentation purpose, always 1.
+ LIBC_INLINE_VAR static constexpr int SIGN_BITS = 1;
+ using UP::EXP_BITS; // The number of bits for the *exponent* part
+ using UP::SIG_BITS; // The number of bits for the *significand* part
+ using UP::TOTAL_BITS; // For convenience, the sum of `SIG_BITS`, `EXP_BITS`,
+ // and `SIGN_BITS`.
+ static_assert(SIGN_BITS + EXP_BITS + SIG_BITS == TOTAL_BITS);
public:
using UIntType = typename UP::UIntType;
-private:
- LIBC_INLINE_VAR static constexpr int STORAGE_BITS =
+ // The number of bits in UIntType.
+ LIBC_INLINE_VAR static constexpr int UINTTYPE_BITS =
sizeof(UIntType) * CHAR_BIT;
- static_assert(STORAGE_BITS >= TOTAL_BITS);
-
- // The number of bits to represent sign.
- // For documentation purpose, always 1.
- LIBC_INLINE_VAR static constexpr int SIGN_BITS = 1;
- static_assert(SIGN_BITS + EXP_BITS + SIG_BITS == TOTAL_BITS);
+ static_assert(UINTTYPE_BITS >= TOTAL_BITS);
+private:
// The exponent bias. Always positive.
LIBC_INLINE_VAR static constexpr int32_t EXP_BIAS =
(1U << (EXP_BITS - 1U)) - 1U;
static_assert(EXP_BIAS > 0);
- // Shifts
+ // The shift amount to get the *significand* part to the least significant
+ // bit. Always `0` but kept for consistency.
LIBC_INLINE_VAR static constexpr int SIG_MASK_SHIFT = 0;
+ // The shift amount to get the *exponent* part to the least significant bit.
LIBC_INLINE_VAR static constexpr int EXP_MASK_SHIFT = SIG_BITS;
+ // The shift amount to get the *sign* part to the least significant bit.
LIBC_INLINE_VAR static constexpr int SIGN_MASK_SHIFT = SIG_BITS + EXP_BITS;
- // Masks
+ // The bit pattern that keeps only the *significand* part.
LIBC_INLINE_VAR static constexpr UIntType SIG_MASK =
mask_trailing_ones<UIntType, SIG_BITS>() << SIG_MASK_SHIFT;
+ // The bit pattern that keeps only the *exponent* part.
LIBC_INLINE_VAR static constexpr UIntType EXP_MASK =
mask_trailing_ones<UIntType, EXP_BITS>() << EXP_MASK_SHIFT;
public:
+ // The bit pattern that keeps only the *sign* part.
LIBC_INLINE_VAR static constexpr UIntType SIGN_MASK =
mask_trailing_ones<UIntType, SIGN_BITS>() << SIGN_MASK_SHIFT;
-
-private:
+ // The bit pattern that keeps only the *sign + exponent + significand* part.
LIBC_INLINE_VAR static constexpr UIntType FP_MASK =
mask_trailing_ones<UIntType, TOTAL_BITS>();
+
static_assert((SIG_MASK & EXP_MASK & SIGN_MASK) == 0, "masks disjoint");
static_assert((SIG_MASK | EXP_MASK | SIGN_MASK) == FP_MASK, "masks cover");
+private:
LIBC_INLINE static constexpr UIntType bit_at(int position) {
return UIntType(1) << position;
}
diff --git a/libc/src/__support/FPUtil/x86_64/LongDoubleBits.h b/libc/src/__support/FPUtil/x86_64/LongDoubleBits.h
index 60953313a695c5..f1ef928f230819 100644
--- a/libc/src/__support/FPUtil/x86_64/LongDoubleBits.h
+++ b/libc/src/__support/FPUtil/x86_64/LongDoubleBits.h
@@ -26,18 +26,6 @@
namespace LIBC_NAMESPACE {
namespace fputil {
-template <unsigned Width> struct Padding;
-
-// i386 padding.
-template <> struct Padding<4> {
- static constexpr unsigned VALUE = 16;
-};
-
-// x86_64 padding.
-template <> struct Padding<8> {
- static constexpr unsigned VALUE = 48;
-};
-
template <> struct FPBits<long double> {
using UIntType = UInt128;
@@ -129,11 +117,7 @@ template <> struct FPBits<long double> {
LIBC_INLINE constexpr UIntType uintval() {
// We zero the padding bits as they can contain garbage.
- constexpr UIntType MASK =
- (UIntType(1) << (sizeof(long double) * 8 -
- Padding<sizeof(uintptr_t)>::VALUE)) -
- 1;
- return bits & MASK;
+ return bits & FloatProp::FP_MASK;
}
LIBC_INLINE constexpr long double get_val() const {
diff --git a/libc/src/__support/float_to_string.h b/libc/src/__support/float_to_string.h
index 34c0c0ceef286d..be105830a91ac1 100644
--- a/libc/src/__support/float_to_string.h
+++ b/libc/src/__support/float_to_string.h
@@ -105,7 +105,7 @@ namespace LIBC_NAMESPACE {
using BlockInt = uint32_t;
constexpr uint32_t BLOCK_SIZE = 9;
-using MantissaInt = fputil::FPBits<long double>::UIntType;
+using FloatProp = fputil::FloatProperties<long double>;
// Larger numbers prefer a slightly larger constant than is used for the smaller
// numbers.
@@ -382,11 +382,10 @@ LIBC_INLINE uint32_t fast_uint_mod_1e9(const cpp::UInt<MID_INT_SIZE> &val) {
(1000000000 * shifted));
}
-LIBC_INLINE uint32_t mul_shift_mod_1e9(const MantissaInt mantissa,
+LIBC_INLINE uint32_t mul_shift_mod_1e9(const FloatProp::UIntType mantissa,
const cpp::UInt<MID_INT_SIZE> &large,
const int32_t shift_amount) {
- constexpr size_t MANT_INT_SIZE = sizeof(MantissaInt) * 8;
- cpp::UInt<MID_INT_SIZE + MANT_INT_SIZE> val(large);
+ cpp::UInt<MID_INT_SIZE + FloatProp::UINTTYPE_BITS> val(large);
val = (val * mantissa) >> shift_amount;
return static_cast<uint32_t>(
val.div_uint32_times_pow_2(1000000000, 0).value());
@@ -415,7 +414,7 @@ class FloatToString {
fputil::FPBits<T> float_bits;
bool is_negative;
int exponent;
- MantissaInt mantissa;
+ FloatProp::UIntType mantissa;
static constexpr int MANT_WIDTH = fputil::MantissaWidth<T>::VALUE;
static constexpr int EXP_BIAS = fputil::FPBits<T>::EXPONENT_BIAS;
diff --git a/libc/src/__support/str_to_float.h b/libc/src/__support/str_to_float.h
index 79d35682d0b714..2a6f15c018f1ec 100644
--- a/libc/src/__support/str_to_float.h
+++ b/libc/src/__support/str_to_float.h
@@ -77,8 +77,6 @@ eisel_lemire(ExpandedFloat<T> init_num,
UIntType mantissa = init_num.mantissa;
int32_t exp10 = init_num.exponent;
- constexpr uint32_t BITS_IN_MANTISSA = sizeof(mantissa) * 8;
-
if (sizeof(T) > 8) { // This algorithm cannot handle anything longer than a
// double, so we skip straight to the fallback.
return cpp::nullopt;
@@ -94,8 +92,8 @@ eisel_lemire(ExpandedFloat<T> init_num,
uint32_t clz = cpp::countl_zero<UIntType>(mantissa);
mantissa <<= clz;
- int32_t exp2 =
- exp10_to_exp2(exp10) + BITS_IN_MANTISSA + FloatProp::EXPONENT_BIAS - clz;
+ int32_t exp2 = exp10_to_exp2(exp10) + FloatProp::UINTTYPE_BITS +
+ FloatProp::EXPONENT_BIAS - clz;
// Multiplication
const uint64_t *power_of_ten =
@@ -112,7 +110,9 @@ eisel_lemire(ExpandedFloat<T> init_num,
// accuracy, and the most significant bit is ignored.) = 9 bits. Similarly,
// it's 6 bits for floats in this case.
const uint64_t halfway_constant =
- (uint64_t(1) << (BITS_IN_MANTISSA - (FloatProp::MANTISSA_WIDTH + 3))) - 1;
+ (uint64_t(1) << (FloatProp::UINTTYPE_BITS -
+ (FloatProp::MANTISSA_WIDTH + 3))) -
+ 1;
if ((high64(first_approx) & halfway_constant) == halfway_constant &&
low64(first_approx) + mantissa < mantissa) {
UInt128 low_bits =
@@ -131,11 +131,11 @@ eisel_lemire(ExpandedFloat<T> init_num,
}
// Shifting to 54 bits for doubles and 25 bits for floats
- UIntType msb =
- static_cast<UIntType>(high64(final_approx) >> (BITS_IN_MANTISSA - 1));
+ UIntType msb = static_cast<UIntType>(high64(final_approx) >>
+ (FloatProp::UINTTYPE_BITS - 1));
UIntType final_mantissa = static_cast<UIntType>(
high64(final_approx) >>
- (msb + BITS_IN_MANTISSA - (FloatProp::MANTISSA_WIDTH + 3)));
+ (msb + FloatProp::UINTTYPE_BITS - (FloatProp::MANTISSA_WIDTH + 3)));
exp2 -= static_cast<uint32_t>(1 ^ msb); // same as !msb
if (round == RoundDirection::Nearest) {
@@ -190,8 +190,6 @@ eisel_lemire<long double>(ExpandedFloat<long double> init_num,
UIntType mantissa = init_num.mantissa;
int32_t exp10 = init_num.exponent;
- constexpr uint32_t BITS_IN_MANTISSA = sizeof(mantissa) * 8;
-
// Exp10 Range
// This doesn't reach very far into the range for long doubles, since it's
// sized for doubles and their 11 exponent bits, and not for long doubles and
@@ -211,8 +209,8 @@ eisel_lemire<long double>(ExpandedFloat<long double> init_num,
uint32_t clz = cpp::countl_zero<UIntType>(mantissa);
mantissa <<= clz;
- int32_t exp2 =
- exp10_to_exp2(exp10) + BITS_IN_MANTISSA + FloatProp::EXPONENT_BIAS - clz;
+ int32_t exp2 = exp10_to_exp2(exp10) + FloatProp::UINTTYPE_BITS +
+ FloatProp::EXPONENT_BIAS - clz;
// Multiplication
const uint64_t *power_of_ten =
@@ -249,7 +247,9 @@ eisel_lemire<long double>(ExpandedFloat<long double> init_num,
// accuracy, and the most significant bit is ignored.) = 61 bits. Similarly,
// it's 12 bits for 128 bit floats in this case.
constexpr UInt128 HALFWAY_CONSTANT =
- (UInt128(1) << (BITS_IN_MANTISSA - (FloatProp::MANTISSA_WIDTH + 3))) - 1;
+ (UInt128(1) << (FloatProp::UINTTYPE_BITS -
+ (FloatProp::MANTISSA_WIDTH + 3))) -
+ 1;
if ((final_approx_upper & HALFWAY_CONSTANT) == HALFWAY_CONSTANT &&
final_approx_lower + mantissa < mantissa) {
@@ -257,11 +257,11 @@ eisel_lemire<long double>(ExpandedFloat<long double> init_num,
}
// Shifting to 65 bits for 80 bit floats and 113 bits for 128 bit floats
- uint32_t msb =
- static_cast<uint32_t>(final_approx_upper >> (BITS_IN_MANTISSA - 1));
+ uint32_t msb = static_cast<uint32_t>(final_approx_upper >>
+ (FloatProp::UINTTYPE_BITS - 1));
UIntType final_mantissa =
final_approx_upper >>
- (msb + BITS_IN_MANTISSA - (FloatProp::MANTISSA_WIDTH + 3));
+ (msb + FloatProp::UINTTYPE_BITS - (FloatProp::MANTISSA_WIDTH + 3));
exp2 -= static_cast<uint32_t>(1 ^ msb); // same as !msb
if (round == RoundDirection::Nearest) {
@@ -622,9 +622,10 @@ template <> constexpr int32_t get_upper_bound<double>() { return 309; }
// other out, and subnormal numbers allow for the result to be at the very low
// end of the final mantissa.
template <typename T> constexpr int32_t get_lower_bound() {
- return -((fputil::FloatProperties<T>::EXPONENT_BIAS +
- static_cast<int32_t>(fputil::FloatProperties<T>::MANTISSA_WIDTH +
- (sizeof(T) * 8))) /
+ using FloatProp = typename fputil::FloatProperties<T>;
+ return -((FloatProp::EXPONENT_BIAS +
+ static_cast<int32_t>(FloatProp::MANTISSA_WIDTH +
+ FloatProp::UINTTYPE_BITS)) /
3);
}
@@ -733,7 +734,6 @@ LIBC_INLINE FloatConvertReturn<T> binary_exp_to_float(ExpandedFloat<T> init_num,
// This is the number of leading zeroes a properly normalized float of type T
// should have.
- constexpr int32_t NUMBITS = sizeof(UIntType) * 8;
constexpr int32_t INF_EXP = (1 << FloatProp::EXPONENT_WIDTH) - 1;
// Normalization step 1: Bring the leading bit to the highest bit of UIntType.
@@ -743,8 +743,9 @@ LIBC_INLINE FloatConvertReturn<T> binary_exp_to_float(ExpandedFloat<T> init_num,
// Keep exp2 representing the exponent of the lowest bit of UIntType.
exp2 -= amount_to_shift_left;
- // biasedExponent represents the biased exponent of the most significant bit.
- int32_t biased_exponent = exp2 + NUMBITS + FPBits::EXPONENT_BIAS - 1;
+ // biased_exponent represents the biased exponent of the most significant bit.
+ int32_t biased_exponent =
+ exp2 + FloatProp::UINTTYPE_BITS + FPBits::EXPONENT_BIAS - 1;
// Handle numbers that're too large and get squashed to inf
if (biased_exponent >= INF_EXP) {
@@ -754,14 +755,15 @@ LIBC_INLINE FloatConvertReturn<T> binary_exp_to_float(ExpandedFloat<T> init_num,
return output;
}
- uint32_t amount_to_shift_right = NUMBITS - FloatProp::MANTISSA_WIDTH - 1;
+ uint32_t amount_to_shift_right =
+ FloatProp::UINTTYPE_BITS - FloatProp::MANTISSA_WIDTH - 1;
// Handle subnormals.
if (biased_exponent <= 0) {
amount_to_shift_right += 1 - biased_exponent;
biased_exponent = 0;
- if (amount_to_shift_right > NUMBITS) {
+ if (amount_to_shift_right > FloatProp::UINTTYPE_BITS) {
// Return 0 if the exponent is too small.
output.num = {0, 0};
output.error = ERANGE;
@@ -774,7 +776,7 @@ LIBC_INLINE FloatConvertReturn<T> binary_exp_to_float(ExpandedFloat<T> init_num,
bool round_bit = static_cast<bool>(mantissa & round_bit_mask);
bool sticky_bit = static_cast<bool>(mantissa & sticky_mask) || truncated;
- if (amount_to_shift_right < NUMBITS) {
+ if (amount_to_shift_right < FloatProp::UINTTYPE_BITS) {
// Shift the mantissa and clear the implicit bit.
mantissa >>= amount_to_shift_right;
mantissa &= FloatProp::MANTISSA_MASK;
>From 875b2dd5e3da14b9b956d5bc96187d94c5dc9838 Mon Sep 17 00:00:00 2001
From: Guillaume Chatelet <gchatelet at google.com>
Date: Tue, 12 Dec 2023 14:46:31 +0000
Subject: [PATCH 2/2] Document UIntType as well
---
libc/src/__support/FPUtil/FloatProperties.h | 2 ++
1 file changed, 2 insertions(+)
diff --git a/libc/src/__support/FPUtil/FloatProperties.h b/libc/src/__support/FPUtil/FloatProperties.h
index b8ca6e5f2c8a8a..3f7dbdc5af3425 100644
--- a/libc/src/__support/FPUtil/FloatProperties.h
+++ b/libc/src/__support/FPUtil/FloatProperties.h
@@ -96,6 +96,8 @@ struct FPProperties : public internal::FPBaseProperties<fp_type> {
static_assert(SIGN_BITS + EXP_BITS + SIG_BITS == TOTAL_BITS);
public:
+ // An unsigned integer that is wide enough to contain all of the floating
+ // point bits.
using UIntType = typename UP::UIntType;
// The number of bits in UIntType.
More information about the libc-commits
mailing list