[libc-commits] [libc] 201cc2d - [libc] Correct rounding for hexadecimalStringToFloat with long inputs.
Tue Ly via libc-commits
libc-commits at lists.llvm.org
Tue Nov 16 11:31:01 PST 2021
Author: Tue Ly
Date: 2021-11-16T14:29:39-05:00
New Revision: 201cc2d885287c91127d23ffe6f30472e974b2dc
URL: https://github.com/llvm/llvm-project/commit/201cc2d885287c91127d23ffe6f30472e974b2dc
DIFF: https://github.com/llvm/llvm-project/commit/201cc2d885287c91127d23ffe6f30472e974b2dc.diff
LOG: [libc] Correct rounding for hexadecimalStringToFloat with long inputs.
Update binaryExpTofloat so that it will round correctly for long inputs when converting hexadecimal strings to floating points.
Differential Revision: https://reviews.llvm.org/D113815
Added:
Modified:
libc/src/__support/str_to_float.h
libc/test/src/stdlib/strtof_test.cpp
Removed:
################################################################################
diff --git a/libc/src/__support/str_to_float.h b/libc/src/__support/str_to_float.h
index 3f4a050fcb480..a0efe4d94cbc8 100644
--- a/libc/src/__support/str_to_float.h
+++ b/libc/src/__support/str_to_float.h
@@ -20,26 +20,6 @@
namespace __llvm_libc {
namespace internal {
-// Shifts right and rounds according to the following rules:
-// 1) If the part being cut off is more than 2^(amountToShift - 1) then round
-// up
-// 2) If it is less than that number then round down
-// 3) If it is exactly that number, then round so that the final number will be
-// even
-template <class T>
-static inline T shiftRightAndRound(T numToShift, unsigned int amountToShift) {
- T result = numToShift >> amountToShift;
- T truncated = numToShift & ((1 << amountToShift) - 1);
-
- if (truncated < (1 << (amountToShift - 1))) {
- return result;
- } else if (truncated > (1 << (amountToShift - 1))) {
- return result + 1;
- } else {
- return result + (result & 1); // This rounds towards even.
- }
-}
-
template <class T> uint32_t inline leadingZeroes(T inputNumber) {
// TODO(michaelrj): investigate the portability of using something like
// __builtin_clz for specific types.
@@ -461,72 +441,85 @@ decimalExpToFloat(typename fputil::FPBits<T>::UIntType mantissa, int32_t exp10,
template <class T>
static inline void
binaryExpToFloat(typename fputil::FPBits<T>::UIntType mantissa, int32_t exp2,
+ bool truncated,
typename fputil::FPBits<T>::UIntType *outputMantissa,
uint32_t *outputExp2) {
using BitsType = typename fputil::FPBits<T>::UIntType;
// This is the number of leading zeroes a properly normalized float of type T
// should have.
- constexpr int32_t NORMALIZED_LEADING_ZEROES =
- (sizeof(BitsType) * 8) - fputil::FloatProperties<T>::mantissaWidth - 1;
- constexpr BitsType OVERFLOWED_MANTISSA =
- BitsType(1) << (fputil::FloatProperties<T>::mantissaWidth + 1);
+ constexpr int32_t NUMBITS = sizeof(BitsType) * 8;
+ constexpr int32_t INF_EXP =
+ (1 << fputil::FloatProperties<T>::exponentWidth) - 1;
- // Normalization
- int32_t amountToShift =
- NORMALIZED_LEADING_ZEROES -
- static_cast<int32_t>(leadingZeroes<BitsType>(mantissa));
- if (amountToShift < 0) {
- mantissa <<= -amountToShift;
- } else {
- mantissa = shiftRightAndRound(mantissa, amountToShift);
- if (mantissa == OVERFLOWED_MANTISSA) {
- mantissa >>= 1;
- exp2 += 1;
- }
+ // Normalization step 1: Bring the leading bit to the highest bit of BitsType.
+ uint32_t amountToShiftLeft = leadingZeroes<BitsType>(mantissa);
+ mantissa <<= amountToShiftLeft;
+
+ // Keep exp2 representing the exponent of the lowest bit of BitsType.
+ exp2 -= amountToShiftLeft;
+
+ // biasedExponent represents the biased exponent of the most significant bit.
+ int32_t biasedExponent = exp2 + NUMBITS + fputil::FPBits<T>::exponentBias - 1;
+
+ // Handle numbers that're too large and get squashed to inf
+ if (biasedExponent >= INF_EXP) {
+ // This indicates an overflow, so we make the result INF and set errno.
+ *outputExp2 = (1 << fputil::FloatProperties<T>::exponentWidth) - 1;
+ *outputMantissa = 0;
+ errno = ERANGE; // NOLINT
+ return;
}
- exp2 += amountToShift;
- // Account for the fact that the mantissa represented an integer
- // previously, but now represents the fractional part of a normalized
- // number.
- exp2 += fputil::FloatProperties<T>::mantissaWidth;
+ uint32_t amountToShiftRight =
+ NUMBITS - fputil::FloatProperties<T>::mantissaWidth - 1;
- int32_t biasedExponent = exp2 + fputil::FPBits<T>::exponentBias;
- // handle subnormals
+ // Handle subnormals.
if (biasedExponent <= 0) {
+ amountToShiftRight += 1 - biasedExponent;
+ biasedExponent = 0;
- // the most mantissa is currently normalized, meaning that the msb is
- // one bit left of where the decimal point should go.
- amountToShift = 1;
- BitsType mantissaCopy = mantissa >> 1;
- while (biasedExponent < 0 && mantissaCopy > 0) {
- mantissaCopy = mantissaCopy >> 1;
- ++amountToShift;
- ++biasedExponent;
- }
- // If we cut off any bits to fit this number into a subnormal, then it's
- // out of range for this size of float.
- if ((mantissa & ((1 << amountToShift) - 1)) > 0) {
+ if (amountToShiftRight > NUMBITS) {
+ // Return 0 if the exponent is too small.
+ *outputMantissa = 0;
+ *outputExp2 = 0;
errno = ERANGE; // NOLINT
- }
- mantissa = shiftRightAndRound(mantissa, amountToShift);
- if (mantissa == OVERFLOWED_MANTISSA) {
- mantissa >>= 1;
- exp2 += 1;
- } else if (mantissa == 0) {
- biasedExponent = 0;
+ return;
}
}
- // handle numbers that're too large and get squashed to inf
- else if (biasedExponent >
- (1 << fputil::FloatProperties<T>::exponentWidth) - 1) {
- // This indicates an overflow, so we make the result INF and set errno.
- biasedExponent = (1 << fputil::FloatProperties<T>::exponentWidth) - 1;
+
+ BitsType roundBitMask = BitsType(1) << (amountToShiftRight - 1);
+ BitsType stickyMask = roundBitMask - 1;
+ bool roundBit = mantissa & roundBitMask;
+ bool stickyBit = static_cast<bool>(mantissa & stickyMask) || truncated;
+
+ if (amountToShiftRight < NUMBITS) {
+ // Shift the mantissa and clear the implicit bit.
+ mantissa >>= amountToShiftRight;
+ mantissa &= fputil::FloatProperties<T>::mantissaMask;
+ } else {
mantissa = 0;
+ }
+ bool leastSignificantBit = mantissa & BitsType(1);
+ // Perform rounding-to-nearest, tie-to-even.
+ if (roundBit && (leastSignificantBit || stickyBit)) {
+ ++mantissa;
+ }
+
+ if (mantissa > fputil::FloatProperties<T>::mantissaMask) {
+ // Rounding causes the exponent to increase.
+ ++biasedExponent;
+
+ if (biasedExponent == INF_EXP) {
+ errno = ERANGE; // NOLINT
+ }
+ }
+
+ if (biasedExponent == 0) {
errno = ERANGE; // NOLINT
}
- *outputMantissa = mantissa;
+
+ *outputMantissa = mantissa & fputil::FloatProperties<T>::mantissaMask;
*outputExp2 = biasedExponent;
}
@@ -666,11 +659,10 @@ hexadecimalStringToFloat(const char *__restrict src, const char DECIMAL_POINT,
while (true) {
if (isalnum(*src)) {
uint32_t digit = b36_char_to_int(*src);
- if (digit >= BASE) {
- seenDigit = false;
+ if (digit < BASE)
+ seenDigit = true;
+ else
break;
- }
- seenDigit = true;
if (mantissa < BITSTYPE_MAX_DIV_BY_BASE) {
mantissa = (mantissa * BASE) + digit;
@@ -722,7 +714,8 @@ hexadecimalStringToFloat(const char *__restrict src, const char DECIMAL_POINT,
*outputMantissa = 0;
*outputExponent = 0;
} else {
- binaryExpToFloat<T>(mantissa, exponent, outputMantissa, outputExponent);
+ binaryExpToFloat<T>(mantissa, exponent, truncated, outputMantissa,
+ outputExponent);
}
return true;
}
diff --git a/libc/test/src/stdlib/strtof_test.cpp b/libc/test/src/stdlib/strtof_test.cpp
index 664133c1991fc..e31882fb34a43 100644
--- a/libc/test/src/stdlib/strtof_test.cpp
+++ b/libc/test/src/stdlib/strtof_test.cpp
@@ -102,10 +102,10 @@ TEST_F(LlvmLibcStrToFTest, BasicHexadecimalTests) {
}
TEST_F(LlvmLibcStrToFTest, HexadecimalSubnormalTests) {
- runTest("0x0.0000000000000000000000000000000002", 38, 0x4000);
+ runTest("0x0.0000000000000000000000000000000002", 38, 0x4000, ERANGE);
// This is the largest subnormal number as represented in hex
- runTest("0x0.00000000000000000000000000000003fffff8", 42, 0x7fffff);
+ runTest("0x0.00000000000000000000000000000003fffff8", 42, 0x7fffff, ERANGE);
}
TEST_F(LlvmLibcStrToFTest, HexadecimalSubnormalRoundingTests) {
@@ -120,6 +120,14 @@ TEST_F(LlvmLibcStrToFTest, HexadecimalSubnormalRoundingTests) {
// These check that we're rounding to even properly
runTest("0x0.0000000000000000000000000000000000000b", 42, 0x00000001, ERANGE);
runTest("0x0.0000000000000000000000000000000000000c", 42, 0x00000002, ERANGE);
+
+ // These check that we're rounding to even properly even when the input bits
+ // are longer than the bit fields can contain.
+ runTest("0x1.000000000000000000000p-150", 30, 0x00000000, ERANGE);
+ runTest("0x1.000010000000000001000p-150", 30, 0x00000001, ERANGE);
+ runTest("0x1.000100000000000001000p-134", 30, 0x00008001, ERANGE);
+ runTest("0x1.FFFFFC000000000001000p-127", 30, 0x007FFFFF, ERANGE);
+ runTest("0x1.FFFFFE000000000000000p-127", 30, 0x00800000);
}
TEST_F(LlvmLibcStrToFTest, HexadecimalNormalRoundingTests) {
@@ -131,6 +139,9 @@ TEST_F(LlvmLibcStrToFTest, HexadecimalNormalRoundingTests) {
runTest("0x123456600", 11, 0x4f91a2b3);
// This gets rounded up to even
runTest("0x123456700", 11, 0x4f91a2b4);
+ // Correct rounding for long input
+ runTest("0x1.000001000000000000000", 25, 0x3f800000);
+ runTest("0x1.000001000000000000100", 25, 0x3f800001);
}
TEST_F(LlvmLibcStrToFTest, HexadecimalsWithRoundingProblems) {
More information about the libc-commits
mailing list