[libc-commits] [libc] 47d0c83 - [libc] fix strtof/d/ld NaN parsing
Michael Jones via libc-commits
libc-commits at lists.llvm.org
Wed Nov 17 11:00:58 PST 2021
Author: Michael Jones
Date: 2021-11-17T11:00:54-08:00
New Revision: 47d0c83e1f5fb0826d265f163d00cb57615b89d4
URL: https://github.com/llvm/llvm-project/commit/47d0c83e1f5fb0826d265f163d00cb57615b89d4
DIFF: https://github.com/llvm/llvm-project/commit/47d0c83e1f5fb0826d265f163d00cb57615b89d4.diff
LOG: [libc] fix strtof/d/ld NaN parsing
Fix the fact that previously strtof/d/ld would only accept a NaN as
having parentheses if the thing in the parentheses was a valid number,
now it will accept any combination of letters and numbers, but will only
put valid numbers in the mantissa.
Reviewed By: sivachandra
Differential Revision: https://reviews.llvm.org/D113790
Added:
Modified:
libc/src/__support/str_to_float.h
libc/test/src/stdlib/strtof_test.cpp
Removed:
################################################################################
diff --git a/libc/src/__support/str_to_float.h b/libc/src/__support/str_to_float.h
index a0efe4d94cbc8..b2347a97aa8b0 100644
--- a/libc/src/__support/str_to_float.h
+++ b/libc/src/__support/str_to_float.h
@@ -775,16 +775,30 @@ static inline T strtofloatingpoint(const char *__restrict src,
seenDigit = true;
src += 3;
BitsType NaNMantissa = 0;
+ // this handles the case of `NaN(n-character-sequence)`, where the
+ // n-character-sequence is made of 0 or more letters and numbers in any
+ // order.
if (*src == '(') {
- char *tempSrc = 0;
- if (isdigit(*(src + 1)) || *(src + 1) == ')') {
- NaNMantissa = strtointeger<BitsType>(src + 1, &tempSrc, 0);
- if (*tempSrc != ')') {
- NaNMantissa = 0;
- } else {
- src = tempSrc + 1;
+ const char *leftParen = src;
+ ++src;
+ while (isalnum(*src))
+ ++src;
+ if (*src == ')') {
+ ++src;
+ char *tempSrc = 0;
+ if (isdigit(*(leftParen + 1))) {
+ // This is to prevent errors when BitsType is larger than 64 bits,
+ // since strtointeger only supports up to 64 bits. This is actually
+ // more than is required by the specification, which says for the
+ // input type "NAN(n-char-sequence)" that "the meaning of
+ // the n-char sequence is implementation-defined."
+ NaNMantissa = static_cast<BitsType>(
+ strtointeger<uint64_t>(leftParen + 1, &tempSrc, 0));
+ if (*tempSrc != ')')
+ NaNMantissa = 0;
}
- }
+ } else
+ src = leftParen;
}
NaNMantissa |= fputil::FloatProperties<T>::quietNaNMask;
if (result.getSign()) {
diff --git a/libc/test/src/stdlib/strtof_test.cpp b/libc/test/src/stdlib/strtof_test.cpp
index e31882fb34a43..88e7623503adf 100644
--- a/libc/test/src/stdlib/strtof_test.cpp
+++ b/libc/test/src/stdlib/strtof_test.cpp
@@ -165,10 +165,39 @@ TEST_F(LlvmLibcStrToFTest, InfTests) {
runTest("-iNfInItY", 9, 0xff800000);
}
-TEST_F(LlvmLibcStrToFTest, NaNTests) {
+TEST_F(LlvmLibcStrToFTest, SimpleNaNTests) {
runTest("NaN", 3, 0x7fc00000);
runTest("-nAn", 4, 0xffc00000);
+}
+
+// These NaNs are of the form `NaN(n-character-sequence)` where the
+// n-character-sequence is 0 or more letters or numbers. If there is anything
+// other than a letter or a number, then the valid number is just `NaN`. If
+// the sequence is valid, then the interpretation of them is implementation
+// defined, in this case it's passed to strtoll with an automatic base, and
+// the result is put into the mantissa if it takes up the whole width of the
+// parentheses.
+TEST_F(LlvmLibcStrToFTest, NaNWithParenthesesEmptyTest) {
runTest("NaN()", 5, 0x7fc00000);
+}
+
+TEST_F(LlvmLibcStrToFTest, NaNWithParenthesesValidNumberTests) {
runTest("NaN(1234)", 9, 0x7fc004d2);
+ runTest("NaN(0x1234)", 11, 0x7fc01234);
+ runTest("NaN(01234)", 10, 0x7fc0029c);
+}
+
+TEST_F(LlvmLibcStrToFTest, NaNWithParenthesesInvalidSequenceTests) {
runTest("NaN( 1234)", 3, 0x7fc00000);
+ runTest("NaN(-1234)", 3, 0x7fc00000);
+ runTest("NaN(asd&f)", 3, 0x7fc00000);
+ runTest("NaN(123 )", 3, 0x7fc00000);
+ runTest("NaN(123+asdf)", 3, 0x7fc00000);
+ runTest("NaN(123", 3, 0x7fc00000);
+}
+
+TEST_F(LlvmLibcStrToFTest, NaNWithParenthesesValidSequenceInvalidNumberTests) {
+ runTest("NaN(1a)", 7, 0x7fc00000);
+ runTest("NaN(asdf)", 9, 0x7fc00000);
+ runTest("NaN(1A1)", 8, 0x7fc00000);
}
More information about the libc-commits
mailing list