[libc-commits] [libc] 47d0c83 - [libc] fix strtof/d/ld NaN parsing

Michael Jones via libc-commits libc-commits at lists.llvm.org
Wed Nov 17 11:00:58 PST 2021


Author: Michael Jones
Date: 2021-11-17T11:00:54-08:00
New Revision: 47d0c83e1f5fb0826d265f163d00cb57615b89d4

URL: https://github.com/llvm/llvm-project/commit/47d0c83e1f5fb0826d265f163d00cb57615b89d4
DIFF: https://github.com/llvm/llvm-project/commit/47d0c83e1f5fb0826d265f163d00cb57615b89d4.diff

LOG: [libc] fix strtof/d/ld NaN parsing

Fix the fact that previously strtof/d/ld would only accept a NaN as
having parentheses if the thing in the parentheses was a valid number,
now it will accept any combination of letters and numbers, but will only
put valid numbers in the mantissa.

Reviewed By: sivachandra

Differential Revision: https://reviews.llvm.org/D113790

Added: 
    

Modified: 
    libc/src/__support/str_to_float.h
    libc/test/src/stdlib/strtof_test.cpp

Removed: 
    


################################################################################
diff  --git a/libc/src/__support/str_to_float.h b/libc/src/__support/str_to_float.h
index a0efe4d94cbc8..b2347a97aa8b0 100644
--- a/libc/src/__support/str_to_float.h
+++ b/libc/src/__support/str_to_float.h
@@ -775,16 +775,30 @@ static inline T strtofloatingpoint(const char *__restrict src,
       seenDigit = true;
       src += 3;
       BitsType NaNMantissa = 0;
+      // this handles the case of `NaN(n-character-sequence)`, where the
+      // n-character-sequence is made of 0 or more letters and numbers in any
+      // order.
       if (*src == '(') {
-        char *tempSrc = 0;
-        if (isdigit(*(src + 1)) || *(src + 1) == ')') {
-          NaNMantissa = strtointeger<BitsType>(src + 1, &tempSrc, 0);
-          if (*tempSrc != ')') {
-            NaNMantissa = 0;
-          } else {
-            src = tempSrc + 1;
+        const char *leftParen = src;
+        ++src;
+        while (isalnum(*src))
+          ++src;
+        if (*src == ')') {
+          ++src;
+          char *tempSrc = 0;
+          if (isdigit(*(leftParen + 1))) {
+            // This is to prevent errors when BitsType is larger than 64 bits,
+            // since strtointeger only supports up to 64 bits. This is actually
+            // more than is required by the specification, which says for the
+            // input type "NAN(n-char-sequence)" that "the meaning of
+            // the n-char sequence is implementation-defined."
+            NaNMantissa = static_cast<BitsType>(
+                strtointeger<uint64_t>(leftParen + 1, &tempSrc, 0));
+            if (*tempSrc != ')')
+              NaNMantissa = 0;
           }
-        }
+        } else
+          src = leftParen;
       }
       NaNMantissa |= fputil::FloatProperties<T>::quietNaNMask;
       if (result.getSign()) {

diff  --git a/libc/test/src/stdlib/strtof_test.cpp b/libc/test/src/stdlib/strtof_test.cpp
index e31882fb34a43..88e7623503adf 100644
--- a/libc/test/src/stdlib/strtof_test.cpp
+++ b/libc/test/src/stdlib/strtof_test.cpp
@@ -165,10 +165,39 @@ TEST_F(LlvmLibcStrToFTest, InfTests) {
   runTest("-iNfInItY", 9, 0xff800000);
 }
 
-TEST_F(LlvmLibcStrToFTest, NaNTests) {
+TEST_F(LlvmLibcStrToFTest, SimpleNaNTests) {
   runTest("NaN", 3, 0x7fc00000);
   runTest("-nAn", 4, 0xffc00000);
+}
+
+// These NaNs are of the form `NaN(n-character-sequence)` where the
+// n-character-sequence is 0 or more letters or numbers. If there is anything
+// other than a letter or a number, then the valid number is just `NaN`. If
+// the sequence is valid, then the interpretation of them is implementation
+// defined, in this case it's passed to strtoll with an automatic base, and
+// the result is put into the mantissa if it takes up the whole width of the
+// parentheses.
+TEST_F(LlvmLibcStrToFTest, NaNWithParenthesesEmptyTest) {
   runTest("NaN()", 5, 0x7fc00000);
+}
+
+TEST_F(LlvmLibcStrToFTest, NaNWithParenthesesValidNumberTests) {
   runTest("NaN(1234)", 9, 0x7fc004d2);
+  runTest("NaN(0x1234)", 11, 0x7fc01234);
+  runTest("NaN(01234)", 10, 0x7fc0029c);
+}
+
+TEST_F(LlvmLibcStrToFTest, NaNWithParenthesesInvalidSequenceTests) {
   runTest("NaN( 1234)", 3, 0x7fc00000);
+  runTest("NaN(-1234)", 3, 0x7fc00000);
+  runTest("NaN(asd&f)", 3, 0x7fc00000);
+  runTest("NaN(123 )", 3, 0x7fc00000);
+  runTest("NaN(123+asdf)", 3, 0x7fc00000);
+  runTest("NaN(123", 3, 0x7fc00000);
+}
+
+TEST_F(LlvmLibcStrToFTest, NaNWithParenthesesValidSequenceInvalidNumberTests) {
+  runTest("NaN(1a)", 7, 0x7fc00000);
+  runTest("NaN(asdf)", 9, 0x7fc00000);
+  runTest("NaN(1A1)", 8, 0x7fc00000);
 }


        


More information about the libc-commits mailing list