[libc-commits] [libc] 53804d4 - [libc] fix strtol returning the wrong length

Michael Jones via libc-commits libc-commits at lists.llvm.org
Mon Oct 25 15:00:49 PDT 2021


Author: Michael Jones
Date: 2021-10-25T15:00:45-07:00
New Revision: 53804d4eb2866b4741ebf6c76a8bee8580dde259

URL: https://github.com/llvm/llvm-project/commit/53804d4eb2866b4741ebf6c76a8bee8580dde259
DIFF: https://github.com/llvm/llvm-project/commit/53804d4eb2866b4741ebf6c76a8bee8580dde259.diff

LOG: [libc] fix strtol returning the wrong length

Previously, strtol/ll/ul/ull would return a pointer to the end of its
parsing, regardless of if it detected a number. Now it will return a
length of 0 when it doesn't find a number.

Reviewed By: sivachandra

Differential Revision: https://reviews.llvm.org/D112176

Added: 
    

Modified: 
    libc/src/__support/str_conv_utils.h
    libc/test/src/stdlib/strtol_test.cpp
    libc/test/src/stdlib/strtoll_test.cpp
    libc/test/src/stdlib/strtoul_test.cpp
    libc/test/src/stdlib/strtoull_test.cpp

Removed: 
    


################################################################################
diff  --git a/libc/src/__support/str_conv_utils.h b/libc/src/__support/str_conv_utils.h
index fb87ca00a93c7..649e3ee9b846b 100644
--- a/libc/src/__support/str_conv_utils.h
+++ b/libc/src/__support/str_conv_utils.h
@@ -42,15 +42,23 @@ static inline bool is_hex_start(const char *__restrict src) {
 }
 
 // Takes the address of the string pointer and parses the base from the start of
-// it. This will advance the string pointer.
+// it. This function will advance |src| to the first valid digit in the inferred
+// base.
 static inline int infer_base(const char *__restrict *__restrict src) {
+  // A hexadecimal number is defined as "the prefix 0x or 0X followed by a
+  // sequence of the deimal digits and the letters a (or A) through f (or F)
+  // with values 10 through 15 respectively." (C standard 6.4.4.1)
   if (is_hex_start(*src)) {
     (*src) += 2;
     return 16;
-  } else if (**src == '0') {
-    ++(*src);
+  } // An octal number is defined as "the prefix 0 optionally followed by a
+    // sequence of the digits 0 through 7 only" (C standard 6.4.4.1) and so any
+    // number that starts with 0, including just 0, is an octal number.
+  else if (**src == '0') {
     return 8;
-  } else {
+  } // A decimal number is defined as beginning "with a nonzero digit and
+    // consist[ing] of a sequence of decimal digits." (C standard 6.4.4.1)
+  else {
     return 10;
   }
 }
@@ -62,6 +70,8 @@ template <class T>
 static inline T strtointeger(const char *__restrict src,
                              char **__restrict str_end, int base) {
   unsigned long long result = 0;
+  bool is_number = false;
+  const char *original_src = src;
 
   if (base < 0 || base == 1 || base > 36) {
     errno = EINVAL; // NOLINT
@@ -97,6 +107,7 @@ static inline T strtointeger(const char *__restrict src,
     if (cur_digit >= base)
       break;
 
+    is_number = true;
     ++src;
 
     // If the number has already hit the maximum value for the current type then
@@ -122,7 +133,7 @@ static inline T strtointeger(const char *__restrict src,
   }
 
   if (str_end != nullptr)
-    *str_end = const_cast<char *>(src);
+    *str_end = const_cast<char *>(is_number ? src : original_src);
 
   if (result == ABS_MAX) {
     if (is_positive || is_unsigned)

diff  --git a/libc/test/src/stdlib/strtol_test.cpp b/libc/test/src/stdlib/strtol_test.cpp
index 1d91b8a2d5743..9372b4d7d4d02 100644
--- a/libc/test/src/stdlib/strtol_test.cpp
+++ b/libc/test/src/stdlib/strtol_test.cpp
@@ -118,7 +118,7 @@ TEST(LlvmLibcStrToLTest, MessyBaseTenDecode) {
   errno = 0;
   ASSERT_EQ(__llvm_libc::strtol(two_signs, &str_end, 10), 0l);
   ASSERT_EQ(errno, 0);
-  EXPECT_EQ(str_end - two_signs, ptr
diff _t(1));
+  EXPECT_EQ(str_end - two_signs, ptr
diff _t(0));
 
   const char *sign_before = "+2=4";
   errno = 0;
@@ -143,6 +143,18 @@ TEST(LlvmLibcStrToLTest, MessyBaseTenDecode) {
   ASSERT_EQ(__llvm_libc::strtol(all_together, &str_end, 10), -12345l);
   ASSERT_EQ(errno, 0);
   EXPECT_EQ(str_end - all_together, ptr
diff _t(9));
+
+  const char *just_spaces = "  ";
+  errno = 0;
+  ASSERT_EQ(__llvm_libc::strtol(just_spaces, &str_end, 10), 0l);
+  ASSERT_EQ(errno, 0);
+  EXPECT_EQ(str_end - just_spaces, ptr
diff _t(0));
+
+  const char *just_space_and_sign = " +";
+  errno = 0;
+  ASSERT_EQ(__llvm_libc::strtol(just_space_and_sign, &str_end, 10), 0l);
+  ASSERT_EQ(errno, 0);
+  EXPECT_EQ(str_end - just_space_and_sign, ptr
diff _t(0));
 }
 
 static char int_to_b36_char(int input) {
@@ -322,4 +334,22 @@ TEST(LlvmLibcStrToLTest, AutomaticBaseSelection) {
   ASSERT_EQ(__llvm_libc::strtol(base_eight_with_prefix, &str_end, 0), 012345l);
   ASSERT_EQ(errno, 0);
   EXPECT_EQ(str_end - base_eight_with_prefix, ptr
diff _t(6));
+
+  const char *just_zero = "0";
+  errno = 0;
+  ASSERT_EQ(__llvm_libc::strtol(just_zero, &str_end, 0), 0l);
+  ASSERT_EQ(errno, 0);
+  EXPECT_EQ(str_end - just_zero, ptr
diff _t(1));
+
+  const char *just_zero_x = "0x";
+  errno = 0;
+  ASSERT_EQ(__llvm_libc::strtol(just_zero_x, &str_end, 0), 0l);
+  ASSERT_EQ(errno, 0);
+  EXPECT_EQ(str_end - just_zero_x, ptr
diff _t(1));
+
+  const char *just_zero_eight = "08";
+  errno = 0;
+  ASSERT_EQ(__llvm_libc::strtol(just_zero_eight, &str_end, 0), 0l);
+  ASSERT_EQ(errno, 0);
+  EXPECT_EQ(str_end - just_zero_eight, ptr
diff _t(1));
 }

diff  --git a/libc/test/src/stdlib/strtoll_test.cpp b/libc/test/src/stdlib/strtoll_test.cpp
index 03e6e2dceb5df..d142b3cedf3e0 100644
--- a/libc/test/src/stdlib/strtoll_test.cpp
+++ b/libc/test/src/stdlib/strtoll_test.cpp
@@ -142,7 +142,7 @@ TEST(LlvmLibcStrToLLTest, MessyBaseTenDecode) {
   errno = 0;
   ASSERT_EQ(__llvm_libc::strtoll(two_signs, &str_end, 10), 0ll);
   ASSERT_EQ(errno, 0);
-  EXPECT_EQ(str_end - two_signs, ptr
diff _t(1));
+  EXPECT_EQ(str_end - two_signs, ptr
diff _t(0));
 
   const char *sign_before = "+2=4";
   errno = 0;
@@ -167,6 +167,18 @@ TEST(LlvmLibcStrToLLTest, MessyBaseTenDecode) {
   ASSERT_EQ(__llvm_libc::strtoll(all_together, &str_end, 10), -12345ll);
   ASSERT_EQ(errno, 0);
   EXPECT_EQ(str_end - all_together, ptr
diff _t(9));
+
+  const char *just_spaces = "  ";
+  errno = 0;
+  ASSERT_EQ(__llvm_libc::strtoll(just_spaces, &str_end, 10), 0ll);
+  ASSERT_EQ(errno, 0);
+  EXPECT_EQ(str_end - just_spaces, ptr
diff _t(0));
+
+  const char *just_space_and_sign = " +";
+  errno = 0;
+  ASSERT_EQ(__llvm_libc::strtoll(just_space_and_sign, &str_end, 10), 0ll);
+  ASSERT_EQ(errno, 0);
+  EXPECT_EQ(str_end - just_space_and_sign, ptr
diff _t(0));
 }
 
 static char int_to_b36_char(int input) {
@@ -350,4 +362,22 @@ TEST(LlvmLibcStrToLLTest, AutomaticBaseSelection) {
             012345ll);
   ASSERT_EQ(errno, 0);
   EXPECT_EQ(str_end - base_eight_with_prefix, ptr
diff _t(6));
+
+  const char *just_zero = "0";
+  errno = 0;
+  ASSERT_EQ(__llvm_libc::strtoll(just_zero, &str_end, 0), 0ll);
+  ASSERT_EQ(errno, 0);
+  EXPECT_EQ(str_end - just_zero, ptr
diff _t(1));
+
+  const char *just_zero_x = "0x";
+  errno = 0;
+  ASSERT_EQ(__llvm_libc::strtoll(just_zero_x, &str_end, 0), 0ll);
+  ASSERT_EQ(errno, 0);
+  EXPECT_EQ(str_end - just_zero_x, ptr
diff _t(1));
+
+  const char *just_zero_eight = "08";
+  errno = 0;
+  ASSERT_EQ(__llvm_libc::strtoll(just_zero_eight, &str_end, 0), 0ll);
+  ASSERT_EQ(errno, 0);
+  EXPECT_EQ(str_end - just_zero_eight, ptr
diff _t(1));
 }

diff  --git a/libc/test/src/stdlib/strtoul_test.cpp b/libc/test/src/stdlib/strtoul_test.cpp
index 07976b1f11019..c5fdb964e6331 100644
--- a/libc/test/src/stdlib/strtoul_test.cpp
+++ b/libc/test/src/stdlib/strtoul_test.cpp
@@ -110,7 +110,7 @@ TEST(LlvmLibcStrToULTest, MessyBaseTenDecode) {
   errno = 0;
   ASSERT_EQ(__llvm_libc::strtoul(two_signs, &str_end, 10), 0ul);
   ASSERT_EQ(errno, 0);
-  EXPECT_EQ(str_end - two_signs, ptr
diff _t(1));
+  EXPECT_EQ(str_end - two_signs, ptr
diff _t(0));
 
   const char *sign_before = "+2=4";
   errno = 0;
@@ -135,6 +135,18 @@ TEST(LlvmLibcStrToULTest, MessyBaseTenDecode) {
   ASSERT_EQ(__llvm_libc::strtoul(all_together, &str_end, 10), -(12345ul));
   ASSERT_EQ(errno, 0);
   EXPECT_EQ(str_end - all_together, ptr
diff _t(9));
+
+  const char *just_spaces = "  ";
+  errno = 0;
+  ASSERT_EQ(__llvm_libc::strtoul(just_spaces, &str_end, 10), 0ul);
+  ASSERT_EQ(errno, 0);
+  EXPECT_EQ(str_end - just_spaces, ptr
diff _t(0));
+
+  const char *just_space_and_sign = " +";
+  errno = 0;
+  ASSERT_EQ(__llvm_libc::strtoul(just_space_and_sign, &str_end, 10), 0ul);
+  ASSERT_EQ(errno, 0);
+  EXPECT_EQ(str_end - just_space_and_sign, ptr
diff _t(0));
 }
 
 static char int_to_b36_char(int input) {
@@ -318,4 +330,22 @@ TEST(LlvmLibcStrToULTest, AutomaticBaseSelection) {
             012345ul);
   ASSERT_EQ(errno, 0);
   EXPECT_EQ(str_end - base_eight_with_prefix, ptr
diff _t(6));
+
+  const char *just_zero = "0";
+  errno = 0;
+  ASSERT_EQ(__llvm_libc::strtoul(just_zero, &str_end, 0), 0ul);
+  ASSERT_EQ(errno, 0);
+  EXPECT_EQ(str_end - just_zero, ptr
diff _t(1));
+
+  const char *just_zero_x = "0x";
+  errno = 0;
+  ASSERT_EQ(__llvm_libc::strtoul(just_zero_x, &str_end, 0), 0ul);
+  ASSERT_EQ(errno, 0);
+  EXPECT_EQ(str_end - just_zero_x, ptr
diff _t(1));
+
+  const char *just_zero_eight = "08";
+  errno = 0;
+  ASSERT_EQ(__llvm_libc::strtoul(just_zero_eight, &str_end, 0), 0ul);
+  ASSERT_EQ(errno, 0);
+  EXPECT_EQ(str_end - just_zero_eight, ptr
diff _t(1));
 }

diff  --git a/libc/test/src/stdlib/strtoull_test.cpp b/libc/test/src/stdlib/strtoull_test.cpp
index 9536da27caea5..223668cff20cc 100644
--- a/libc/test/src/stdlib/strtoull_test.cpp
+++ b/libc/test/src/stdlib/strtoull_test.cpp
@@ -118,7 +118,7 @@ TEST(LlvmLibcStrToULLTest, MessyBaseTenDecode) {
   errno = 0;
   ASSERT_EQ(__llvm_libc::strtoull(two_signs, &str_end, 10), 0ull);
   ASSERT_EQ(errno, 0);
-  EXPECT_EQ(str_end - two_signs, ptr
diff _t(1));
+  EXPECT_EQ(str_end - two_signs, ptr
diff _t(0));
 
   const char *sign_before = "+2=4";
   errno = 0;
@@ -143,6 +143,18 @@ TEST(LlvmLibcStrToULLTest, MessyBaseTenDecode) {
   ASSERT_EQ(__llvm_libc::strtoull(all_together, &str_end, 10), -(12345ull));
   ASSERT_EQ(errno, 0);
   EXPECT_EQ(str_end - all_together, ptr
diff _t(9));
+
+  const char *just_spaces = "  ";
+  errno = 0;
+  ASSERT_EQ(__llvm_libc::strtoull(just_spaces, &str_end, 10), 0ull);
+  ASSERT_EQ(errno, 0);
+  EXPECT_EQ(str_end - just_spaces, ptr
diff _t(0));
+
+  const char *just_space_and_sign = " +";
+  errno = 0;
+  ASSERT_EQ(__llvm_libc::strtoull(just_space_and_sign, &str_end, 10), 0ull);
+  ASSERT_EQ(errno, 0);
+  EXPECT_EQ(str_end - just_space_and_sign, ptr
diff _t(0));
 }
 
 static char int_to_b36_char(int input) {
@@ -326,4 +338,22 @@ TEST(LlvmLibcStrToULLTest, AutomaticBaseSelection) {
             012345ull);
   ASSERT_EQ(errno, 0);
   EXPECT_EQ(str_end - base_eight_with_prefix, ptr
diff _t(6));
+
+  const char *just_zero = "0";
+  errno = 0;
+  ASSERT_EQ(__llvm_libc::strtoull(just_zero, &str_end, 0), 0ull);
+  ASSERT_EQ(errno, 0);
+  EXPECT_EQ(str_end - just_zero, ptr
diff _t(1));
+
+  const char *just_zero_x = "0x";
+  errno = 0;
+  ASSERT_EQ(__llvm_libc::strtoull(just_zero_x, &str_end, 0), 0ull);
+  ASSERT_EQ(errno, 0);
+  EXPECT_EQ(str_end - just_zero_x, ptr
diff _t(1));
+
+  const char *just_zero_eight = "08";
+  errno = 0;
+  ASSERT_EQ(__llvm_libc::strtoull(just_zero_eight, &str_end, 0), 0ull);
+  ASSERT_EQ(errno, 0);
+  EXPECT_EQ(str_end - just_zero_eight, ptr
diff _t(1));
 }


        


More information about the libc-commits mailing list