[libc-commits] [libc] 37ce734 - [libc] fix strtointeger hex prefix parsing

Michael Jones via libc-commits libc-commits at lists.llvm.org
Fri Sep 3 13:39:20 PDT 2021


Author: Michael Jones
Date: 2021-09-03T20:39:15Z
New Revision: 37ce7349f7e95ef19cd6169e70414286b33114cb

URL: https://github.com/llvm/llvm-project/commit/37ce7349f7e95ef19cd6169e70414286b33114cb
DIFF: https://github.com/llvm/llvm-project/commit/37ce7349f7e95ef19cd6169e70414286b33114cb.diff

LOG: [libc] fix strtointeger hex prefix parsing

Fix edge case where "0x" would be considered a complete hexadecimal
number for purposes of str_end. Now the hexadecimal prefix needs a valid
digit after it, else just the 0 will be counted as the number.

Reviewed By: sivachandra

Differential Revision: https://reviews.llvm.org/D109084

Added: 
    

Modified: 
    libc/src/__support/str_conv_utils.h
    libc/test/src/stdlib/strtol_test.cpp
    libc/test/src/stdlib/strtoll_test.cpp
    libc/test/src/stdlib/strtoul_test.cpp
    libc/test/src/stdlib/strtoull_test.cpp

Removed: 
    


################################################################################
diff  --git a/libc/src/__support/str_conv_utils.h b/libc/src/__support/str_conv_utils.h
index adde15823ef65..fb87ca00a93c7 100644
--- a/libc/src/__support/str_conv_utils.h
+++ b/libc/src/__support/str_conv_utils.h
@@ -34,18 +34,25 @@ static inline int b36_char_to_int(char input) {
   return 0;
 }
 
+// checks if the next 3 characters of the string pointer are the start of a
+// hexadecimal number. Does not advance the string pointer.
+static inline bool is_hex_start(const char *__restrict src) {
+  return *src == '0' && (*(src + 1) | 32) == 'x' && isalnum(*(src + 2)) &&
+         b36_char_to_int(*(src + 2)) < 16;
+}
+
 // Takes the address of the string pointer and parses the base from the start of
 // it. This will advance the string pointer.
 static inline int infer_base(const char *__restrict *__restrict src) {
-  if (**src == '0') {
+  if (is_hex_start(*src)) {
+    (*src) += 2;
+    return 16;
+  } else if (**src == '0') {
     ++(*src);
-    if ((**src | 32) == 'x') {
-      ++(*src);
-      return 16;
-    }
     return 8;
+  } else {
+    return 10;
   }
-  return 10;
 }
 
 // Takes a pointer to a string, a pointer to a string pointer, and the base to
@@ -71,7 +78,7 @@ static inline T strtointeger(const char *__restrict src,
 
   if (base == 0) {
     base = infer_base(&src);
-  } else if (base == 16 && *src == '0' && (*(src + 1) | 32) == 'x') {
+  } else if (base == 16 && is_hex_start(src)) {
     src = src + 2;
   }
 

diff  --git a/libc/test/src/stdlib/strtol_test.cpp b/libc/test/src/stdlib/strtol_test.cpp
index 29997639a71e4..1d91b8a2d5743 100644
--- a/libc/test/src/stdlib/strtol_test.cpp
+++ b/libc/test/src/stdlib/strtol_test.cpp
@@ -261,6 +261,38 @@ TEST(LlvmLibcStrToLTest, CleanBaseSixteenDecode) {
   ASSERT_EQ(__llvm_libc::strtol(yes_prefix, &str_end, 16), 0x456defl);
   ASSERT_EQ(errno, 0);
   EXPECT_EQ(str_end - yes_prefix, ptr
diff _t(8));
+
+  const char *letter_after_prefix = "0xabc123";
+  errno = 0;
+  ASSERT_EQ(__llvm_libc::strtol(letter_after_prefix, &str_end, 16), 0xabc123l);
+  ASSERT_EQ(errno, 0);
+  EXPECT_EQ(str_end - letter_after_prefix, ptr
diff _t(8));
+}
+
+TEST(LlvmLibcStrToLTest, MessyBaseSixteenDecode) {
+  char *str_end = nullptr;
+
+  const char *just_prefix = "0x";
+  errno = 0;
+  ASSERT_EQ(__llvm_libc::strtol(just_prefix, &str_end, 16), 0l);
+  ASSERT_EQ(errno, 0);
+  EXPECT_EQ(str_end - just_prefix, ptr
diff _t(1));
+
+  errno = 0;
+  ASSERT_EQ(__llvm_libc::strtol(just_prefix, &str_end, 0), 0l);
+  ASSERT_EQ(errno, 0);
+  EXPECT_EQ(str_end - just_prefix, ptr
diff _t(1));
+
+  const char *prefix_with_x_after = "0xx";
+  errno = 0;
+  ASSERT_EQ(__llvm_libc::strtol(prefix_with_x_after, &str_end, 16), 0l);
+  ASSERT_EQ(errno, 0);
+  EXPECT_EQ(str_end - prefix_with_x_after, ptr
diff _t(1));
+
+  errno = 0;
+  ASSERT_EQ(__llvm_libc::strtol(prefix_with_x_after, &str_end, 0), 0l);
+  ASSERT_EQ(errno, 0);
+  EXPECT_EQ(str_end - prefix_with_x_after, ptr
diff _t(1));
 }
 
 TEST(LlvmLibcStrToLTest, AutomaticBaseSelection) {

diff  --git a/libc/test/src/stdlib/strtoll_test.cpp b/libc/test/src/stdlib/strtoll_test.cpp
index 8e3ce85fa0a46..03e6e2dceb5df 100644
--- a/libc/test/src/stdlib/strtoll_test.cpp
+++ b/libc/test/src/stdlib/strtoll_test.cpp
@@ -287,6 +287,39 @@ TEST(LlvmLibcStrToLLTest, CleanBaseSixteenDecode) {
   ASSERT_EQ(__llvm_libc::strtoll(yes_prefix, &str_end, 16), 0x456defll);
   ASSERT_EQ(errno, 0);
   EXPECT_EQ(str_end - yes_prefix, ptr
diff _t(8));
+
+  const char *letter_after_prefix = "0xabc123";
+  errno = 0;
+  ASSERT_EQ(__llvm_libc::strtoll(letter_after_prefix, &str_end, 16),
+            0xabc123ll);
+  ASSERT_EQ(errno, 0);
+  EXPECT_EQ(str_end - letter_after_prefix, ptr
diff _t(8));
+}
+
+TEST(LlvmLibcStrToLLTest, MessyBaseSixteenDecode) {
+  char *str_end = nullptr;
+
+  const char *just_prefix = "0x";
+  errno = 0;
+  ASSERT_EQ(__llvm_libc::strtoll(just_prefix, &str_end, 16), 0ll);
+  ASSERT_EQ(errno, 0);
+  EXPECT_EQ(str_end - just_prefix, ptr
diff _t(1));
+
+  errno = 0;
+  ASSERT_EQ(__llvm_libc::strtoll(just_prefix, &str_end, 0), 0ll);
+  ASSERT_EQ(errno, 0);
+  EXPECT_EQ(str_end - just_prefix, ptr
diff _t(1));
+
+  const char *prefix_with_x_after = "0xx";
+  errno = 0;
+  ASSERT_EQ(__llvm_libc::strtoll(prefix_with_x_after, &str_end, 16), 0ll);
+  ASSERT_EQ(errno, 0);
+  EXPECT_EQ(str_end - prefix_with_x_after, ptr
diff _t(1));
+
+  errno = 0;
+  ASSERT_EQ(__llvm_libc::strtoll(prefix_with_x_after, &str_end, 0), 0ll);
+  ASSERT_EQ(errno, 0);
+  EXPECT_EQ(str_end - prefix_with_x_after, ptr
diff _t(1));
 }
 
 TEST(LlvmLibcStrToLLTest, AutomaticBaseSelection) {

diff  --git a/libc/test/src/stdlib/strtoul_test.cpp b/libc/test/src/stdlib/strtoul_test.cpp
index 7b08a399bbfb0..07976b1f11019 100644
--- a/libc/test/src/stdlib/strtoul_test.cpp
+++ b/libc/test/src/stdlib/strtoul_test.cpp
@@ -255,6 +255,39 @@ TEST(LlvmLibcStrToULTest, CleanBaseSixteenDecode) {
   ASSERT_EQ(__llvm_libc::strtoul(yes_prefix, &str_end, 16), 0x456deful);
   ASSERT_EQ(errno, 0);
   EXPECT_EQ(str_end - yes_prefix, ptr
diff _t(8));
+
+  const char *letter_after_prefix = "0xabc123";
+  errno = 0;
+  ASSERT_EQ(__llvm_libc::strtoul(letter_after_prefix, &str_end, 16),
+            0xabc123ul);
+  ASSERT_EQ(errno, 0);
+  EXPECT_EQ(str_end - letter_after_prefix, ptr
diff _t(8));
+}
+
+TEST(LlvmLibcStrToULTest, MessyBaseSixteenDecode) {
+  char *str_end = nullptr;
+
+  const char *just_prefix = "0x";
+  errno = 0;
+  ASSERT_EQ(__llvm_libc::strtoul(just_prefix, &str_end, 16), 0ul);
+  ASSERT_EQ(errno, 0);
+  EXPECT_EQ(str_end - just_prefix, ptr
diff _t(1));
+
+  errno = 0;
+  ASSERT_EQ(__llvm_libc::strtoul(just_prefix, &str_end, 0), 0ul);
+  ASSERT_EQ(errno, 0);
+  EXPECT_EQ(str_end - just_prefix, ptr
diff _t(1));
+
+  const char *prefix_with_x_after = "0xx";
+  errno = 0;
+  ASSERT_EQ(__llvm_libc::strtoul(prefix_with_x_after, &str_end, 16), 0ul);
+  ASSERT_EQ(errno, 0);
+  EXPECT_EQ(str_end - prefix_with_x_after, ptr
diff _t(1));
+
+  errno = 0;
+  ASSERT_EQ(__llvm_libc::strtoul(prefix_with_x_after, &str_end, 0), 0ul);
+  ASSERT_EQ(errno, 0);
+  EXPECT_EQ(str_end - prefix_with_x_after, ptr
diff _t(1));
 }
 
 TEST(LlvmLibcStrToULTest, AutomaticBaseSelection) {

diff  --git a/libc/test/src/stdlib/strtoull_test.cpp b/libc/test/src/stdlib/strtoull_test.cpp
index 8a2ca11dcd556..9536da27caea5 100644
--- a/libc/test/src/stdlib/strtoull_test.cpp
+++ b/libc/test/src/stdlib/strtoull_test.cpp
@@ -263,6 +263,39 @@ TEST(LlvmLibcStrToULLTest, CleanBaseSixteenDecode) {
   ASSERT_EQ(__llvm_libc::strtoull(yes_prefix, &str_end, 16), 0x456defull);
   ASSERT_EQ(errno, 0);
   EXPECT_EQ(str_end - yes_prefix, ptr
diff _t(8));
+
+  const char *letter_after_prefix = "0xabc123";
+  errno = 0;
+  ASSERT_EQ(__llvm_libc::strtoull(letter_after_prefix, &str_end, 16),
+            0xabc123ull);
+  ASSERT_EQ(errno, 0);
+  EXPECT_EQ(str_end - letter_after_prefix, ptr
diff _t(8));
+}
+
+TEST(LlvmLibcStrToULLTest, MessyBaseSixteenDecode) {
+  char *str_end = nullptr;
+
+  const char *just_prefix = "0x";
+  errno = 0;
+  ASSERT_EQ(__llvm_libc::strtoull(just_prefix, &str_end, 16), 0ull);
+  ASSERT_EQ(errno, 0);
+  EXPECT_EQ(str_end - just_prefix, ptr
diff _t(1));
+
+  errno = 0;
+  ASSERT_EQ(__llvm_libc::strtoull(just_prefix, &str_end, 0), 0ull);
+  ASSERT_EQ(errno, 0);
+  EXPECT_EQ(str_end - just_prefix, ptr
diff _t(1));
+
+  const char *prefix_with_x_after = "0xx";
+  errno = 0;
+  ASSERT_EQ(__llvm_libc::strtoull(prefix_with_x_after, &str_end, 16), 0ull);
+  ASSERT_EQ(errno, 0);
+  EXPECT_EQ(str_end - prefix_with_x_after, ptr
diff _t(1));
+
+  errno = 0;
+  ASSERT_EQ(__llvm_libc::strtoull(prefix_with_x_after, &str_end, 0), 0ull);
+  ASSERT_EQ(errno, 0);
+  EXPECT_EQ(str_end - prefix_with_x_after, ptr
diff _t(1));
 }
 
 TEST(LlvmLibcStrToULLTest, AutomaticBaseSelection) {


        


More information about the libc-commits mailing list