[libc-commits] [libc] [libc] Scanf shouldn't match just "0x" for hex int (PR #112440)

via libc-commits libc-commits at lists.llvm.org
Tue Oct 15 14:45:17 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-libc

Author: Michael Jones (michaelrj-google)

<details>
<summary>Changes</summary>

Scanf parsing reads the longest possibly valid prefix for a given
conversion. Then, it performs the conversion on that string. In the case
of "0xZ" with a hex conversion (either "%x" or "%i") the longest
possibly valid prefix is "0x", which makes it the "input item" (per the
standard). The sequence "0x" is not a "matching sequence" for a hex
conversion, meaning it results in a matching failure, and parsing ends.
This is because to know that there's no valid digit after "0x" it reads
the 'Z', but it can only put back one character (the 'Z') leaving it
with consuming an invalid sequence.

(inspired by a thread on the libc-coord mailing list, see 7.32.6.2 in
the standard for more details.)


---
Full diff: https://github.com/llvm/llvm-project/pull/112440.diff


2 Files Affected:

- (modified) libc/src/stdio/scanf_core/int_converter.cpp (+17-4) 
- (modified) libc/test/src/stdio/sscanf_test.cpp (+26-10) 


``````````diff
diff --git a/libc/src/stdio/scanf_core/int_converter.cpp b/libc/src/stdio/scanf_core/int_converter.cpp
index 136db2a3773e11..cc3ab9da0bbd4f 100644
--- a/libc/src/stdio/scanf_core/int_converter.cpp
+++ b/libc/src/stdio/scanf_core/int_converter.cpp
@@ -124,13 +124,25 @@ int convert_int(Reader *reader, const FormatSection &to_conv) {
 
       if (to_lower(cur_char) == 'x') {
         // This is a valid hex prefix.
+
+        is_number = false;
+        // A valid hex prefix is not necessarily a valid number. For the
+        // conversion to be valid it needs to use all of the characters it
+        // consumes. From the standard:
+        // 7.23.6.2 paragraph 9: "An input item is defined as the longest
+        // sequence of input characters which does not exceed any specified
+        // field width and which is, or is a prefix of, a matching input
+        // sequence."
+        // 7.23.6.2 paragraph 10: "If the input item is not a matching sequence,
+        // the execution of the directive fails: this condition is a matching
+        // failure"
         base = 16;
         if (max_width > 1) {
           --max_width;
           cur_char = reader->getc();
         } else {
-          write_int_with_length(0, to_conv);
-          return READ_OK;
+          // write_int_with_length(0, to_conv);
+          return MATCHING_FAILURE;
         }
 
       } else {
@@ -198,6 +210,9 @@ int convert_int(Reader *reader, const FormatSection &to_conv) {
   // last one back.
   reader->ungetc(cur_char);
 
+  if (!is_number)
+    return MATCHING_FAILURE;
+
   if (has_overflow) {
     write_int_with_length(MAX, to_conv);
   } else {
@@ -207,8 +222,6 @@ int convert_int(Reader *reader, const FormatSection &to_conv) {
     write_int_with_length(result, to_conv);
   }
 
-  if (!is_number)
-    return MATCHING_FAILURE;
   return READ_OK;
 }
 
diff --git a/libc/test/src/stdio/sscanf_test.cpp b/libc/test/src/stdio/sscanf_test.cpp
index 33bb0acba3e662..18addb632067c9 100644
--- a/libc/test/src/stdio/sscanf_test.cpp
+++ b/libc/test/src/stdio/sscanf_test.cpp
@@ -177,13 +177,25 @@ TEST(LlvmLibcSScanfTest, IntConvMaxLengthTests) {
   EXPECT_EQ(ret_val, 1);
   EXPECT_EQ(result, 0);
 
+  result = -999;
+
+  // 0x is a valid prefix, but not a valid number. This should be a matching
+  // failure and should not modify the values.
   ret_val = LIBC_NAMESPACE::sscanf("0x1", "%2i", &result);
-  EXPECT_EQ(ret_val, 1);
-  EXPECT_EQ(result, 0);
+  EXPECT_EQ(ret_val, 0);
+  EXPECT_EQ(result, -999);
 
   ret_val = LIBC_NAMESPACE::sscanf("-0x1", "%3i", &result);
+  EXPECT_EQ(ret_val, 0);
+  EXPECT_EQ(result, -999);
+
+  ret_val = LIBC_NAMESPACE::sscanf("0x1", "%3i", &result);
   EXPECT_EQ(ret_val, 1);
-  EXPECT_EQ(result, 0);
+  EXPECT_EQ(result, 1);
+
+  ret_val = LIBC_NAMESPACE::sscanf("-0x1", "%4i", &result);
+  EXPECT_EQ(ret_val, 1);
+  EXPECT_EQ(result, -1);
 
   ret_val = LIBC_NAMESPACE::sscanf("-0x123", "%4i", &result);
   EXPECT_EQ(ret_val, 1);
@@ -212,7 +224,7 @@ TEST(LlvmLibcSScanfTest, IntConvNoWriteTests) {
   EXPECT_EQ(result, 0);
 
   ret_val = LIBC_NAMESPACE::sscanf("0x1", "%*2i", &result);
-  EXPECT_EQ(ret_val, 1);
+  EXPECT_EQ(ret_val, 0);
   EXPECT_EQ(result, 0);
 
   ret_val = LIBC_NAMESPACE::sscanf("a", "%*i", &result);
@@ -679,13 +691,17 @@ TEST(LlvmLibcSScanfTest, CombinedConv) {
   EXPECT_EQ(result, 123);
   ASSERT_STREQ(buffer, "abc");
 
+  result = -1;
+
+  // 0x is a valid prefix, but not a valid number. This should be a matching
+  // failure and should not modify the values.
   ret_val = LIBC_NAMESPACE::sscanf("0xZZZ", "%i%s", &result, buffer);
-  EXPECT_EQ(ret_val, 2);
-  EXPECT_EQ(result, 0);
-  ASSERT_STREQ(buffer, "ZZZ");
+  EXPECT_EQ(ret_val, 0);
+  EXPECT_EQ(result, -1);
+  ASSERT_STREQ(buffer, "abc");
 
   ret_val = LIBC_NAMESPACE::sscanf("0xZZZ", "%X%s", &result, buffer);
-  EXPECT_EQ(ret_val, 2);
-  EXPECT_EQ(result, 0);
-  ASSERT_STREQ(buffer, "ZZZ");
+  EXPECT_EQ(ret_val, 0);
+  EXPECT_EQ(result, -1);
+  ASSERT_STREQ(buffer, "abc");
 }

``````````

</details>


https://github.com/llvm/llvm-project/pull/112440


More information about the libc-commits mailing list