[libc-commits] [libc] [libc] Scanf shouldn't match just "0x" for hex int (PR #112440)
Michael Jones via libc-commits
libc-commits at lists.llvm.org
Thu Oct 17 09:44:30 PDT 2024
https://github.com/michaelrj-google updated https://github.com/llvm/llvm-project/pull/112440
>From 1cbfb1e86665b381ab29c0a2bab5dd0c70023df0 Mon Sep 17 00:00:00 2001
From: Michael Jones <michaelrj at google.com>
Date: Tue, 15 Oct 2024 13:56:44 -0700
Subject: [PATCH 1/2] [libc] Scanf shouldn't match just "0x" for hex int
Scanf parsing reads the longest possibly valid prefix for a given
conversion. Then, it performs the conversion on that string. In the case
of "0xZ" with a hex conversion (either "%x" or "%i") the longest
possibly valid prefix is "0x", which makes it the "input item" (per the
standard). The sequence "0x" is not a "matching sequence" for a hex
conversion, meaning it results in a matching failure, and parsing ends.
This is because to know that there's no valid digit after "0x" it reads
the 'Z', but it can only put back one character (the 'Z') leaving it
with consuming an invalid sequence.
(inspired by a thread on the libc-coord mailing list, see 7.32.6.2 in
the standard for more details.)
---
libc/src/stdio/scanf_core/int_converter.cpp | 21 +++++++++---
libc/test/src/stdio/sscanf_test.cpp | 36 +++++++++++++++------
2 files changed, 43 insertions(+), 14 deletions(-)
diff --git a/libc/src/stdio/scanf_core/int_converter.cpp b/libc/src/stdio/scanf_core/int_converter.cpp
index 136db2a3773e11..cc3ab9da0bbd4f 100644
--- a/libc/src/stdio/scanf_core/int_converter.cpp
+++ b/libc/src/stdio/scanf_core/int_converter.cpp
@@ -124,13 +124,25 @@ int convert_int(Reader *reader, const FormatSection &to_conv) {
if (to_lower(cur_char) == 'x') {
// This is a valid hex prefix.
+
+ is_number = false;
+ // A valid hex prefix is not necessarily a valid number. For the
+ // conversion to be valid it needs to use all of the characters it
+ // consumes. From the standard:
+ // 7.23.6.2 paragraph 9: "An input item is defined as the longest
+ // sequence of input characters which does not exceed any specified
+ // field width and which is, or is a prefix of, a matching input
+ // sequence."
+ // 7.23.6.2 paragraph 10: "If the input item is not a matching sequence,
+ // the execution of the directive fails: this condition is a matching
+ // failure"
base = 16;
if (max_width > 1) {
--max_width;
cur_char = reader->getc();
} else {
- write_int_with_length(0, to_conv);
- return READ_OK;
+ // write_int_with_length(0, to_conv);
+ return MATCHING_FAILURE;
}
} else {
@@ -198,6 +210,9 @@ int convert_int(Reader *reader, const FormatSection &to_conv) {
// last one back.
reader->ungetc(cur_char);
+ if (!is_number)
+ return MATCHING_FAILURE;
+
if (has_overflow) {
write_int_with_length(MAX, to_conv);
} else {
@@ -207,8 +222,6 @@ int convert_int(Reader *reader, const FormatSection &to_conv) {
write_int_with_length(result, to_conv);
}
- if (!is_number)
- return MATCHING_FAILURE;
return READ_OK;
}
diff --git a/libc/test/src/stdio/sscanf_test.cpp b/libc/test/src/stdio/sscanf_test.cpp
index 33bb0acba3e662..18addb632067c9 100644
--- a/libc/test/src/stdio/sscanf_test.cpp
+++ b/libc/test/src/stdio/sscanf_test.cpp
@@ -177,13 +177,25 @@ TEST(LlvmLibcSScanfTest, IntConvMaxLengthTests) {
EXPECT_EQ(ret_val, 1);
EXPECT_EQ(result, 0);
+ result = -999;
+
+ // 0x is a valid prefix, but not a valid number. This should be a matching
+ // failure and should not modify the values.
ret_val = LIBC_NAMESPACE::sscanf("0x1", "%2i", &result);
- EXPECT_EQ(ret_val, 1);
- EXPECT_EQ(result, 0);
+ EXPECT_EQ(ret_val, 0);
+ EXPECT_EQ(result, -999);
ret_val = LIBC_NAMESPACE::sscanf("-0x1", "%3i", &result);
+ EXPECT_EQ(ret_val, 0);
+ EXPECT_EQ(result, -999);
+
+ ret_val = LIBC_NAMESPACE::sscanf("0x1", "%3i", &result);
EXPECT_EQ(ret_val, 1);
- EXPECT_EQ(result, 0);
+ EXPECT_EQ(result, 1);
+
+ ret_val = LIBC_NAMESPACE::sscanf("-0x1", "%4i", &result);
+ EXPECT_EQ(ret_val, 1);
+ EXPECT_EQ(result, -1);
ret_val = LIBC_NAMESPACE::sscanf("-0x123", "%4i", &result);
EXPECT_EQ(ret_val, 1);
@@ -212,7 +224,7 @@ TEST(LlvmLibcSScanfTest, IntConvNoWriteTests) {
EXPECT_EQ(result, 0);
ret_val = LIBC_NAMESPACE::sscanf("0x1", "%*2i", &result);
- EXPECT_EQ(ret_val, 1);
+ EXPECT_EQ(ret_val, 0);
EXPECT_EQ(result, 0);
ret_val = LIBC_NAMESPACE::sscanf("a", "%*i", &result);
@@ -679,13 +691,17 @@ TEST(LlvmLibcSScanfTest, CombinedConv) {
EXPECT_EQ(result, 123);
ASSERT_STREQ(buffer, "abc");
+ result = -1;
+
+ // 0x is a valid prefix, but not a valid number. This should be a matching
+ // failure and should not modify the values.
ret_val = LIBC_NAMESPACE::sscanf("0xZZZ", "%i%s", &result, buffer);
- EXPECT_EQ(ret_val, 2);
- EXPECT_EQ(result, 0);
- ASSERT_STREQ(buffer, "ZZZ");
+ EXPECT_EQ(ret_val, 0);
+ EXPECT_EQ(result, -1);
+ ASSERT_STREQ(buffer, "abc");
ret_val = LIBC_NAMESPACE::sscanf("0xZZZ", "%X%s", &result, buffer);
- EXPECT_EQ(ret_val, 2);
- EXPECT_EQ(result, 0);
- ASSERT_STREQ(buffer, "ZZZ");
+ EXPECT_EQ(ret_val, 0);
+ EXPECT_EQ(result, -1);
+ ASSERT_STREQ(buffer, "abc");
}
>From d6b2da95ebf9283f0e413e43330aacfd242f0275 Mon Sep 17 00:00:00 2001
From: Michael Jones <michaelrj at google.com>
Date: Thu, 17 Oct 2024 09:44:00 -0700
Subject: [PATCH 2/2] delete commented-out code
---
libc/src/stdio/scanf_core/int_converter.cpp | 1 -
1 file changed, 1 deletion(-)
diff --git a/libc/src/stdio/scanf_core/int_converter.cpp b/libc/src/stdio/scanf_core/int_converter.cpp
index cc3ab9da0bbd4f..ecdac52e84bbde 100644
--- a/libc/src/stdio/scanf_core/int_converter.cpp
+++ b/libc/src/stdio/scanf_core/int_converter.cpp
@@ -141,7 +141,6 @@ int convert_int(Reader *reader, const FormatSection &to_conv) {
--max_width;
cur_char = reader->getc();
} else {
- // write_int_with_length(0, to_conv);
return MATCHING_FAILURE;
}
More information about the libc-commits
mailing list