[libc-commits] [libc] afa764c - [libc] add scanf current position conversion
Michael Jones via libc-commits
libc-commits at lists.llvm.org
Wed Jan 25 15:29:30 PST 2023
Author: Michael Jones
Date: 2023-01-25T15:29:22-08:00
New Revision: afa764c9a6d68ab6de94a5d007e0b468e65ae946
URL: https://github.com/llvm/llvm-project/commit/afa764c9a6d68ab6de94a5d007e0b468e65ae946
DIFF: https://github.com/llvm/llvm-project/commit/afa764c9a6d68ab6de94a5d007e0b468e65ae946.diff
LOG: [libc] add scanf current position conversion
To add the current position (%n) conversion, some reorganization needed
to be done. The "write a number to this pointer using the length
modifier" utilities and a couple other shared parsing functions have
been moved into converter_utils.h. This made implementing
current_pos_converter very simple.
Reviewed By: lntue
Differential Revision: https://reviews.llvm.org/D142495
Added:
libc/src/stdio/scanf_core/converter_utils.h
libc/src/stdio/scanf_core/current_pos_converter.h
Modified:
libc/src/stdio/scanf_core/CMakeLists.txt
libc/src/stdio/scanf_core/converter.cpp
libc/src/stdio/scanf_core/float_converter.cpp
libc/src/stdio/scanf_core/int_converter.cpp
libc/src/stdio/scanf_core/scanf_main.cpp
libc/test/src/stdio/sscanf_test.cpp
Removed:
################################################################################
diff --git a/libc/src/stdio/scanf_core/CMakeLists.txt b/libc/src/stdio/scanf_core/CMakeLists.txt
index 54fa40d5363b4..d0eaa1a44012e 100644
--- a/libc/src/stdio/scanf_core/CMakeLists.txt
+++ b/libc/src/stdio/scanf_core/CMakeLists.txt
@@ -82,19 +82,21 @@ add_object_library(
float_converter.cpp
HDRS
converter.h
+ converter_utils.h
int_converter.h
string_converter.h
float_converter.h
+ current_pos_converter.h
DEPENDS
.reader
.core_structs
+ libc.src.__support.common
libc.src.__support.ctype_utils
libc.src.__support.CPP.bitset
libc.src.__support.CPP.string_view
libc.src.__support.CPP.limits
libc.src.__support.char_vector
- libc.include.errno
- libc.src.errno.errno
+ libc.src.__support.str_to_float
)
add_object_library(
diff --git a/libc/src/stdio/scanf_core/converter.cpp b/libc/src/stdio/scanf_core/converter.cpp
index c5a2932fb18f8..053e215b4d983 100644
--- a/libc/src/stdio/scanf_core/converter.cpp
+++ b/libc/src/stdio/scanf_core/converter.cpp
@@ -15,6 +15,7 @@
#ifndef LLVM_LIBC_SCANF_DISABLE_FLOAT
#include "src/stdio/scanf_core/float_converter.h"
#endif // LLVM_LIBC_SCANF_DISABLE_FLOAT
+#include "src/stdio/scanf_core/current_pos_converter.h"
#include "src/stdio/scanf_core/int_converter.h"
#include "src/stdio/scanf_core/string_converter.h"
@@ -60,8 +61,8 @@ int convert(Reader *reader, const FormatSection &to_conv) {
return ret_val;
return convert_float(reader, to_conv);
#endif // LLVM_LIBC_SCANF_DISABLE_FLOAT
- // case 'n':
- // return convert_write_int(reader, to_conv);
+ case 'n':
+ return convert_current_pos(reader, to_conv);
// case 'p':
// ret_val = raw_match(reader, " ");
// if (ret_val != READ_OK)
diff --git a/libc/src/stdio/scanf_core/converter_utils.h b/libc/src/stdio/scanf_core/converter_utils.h
new file mode 100644
index 0000000000000..07ac9c7407ed3
--- /dev/null
+++ b/libc/src/stdio/scanf_core/converter_utils.h
@@ -0,0 +1,107 @@
+//===-- Format specifier converter for scanf -------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_STDIO_SCANF_CORE_CONVERTER_UTILS_H
+#define LLVM_LIBC_SRC_STDIO_SCANF_CORE_CONVERTER_UTILS_H
+
+#include "src/__support/common.h"
+#include "src/__support/ctype_utils.h"
+#include "src/__support/str_to_float.h"
+#include "src/stdio/scanf_core/core_structs.h"
+#include "src/stdio/scanf_core/reader.h"
+
+#include <stddef.h>
+
+namespace __llvm_libc {
+namespace scanf_core {
+
+LIBC_INLINE constexpr char to_lower(char a) { return a | 32; }
+
+LIBC_INLINE constexpr int b36_char_to_int(char input) {
+ if (internal::isdigit(input))
+ return input - '0';
+ if (internal::isalpha(input))
+ return to_lower(input) + 10 - 'a';
+ return 0;
+}
+
+LIBC_INLINE void write_int_with_length(uintmax_t output_val,
+ const FormatSection &to_conv) {
+ if ((to_conv.flags & NO_WRITE) != 0) {
+ return;
+ }
+ LengthModifier lm = to_conv.length_modifier;
+ void *output_ptr = to_conv.output_ptr;
+ switch (lm) {
+ case (LengthModifier::hh):
+ *reinterpret_cast<unsigned char *>(output_ptr) =
+ static_cast<unsigned char>(output_val);
+ break;
+ case (LengthModifier::h):
+ *reinterpret_cast<unsigned short *>(output_ptr) =
+ static_cast<unsigned short>(output_val);
+ break;
+ case (LengthModifier::NONE):
+ *reinterpret_cast<unsigned int *>(output_ptr) =
+ static_cast<unsigned int>(output_val);
+ break;
+ case (LengthModifier::l):
+ *reinterpret_cast<unsigned long *>(output_ptr) =
+ static_cast<unsigned long>(output_val);
+ break;
+ case (LengthModifier::ll):
+ case (LengthModifier::L):
+ *reinterpret_cast<unsigned long long *>(output_ptr) =
+ static_cast<unsigned long long>(output_val);
+ break;
+ case (LengthModifier::j):
+ *reinterpret_cast<uintmax_t *>(output_ptr) =
+ static_cast<uintmax_t>(output_val);
+ break;
+ case (LengthModifier::z):
+ *reinterpret_cast<size_t *>(output_ptr) = static_cast<size_t>(output_val);
+ break;
+ case (LengthModifier::t):
+ *reinterpret_cast<ptr
diff _t *>(output_ptr) =
+ static_cast<ptr
diff _t>(output_val);
+ break;
+ }
+}
+
+LIBC_INLINE void write_float_with_length(char *str,
+ const FormatSection &to_conv) {
+ if ((to_conv.flags & NO_WRITE) != 0) {
+ return;
+ }
+
+ void *output_ptr = to_conv.output_ptr;
+
+ LengthModifier lm = to_conv.length_modifier;
+ switch (lm) {
+ case (LengthModifier::l): {
+ auto value = internal::strtofloatingpoint<double>(str, nullptr);
+ *reinterpret_cast<double *>(output_ptr) = value;
+ break;
+ }
+ case (LengthModifier::L): {
+ auto value = internal::strtofloatingpoint<long double>(str, nullptr);
+ *reinterpret_cast<long double *>(output_ptr) = value;
+ break;
+ }
+ default: {
+ auto value = internal::strtofloatingpoint<float>(str, nullptr);
+ *reinterpret_cast<float *>(output_ptr) = value;
+ break;
+ }
+ }
+}
+
+} // namespace scanf_core
+} // namespace __llvm_libc
+
+#endif // LLVM_LIBC_SRC_STDIO_SCANF_CORE_CONVERTER_UTILS_H
diff --git a/libc/src/stdio/scanf_core/current_pos_converter.h b/libc/src/stdio/scanf_core/current_pos_converter.h
new file mode 100644
index 0000000000000..be4b6553b89dc
--- /dev/null
+++ b/libc/src/stdio/scanf_core/current_pos_converter.h
@@ -0,0 +1,31 @@
+//===-- Current position specifier converter for scanf ----------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_STDIO_SCANF_CORE_CURRENT_POS_CONVERTER_H
+#define LLVM_LIBC_SRC_STDIO_SCANF_CORE_CURRENT_POS_CONVERTER_H
+
+#include "src/__support/common.h"
+#include "src/stdio/scanf_core/converter_utils.h"
+#include "src/stdio/scanf_core/core_structs.h"
+#include "src/stdio/scanf_core/reader.h"
+
+#include <stddef.h>
+
+namespace __llvm_libc {
+namespace scanf_core {
+
+LIBC_INLINE int convert_current_pos(Reader *reader,
+ const FormatSection &to_conv) {
+ write_int_with_length(reader->chars_read(), to_conv);
+ return READ_OK;
+}
+
+} // namespace scanf_core
+} // namespace __llvm_libc
+
+#endif // LLVM_LIBC_SRC_STDIO_SCANF_CORE_CURRENT_POS_CONVERTER_H
diff --git a/libc/src/stdio/scanf_core/float_converter.cpp b/libc/src/stdio/scanf_core/float_converter.cpp
index 1a0ce42863f35..5ffb9b09fcfc2 100644
--- a/libc/src/stdio/scanf_core/float_converter.cpp
+++ b/libc/src/stdio/scanf_core/float_converter.cpp
@@ -11,7 +11,7 @@
#include "src/__support/CPP/limits.h"
#include "src/__support/char_vector.h"
#include "src/__support/ctype_utils.h"
-#include "src/__support/str_to_float.h"
+#include "src/stdio/scanf_core/converter_utils.h"
#include "src/stdio/scanf_core/core_structs.h"
#include "src/stdio/scanf_core/reader.h"
@@ -20,35 +20,6 @@
namespace __llvm_libc {
namespace scanf_core {
-constexpr char inline to_lower(char a) { return a | 32; }
-
-void write_with_length(char *str, const FormatSection &to_conv) {
- if ((to_conv.flags & NO_WRITE) != 0) {
- return;
- }
-
- void *output_ptr = to_conv.output_ptr;
-
- LengthModifier lm = to_conv.length_modifier;
- switch (lm) {
- case (LengthModifier::l): {
- auto value = internal::strtofloatingpoint<double>(str, nullptr);
- *reinterpret_cast<double *>(output_ptr) = value;
- break;
- }
- case (LengthModifier::L): {
- auto value = internal::strtofloatingpoint<long double>(str, nullptr);
- *reinterpret_cast<long double *>(output_ptr) = value;
- break;
- }
- default: {
- auto value = internal::strtofloatingpoint<float>(str, nullptr);
- *reinterpret_cast<float *>(output_ptr) = value;
- break;
- }
- }
-}
-
// All of the floating point conversions are the same for scanf, every name will
// accept every style.
int convert_float(Reader *reader, const FormatSection &to_conv) {
@@ -96,7 +67,7 @@ int convert_float(Reader *reader, const FormatSection &to_conv) {
}
if (inf_index == 3 || inf_index == sizeof(inf_string) - 1) {
- write_with_length(out_str.c_str(), to_conv);
+ write_float_with_length(out_str.c_str(), to_conv);
return READ_OK;
} else {
return MATCHING_FAILURE;
@@ -119,7 +90,7 @@ int convert_float(Reader *reader, const FormatSection &to_conv) {
}
if (nan_index == sizeof(nan_string) - 1) {
- write_with_length(out_str.c_str(), to_conv);
+ write_float_with_length(out_str.c_str(), to_conv);
return READ_OK;
} else {
return MATCHING_FAILURE;
@@ -138,7 +109,7 @@ int convert_float(Reader *reader, const FormatSection &to_conv) {
}
// If we've hit the end, then this is "0", which is valid.
if (out_str.length() == max_width) {
- write_with_length(out_str.c_str(), to_conv);
+ write_float_with_length(out_str.c_str(), to_conv);
return READ_OK;
} else {
cur_char = reader->getc();
@@ -154,7 +125,7 @@ int convert_float(Reader *reader, const FormatSection &to_conv) {
// If we've hit the end here, we have "0x" which is a valid prefix to a
// floating point number, and will be evaluated to 0.
if (out_str.length() == max_width) {
- write_with_length(out_str.c_str(), to_conv);
+ write_float_with_length(out_str.c_str(), to_conv);
return READ_OK;
} else {
cur_char = reader->getc();
@@ -246,7 +217,7 @@ int convert_float(Reader *reader, const FormatSection &to_conv) {
if (!is_number) {
return MATCHING_FAILURE;
}
- write_with_length(out_str.c_str(), to_conv);
+ write_float_with_length(out_str.c_str(), to_conv);
return READ_OK;
}
diff --git a/libc/src/stdio/scanf_core/int_converter.cpp b/libc/src/stdio/scanf_core/int_converter.cpp
index be88a01f942d7..ce23f5d3fed8d 100644
--- a/libc/src/stdio/scanf_core/int_converter.cpp
+++ b/libc/src/stdio/scanf_core/int_converter.cpp
@@ -10,6 +10,7 @@
#include "src/__support/CPP/limits.h"
#include "src/__support/ctype_utils.h"
+#include "src/stdio/scanf_core/converter_utils.h"
#include "src/stdio/scanf_core/core_structs.h"
#include "src/stdio/scanf_core/reader.h"
@@ -18,58 +19,6 @@
namespace __llvm_libc {
namespace scanf_core {
-constexpr char inline to_lower(char a) { return a | 32; }
-
-constexpr inline int b36_char_to_int(char input) {
- if (internal::isdigit(input))
- return input - '0';
- if (internal::isalpha(input))
- return to_lower(input) + 10 - 'a';
- return 0;
-}
-
-void write_with_length(uintmax_t output_val, const FormatSection &to_conv) {
- if ((to_conv.flags & NO_WRITE) != 0) {
- return;
- }
- LengthModifier lm = to_conv.length_modifier;
- void *output_ptr = to_conv.output_ptr;
- switch (lm) {
- case (LengthModifier::hh):
- *reinterpret_cast<unsigned char *>(output_ptr) =
- static_cast<unsigned char>(output_val);
- break;
- case (LengthModifier::h):
- *reinterpret_cast<unsigned short *>(output_ptr) =
- static_cast<unsigned short>(output_val);
- break;
- case (LengthModifier::NONE):
- *reinterpret_cast<unsigned int *>(output_ptr) =
- static_cast<unsigned int>(output_val);
- break;
- case (LengthModifier::l):
- *reinterpret_cast<unsigned long *>(output_ptr) =
- static_cast<unsigned long>(output_val);
- break;
- case (LengthModifier::ll):
- case (LengthModifier::L):
- *reinterpret_cast<unsigned long long *>(output_ptr) =
- static_cast<unsigned long long>(output_val);
- break;
- case (LengthModifier::j):
- *reinterpret_cast<uintmax_t *>(output_ptr) =
- static_cast<uintmax_t>(output_val);
- break;
- case (LengthModifier::z):
- *reinterpret_cast<size_t *>(output_ptr) = static_cast<size_t>(output_val);
- break;
- case (LengthModifier::t):
- *reinterpret_cast<ptr
diff _t *>(output_ptr) =
- static_cast<ptr
diff _t>(output_val);
- break;
- }
-}
-
// This code is very similar to the code in __support/str_to_integer.h but is
// not quite the same. Here is the list of
diff erences and why they exist:
// 1) This takes a reader and a format section instead of a char* and the base.
@@ -150,7 +99,7 @@ int convert_int(Reader *reader, const FormatSection &to_conv) {
} else {
// If the max width has been hit already, then the return value must be 0
// since no actual digits of the number have been parsed yet.
- write_with_length(0, to_conv);
+ write_int_with_length(0, to_conv);
return MATCHING_FAILURE;
}
}
@@ -168,7 +117,7 @@ int convert_int(Reader *reader, const FormatSection &to_conv) {
--max_width;
cur_char = reader->getc();
} else {
- write_with_length(0, to_conv);
+ write_int_with_length(0, to_conv);
return READ_OK;
}
@@ -179,7 +128,7 @@ int convert_int(Reader *reader, const FormatSection &to_conv) {
--max_width;
cur_char = reader->getc();
} else {
- write_with_length(0, to_conv);
+ write_int_with_length(0, to_conv);
return READ_OK;
}
@@ -196,7 +145,7 @@ int convert_int(Reader *reader, const FormatSection &to_conv) {
// If the first character isn't a valid digit, then there are no valid
// digits at all. The number is 0.
reader->ungetc(cur_char);
- write_with_length(0, to_conv);
+ write_int_with_length(0, to_conv);
return MATCHING_FAILURE;
}
}
@@ -249,12 +198,12 @@ int convert_int(Reader *reader, const FormatSection &to_conv) {
reader->ungetc(cur_char);
if (has_overflow) {
- write_with_length(MAX, to_conv);
+ write_int_with_length(MAX, to_conv);
} else {
if (is_negative)
result = -result;
- write_with_length(result, to_conv);
+ write_int_with_length(result, to_conv);
}
if (!is_number)
diff --git a/libc/src/stdio/scanf_core/scanf_main.cpp b/libc/src/stdio/scanf_core/scanf_main.cpp
index ed509eca4c66f..5a79d2e624ab0 100644
--- a/libc/src/stdio/scanf_core/scanf_main.cpp
+++ b/libc/src/stdio/scanf_core/scanf_main.cpp
@@ -29,7 +29,10 @@ int scanf_main(Reader *reader, const char *__restrict str,
cur_section = parser.get_next_section()) {
if (cur_section.has_conv) {
ret_val = convert(reader, cur_section);
- conversions += ret_val == READ_OK ? 1 : 0;
+ // The %n (current position) conversion doesn't increment the number of
+ // assignments.
+ if (cur_section.conv_name != 'n')
+ conversions += ret_val == READ_OK ? 1 : 0;
} else {
ret_val = raw_match(reader, cur_section.raw_string);
}
diff --git a/libc/test/src/stdio/sscanf_test.cpp b/libc/test/src/stdio/sscanf_test.cpp
index fc67593f57f0b..55671279e1b4d 100644
--- a/libc/test/src/stdio/sscanf_test.cpp
+++ b/libc/test/src/stdio/sscanf_test.cpp
@@ -59,6 +59,20 @@ TEST(LlvmLibcSScanfTest, IntConvSimple) {
EXPECT_EQ(ret_val, 1);
EXPECT_EQ(result, 345);
+ // 288 characters
+ ret_val = __llvm_libc::sscanf("10000000000000000000000000000000"
+ "00000000000000000000000000000000"
+ "00000000000000000000000000000000"
+ "00000000000000000000000000000000"
+ "00000000000000000000000000000000"
+ "00000000000000000000000000000000"
+ "00000000000000000000000000000000"
+ "00000000000000000000000000000000"
+ "00000000000000000000000000000000",
+ "%d", &result);
+ EXPECT_EQ(ret_val, 1);
+ EXPECT_EQ(result, int(__llvm_libc::cpp::numeric_limits<intmax_t>::max()));
+
ret_val = __llvm_libc::sscanf("Not an integer", "%d", &result);
EXPECT_EQ(ret_val, 0);
}
@@ -445,11 +459,6 @@ TEST(LlvmLibcSScanfTest, FloatConvComplexParsing) {
EXPECT_FP_EQ(result, 1.2);
}
-/*
-TODO:
- Max width tests
-*/
-
TEST(LlvmLibcSScanfTest, FloatConvMaxWidth) {
int ret_val;
float result = 0;
@@ -572,6 +581,54 @@ TEST(LlvmLibcSScanfTest, FloatConvNoWrite) {
EXPECT_EQ(ret_val, 0);
}
+TEST(LlvmLibcSScanfTest, CurPosCombined) {
+ int ret_val;
+ int result = -1;
+ char c_result = 0;
+
+ ret_val = __llvm_libc::sscanf("some text", "%n", &result);
+ // %n doesn't count as a conversion for the return value.
+ EXPECT_EQ(ret_val, 0);
+ EXPECT_EQ(result, 0);
+
+ ret_val = __llvm_libc::sscanf("1234567890", "12345%n", &result);
+ EXPECT_EQ(ret_val, 0);
+ EXPECT_EQ(result, 5);
+
+ ret_val = __llvm_libc::sscanf("1234567890", "12345%n", &result);
+ EXPECT_EQ(ret_val, 0);
+ EXPECT_EQ(result, 5);
+
+ // 288 characters
+ ret_val = __llvm_libc::sscanf("10000000000000000000000000000000"
+ "00000000000000000000000000000000"
+ "00000000000000000000000000000000"
+ "00000000000000000000000000000000"
+ "00000000000000000000000000000000"
+ "00000000000000000000000000000000"
+ "00000000000000000000000000000000"
+ "00000000000000000000000000000000"
+ "00000000000000000000000000000000",
+ "%*d%hhn", &c_result);
+ EXPECT_EQ(ret_val, 1);
+ EXPECT_EQ(c_result, char(288)); // Overflow is handled by casting.
+
+ // 320 characters
+ ret_val = __llvm_libc::sscanf("10000000000000000000000000000000"
+ "00000000000000000000000000000000"
+ "00000000000000000000000000000000"
+ "00000000000000000000000000000000"
+ "00000000000000000000000000000000"
+ "00000000000000000000000000000000"
+ "00000000000000000000000000000000"
+ "00000000000000000000000000000000"
+ "00000000000000000000000000000000"
+ "00000000000000000000000000000000",
+ "%*d%n", &result);
+ EXPECT_EQ(ret_val, 1);
+ EXPECT_EQ(result, 320);
+}
+
TEST(LlvmLibcSScanfTest, CombinedConv) {
int ret_val;
int result = 0;
More information about the libc-commits
mailing list