[libc-commits] [libc] [libc] Templatize the scanf Reader interface (PR #131037)
Petr Hosek via libc-commits
libc-commits at lists.llvm.org
Mon Mar 17 16:01:51 PDT 2025
https://github.com/petrhosek updated https://github.com/llvm/llvm-project/pull/131037
>From 2610f9ae9ef288f4b6c07d7d981ce38923aa32a8 Mon Sep 17 00:00:00 2001
From: Petr Hosek <phosek at google.com>
Date: Tue, 18 Feb 2025 14:05:26 -0800
Subject: [PATCH 1/4] [libc] Templatize the scanf Reader interface
This allows specializing the implementation for different targets
without including unnecessary logic and is similar to #111559 which
did the same for printf Writer interface.
---
libc/src/stdio/scanf_core/CMakeLists.txt | 12 +-
libc/src/stdio/scanf_core/converter.cpp | 73 ------
libc/src/stdio/scanf_core/converter.h | 83 ++++++-
.../stdio/scanf_core/current_pos_converter.h | 3 +-
libc/src/stdio/scanf_core/float_converter.cpp | 229 -----------------
libc/src/stdio/scanf_core/float_converter.h | 209 +++++++++++++++-
libc/src/stdio/scanf_core/int_converter.cpp | 230 ------------------
libc/src/stdio/scanf_core/int_converter.h | 210 +++++++++++++++-
libc/src/stdio/scanf_core/ptr_converter.cpp | 46 ----
libc/src/stdio/scanf_core/ptr_converter.h | 27 +-
libc/src/stdio/scanf_core/reader.h | 118 +++------
libc/src/stdio/scanf_core/scanf_main.cpp | 46 ----
libc/src/stdio/scanf_core/scanf_main.h | 27 +-
.../src/stdio/scanf_core/string_converter.cpp | 77 ------
libc/src/stdio/scanf_core/string_converter.h | 57 ++++-
libc/src/stdio/scanf_core/vfscanf_internal.h | 40 ++-
libc/src/stdio/sscanf.cpp | 4 +-
libc/src/stdio/vsscanf.cpp | 6 +-
.../src/stdio/scanf_core/converter_test.cpp | 35 +--
.../test/src/stdio/scanf_core/reader_test.cpp | 15 +-
20 files changed, 714 insertions(+), 833 deletions(-)
delete mode 100644 libc/src/stdio/scanf_core/float_converter.cpp
delete mode 100644 libc/src/stdio/scanf_core/int_converter.cpp
delete mode 100644 libc/src/stdio/scanf_core/ptr_converter.cpp
delete mode 100644 libc/src/stdio/scanf_core/scanf_main.cpp
delete mode 100644 libc/src/stdio/scanf_core/string_converter.cpp
diff --git a/libc/src/stdio/scanf_core/CMakeLists.txt b/libc/src/stdio/scanf_core/CMakeLists.txt
index 014413ccaa8da..84d12e6138002 100644
--- a/libc/src/stdio/scanf_core/CMakeLists.txt
+++ b/libc/src/stdio/scanf_core/CMakeLists.txt
@@ -61,10 +61,8 @@ if(NOT(TARGET libc.src.__support.File.file) AND LLVM_LIBC_FULL_BUILD AND
return()
endif()
-add_object_library(
+add_header_library(
scanf_main
- SRCS
- scanf_main.cpp
HDRS
scanf_main.h
DEPENDS
@@ -87,14 +85,8 @@ add_header_library(
${use_system_file}
)
-add_object_library(
+add_header_library(
converter
- SRCS
- converter.cpp
- string_converter.cpp
- int_converter.cpp
- float_converter.cpp
- ptr_converter.cpp
HDRS
converter.h
converter_utils.h
diff --git a/libc/src/stdio/scanf_core/converter.cpp b/libc/src/stdio/scanf_core/converter.cpp
index b1ee8cd1e74bb..fb234ce7864cf 100644
--- a/libc/src/stdio/scanf_core/converter.cpp
+++ b/libc/src/stdio/scanf_core/converter.cpp
@@ -26,78 +26,5 @@
namespace LIBC_NAMESPACE_DECL {
namespace scanf_core {
-int convert(Reader *reader, const FormatSection &to_conv) {
- int ret_val = 0;
- switch (to_conv.conv_name) {
- case '%':
- return raw_match(reader, "%");
- case 's':
- ret_val = raw_match(reader, " ");
- if (ret_val != READ_OK)
- return ret_val;
- return convert_string(reader, to_conv);
- case 'c':
- case '[':
- return convert_string(reader, to_conv);
- case 'd':
- case 'i':
- case 'u':
- case 'o':
- case 'x':
- case 'X':
- ret_val = raw_match(reader, " ");
- if (ret_val != READ_OK)
- return ret_val;
- return convert_int(reader, to_conv);
-#ifndef LIBC_COPT_SCANF_DISABLE_FLOAT
- case 'f':
- case 'F':
- case 'e':
- case 'E':
- case 'a':
- case 'A':
- case 'g':
- case 'G':
- ret_val = raw_match(reader, " ");
- if (ret_val != READ_OK)
- return ret_val;
- return convert_float(reader, to_conv);
-#endif // LIBC_COPT_SCANF_DISABLE_FLOAT
- case 'n':
- return convert_current_pos(reader, to_conv);
- case 'p':
- ret_val = raw_match(reader, " ");
- if (ret_val != READ_OK)
- return ret_val;
- return convert_pointer(reader, to_conv);
- default:
- return raw_match(reader, to_conv.raw_string);
- }
- return -1;
-}
-
-// raw_string is assumed to have a positive size.
-int raw_match(Reader *reader, cpp::string_view raw_string) {
- char cur_char = reader->getc();
- int ret_val = READ_OK;
- for (size_t i = 0; i < raw_string.size(); ++i) {
- // Any space character matches any number of space characters.
- if (internal::isspace(raw_string[i])) {
- while (internal::isspace(cur_char)) {
- cur_char = reader->getc();
- }
- } else {
- if (raw_string[i] == cur_char) {
- cur_char = reader->getc();
- } else {
- ret_val = MATCHING_FAILURE;
- break;
- }
- }
- }
- reader->ungetc(cur_char);
- return ret_val;
-}
-
} // namespace scanf_core
} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/stdio/scanf_core/converter.h b/libc/src/stdio/scanf_core/converter.h
index 3f514eeb75bdf..3df43e9de1e25 100644
--- a/libc/src/stdio/scanf_core/converter.h
+++ b/libc/src/stdio/scanf_core/converter.h
@@ -10,10 +10,19 @@
#define LLVM_LIBC_SRC_STDIO_SCANF_CORE_CONVERTER_H
#include "src/__support/CPP/string_view.h"
+#include "src/__support/ctype_utils.h"
#include "src/__support/macros/config.h"
#include "src/stdio/scanf_core/core_structs.h"
#include "src/stdio/scanf_core/reader.h"
+#ifndef LIBC_COPT_SCANF_DISABLE_FLOAT
+#include "src/stdio/scanf_core/float_converter.h"
+#endif // LIBC_COPT_SCANF_DISABLE_FLOAT
+#include "src/stdio/scanf_core/current_pos_converter.h"
+#include "src/stdio/scanf_core/int_converter.h"
+#include "src/stdio/scanf_core/ptr_converter.h"
+#include "src/stdio/scanf_core/string_converter.h"
+
#include <stddef.h>
namespace LIBC_NAMESPACE_DECL {
@@ -22,11 +31,81 @@ namespace scanf_core {
// convert will call a conversion function to convert the FormatSection into
// its string representation, and then that will write the result to the
// reader.
-int convert(Reader *reader, const FormatSection &to_conv);
+template <typename T>
+int convert(Reader<T> *reader, const FormatSection &to_conv) {
+ int ret_val = 0;
+ switch (to_conv.conv_name) {
+ case '%':
+ return raw_match(reader, "%");
+ case 's':
+ ret_val = raw_match(reader, " ");
+ if (ret_val != READ_OK)
+ return ret_val;
+ return convert_string(reader, to_conv);
+ case 'c':
+ case '[':
+ return convert_string(reader, to_conv);
+ case 'd':
+ case 'i':
+ case 'u':
+ case 'o':
+ case 'x':
+ case 'X':
+ ret_val = raw_match(reader, " ");
+ if (ret_val != READ_OK)
+ return ret_val;
+ return convert_int(reader, to_conv);
+#ifndef LIBC_COPT_SCANF_DISABLE_FLOAT
+ case 'f':
+ case 'F':
+ case 'e':
+ case 'E':
+ case 'a':
+ case 'A':
+ case 'g':
+ case 'G':
+ ret_val = raw_match(reader, " ");
+ if (ret_val != READ_OK)
+ return ret_val;
+ return convert_float(reader, to_conv);
+#endif // LIBC_COPT_SCANF_DISABLE_FLOAT
+ case 'n':
+ return convert_current_pos(reader, to_conv);
+ case 'p':
+ ret_val = raw_match(reader, " ");
+ if (ret_val != READ_OK)
+ return ret_val;
+ return convert_pointer(reader, to_conv);
+ default:
+ return raw_match(reader, to_conv.raw_string);
+ }
+ return -1;
+}
// raw_match takes a raw string and matches it to the characters obtained from
// the reader.
-int raw_match(Reader *reader, cpp::string_view raw_string);
+template <typename T>
+int raw_match(Reader<T> *reader, cpp::string_view raw_string) {
+ char cur_char = reader->getc();
+ int ret_val = READ_OK;
+ for (size_t i = 0; i < raw_string.size(); ++i) {
+ // Any space character matches any number of space characters.
+ if (internal::isspace(raw_string[i])) {
+ while (internal::isspace(cur_char)) {
+ cur_char = reader->getc();
+ }
+ } else {
+ if (raw_string[i] == cur_char) {
+ cur_char = reader->getc();
+ } else {
+ ret_val = MATCHING_FAILURE;
+ break;
+ }
+ }
+ }
+ reader->ungetc(cur_char);
+ return ret_val;
+}
} // namespace scanf_core
} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/stdio/scanf_core/current_pos_converter.h b/libc/src/stdio/scanf_core/current_pos_converter.h
index 8af1cc0ca0c27..8708490c82d3e 100644
--- a/libc/src/stdio/scanf_core/current_pos_converter.h
+++ b/libc/src/stdio/scanf_core/current_pos_converter.h
@@ -19,7 +19,8 @@
namespace LIBC_NAMESPACE_DECL {
namespace scanf_core {
-LIBC_INLINE int convert_current_pos(Reader *reader,
+template <typename T>
+LIBC_INLINE int convert_current_pos(Reader<T> *reader,
const FormatSection &to_conv) {
write_int_with_length(reader->chars_read(), to_conv);
return READ_OK;
diff --git a/libc/src/stdio/scanf_core/float_converter.cpp b/libc/src/stdio/scanf_core/float_converter.cpp
deleted file mode 100644
index 9c714d0727214..0000000000000
--- a/libc/src/stdio/scanf_core/float_converter.cpp
+++ /dev/null
@@ -1,229 +0,0 @@
-//===-- Int type specifier converters for scanf -----------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "src/stdio/scanf_core/float_converter.h"
-
-#include "src/__support/CPP/limits.h"
-#include "src/__support/char_vector.h"
-#include "src/__support/ctype_utils.h"
-#include "src/__support/macros/config.h"
-#include "src/stdio/scanf_core/converter_utils.h"
-#include "src/stdio/scanf_core/core_structs.h"
-#include "src/stdio/scanf_core/reader.h"
-
-#include <stddef.h>
-
-namespace LIBC_NAMESPACE_DECL {
-namespace scanf_core {
-
-// All of the floating point conversions are the same for scanf, every name will
-// accept every style.
-int convert_float(Reader *reader, const FormatSection &to_conv) {
- // %a/A/e/E/f/F/g/G "Matches an optionally signed floating-point number,
- // infinity, or NaN, whose format is the same as expected for the subject
- // sequence of the strtod function. The corresponding argument shall be a
- // pointer to floating."
-
- CharVector out_str = CharVector();
- bool is_number = false;
-
- size_t max_width = cpp::numeric_limits<size_t>::max();
- if (to_conv.max_width > 0) {
- max_width = to_conv.max_width;
- }
-
- char cur_char = reader->getc();
- // Handle the sign.
- if (cur_char == '+' || cur_char == '-') {
- if (!out_str.append(cur_char)) {
- return ALLOCATION_FAILURE;
- }
- if (out_str.length() == max_width) {
- return MATCHING_FAILURE;
- } else {
- cur_char = reader->getc();
- }
- }
-
- static constexpr char DECIMAL_POINT = '.';
- static const char inf_string[] = "infinity";
-
- // Handle inf
-
- if (internal::tolower(cur_char) == inf_string[0]) {
- size_t inf_index = 0;
-
- for (;
- inf_index < (sizeof(inf_string) - 1) && out_str.length() < max_width &&
- internal::tolower(cur_char) == inf_string[inf_index];
- ++inf_index) {
- if (!out_str.append(cur_char)) {
- return ALLOCATION_FAILURE;
- }
- cur_char = reader->getc();
- }
-
- if (inf_index == 3 || inf_index == sizeof(inf_string) - 1) {
- write_float_with_length(out_str.c_str(), to_conv);
- return READ_OK;
- } else {
- return MATCHING_FAILURE;
- }
- }
-
- static const char nan_string[] = "nan";
-
- // Handle nan
- if (internal::tolower(cur_char) == nan_string[0]) {
- size_t nan_index = 0;
-
- for (;
- nan_index < (sizeof(nan_string) - 1) && out_str.length() < max_width &&
- internal::tolower(cur_char) == nan_string[nan_index];
- ++nan_index) {
- if (!out_str.append(cur_char)) {
- return ALLOCATION_FAILURE;
- }
- cur_char = reader->getc();
- }
-
- if (nan_index == sizeof(nan_string) - 1) {
- write_float_with_length(out_str.c_str(), to_conv);
- return READ_OK;
- } else {
- return MATCHING_FAILURE;
- }
- }
-
- // Assume base of 10 by default but check if it is actually base 16.
- int base = 10;
-
- // If the string starts with 0 it might be in hex.
- if (cur_char == '0') {
- is_number = true;
- // Read the next character to check.
- if (!out_str.append(cur_char)) {
- return ALLOCATION_FAILURE;
- }
- // If we've hit the end, then this is "0", which is valid.
- if (out_str.length() == max_width) {
- write_float_with_length(out_str.c_str(), to_conv);
- return READ_OK;
- } else {
- cur_char = reader->getc();
- }
-
- // If that next character is an 'x' then this is a hexadecimal number.
- if (internal::tolower(cur_char) == 'x') {
- base = 16;
-
- if (!out_str.append(cur_char)) {
- return ALLOCATION_FAILURE;
- }
- // If we've hit the end here, we have "0x" which is a valid prefix to a
- // floating point number, and will be evaluated to 0.
- if (out_str.length() == max_width) {
- write_float_with_length(out_str.c_str(), to_conv);
- return READ_OK;
- } else {
- cur_char = reader->getc();
- }
- }
- }
-
- const char exponent_mark = ((base == 10) ? 'e' : 'p');
- bool after_decimal = false;
-
- // The format for the remaining characters at this point is DD.DDe+/-DD for
- // base 10 and XX.XXp+/-DD for base 16
-
- // This handles the digits before and after the decimal point, but not the
- // exponent.
- while (out_str.length() < max_width) {
- if (internal::isalnum(cur_char) &&
- internal::b36_char_to_int(cur_char) < base) {
- is_number = true;
- if (!out_str.append(cur_char)) {
- return ALLOCATION_FAILURE;
- }
- cur_char = reader->getc();
- } else if (cur_char == DECIMAL_POINT && !after_decimal) {
- after_decimal = true;
- if (!out_str.append(cur_char)) {
- return ALLOCATION_FAILURE;
- }
- cur_char = reader->getc();
- } else {
- break;
- }
- }
-
- // Handle the exponent, which has an exponent mark, an optional sign, and
- // decimal digits.
- if (internal::tolower(cur_char) == exponent_mark) {
- if (!out_str.append(cur_char)) {
- return ALLOCATION_FAILURE;
- }
- if (out_str.length() == max_width) {
- // This is laid out in the standard as being a matching error (100e is not
- // a valid float) but may conflict with existing implementations.
- return MATCHING_FAILURE;
- } else {
- cur_char = reader->getc();
- }
-
- if (cur_char == '+' || cur_char == '-') {
- if (!out_str.append(cur_char)) {
- return ALLOCATION_FAILURE;
- }
- if (out_str.length() == max_width) {
- return MATCHING_FAILURE;
- } else {
- cur_char = reader->getc();
- }
- }
-
- // It is specified by the standard that "100er" is a matching failure since
- // the longest prefix of a possibly valid floating-point number (which is
- // "100e") is not a valid floating-point number. If there is an exponent
- // mark then there must be a digit after it else the number is not valid.
- // Some implementations will roll back two characters (to just "100") and
- // accept that since the prefix is not valid, and some will interpret an
- // exponent mark followed by no digits as an additional exponent of 0
- // (accepting "100e" and returning 100.0). Both of these behaviors are wrong
- // by the standard, but they may be used in real code, see Hyrum's law. This
- // code follows the standard, but may be incompatible due to code expecting
- // these bugs.
- if (!internal::isdigit(cur_char)) {
- return MATCHING_FAILURE;
- }
-
- while (internal::isdigit(cur_char) && out_str.length() < max_width) {
- if (!out_str.append(cur_char)) {
- return ALLOCATION_FAILURE;
- }
- cur_char = reader->getc();
- }
- }
-
- // We always read one more character than will be used, so we have to put the
- // last one back.
- reader->ungetc(cur_char);
-
- // If we haven't actually found any digits, this is a matching failure (this
- // catches cases like "+.")
- if (!is_number) {
- return MATCHING_FAILURE;
- }
- write_float_with_length(out_str.c_str(), to_conv);
-
- return READ_OK;
-}
-
-} // namespace scanf_core
-} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/stdio/scanf_core/float_converter.h b/libc/src/stdio/scanf_core/float_converter.h
index bd44847830fd1..6bbba379e13b6 100644
--- a/libc/src/stdio/scanf_core/float_converter.h
+++ b/libc/src/stdio/scanf_core/float_converter.h
@@ -9,7 +9,11 @@
#ifndef LLVM_LIBC_SRC_STDIO_SCANF_CORE_FLOAT_CONVERTER_H
#define LLVM_LIBC_SRC_STDIO_SCANF_CORE_FLOAT_CONVERTER_H
+#include "src/__support/CPP/limits.h"
+#include "src/__support/char_vector.h"
+#include "src/__support/ctype_utils.h"
#include "src/__support/macros/config.h"
+#include "src/stdio/scanf_core/converter_utils.h"
#include "src/stdio/scanf_core/core_structs.h"
#include "src/stdio/scanf_core/reader.h"
@@ -18,7 +22,210 @@
namespace LIBC_NAMESPACE_DECL {
namespace scanf_core {
-int convert_float(Reader *reader, const FormatSection &to_conv);
+// All of the floating point conversions are the same for scanf, every name will
+// accept every style.
+template <typename T>
+int convert_float(Reader<T> *reader, const FormatSection &to_conv) {
+ // %a/A/e/E/f/F/g/G "Matches an optionally signed floating-point number,
+ // infinity, or NaN, whose format is the same as expected for the subject
+ // sequence of the strtod function. The corresponding argument shall be a
+ // pointer to floating."
+
+ CharVector out_str = CharVector();
+ bool is_number = false;
+
+ size_t max_width = cpp::numeric_limits<size_t>::max();
+ if (to_conv.max_width > 0) {
+ max_width = to_conv.max_width;
+ }
+
+ char cur_char = reader->getc();
+ // Handle the sign.
+ if (cur_char == '+' || cur_char == '-') {
+ if (!out_str.append(cur_char)) {
+ return ALLOCATION_FAILURE;
+ }
+ if (out_str.length() == max_width) {
+ return MATCHING_FAILURE;
+ } else {
+ cur_char = reader->getc();
+ }
+ }
+
+ static constexpr char DECIMAL_POINT = '.';
+ static const char inf_string[] = "infinity";
+
+ // Handle inf
+
+ if (internal::tolower(cur_char) == inf_string[0]) {
+ size_t inf_index = 0;
+
+ for (;
+ inf_index < (sizeof(inf_string) - 1) && out_str.length() < max_width &&
+ internal::tolower(cur_char) == inf_string[inf_index];
+ ++inf_index) {
+ if (!out_str.append(cur_char)) {
+ return ALLOCATION_FAILURE;
+ }
+ cur_char = reader->getc();
+ }
+
+ if (inf_index == 3 || inf_index == sizeof(inf_string) - 1) {
+ write_float_with_length(out_str.c_str(), to_conv);
+ return READ_OK;
+ } else {
+ return MATCHING_FAILURE;
+ }
+ }
+
+ static const char nan_string[] = "nan";
+
+ // Handle nan
+ if (internal::tolower(cur_char) == nan_string[0]) {
+ size_t nan_index = 0;
+
+ for (;
+ nan_index < (sizeof(nan_string) - 1) && out_str.length() < max_width &&
+ internal::tolower(cur_char) == nan_string[nan_index];
+ ++nan_index) {
+ if (!out_str.append(cur_char)) {
+ return ALLOCATION_FAILURE;
+ }
+ cur_char = reader->getc();
+ }
+
+ if (nan_index == sizeof(nan_string) - 1) {
+ write_float_with_length(out_str.c_str(), to_conv);
+ return READ_OK;
+ } else {
+ return MATCHING_FAILURE;
+ }
+ }
+
+ // Assume base of 10 by default but check if it is actually base 16.
+ int base = 10;
+
+ // If the string starts with 0 it might be in hex.
+ if (cur_char == '0') {
+ is_number = true;
+ // Read the next character to check.
+ if (!out_str.append(cur_char)) {
+ return ALLOCATION_FAILURE;
+ }
+ // If we've hit the end, then this is "0", which is valid.
+ if (out_str.length() == max_width) {
+ write_float_with_length(out_str.c_str(), to_conv);
+ return READ_OK;
+ } else {
+ cur_char = reader->getc();
+ }
+
+ // If that next character is an 'x' then this is a hexadecimal number.
+ if (internal::tolower(cur_char) == 'x') {
+ base = 16;
+
+ if (!out_str.append(cur_char)) {
+ return ALLOCATION_FAILURE;
+ }
+ // If we've hit the end here, we have "0x" which is a valid prefix to a
+ // floating point number, and will be evaluated to 0.
+ if (out_str.length() == max_width) {
+ write_float_with_length(out_str.c_str(), to_conv);
+ return READ_OK;
+ } else {
+ cur_char = reader->getc();
+ }
+ }
+ }
+
+ const char exponent_mark = ((base == 10) ? 'e' : 'p');
+ bool after_decimal = false;
+
+ // The format for the remaining characters at this point is DD.DDe+/-DD for
+ // base 10 and XX.XXp+/-DD for base 16
+
+ // This handles the digits before and after the decimal point, but not the
+ // exponent.
+ while (out_str.length() < max_width) {
+ if (internal::isalnum(cur_char) &&
+ internal::b36_char_to_int(cur_char) < base) {
+ is_number = true;
+ if (!out_str.append(cur_char)) {
+ return ALLOCATION_FAILURE;
+ }
+ cur_char = reader->getc();
+ } else if (cur_char == DECIMAL_POINT && !after_decimal) {
+ after_decimal = true;
+ if (!out_str.append(cur_char)) {
+ return ALLOCATION_FAILURE;
+ }
+ cur_char = reader->getc();
+ } else {
+ break;
+ }
+ }
+
+ // Handle the exponent, which has an exponent mark, an optional sign, and
+ // decimal digits.
+ if (internal::tolower(cur_char) == exponent_mark) {
+ if (!out_str.append(cur_char)) {
+ return ALLOCATION_FAILURE;
+ }
+ if (out_str.length() == max_width) {
+ // This is laid out in the standard as being a matching error (100e is not
+ // a valid float) but may conflict with existing implementations.
+ return MATCHING_FAILURE;
+ } else {
+ cur_char = reader->getc();
+ }
+
+ if (cur_char == '+' || cur_char == '-') {
+ if (!out_str.append(cur_char)) {
+ return ALLOCATION_FAILURE;
+ }
+ if (out_str.length() == max_width) {
+ return MATCHING_FAILURE;
+ } else {
+ cur_char = reader->getc();
+ }
+ }
+
+ // It is specified by the standard that "100er" is a matching failure since
+ // the longest prefix of a possibly valid floating-point number (which is
+ // "100e") is not a valid floating-point number. If there is an exponent
+ // mark then there must be a digit after it else the number is not valid.
+ // Some implementations will roll back two characters (to just "100") and
+ // accept that since the prefix is not valid, and some will interpret an
+ // exponent mark followed by no digits as an additional exponent of 0
+ // (accepting "100e" and returning 100.0). Both of these behaviors are wrong
+ // by the standard, but they may be used in real code, see Hyrum's law. This
+ // code follows the standard, but may be incompatible due to code expecting
+ // these bugs.
+ if (!internal::isdigit(cur_char)) {
+ return MATCHING_FAILURE;
+ }
+
+ while (internal::isdigit(cur_char) && out_str.length() < max_width) {
+ if (!out_str.append(cur_char)) {
+ return ALLOCATION_FAILURE;
+ }
+ cur_char = reader->getc();
+ }
+ }
+
+ // We always read one more character than will be used, so we have to put the
+ // last one back.
+ reader->ungetc(cur_char);
+
+ // If we haven't actually found any digits, this is a matching failure (this
+ // catches cases like "+.")
+ if (!is_number) {
+ return MATCHING_FAILURE;
+ }
+ write_float_with_length(out_str.c_str(), to_conv);
+
+ return READ_OK;
+}
} // namespace scanf_core
} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/stdio/scanf_core/int_converter.cpp b/libc/src/stdio/scanf_core/int_converter.cpp
deleted file mode 100644
index fce817245c010..0000000000000
--- a/libc/src/stdio/scanf_core/int_converter.cpp
+++ /dev/null
@@ -1,230 +0,0 @@
-//===-- Int type specifier converters for scanf -----------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "src/stdio/scanf_core/int_converter.h"
-
-#include "src/__support/CPP/limits.h"
-#include "src/__support/ctype_utils.h"
-#include "src/__support/macros/config.h"
-#include "src/stdio/scanf_core/converter_utils.h"
-#include "src/stdio/scanf_core/core_structs.h"
-#include "src/stdio/scanf_core/reader.h"
-
-#include <stddef.h>
-
-namespace LIBC_NAMESPACE_DECL {
-namespace scanf_core {
-
-// This code is very similar to the code in __support/str_to_integer.h but is
-// not quite the same. Here is the list of differences and why they exist:
-// 1) This takes a reader and a format section instead of a char* and the base.
-// This should be fairly self explanatory. While the char* could be adapted
-// to a reader and the base could be calculated ahead of time, the
-// semantics are slightly different, specifically a char* can be indexed
-// freely (I can read str[2] and then str[0]) whereas a File (which the
-// reader may contain) cannot.
-// 2) Because this uses a Reader, this function can only unget once.
-// This is relevant because scanf specifies it reads the "longest sequence
-// of input characters which does not exceed any specified field width and
-// which is, or is a prefix of, a matching input sequence." Whereas the
-// strtol function accepts "the longest initial subsequence of the input
-// string (...) that is of the expected form." This is demonstrated by the
-// differences in how they deal with the string "0xZZZ" when parsing as
-// hexadecimal. Scanf will read the "0x" as a valid prefix and return 0,
-// since it reads the first 'Z', sees that it's not a valid hex digit, and
-// reverses one character. The strtol function on the other hand only
-// accepts the "0" since that's the longest valid hexadecimal sequence. It
-// sees the 'Z' after the "0x" and determines that this is not the prefix
-// to a valid hex string.
-// 3) This conversion may have a maximum width.
-// If a maximum width is specified, this conversion is only allowed to
-// accept a certain number of characters. Strtol doesn't have any such
-// limitation.
-int convert_int(Reader *reader, const FormatSection &to_conv) {
- // %d "Matches an optionally signed decimal integer [...] with the value 10
- // for the base argument. The corresponding argument shall be a pointer to
- // signed integer."
-
- // %i "Matches an optionally signed integer [...] with the value 0 for the
- // base argument. The corresponding argument shall be a pointer to signed
- // integer."
-
- // %u "Matches an optionally signed decimal integer [...] with the value 10
- // for the base argument. The corresponding argument shall be a pointer to
- // unsigned integer"
-
- // %o "Matches an optionally signed octal integer [...] with the value 8 for
- // the base argument. The corresponding argument shall be a pointer to
- // unsigned integer"
-
- // %x/X "Matches an optionally signed hexadecimal integer [...] with the value
- // 16 for the base argument. The corresponding argument shall be a pointer to
- // unsigned integer"
-
- size_t max_width = cpp::numeric_limits<size_t>::max();
- if (to_conv.max_width > 0) {
- max_width = to_conv.max_width;
- }
-
- uintmax_t result = 0;
- bool is_number = false;
- bool is_signed = false;
- int base = 0;
- if (to_conv.conv_name == 'i') {
- base = 0;
- is_signed = true;
- } else if (to_conv.conv_name == 'o') {
- base = 8;
- } else if (internal::tolower(to_conv.conv_name) == 'x' ||
- to_conv.conv_name == 'p') {
- base = 16;
- } else if (to_conv.conv_name == 'd') {
- base = 10;
- is_signed = true;
- } else { // conv_name must be 'u'
- base = 10;
- }
-
- char cur_char = reader->getc();
-
- char result_sign = '+';
- if (cur_char == '+' || cur_char == '-') {
- result_sign = cur_char;
- if (max_width > 1) {
- --max_width;
- cur_char = reader->getc();
- } else {
- // If the max width has been hit already, then the return value must be 0
- // since no actual digits of the number have been parsed yet.
- write_int_with_length(0, to_conv);
- return MATCHING_FAILURE;
- }
- }
- const bool is_negative = result_sign == '-';
-
- // Base of 0 means automatically determine the base. Base of 16 may have a
- // prefix of "0x"
- if (base == 0 || base == 16) {
- // If the first character is 0, then it could be octal or hex.
- if (cur_char == '0') {
- is_number = true;
-
- // Read the next character to check.
- if (max_width > 1) {
- --max_width;
- cur_char = reader->getc();
- } else {
- write_int_with_length(0, to_conv);
- return READ_OK;
- }
-
- if (internal::tolower(cur_char) == 'x') {
- // This is a valid hex prefix.
-
- is_number = false;
- // A valid hex prefix is not necessarily a valid number. For the
- // conversion to be valid it needs to use all of the characters it
- // consumes. From the standard:
- // 7.23.6.2 paragraph 9: "An input item is defined as the longest
- // sequence of input characters which does not exceed any specified
- // field width and which is, or is a prefix of, a matching input
- // sequence."
- // 7.23.6.2 paragraph 10: "If the input item is not a matching sequence,
- // the execution of the directive fails: this condition is a matching
- // failure"
- base = 16;
- if (max_width > 1) {
- --max_width;
- cur_char = reader->getc();
- } else {
- return MATCHING_FAILURE;
- }
-
- } else {
- if (base == 0) {
- base = 8;
- }
- }
- } else if (base == 0) {
- if (internal::isdigit(cur_char)) {
- // If the first character is a different number, then it's 10.
- base = 10;
- } else {
- // If the first character isn't a valid digit, then there are no valid
- // digits at all. The number is 0.
- reader->ungetc(cur_char);
- write_int_with_length(0, to_conv);
- return MATCHING_FAILURE;
- }
- }
- }
-
- constexpr uintmax_t UNSIGNED_MAX = cpp::numeric_limits<uintmax_t>::max();
- constexpr uintmax_t SIGNED_MAX =
- static_cast<uintmax_t>(cpp::numeric_limits<intmax_t>::max());
- constexpr uintmax_t NEGATIVE_SIGNED_MAX =
- static_cast<uintmax_t>(cpp::numeric_limits<intmax_t>::max()) + 1;
-
- const uintmax_t MAX =
- (is_signed ? (is_negative ? NEGATIVE_SIGNED_MAX : SIGNED_MAX)
- : UNSIGNED_MAX);
-
- const uintmax_t max_div_by_base = MAX / base;
-
- if (internal::isalnum(cur_char) &&
- internal::b36_char_to_int(cur_char) < base) {
- is_number = true;
- }
-
- bool has_overflow = false;
- size_t i = 0;
- for (; i < max_width && internal::isalnum(cur_char) &&
- internal::b36_char_to_int(cur_char) < base;
- ++i, cur_char = reader->getc()) {
-
- uintmax_t cur_digit = internal::b36_char_to_int(cur_char);
-
- if (result == MAX) {
- has_overflow = true;
- continue;
- } else if (result > max_div_by_base) {
- result = MAX;
- has_overflow = true;
- } else {
- result = result * base;
- }
-
- if (result > MAX - cur_digit) {
- result = MAX;
- has_overflow = true;
- } else {
- result = result + cur_digit;
- }
- }
-
- // We always read one more character than will be used, so we have to put the
- // last one back.
- reader->ungetc(cur_char);
-
- if (!is_number)
- return MATCHING_FAILURE;
-
- if (has_overflow) {
- write_int_with_length(MAX, to_conv);
- } else {
- if (is_negative)
- result = -result;
-
- write_int_with_length(result, to_conv);
- }
-
- return READ_OK;
-}
-
-} // namespace scanf_core
-} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/stdio/scanf_core/int_converter.h b/libc/src/stdio/scanf_core/int_converter.h
index 5fc27ad0faafc..35f11d67d4701 100644
--- a/libc/src/stdio/scanf_core/int_converter.h
+++ b/libc/src/stdio/scanf_core/int_converter.h
@@ -9,7 +9,10 @@
#ifndef LLVM_LIBC_SRC_STDIO_SCANF_CORE_INT_CONVERTER_H
#define LLVM_LIBC_SRC_STDIO_SCANF_CORE_INT_CONVERTER_H
+#include "src/__support/CPP/limits.h"
+#include "src/__support/ctype_utils.h"
#include "src/__support/macros/config.h"
+#include "src/stdio/scanf_core/converter_utils.h"
#include "src/stdio/scanf_core/core_structs.h"
#include "src/stdio/scanf_core/reader.h"
@@ -18,7 +21,212 @@
namespace LIBC_NAMESPACE_DECL {
namespace scanf_core {
-int convert_int(Reader *reader, const FormatSection &to_conv);
+// This code is very similar to the code in __support/str_to_integer.h but is
+// not quite the same. Here is the list of differences and why they exist:
+// 1) This takes a reader and a format section instead of a char* and the base.
+// This should be fairly self explanatory. While the char* could be adapted
+// to a reader and the base could be calculated ahead of time, the
+// semantics are slightly different, specifically a char* can be indexed
+// freely (I can read str[2] and then str[0]) whereas a File (which the
+// reader may contain) cannot.
+// 2) Because this uses a Reader, this function can only unget once.
+// This is relevant because scanf specifies it reads the "longest sequence
+// of input characters which does not exceed any specified field width and
+// which is, or is a prefix of, a matching input sequence." Whereas the
+// strtol function accepts "the longest initial subsequence of the input
+// string (...) that is of the expected form." This is demonstrated by the
+// differences in how they deal with the string "0xZZZ" when parsing as
+// hexadecimal. Scanf will read the "0x" as a valid prefix and return 0,
+// since it reads the first 'Z', sees that it's not a valid hex digit, and
+// reverses one character. The strtol function on the other hand only
+// accepts the "0" since that's the longest valid hexadecimal sequence. It
+// sees the 'Z' after the "0x" and determines that this is not the prefix
+// to a valid hex string.
+// 3) This conversion may have a maximum width.
+// If a maximum width is specified, this conversion is only allowed to
+// accept a certain number of characters. Strtol doesn't have any such
+// limitation.
+template <typename T>
+int convert_int(Reader<T> *reader, const FormatSection &to_conv) {
+ // %d "Matches an optionally signed decimal integer [...] with the value 10
+ // for the base argument. The corresponding argument shall be a pointer to
+ // signed integer."
+
+ // %i "Matches an optionally signed integer [...] with the value 0 for the
+ // base argument. The corresponding argument shall be a pointer to signed
+ // integer."
+
+ // %u "Matches an optionally signed decimal integer [...] with the value 10
+ // for the base argument. The corresponding argument shall be a pointer to
+ // unsigned integer"
+
+ // %o "Matches an optionally signed octal integer [...] with the value 8 for
+ // the base argument. The corresponding argument shall be a pointer to
+ // unsigned integer"
+
+ // %x/X "Matches an optionally signed hexadecimal integer [...] with the value
+ // 16 for the base argument. The corresponding argument shall be a pointer to
+ // unsigned integer"
+
+ size_t max_width = cpp::numeric_limits<size_t>::max();
+ if (to_conv.max_width > 0) {
+ max_width = to_conv.max_width;
+ }
+
+ uintmax_t result = 0;
+ bool is_number = false;
+ bool is_signed = false;
+ int base = 0;
+ if (to_conv.conv_name == 'i') {
+ base = 0;
+ is_signed = true;
+ } else if (to_conv.conv_name == 'o') {
+ base = 8;
+ } else if (internal::tolower(to_conv.conv_name) == 'x' ||
+ to_conv.conv_name == 'p') {
+ base = 16;
+ } else if (to_conv.conv_name == 'd') {
+ base = 10;
+ is_signed = true;
+ } else { // conv_name must be 'u'
+ base = 10;
+ }
+
+ char cur_char = reader->getc();
+
+ char result_sign = '+';
+ if (cur_char == '+' || cur_char == '-') {
+ result_sign = cur_char;
+ if (max_width > 1) {
+ --max_width;
+ cur_char = reader->getc();
+ } else {
+ // If the max width has been hit already, then the return value must be 0
+ // since no actual digits of the number have been parsed yet.
+ write_int_with_length(0, to_conv);
+ return MATCHING_FAILURE;
+ }
+ }
+ const bool is_negative = result_sign == '-';
+
+ // Base of 0 means automatically determine the base. Base of 16 may have a
+ // prefix of "0x"
+ if (base == 0 || base == 16) {
+ // If the first character is 0, then it could be octal or hex.
+ if (cur_char == '0') {
+ is_number = true;
+
+ // Read the next character to check.
+ if (max_width > 1) {
+ --max_width;
+ cur_char = reader->getc();
+ } else {
+ write_int_with_length(0, to_conv);
+ return READ_OK;
+ }
+
+ if (internal::tolower(cur_char) == 'x') {
+ // This is a valid hex prefix.
+
+ is_number = false;
+ // A valid hex prefix is not necessarily a valid number. For the
+ // conversion to be valid it needs to use all of the characters it
+ // consumes. From the standard:
+ // 7.23.6.2 paragraph 9: "An input item is defined as the longest
+ // sequence of input characters which does not exceed any specified
+ // field width and which is, or is a prefix of, a matching input
+ // sequence."
+ // 7.23.6.2 paragraph 10: "If the input item is not a matching sequence,
+ // the execution of the directive fails: this condition is a matching
+ // failure"
+ base = 16;
+ if (max_width > 1) {
+ --max_width;
+ cur_char = reader->getc();
+ } else {
+ return MATCHING_FAILURE;
+ }
+
+ } else {
+ if (base == 0) {
+ base = 8;
+ }
+ }
+ } else if (base == 0) {
+ if (internal::isdigit(cur_char)) {
+ // If the first character is a different number, then it's 10.
+ base = 10;
+ } else {
+ // If the first character isn't a valid digit, then there are no valid
+ // digits at all. The number is 0.
+ reader->ungetc(cur_char);
+ write_int_with_length(0, to_conv);
+ return MATCHING_FAILURE;
+ }
+ }
+ }
+
+ constexpr uintmax_t UNSIGNED_MAX = cpp::numeric_limits<uintmax_t>::max();
+ constexpr uintmax_t SIGNED_MAX =
+ static_cast<uintmax_t>(cpp::numeric_limits<intmax_t>::max());
+ constexpr uintmax_t NEGATIVE_SIGNED_MAX =
+ static_cast<uintmax_t>(cpp::numeric_limits<intmax_t>::max()) + 1;
+
+ const uintmax_t MAX =
+ (is_signed ? (is_negative ? NEGATIVE_SIGNED_MAX : SIGNED_MAX)
+ : UNSIGNED_MAX);
+
+ const uintmax_t max_div_by_base = MAX / base;
+
+ if (internal::isalnum(cur_char) &&
+ internal::b36_char_to_int(cur_char) < base) {
+ is_number = true;
+ }
+
+ bool has_overflow = false;
+ size_t i = 0;
+ for (; i < max_width && internal::isalnum(cur_char) &&
+ internal::b36_char_to_int(cur_char) < base;
+ ++i, cur_char = reader->getc()) {
+
+ uintmax_t cur_digit = internal::b36_char_to_int(cur_char);
+
+ if (result == MAX) {
+ has_overflow = true;
+ continue;
+ } else if (result > max_div_by_base) {
+ result = MAX;
+ has_overflow = true;
+ } else {
+ result = result * base;
+ }
+
+ if (result > MAX - cur_digit) {
+ result = MAX;
+ has_overflow = true;
+ } else {
+ result = result + cur_digit;
+ }
+ }
+
+ // We always read one more character than will be used, so we have to put the
+ // last one back.
+ reader->ungetc(cur_char);
+
+ if (!is_number)
+ return MATCHING_FAILURE;
+
+ if (has_overflow) {
+ write_int_with_length(MAX, to_conv);
+ } else {
+ if (is_negative)
+ result = -result;
+
+ write_int_with_length(result, to_conv);
+ }
+
+ return READ_OK;
+}
} // namespace scanf_core
} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/stdio/scanf_core/ptr_converter.cpp b/libc/src/stdio/scanf_core/ptr_converter.cpp
deleted file mode 100644
index 37f002d3da698..0000000000000
--- a/libc/src/stdio/scanf_core/ptr_converter.cpp
+++ /dev/null
@@ -1,46 +0,0 @@
-//===-- Int type specifier converters for scanf -----------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "src/stdio/scanf_core/ptr_converter.h"
-
-#include "src/__support/ctype_utils.h"
-#include "src/__support/macros/config.h"
-#include "src/stdio/scanf_core/converter_utils.h"
-#include "src/stdio/scanf_core/core_structs.h"
-#include "src/stdio/scanf_core/int_converter.h"
-#include "src/stdio/scanf_core/reader.h"
-
-#include <stddef.h>
-
-namespace LIBC_NAMESPACE_DECL {
-namespace scanf_core {
-int convert_pointer(Reader *reader, const FormatSection &to_conv) {
- static const char nullptr_string[] = "(nullptr)";
-
- // Check if it's exactly the nullptr string, if so then it's a nullptr.
- char cur_char = reader->getc();
- size_t i = 0;
- for (; i < (sizeof(nullptr_string) - 1) &&
- internal::tolower(cur_char) == nullptr_string[i];
- ++i) {
- cur_char = reader->getc();
- }
- if (i == (sizeof(nullptr_string) - 1)) {
- *reinterpret_cast<void **>(to_conv.output_ptr) = nullptr;
- return READ_OK;
- } else if (i > 0) {
- return MATCHING_FAILURE;
- }
-
- reader->ungetc(cur_char);
-
- // Else treat it as a hex int
- return convert_int(reader, to_conv);
-}
-} // namespace scanf_core
-} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/stdio/scanf_core/ptr_converter.h b/libc/src/stdio/scanf_core/ptr_converter.h
index 0732c1c0e3284..e74a17eaac4cf 100644
--- a/libc/src/stdio/scanf_core/ptr_converter.h
+++ b/libc/src/stdio/scanf_core/ptr_converter.h
@@ -9,8 +9,10 @@
#ifndef LLVM_LIBC_SRC_STDIO_SCANF_CORE_PTR_CONVERTER_H
#define LLVM_LIBC_SRC_STDIO_SCANF_CORE_PTR_CONVERTER_H
+#include "src/__support/ctype_utils.h"
#include "src/__support/macros/config.h"
#include "src/stdio/scanf_core/core_structs.h"
+#include "src/stdio/scanf_core/int_converter.h"
#include "src/stdio/scanf_core/reader.h"
#include <stddef.h>
@@ -18,7 +20,30 @@
namespace LIBC_NAMESPACE_DECL {
namespace scanf_core {
-int convert_pointer(Reader *reader, const FormatSection &to_conv);
+template <typename T>
+int convert_pointer(Reader<T> *reader, const FormatSection &to_conv) {
+ static const char nullptr_string[] = "(nullptr)";
+
+ // Check if it's exactly the nullptr string, if so then it's a nullptr.
+ char cur_char = reader->getc();
+ size_t i = 0;
+ for (; i < (sizeof(nullptr_string) - 1) &&
+ internal::tolower(cur_char) == nullptr_string[i];
+ ++i) {
+ cur_char = reader->getc();
+ }
+ if (i == (sizeof(nullptr_string) - 1)) {
+ *reinterpret_cast<void **>(to_conv.output_ptr) = nullptr;
+ return READ_OK;
+ } else if (i > 0) {
+ return MATCHING_FAILURE;
+ }
+
+ reader->ungetc(cur_char);
+
+ // Else treat it as a hex int
+ return convert_int(reader, to_conv);
+}
} // namespace scanf_core
} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/stdio/scanf_core/reader.h b/libc/src/stdio/scanf_core/reader.h
index 1f8ec9695a314..21b405491b1fc 100644
--- a/libc/src/stdio/scanf_core/reader.h
+++ b/libc/src/stdio/scanf_core/reader.h
@@ -9,17 +9,6 @@
#ifndef LLVM_LIBC_SRC_STDIO_SCANF_CORE_READER_H
#define LLVM_LIBC_SRC_STDIO_SCANF_CORE_READER_H
-#include "hdr/types/FILE.h"
-
-#ifndef LIBC_COPT_STDIO_USE_SYSTEM_FILE
-#include "src/__support/File/file.h"
-#endif
-
-#if defined(LIBC_TARGET_ARCH_IS_GPU)
-#include "src/stdio/getc.h"
-#include "src/stdio/ungetc.h"
-#endif
-
#include "src/__support/macros/attributes.h" // For LIBC_INLINE
#include "src/__support/macros/config.h"
@@ -27,103 +16,58 @@
namespace LIBC_NAMESPACE_DECL {
namespace scanf_core {
-// We use the name "reader_internal" over "internal" because
-// "internal" causes name lookups in files that include the current header to be
-// ambigious i.e. `internal::foo` in those files, will try to lookup in
-// `LIBC_NAMESPACE::scanf_core::internal` over `LIBC_NAMESPACE::internal` for
-// e.g., `internal::ArgList` in `libc/src/stdio/scanf_core/scanf_main.h`
-namespace reader_internal {
-
-#if defined(LIBC_TARGET_ARCH_IS_GPU)
-// The GPU build provides FILE access through the host operating system's
-// library. So here we simply use the public entrypoints like in the SYSTEM_FILE
-// interface. Entrypoints should normally not call others, this is an exception.
-// FIXME: We do not acquire any locks here, so this is not thread safe.
-LIBC_INLINE int getc(void *f) {
- return LIBC_NAMESPACE::getc(reinterpret_cast<::FILE *>(f));
-}
-
-LIBC_INLINE void ungetc(int c, void *f) {
- LIBC_NAMESPACE::ungetc(c, reinterpret_cast<::FILE *>(f));
-}
-
-#elif !defined(LIBC_COPT_STDIO_USE_SYSTEM_FILE)
-
-LIBC_INLINE int getc(void *f) {
- unsigned char c;
- auto result =
- reinterpret_cast<LIBC_NAMESPACE::File *>(f)->read_unlocked(&c, 1);
- size_t r = result.value;
- if (result.has_error() || r != 1)
- return '\0';
-
- return c;
-}
-LIBC_INLINE void ungetc(int c, void *f) {
- reinterpret_cast<LIBC_NAMESPACE::File *>(f)->ungetc_unlocked(c);
-}
-
-#else // defined(LIBC_COPT_STDIO_USE_SYSTEM_FILE)
-
-// Since ungetc_unlocked isn't always available, we don't acquire the lock for
-// system files.
-LIBC_INLINE int getc(void *f) { return ::getc(reinterpret_cast<::FILE *>(f)); }
-
-LIBC_INLINE void ungetc(int c, void *f) {
- ::ungetc(c, reinterpret_cast<::FILE *>(f));
-}
-#endif // LIBC_COPT_STDIO_USE_SYSTEM_FILE
-
-} // namespace reader_internal
+template <typename Derived> struct ReadBuffer {
+ LIBC_INLINE char getc() { return static_cast<Derived *>(this)->getc(); }
+ LIBC_INLINE void ungetc(int c) { static_cast<Derived *>(this)->ungetc(c); }
+};
-// This is intended to be either a raw string or a buffer syncronized with the
-// file's internal buffer.
-struct ReadBuffer {
+class StringBuffer : public ReadBuffer<StringBuffer> {
const char *buffer;
- size_t buff_len;
+ [[maybe_unused]] size_t buff_len;
size_t buff_cur = 0;
+
+public:
+ LIBC_INLINE StringBuffer(const char *buffer, size_t buff_len)
+ : buffer(buffer), buff_len(buff_len) {}
+
+ LIBC_INLINE char getc() {
+ char output = buffer[buff_cur];
+ ++buff_cur;
+ return output;
+ }
+ LIBC_INLINE void ungetc(int) {
+ if (buff_cur > 0) {
+ // While technically c should be written back to the buffer, in scanf we
+ // always write the character that was already there. Additionally, the
+ // buffer is most likely to contain a string that isn't part of a file,
+ // which may not be writable.
+ --buff_cur;
+ }
+ }
};
-class Reader {
- ReadBuffer *rb;
- void *input_stream = nullptr;
+// TODO: We should be able to fold ReadBuffer into Reader.
+template <typename T> class Reader {
+ ReadBuffer<T> *buffer;
size_t cur_chars_read = 0;
public:
- // TODO: Set buff_len with a proper constant
- LIBC_INLINE Reader(ReadBuffer *string_buffer) : rb(string_buffer) {}
-
- LIBC_INLINE Reader(void *stream, ReadBuffer *stream_buffer = nullptr)
- : rb(stream_buffer), input_stream(stream) {}
+ LIBC_INLINE Reader(ReadBuffer<T> *buffer) : buffer(buffer) {}
// This returns the next character from the input and advances it by one
// character. When it hits the end of the string or file it returns '\0' to
// signal to stop parsing.
LIBC_INLINE char getc() {
++cur_chars_read;
- if (rb != nullptr) {
- char output = rb->buffer[rb->buff_cur];
- ++(rb->buff_cur);
- return output;
- }
- // This should reset the buffer if applicable.
- return static_cast<char>(reader_internal::getc(input_stream));
+ return buffer->getc();
}
// This moves the input back by one character, placing c into the buffer if
// this is a file reader, else c is ignored.
LIBC_INLINE void ungetc(char c) {
--cur_chars_read;
- if (rb != nullptr && rb->buff_cur > 0) {
- // While technically c should be written back to the buffer, in scanf we
- // always write the character that was already there. Additionally, the
- // buffer is most likely to contain a string that isn't part of a file,
- // which may not be writable.
- --(rb->buff_cur);
- return;
- }
- reader_internal::ungetc(static_cast<int>(c), input_stream);
+ buffer->ungetc(c);
}
LIBC_INLINE size_t chars_read() { return cur_chars_read; }
diff --git a/libc/src/stdio/scanf_core/scanf_main.cpp b/libc/src/stdio/scanf_core/scanf_main.cpp
deleted file mode 100644
index eb480943aeeda..0000000000000
--- a/libc/src/stdio/scanf_core/scanf_main.cpp
+++ /dev/null
@@ -1,46 +0,0 @@
-//===-- Starting point for scanf --------------------------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "src/stdio/scanf_core/scanf_main.h"
-
-#include "src/__support/arg_list.h"
-#include "src/__support/macros/config.h"
-#include "src/stdio/scanf_core/converter.h"
-#include "src/stdio/scanf_core/core_structs.h"
-#include "src/stdio/scanf_core/parser.h"
-#include "src/stdio/scanf_core/reader.h"
-
-#include <stddef.h>
-
-namespace LIBC_NAMESPACE_DECL {
-namespace scanf_core {
-
-int scanf_main(Reader *reader, const char *__restrict str,
- internal::ArgList &args) {
- Parser<internal::ArgList> parser(str, args);
- int ret_val = READ_OK;
- int conversions = 0;
- for (FormatSection cur_section = parser.get_next_section();
- !cur_section.raw_string.empty() && ret_val == READ_OK;
- cur_section = parser.get_next_section()) {
- if (cur_section.has_conv) {
- ret_val = convert(reader, cur_section);
- // The %n (current position) conversion doesn't increment the number of
- // assignments.
- if (cur_section.conv_name != 'n')
- conversions += ret_val == READ_OK ? 1 : 0;
- } else {
- ret_val = raw_match(reader, cur_section.raw_string);
- }
- }
-
- return conversions;
-}
-
-} // namespace scanf_core
-} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/stdio/scanf_core/scanf_main.h b/libc/src/stdio/scanf_core/scanf_main.h
index 27c246933dceb..f975d85c16f8a 100644
--- a/libc/src/stdio/scanf_core/scanf_main.h
+++ b/libc/src/stdio/scanf_core/scanf_main.h
@@ -11,6 +11,9 @@
#include "src/__support/arg_list.h"
#include "src/__support/macros/config.h"
+#include "src/stdio/scanf_core/converter.h"
+#include "src/stdio/scanf_core/core_structs.h"
+#include "src/stdio/scanf_core/parser.h"
#include "src/stdio/scanf_core/reader.h"
#include <stddef.h>
@@ -18,8 +21,28 @@
namespace LIBC_NAMESPACE_DECL {
namespace scanf_core {
-int scanf_main(Reader *reader, const char *__restrict str,
- internal::ArgList &args);
+template <typename T>
+int scanf_main(Reader<T> *reader, const char *__restrict str,
+ internal::ArgList &args) {
+ Parser<internal::ArgList> parser(str, args);
+ int ret_val = READ_OK;
+ int conversions = 0;
+ for (FormatSection cur_section = parser.get_next_section();
+ !cur_section.raw_string.empty() && ret_val == READ_OK;
+ cur_section = parser.get_next_section()) {
+ if (cur_section.has_conv) {
+ ret_val = convert(reader, cur_section);
+ // The %n (current position) conversion doesn't increment the number of
+ // assignments.
+ if (cur_section.conv_name != 'n')
+ conversions += ret_val == READ_OK ? 1 : 0;
+ } else {
+ ret_val = raw_match(reader, cur_section.raw_string);
+ }
+ }
+
+ return conversions;
+}
} // namespace scanf_core
} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/stdio/scanf_core/string_converter.cpp b/libc/src/stdio/scanf_core/string_converter.cpp
deleted file mode 100644
index 0de2eeed2f5e4..0000000000000
--- a/libc/src/stdio/scanf_core/string_converter.cpp
+++ /dev/null
@@ -1,77 +0,0 @@
-//===-- String type specifier converters for scanf --------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "src/stdio/scanf_core/string_converter.h"
-
-#include "src/__support/CPP/limits.h"
-#include "src/__support/ctype_utils.h"
-#include "src/__support/macros/config.h"
-#include "src/stdio/scanf_core/core_structs.h"
-#include "src/stdio/scanf_core/reader.h"
-
-#include <stddef.h>
-
-namespace LIBC_NAMESPACE_DECL {
-namespace scanf_core {
-
-int convert_string(Reader *reader, const FormatSection &to_conv) {
- // %s "Matches a sequence of non-white-space characters"
-
- // %c "Matches a sequence of characters of exactly the number specified by the
- // field width (1 if no field width is present in the directive)"
-
- // %[ "Matches a nonempty sequence of characters from a set of expected
- // characters (the scanset)."
- size_t max_width = 0;
- if (to_conv.max_width > 0) {
- max_width = to_conv.max_width;
- } else {
- if (to_conv.conv_name == 'c') {
- max_width = 1;
- } else {
- max_width = cpp::numeric_limits<size_t>::max();
- }
- }
-
- char *output = reinterpret_cast<char *>(to_conv.output_ptr);
-
- char cur_char = reader->getc();
- size_t i = 0;
- for (; i < max_width && cur_char != '\0'; ++i) {
- // If this is %s and we've hit a space, or if this is %[] and we've found
- // something not in the scanset.
- if ((to_conv.conv_name == 's' && internal::isspace(cur_char)) ||
- (to_conv.conv_name == '[' && !to_conv.scan_set.test(cur_char))) {
- break;
- }
- // if the NO_WRITE flag is not set, write to the output.
- if ((to_conv.flags & NO_WRITE) == 0)
- output[i] = cur_char;
- cur_char = reader->getc();
- }
-
- // We always read one more character than will be used, so we have to put the
- // last one back.
- reader->ungetc(cur_char);
-
- // If this is %s or %[]
- if (to_conv.conv_name != 'c' && (to_conv.flags & NO_WRITE) == 0) {
- // Always null terminate the string. This may cause a write to the
- // (max_width + 1) byte, which is correct. The max width describes the max
- // number of characters read from the input string, and doesn't necessarily
- // correspond to the output.
- output[i] = '\0';
- }
-
- if (i == 0)
- return MATCHING_FAILURE;
- return READ_OK;
-}
-
-} // namespace scanf_core
-} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/stdio/scanf_core/string_converter.h b/libc/src/stdio/scanf_core/string_converter.h
index 552dc22a502f5..3879f8c995899 100644
--- a/libc/src/stdio/scanf_core/string_converter.h
+++ b/libc/src/stdio/scanf_core/string_converter.h
@@ -9,6 +9,8 @@
#ifndef LLVM_LIBC_SRC_STDIO_SCANF_CORE_STRING_CONVERTER_H
#define LLVM_LIBC_SRC_STDIO_SCANF_CORE_STRING_CONVERTER_H
+#include "src/__support/CPP/limits.h"
+#include "src/__support/ctype_utils.h"
#include "src/__support/macros/config.h"
#include "src/stdio/scanf_core/core_structs.h"
#include "src/stdio/scanf_core/reader.h"
@@ -18,7 +20,60 @@
namespace LIBC_NAMESPACE_DECL {
namespace scanf_core {
-int convert_string(Reader *reader, const FormatSection &to_conv);
+template <typename T>
+int convert_string(Reader<T> *reader, const FormatSection &to_conv) {
+ // %s "Matches a sequence of non-white-space characters"
+
+ // %c "Matches a sequence of characters of exactly the number specified by the
+ // field width (1 if no field width is present in the directive)"
+
+ // %[ "Matches a nonempty sequence of characters from a set of expected
+ // characters (the scanset)."
+ size_t max_width = 0;
+ if (to_conv.max_width > 0) {
+ max_width = to_conv.max_width;
+ } else {
+ if (to_conv.conv_name == 'c') {
+ max_width = 1;
+ } else {
+ max_width = cpp::numeric_limits<size_t>::max();
+ }
+ }
+
+ char *output = reinterpret_cast<char *>(to_conv.output_ptr);
+
+ char cur_char = reader->getc();
+ size_t i = 0;
+ for (; i < max_width && cur_char != '\0'; ++i) {
+ // If this is %s and we've hit a space, or if this is %[] and we've found
+ // something not in the scanset.
+ if ((to_conv.conv_name == 's' && internal::isspace(cur_char)) ||
+ (to_conv.conv_name == '[' && !to_conv.scan_set.test(cur_char))) {
+ break;
+ }
+ // if the NO_WRITE flag is not set, write to the output.
+ if ((to_conv.flags & NO_WRITE) == 0)
+ output[i] = cur_char;
+ cur_char = reader->getc();
+ }
+
+ // We always read one more character than will be used, so we have to put the
+ // last one back.
+ reader->ungetc(cur_char);
+
+ // If this is %s or %[]
+ if (to_conv.conv_name != 'c' && (to_conv.flags & NO_WRITE) == 0) {
+ // Always null terminate the string. This may cause a write to the
+ // (max_width + 1) byte, which is correct. The max width describes the max
+ // number of characters read from the input string, and doesn't necessarily
+ // correspond to the output.
+ output[i] = '\0';
+ }
+
+ if (i == 0)
+ return MATCHING_FAILURE;
+ return READ_OK;
+}
} // namespace scanf_core
} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/stdio/scanf_core/vfscanf_internal.h b/libc/src/stdio/scanf_core/vfscanf_internal.h
index 4e20fa3b93091..46f2675504c6b 100644
--- a/libc/src/stdio/scanf_core/vfscanf_internal.h
+++ b/libc/src/stdio/scanf_core/vfscanf_internal.h
@@ -38,6 +38,10 @@ LIBC_INLINE void funlockfile(::FILE *) { return; }
LIBC_INLINE int ferror_unlocked(::FILE *f) { return LIBC_NAMESPACE::ferror(f); }
+LIBC_INLINE int getc(::FILE *f) { return LIBC_NAMESPACE::getc(f); }
+
+LIBC_INLINE void ungetc(int c, ::FILE *f) { LIBC_NAMESPACE::ungetc(c, f); }
+
#elif !defined(LIBC_COPT_STDIO_USE_SYSTEM_FILE)
LIBC_INLINE void flockfile(FILE *f) {
@@ -52,6 +56,21 @@ LIBC_INLINE int ferror_unlocked(FILE *f) {
return reinterpret_cast<LIBC_NAMESPACE::File *>(f)->error_unlocked();
}
+LIBC_INLINE int getc(FILE *f) {
+ unsigned char c;
+ auto result =
+ reinterpret_cast<LIBC_NAMESPACE::File *>(f)->read_unlocked(&c, 1);
+ size_t r = result.value;
+ if (result.has_error() || r != 1)
+ return '\0';
+
+ return c;
+}
+
+LIBC_INLINE void ungetc(int c, FILE *f) {
+ reinterpret_cast<LIBC_NAMESPACE::File *>(f)->ungetc_unlocked(c);
+}
+
#else // defined(LIBC_COPT_STDIO_USE_SYSTEM_FILE)
// Since ungetc_unlocked isn't always available, we don't acquire the lock for
@@ -62,17 +81,36 @@ LIBC_INLINE void funlockfile(::FILE *) { return; }
LIBC_INLINE int ferror_unlocked(::FILE *f) { return ::ferror(f); }
+LIBC_INLINE int getc(::FILE *f) { return ::getc(f); }
+
+LIBC_INLINE void ungetc(int c, ::FILE *f) { ::ungetc(c, f); }
+
#endif // LIBC_COPT_STDIO_USE_SYSTEM_FILE
} // namespace internal
namespace scanf_core {
+class StreamBuffer : public ReadBuffer<StreamBuffer> {
+ ::FILE *stream;
+
+public:
+ LIBC_INLINE StreamBuffer(::FILE *stream) : stream(stream) {}
+
+ LIBC_INLINE char getc() {
+ return static_cast<char>(internal::getc(static_cast<FILE *>(stream)));
+ }
+ LIBC_INLINE void ungetc(int c) {
+ internal::ungetc(c, static_cast<FILE *>(stream));
+ }
+};
+
LIBC_INLINE int vfscanf_internal(::FILE *__restrict stream,
const char *__restrict format,
internal::ArgList &args) {
internal::flockfile(stream);
- scanf_core::Reader reader(stream);
+ scanf_core::StreamBuffer buffer(stream);
+ scanf_core::Reader<scanf_core::StreamBuffer> reader(&buffer);
int retval = scanf_core::scanf_main(&reader, format, args);
if (retval == 0 && internal::ferror_unlocked(stream))
retval = EOF;
diff --git a/libc/src/stdio/sscanf.cpp b/libc/src/stdio/sscanf.cpp
index 82de8a29f6ad1..ec4e10caf6cb5 100644
--- a/libc/src/stdio/sscanf.cpp
+++ b/libc/src/stdio/sscanf.cpp
@@ -29,8 +29,8 @@ LLVM_LIBC_FUNCTION(int, sscanf,
// and pointer semantics, as well as handling
// destruction automatically.
va_end(vlist);
- scanf_core::ReadBuffer rb{buffer, cpp::numeric_limits<size_t>::max()};
- scanf_core::Reader reader(&rb);
+ scanf_core::StringBuffer rb(buffer, cpp::numeric_limits<size_t>::max());
+ scanf_core::Reader<scanf_core::StringBuffer> reader(&rb);
int ret_val = scanf_core::scanf_main(&reader, format, args);
// This is done to avoid including stdio.h in the internals. On most systems
// EOF is -1, so this will be transformed into just "return ret_val".
diff --git a/libc/src/stdio/vsscanf.cpp b/libc/src/stdio/vsscanf.cpp
index f3f56bce64292..e3e2fe34c32ec 100644
--- a/libc/src/stdio/vsscanf.cpp
+++ b/libc/src/stdio/vsscanf.cpp
@@ -21,9 +21,9 @@ namespace LIBC_NAMESPACE_DECL {
LLVM_LIBC_FUNCTION(int, vsscanf,
(const char *buffer, const char *format, va_list vlist)) {
internal::ArgList args(vlist);
- scanf_core::ReadBuffer rb{const_cast<char *>(buffer),
- cpp::numeric_limits<size_t>::max()};
- scanf_core::Reader reader(&rb);
+ scanf_core::StringBuffer rb{const_cast<char *>(buffer),
+ cpp::numeric_limits<size_t>::max()};
+ scanf_core::Reader<scanf_core::StringBuffer> reader(&rb);
int ret_val = scanf_core::scanf_main(&reader, format, args);
// This is done to avoid including stdio.h in the internals. On most systems
// EOF is -1, so this will be transformed into just "return ret_val".
diff --git a/libc/test/src/stdio/scanf_core/converter_test.cpp b/libc/test/src/stdio/scanf_core/converter_test.cpp
index d1aecd4c6ba06..962b33ff748b3 100644
--- a/libc/test/src/stdio/scanf_core/converter_test.cpp
+++ b/libc/test/src/stdio/scanf_core/converter_test.cpp
@@ -15,8 +15,9 @@
TEST(LlvmLibcScanfConverterTest, RawMatchBasic) {
const char *str = "abcdef";
- LIBC_NAMESPACE::scanf_core::ReadBuffer str_reader{str, sizeof(str)};
- LIBC_NAMESPACE::scanf_core::Reader reader(&str_reader);
+ LIBC_NAMESPACE::scanf_core::StringBuffer str_reader{str, sizeof(str)};
+ LIBC_NAMESPACE::scanf_core::Reader<LIBC_NAMESPACE::scanf_core::StringBuffer>
+ reader(&str_reader);
// Reading "abc" should succeed.
ASSERT_EQ(LIBC_NAMESPACE::scanf_core::raw_match(&reader, "abc"),
@@ -51,8 +52,9 @@ TEST(LlvmLibcScanfConverterTest, RawMatchBasic) {
TEST(LlvmLibcScanfConverterTest, RawMatchSpaces) {
const char *str = " a \t\n b cd";
- LIBC_NAMESPACE::scanf_core::ReadBuffer str_reader{str, sizeof(str)};
- LIBC_NAMESPACE::scanf_core::Reader reader(&str_reader);
+ LIBC_NAMESPACE::scanf_core::StringBuffer str_reader{str, sizeof(str)};
+ LIBC_NAMESPACE::scanf_core::Reader<LIBC_NAMESPACE::scanf_core::StringBuffer>
+ reader(&str_reader);
// Reading "a" should fail and not advance.
// Since there's nothing in the format string (the second argument to
@@ -98,8 +100,9 @@ TEST(LlvmLibcScanfConverterTest, RawMatchSpaces) {
TEST(LlvmLibcScanfConverterTest, StringConvSimple) {
const char *str = "abcDEF123 654LKJihg";
char result[20];
- LIBC_NAMESPACE::scanf_core::ReadBuffer str_reader{str, sizeof(str)};
- LIBC_NAMESPACE::scanf_core::Reader reader(&str_reader);
+ LIBC_NAMESPACE::scanf_core::StringBuffer str_reader{str, sizeof(str)};
+ LIBC_NAMESPACE::scanf_core::Reader<LIBC_NAMESPACE::scanf_core::StringBuffer>
+ reader(&str_reader);
LIBC_NAMESPACE::scanf_core::FormatSection conv;
conv.has_conv = true;
@@ -120,8 +123,9 @@ TEST(LlvmLibcScanfConverterTest, StringConvSimple) {
TEST(LlvmLibcScanfConverterTest, StringConvNoWrite) {
const char *str = "abcDEF123 654LKJihg";
- LIBC_NAMESPACE::scanf_core::ReadBuffer str_reader{str, sizeof(str)};
- LIBC_NAMESPACE::scanf_core::Reader reader(&str_reader);
+ LIBC_NAMESPACE::scanf_core::StringBuffer str_reader{str, sizeof(str)};
+ LIBC_NAMESPACE::scanf_core::Reader<LIBC_NAMESPACE::scanf_core::StringBuffer>
+ reader(&str_reader);
LIBC_NAMESPACE::scanf_core::FormatSection conv;
conv.has_conv = true;
@@ -141,8 +145,9 @@ TEST(LlvmLibcScanfConverterTest, StringConvNoWrite) {
TEST(LlvmLibcScanfConverterTest, StringConvWidth) {
const char *str = "abcDEF123 654LKJihg";
char result[6];
- LIBC_NAMESPACE::scanf_core::ReadBuffer str_reader{str, sizeof(str)};
- LIBC_NAMESPACE::scanf_core::Reader reader(&str_reader);
+ LIBC_NAMESPACE::scanf_core::StringBuffer str_reader{str, sizeof(str)};
+ LIBC_NAMESPACE::scanf_core::Reader<LIBC_NAMESPACE::scanf_core::StringBuffer>
+ reader(&str_reader);
LIBC_NAMESPACE::scanf_core::FormatSection conv;
conv.has_conv = true;
@@ -175,8 +180,9 @@ TEST(LlvmLibcScanfConverterTest, StringConvWidth) {
TEST(LlvmLibcScanfConverterTest, CharsConv) {
const char *str = "abcDEF123 654LKJihg MNOpqr&*(";
char result[20];
- LIBC_NAMESPACE::scanf_core::ReadBuffer str_reader{str, sizeof(str)};
- LIBC_NAMESPACE::scanf_core::Reader reader(&str_reader);
+ LIBC_NAMESPACE::scanf_core::StringBuffer str_reader{str, sizeof(str)};
+ LIBC_NAMESPACE::scanf_core::Reader<LIBC_NAMESPACE::scanf_core::StringBuffer>
+ reader(&str_reader);
LIBC_NAMESPACE::scanf_core::FormatSection conv;
conv.has_conv = true;
@@ -230,8 +236,9 @@ TEST(LlvmLibcScanfConverterTest, CharsConv) {
TEST(LlvmLibcScanfConverterTest, ScansetConv) {
const char *str = "abcDEF[123] 654LKJihg";
char result[20];
- LIBC_NAMESPACE::scanf_core::ReadBuffer str_reader{str, sizeof(str)};
- LIBC_NAMESPACE::scanf_core::Reader reader(&str_reader);
+ LIBC_NAMESPACE::scanf_core::StringBuffer str_reader{str, sizeof(str)};
+ LIBC_NAMESPACE::scanf_core::Reader<LIBC_NAMESPACE::scanf_core::StringBuffer>
+ reader(&str_reader);
LIBC_NAMESPACE::scanf_core::FormatSection conv;
conv.has_conv = true;
diff --git a/libc/test/src/stdio/scanf_core/reader_test.cpp b/libc/test/src/stdio/scanf_core/reader_test.cpp
index 43a14184c7650..787185d8cb9f9 100644
--- a/libc/test/src/stdio/scanf_core/reader_test.cpp
+++ b/libc/test/src/stdio/scanf_core/reader_test.cpp
@@ -15,14 +15,16 @@ TEST(LlvmLibcScanfStringReaderTest, Constructor) {
char str[10];
// buff_len justneeds to be a big number. The specific value isn't important
// in the real world.
- LIBC_NAMESPACE::scanf_core::ReadBuffer rb{const_cast<char *>(str), 1000000};
- LIBC_NAMESPACE::scanf_core::Reader reader(&rb);
+ LIBC_NAMESPACE::scanf_core::StringBuffer rb{const_cast<char *>(str), 1000000};
+ LIBC_NAMESPACE::scanf_core::Reader<LIBC_NAMESPACE::scanf_core::StringBuffer>
+ reader(&rb);
}
TEST(LlvmLibcScanfStringReaderTest, SimpleRead) {
const char *str = "abc";
- LIBC_NAMESPACE::scanf_core::ReadBuffer rb{const_cast<char *>(str), 1000000};
- LIBC_NAMESPACE::scanf_core::Reader reader(&rb);
+ LIBC_NAMESPACE::scanf_core::StringBuffer rb{const_cast<char *>(str), 1000000};
+ LIBC_NAMESPACE::scanf_core::Reader<LIBC_NAMESPACE::scanf_core::StringBuffer>
+ reader(&rb);
for (size_t i = 0; i < sizeof("abc"); ++i) {
ASSERT_EQ(str[i], reader.getc());
@@ -31,8 +33,9 @@ TEST(LlvmLibcScanfStringReaderTest, SimpleRead) {
TEST(LlvmLibcScanfStringReaderTest, ReadAndReverse) {
const char *str = "abcDEF123";
- LIBC_NAMESPACE::scanf_core::ReadBuffer rb{const_cast<char *>(str), 1000000};
- LIBC_NAMESPACE::scanf_core::Reader reader(&rb);
+ LIBC_NAMESPACE::scanf_core::StringBuffer rb{const_cast<char *>(str), 1000000};
+ LIBC_NAMESPACE::scanf_core::Reader<LIBC_NAMESPACE::scanf_core::StringBuffer>
+ reader(&rb);
for (size_t i = 0; i < 5; ++i) {
ASSERT_EQ(str[i], reader.getc());
>From bcb6d61a4fb05d66ac9dd4aca2f1ba675f66f61c Mon Sep 17 00:00:00 2001
From: Petr Hosek <phosek at google.com>
Date: Wed, 12 Mar 2025 20:46:15 -0700
Subject: [PATCH 2/4] Merge ReadBuffer and Reader
---
libc/src/stdio/scanf_core/converter.cpp | 30 -----------
libc/src/stdio/scanf_core/reader.h | 53 ++++++++-----------
libc/src/stdio/scanf_core/vfscanf_internal.h | 7 ++-
libc/src/stdio/sscanf.cpp | 3 +-
libc/src/stdio/vsscanf.cpp | 4 +-
.../src/stdio/scanf_core/converter_test.cpp | 28 +++-------
.../test/src/stdio/scanf_core/reader_test.cpp | 12 ++---
7 files changed, 37 insertions(+), 100 deletions(-)
delete mode 100644 libc/src/stdio/scanf_core/converter.cpp
diff --git a/libc/src/stdio/scanf_core/converter.cpp b/libc/src/stdio/scanf_core/converter.cpp
deleted file mode 100644
index fb234ce7864cf..0000000000000
--- a/libc/src/stdio/scanf_core/converter.cpp
+++ /dev/null
@@ -1,30 +0,0 @@
-//===-- Format specifier converter implmentation for scanf -----*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "src/stdio/scanf_core/converter.h"
-
-#include "src/__support/ctype_utils.h"
-#include "src/__support/macros/config.h"
-#include "src/stdio/scanf_core/core_structs.h"
-#include "src/stdio/scanf_core/reader.h"
-
-#ifndef LIBC_COPT_SCANF_DISABLE_FLOAT
-#include "src/stdio/scanf_core/float_converter.h"
-#endif // LIBC_COPT_SCANF_DISABLE_FLOAT
-#include "src/stdio/scanf_core/current_pos_converter.h"
-#include "src/stdio/scanf_core/int_converter.h"
-#include "src/stdio/scanf_core/ptr_converter.h"
-#include "src/stdio/scanf_core/string_converter.h"
-
-#include <stddef.h>
-
-namespace LIBC_NAMESPACE_DECL {
-namespace scanf_core {
-
-} // namespace scanf_core
-} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/stdio/scanf_core/reader.h b/libc/src/stdio/scanf_core/reader.h
index 21b405491b1fc..95ac35d2c9e79 100644
--- a/libc/src/stdio/scanf_core/reader.h
+++ b/libc/src/stdio/scanf_core/reader.h
@@ -17,18 +17,35 @@
namespace LIBC_NAMESPACE_DECL {
namespace scanf_core {
-template <typename Derived> struct ReadBuffer {
- LIBC_INLINE char getc() { return static_cast<Derived *>(this)->getc(); }
- LIBC_INLINE void ungetc(int c) { static_cast<Derived *>(this)->ungetc(c); }
+template <typename Derived> class Reader {
+ size_t cur_chars_read = 0;
+
+public:
+ // This returns the next character from the input and advances it by one
+ // character. When it hits the end of the string or file it returns '\0' to
+ // signal to stop parsing.
+ LIBC_INLINE char getc() {
+ ++cur_chars_read;
+ return static_cast<Derived *>(this)->getc();
+ }
+
+ // This moves the input back by one character, placing c into the buffer if
+ // this is a file reader, else c is ignored.
+ LIBC_INLINE void ungetc(int c) {
+ --cur_chars_read;
+ static_cast<Derived *>(this)->ungetc(c);
+ }
+
+ LIBC_INLINE size_t chars_read() { return cur_chars_read; }
};
-class StringBuffer : public ReadBuffer<StringBuffer> {
+class StringReader : public Reader<StringReader> {
const char *buffer;
[[maybe_unused]] size_t buff_len;
size_t buff_cur = 0;
public:
- LIBC_INLINE StringBuffer(const char *buffer, size_t buff_len)
+ LIBC_INLINE StringReader(const char *buffer, size_t buff_len)
: buffer(buffer), buff_len(buff_len) {}
LIBC_INLINE char getc() {
@@ -47,32 +64,6 @@ class StringBuffer : public ReadBuffer<StringBuffer> {
}
};
-// TODO: We should be able to fold ReadBuffer into Reader.
-template <typename T> class Reader {
- ReadBuffer<T> *buffer;
- size_t cur_chars_read = 0;
-
-public:
- LIBC_INLINE Reader(ReadBuffer<T> *buffer) : buffer(buffer) {}
-
- // This returns the next character from the input and advances it by one
- // character. When it hits the end of the string or file it returns '\0' to
- // signal to stop parsing.
- LIBC_INLINE char getc() {
- ++cur_chars_read;
- return buffer->getc();
- }
-
- // This moves the input back by one character, placing c into the buffer if
- // this is a file reader, else c is ignored.
- LIBC_INLINE void ungetc(char c) {
- --cur_chars_read;
- buffer->ungetc(c);
- }
-
- LIBC_INLINE size_t chars_read() { return cur_chars_read; }
-};
-
} // namespace scanf_core
} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/stdio/scanf_core/vfscanf_internal.h b/libc/src/stdio/scanf_core/vfscanf_internal.h
index 46f2675504c6b..1ff722b3bc8ac 100644
--- a/libc/src/stdio/scanf_core/vfscanf_internal.h
+++ b/libc/src/stdio/scanf_core/vfscanf_internal.h
@@ -91,11 +91,11 @@ LIBC_INLINE void ungetc(int c, ::FILE *f) { ::ungetc(c, f); }
namespace scanf_core {
-class StreamBuffer : public ReadBuffer<StreamBuffer> {
+class StreamReader : public Reader<StreamReader> {
::FILE *stream;
public:
- LIBC_INLINE StreamBuffer(::FILE *stream) : stream(stream) {}
+ LIBC_INLINE StreamReader(::FILE *stream) : stream(stream) {}
LIBC_INLINE char getc() {
return static_cast<char>(internal::getc(static_cast<FILE *>(stream)));
@@ -109,8 +109,7 @@ LIBC_INLINE int vfscanf_internal(::FILE *__restrict stream,
const char *__restrict format,
internal::ArgList &args) {
internal::flockfile(stream);
- scanf_core::StreamBuffer buffer(stream);
- scanf_core::Reader<scanf_core::StreamBuffer> reader(&buffer);
+ scanf_core::StreamReader reader(stream);
int retval = scanf_core::scanf_main(&reader, format, args);
if (retval == 0 && internal::ferror_unlocked(stream))
retval = EOF;
diff --git a/libc/src/stdio/sscanf.cpp b/libc/src/stdio/sscanf.cpp
index ec4e10caf6cb5..bed66cc430059 100644
--- a/libc/src/stdio/sscanf.cpp
+++ b/libc/src/stdio/sscanf.cpp
@@ -29,8 +29,7 @@ LLVM_LIBC_FUNCTION(int, sscanf,
// and pointer semantics, as well as handling
// destruction automatically.
va_end(vlist);
- scanf_core::StringBuffer rb(buffer, cpp::numeric_limits<size_t>::max());
- scanf_core::Reader<scanf_core::StringBuffer> reader(&rb);
+ scanf_core::StringReader reader(buffer, cpp::numeric_limits<size_t>::max());
int ret_val = scanf_core::scanf_main(&reader, format, args);
// This is done to avoid including stdio.h in the internals. On most systems
// EOF is -1, so this will be transformed into just "return ret_val".
diff --git a/libc/src/stdio/vsscanf.cpp b/libc/src/stdio/vsscanf.cpp
index e3e2fe34c32ec..b9fe8863990e2 100644
--- a/libc/src/stdio/vsscanf.cpp
+++ b/libc/src/stdio/vsscanf.cpp
@@ -21,9 +21,7 @@ namespace LIBC_NAMESPACE_DECL {
LLVM_LIBC_FUNCTION(int, vsscanf,
(const char *buffer, const char *format, va_list vlist)) {
internal::ArgList args(vlist);
- scanf_core::StringBuffer rb{const_cast<char *>(buffer),
- cpp::numeric_limits<size_t>::max()};
- scanf_core::Reader<scanf_core::StringBuffer> reader(&rb);
+ scanf_core::StringReader reader(buffer, cpp::numeric_limits<size_t>::max());
int ret_val = scanf_core::scanf_main(&reader, format, args);
// This is done to avoid including stdio.h in the internals. On most systems
// EOF is -1, so this will be transformed into just "return ret_val".
diff --git a/libc/test/src/stdio/scanf_core/converter_test.cpp b/libc/test/src/stdio/scanf_core/converter_test.cpp
index 962b33ff748b3..19f9a376bb209 100644
--- a/libc/test/src/stdio/scanf_core/converter_test.cpp
+++ b/libc/test/src/stdio/scanf_core/converter_test.cpp
@@ -15,9 +15,7 @@
TEST(LlvmLibcScanfConverterTest, RawMatchBasic) {
const char *str = "abcdef";
- LIBC_NAMESPACE::scanf_core::StringBuffer str_reader{str, sizeof(str)};
- LIBC_NAMESPACE::scanf_core::Reader<LIBC_NAMESPACE::scanf_core::StringBuffer>
- reader(&str_reader);
+ LIBC_NAMESPACE::scanf_core::StringReader reader(str, sizeof(str));
// Reading "abc" should succeed.
ASSERT_EQ(LIBC_NAMESPACE::scanf_core::raw_match(&reader, "abc"),
@@ -52,9 +50,7 @@ TEST(LlvmLibcScanfConverterTest, RawMatchBasic) {
TEST(LlvmLibcScanfConverterTest, RawMatchSpaces) {
const char *str = " a \t\n b cd";
- LIBC_NAMESPACE::scanf_core::StringBuffer str_reader{str, sizeof(str)};
- LIBC_NAMESPACE::scanf_core::Reader<LIBC_NAMESPACE::scanf_core::StringBuffer>
- reader(&str_reader);
+ LIBC_NAMESPACE::scanf_core::StringReader reader(str, sizeof(str));
// Reading "a" should fail and not advance.
// Since there's nothing in the format string (the second argument to
@@ -100,9 +96,7 @@ TEST(LlvmLibcScanfConverterTest, RawMatchSpaces) {
TEST(LlvmLibcScanfConverterTest, StringConvSimple) {
const char *str = "abcDEF123 654LKJihg";
char result[20];
- LIBC_NAMESPACE::scanf_core::StringBuffer str_reader{str, sizeof(str)};
- LIBC_NAMESPACE::scanf_core::Reader<LIBC_NAMESPACE::scanf_core::StringBuffer>
- reader(&str_reader);
+ LIBC_NAMESPACE::scanf_core::StringReader reader(str, sizeof(str));
LIBC_NAMESPACE::scanf_core::FormatSection conv;
conv.has_conv = true;
@@ -123,9 +117,7 @@ TEST(LlvmLibcScanfConverterTest, StringConvSimple) {
TEST(LlvmLibcScanfConverterTest, StringConvNoWrite) {
const char *str = "abcDEF123 654LKJihg";
- LIBC_NAMESPACE::scanf_core::StringBuffer str_reader{str, sizeof(str)};
- LIBC_NAMESPACE::scanf_core::Reader<LIBC_NAMESPACE::scanf_core::StringBuffer>
- reader(&str_reader);
+ LIBC_NAMESPACE::scanf_core::StringReader reader(str, sizeof(str));
LIBC_NAMESPACE::scanf_core::FormatSection conv;
conv.has_conv = true;
@@ -145,9 +137,7 @@ TEST(LlvmLibcScanfConverterTest, StringConvNoWrite) {
TEST(LlvmLibcScanfConverterTest, StringConvWidth) {
const char *str = "abcDEF123 654LKJihg";
char result[6];
- LIBC_NAMESPACE::scanf_core::StringBuffer str_reader{str, sizeof(str)};
- LIBC_NAMESPACE::scanf_core::Reader<LIBC_NAMESPACE::scanf_core::StringBuffer>
- reader(&str_reader);
+ LIBC_NAMESPACE::scanf_core::StringReader reader(str, sizeof(str));
LIBC_NAMESPACE::scanf_core::FormatSection conv;
conv.has_conv = true;
@@ -180,9 +170,7 @@ TEST(LlvmLibcScanfConverterTest, StringConvWidth) {
TEST(LlvmLibcScanfConverterTest, CharsConv) {
const char *str = "abcDEF123 654LKJihg MNOpqr&*(";
char result[20];
- LIBC_NAMESPACE::scanf_core::StringBuffer str_reader{str, sizeof(str)};
- LIBC_NAMESPACE::scanf_core::Reader<LIBC_NAMESPACE::scanf_core::StringBuffer>
- reader(&str_reader);
+ LIBC_NAMESPACE::scanf_core::StringReader reader(str, sizeof(str));
LIBC_NAMESPACE::scanf_core::FormatSection conv;
conv.has_conv = true;
@@ -236,9 +224,7 @@ TEST(LlvmLibcScanfConverterTest, CharsConv) {
TEST(LlvmLibcScanfConverterTest, ScansetConv) {
const char *str = "abcDEF[123] 654LKJihg";
char result[20];
- LIBC_NAMESPACE::scanf_core::StringBuffer str_reader{str, sizeof(str)};
- LIBC_NAMESPACE::scanf_core::Reader<LIBC_NAMESPACE::scanf_core::StringBuffer>
- reader(&str_reader);
+ LIBC_NAMESPACE::scanf_core::StringReader reader(str, sizeof(str));
LIBC_NAMESPACE::scanf_core::FormatSection conv;
conv.has_conv = true;
diff --git a/libc/test/src/stdio/scanf_core/reader_test.cpp b/libc/test/src/stdio/scanf_core/reader_test.cpp
index 787185d8cb9f9..fa3260d556086 100644
--- a/libc/test/src/stdio/scanf_core/reader_test.cpp
+++ b/libc/test/src/stdio/scanf_core/reader_test.cpp
@@ -15,16 +15,12 @@ TEST(LlvmLibcScanfStringReaderTest, Constructor) {
char str[10];
// buff_len justneeds to be a big number. The specific value isn't important
// in the real world.
- LIBC_NAMESPACE::scanf_core::StringBuffer rb{const_cast<char *>(str), 1000000};
- LIBC_NAMESPACE::scanf_core::Reader<LIBC_NAMESPACE::scanf_core::StringBuffer>
- reader(&rb);
+ LIBC_NAMESPACE::scanf_core::StringReader reader(const_cast<char *>(str), 1000000);
}
TEST(LlvmLibcScanfStringReaderTest, SimpleRead) {
const char *str = "abc";
- LIBC_NAMESPACE::scanf_core::StringBuffer rb{const_cast<char *>(str), 1000000};
- LIBC_NAMESPACE::scanf_core::Reader<LIBC_NAMESPACE::scanf_core::StringBuffer>
- reader(&rb);
+ LIBC_NAMESPACE::scanf_core::StringReader reader(const_cast<char *>(str), 1000000);
for (size_t i = 0; i < sizeof("abc"); ++i) {
ASSERT_EQ(str[i], reader.getc());
@@ -33,9 +29,7 @@ TEST(LlvmLibcScanfStringReaderTest, SimpleRead) {
TEST(LlvmLibcScanfStringReaderTest, ReadAndReverse) {
const char *str = "abcDEF123";
- LIBC_NAMESPACE::scanf_core::StringBuffer rb{const_cast<char *>(str), 1000000};
- LIBC_NAMESPACE::scanf_core::Reader<LIBC_NAMESPACE::scanf_core::StringBuffer>
- reader(&rb);
+ LIBC_NAMESPACE::scanf_core::StringReader reader(const_cast<char *>(str), 1000000);
for (size_t i = 0; i < 5; ++i) {
ASSERT_EQ(str[i], reader.getc());
>From 7fb8a00206696e523954b66eeb53877f0b8990f1 Mon Sep 17 00:00:00 2001
From: Petr Hosek <phosek at google.com>
Date: Wed, 12 Mar 2025 21:07:20 -0700
Subject: [PATCH 3/4] Fix formatting
---
libc/test/src/stdio/scanf_core/reader_test.cpp | 9 ++++++---
1 file changed, 6 insertions(+), 3 deletions(-)
diff --git a/libc/test/src/stdio/scanf_core/reader_test.cpp b/libc/test/src/stdio/scanf_core/reader_test.cpp
index fa3260d556086..2ba95d84f5f50 100644
--- a/libc/test/src/stdio/scanf_core/reader_test.cpp
+++ b/libc/test/src/stdio/scanf_core/reader_test.cpp
@@ -15,12 +15,14 @@ TEST(LlvmLibcScanfStringReaderTest, Constructor) {
char str[10];
// buff_len justneeds to be a big number. The specific value isn't important
// in the real world.
- LIBC_NAMESPACE::scanf_core::StringReader reader(const_cast<char *>(str), 1000000);
+ LIBC_NAMESPACE::scanf_core::StringReader reader(const_cast<char *>(str),
+ 1000000);
}
TEST(LlvmLibcScanfStringReaderTest, SimpleRead) {
const char *str = "abc";
- LIBC_NAMESPACE::scanf_core::StringReader reader(const_cast<char *>(str), 1000000);
+ LIBC_NAMESPACE::scanf_core::StringReader reader(const_cast<char *>(str),
+ 1000000);
for (size_t i = 0; i < sizeof("abc"); ++i) {
ASSERT_EQ(str[i], reader.getc());
@@ -29,7 +31,8 @@ TEST(LlvmLibcScanfStringReaderTest, SimpleRead) {
TEST(LlvmLibcScanfStringReaderTest, ReadAndReverse) {
const char *str = "abcDEF123";
- LIBC_NAMESPACE::scanf_core::StringReader reader(const_cast<char *>(str), 1000000);
+ LIBC_NAMESPACE::scanf_core::StringReader reader(const_cast<char *>(str),
+ 1000000);
for (size_t i = 0; i < 5; ++i) {
ASSERT_EQ(str[i], reader.getc());
>From 848c876e0fcfa701ff68c954024354740cf6a7db Mon Sep 17 00:00:00 2001
From: Petr Hosek <phosek at google.com>
Date: Mon, 17 Mar 2025 16:01:26 -0700
Subject: [PATCH 4/4] Split StringReader into its own header
---
libc/src/stdio/CMakeLists.txt | 4 +-
libc/src/stdio/scanf_core/CMakeLists.txt | 11 ++++-
libc/src/stdio/scanf_core/reader.h | 25 ----------
libc/src/stdio/scanf_core/string_reader.h | 49 +++++++++++++++++++
libc/src/stdio/sscanf.cpp | 2 +-
libc/src/stdio/vsscanf.cpp | 2 +-
libc/test/src/stdio/scanf_core/CMakeLists.txt | 4 +-
.../src/stdio/scanf_core/converter_test.cpp | 2 +-
.../test/src/stdio/scanf_core/reader_test.cpp | 2 +-
9 files changed, 66 insertions(+), 35 deletions(-)
create mode 100644 libc/src/stdio/scanf_core/string_reader.h
diff --git a/libc/src/stdio/CMakeLists.txt b/libc/src/stdio/CMakeLists.txt
index b9bc904471df9..ce5d0660ff81f 100644
--- a/libc/src/stdio/CMakeLists.txt
+++ b/libc/src/stdio/CMakeLists.txt
@@ -117,8 +117,8 @@ add_entrypoint_object(
sscanf.h
DEPENDS
libc.src.__support.arg_list
- libc.src.stdio.scanf_core.reader
libc.src.stdio.scanf_core.scanf_main
+ libc.src.stdio.scanf_core.string_reader
)
add_entrypoint_object(
@@ -129,8 +129,8 @@ add_entrypoint_object(
vsscanf.h
DEPENDS
libc.src.__support.arg_list
- libc.src.stdio.scanf_core.reader
libc.src.stdio.scanf_core.scanf_main
+ libc.src.stdio.scanf_core.string_reader
)
add_entrypoint_object(
diff --git a/libc/src/stdio/scanf_core/CMakeLists.txt b/libc/src/stdio/scanf_core/CMakeLists.txt
index 84d12e6138002..d0d922ac823fe 100644
--- a/libc/src/stdio/scanf_core/CMakeLists.txt
+++ b/libc/src/stdio/scanf_core/CMakeLists.txt
@@ -81,8 +81,15 @@ add_header_library(
reader.h
DEPENDS
libc.src.__support.macros.attributes
- ${file_deps}
- ${use_system_file}
+)
+
+add_header_library(
+ string_reader
+ HDRS
+ string_reader.h
+ DEPENDS
+ .reader
+ libc.src.__support.macros.attributes
)
add_header_library(
diff --git a/libc/src/stdio/scanf_core/reader.h b/libc/src/stdio/scanf_core/reader.h
index 95ac35d2c9e79..c71446ea0abed 100644
--- a/libc/src/stdio/scanf_core/reader.h
+++ b/libc/src/stdio/scanf_core/reader.h
@@ -39,31 +39,6 @@ template <typename Derived> class Reader {
LIBC_INLINE size_t chars_read() { return cur_chars_read; }
};
-class StringReader : public Reader<StringReader> {
- const char *buffer;
- [[maybe_unused]] size_t buff_len;
- size_t buff_cur = 0;
-
-public:
- LIBC_INLINE StringReader(const char *buffer, size_t buff_len)
- : buffer(buffer), buff_len(buff_len) {}
-
- LIBC_INLINE char getc() {
- char output = buffer[buff_cur];
- ++buff_cur;
- return output;
- }
- LIBC_INLINE void ungetc(int) {
- if (buff_cur > 0) {
- // While technically c should be written back to the buffer, in scanf we
- // always write the character that was already there. Additionally, the
- // buffer is most likely to contain a string that isn't part of a file,
- // which may not be writable.
- --buff_cur;
- }
- }
-};
-
} // namespace scanf_core
} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/stdio/scanf_core/string_reader.h b/libc/src/stdio/scanf_core/string_reader.h
new file mode 100644
index 0000000000000..95ca22d956b7d
--- /dev/null
+++ b/libc/src/stdio/scanf_core/string_reader.h
@@ -0,0 +1,49 @@
+//===-- Reader definition for scanf -----------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_STDIO_SCANF_CORE_STRING_READER_H
+#define LLVM_LIBC_SRC_STDIO_SCANF_CORE_STRING_READER_H
+
+#include "src/__support/macros/attributes.h" // For LIBC_INLINE
+#include "src/__support/macros/config.h"
+#include "src/stdio/scanf_core/reader.h"
+
+#include <stddef.h>
+
+namespace LIBC_NAMESPACE_DECL {
+namespace scanf_core {
+
+class StringReader : public Reader<StringReader> {
+ const char *buffer;
+ [[maybe_unused]] size_t buff_len;
+ size_t buff_cur = 0;
+
+public:
+ LIBC_INLINE StringReader(const char *buffer, size_t buff_len)
+ : buffer(buffer), buff_len(buff_len) {}
+
+ LIBC_INLINE char getc() {
+ char output = buffer[buff_cur];
+ ++buff_cur;
+ return output;
+ }
+ LIBC_INLINE void ungetc(int) {
+ if (buff_cur > 0) {
+ // While technically c should be written back to the buffer, in scanf we
+ // always write the character that was already there. Additionally, the
+ // buffer is most likely to contain a string that isn't part of a file,
+ // which may not be writable.
+ --buff_cur;
+ }
+ }
+};
+
+} // namespace scanf_core
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC_STDIO_SCANF_CORE_STRING_READER_H
diff --git a/libc/src/stdio/sscanf.cpp b/libc/src/stdio/sscanf.cpp
index bed66cc430059..9fa2ede461595 100644
--- a/libc/src/stdio/sscanf.cpp
+++ b/libc/src/stdio/sscanf.cpp
@@ -11,8 +11,8 @@
#include "src/__support/CPP/limits.h"
#include "src/__support/arg_list.h"
#include "src/__support/macros/config.h"
-#include "src/stdio/scanf_core/reader.h"
#include "src/stdio/scanf_core/scanf_main.h"
+#include "src/stdio/scanf_core/string_reader.h"
#include "hdr/stdio_macros.h"
#include "hdr/types/FILE.h"
diff --git a/libc/src/stdio/vsscanf.cpp b/libc/src/stdio/vsscanf.cpp
index b9fe8863990e2..7c7240a102b5a 100644
--- a/libc/src/stdio/vsscanf.cpp
+++ b/libc/src/stdio/vsscanf.cpp
@@ -11,8 +11,8 @@
#include "hdr/stdio_macros.h"
#include "src/__support/CPP/limits.h"
#include "src/__support/arg_list.h"
-#include "src/stdio/scanf_core/reader.h"
#include "src/stdio/scanf_core/scanf_main.h"
+#include "src/stdio/scanf_core/string_reader.h"
#include <stdarg.h>
diff --git a/libc/test/src/stdio/scanf_core/CMakeLists.txt b/libc/test/src/stdio/scanf_core/CMakeLists.txt
index 058f665e42930..64ff7d324c6fd 100644
--- a/libc/test/src/stdio/scanf_core/CMakeLists.txt
+++ b/libc/test/src/stdio/scanf_core/CMakeLists.txt
@@ -32,7 +32,7 @@ add_libc_unittest(
SRCS
reader_test.cpp
DEPENDS
- libc.src.stdio.scanf_core.reader
+ libc.src.stdio.scanf_core.string_reader
libc.src.__support.CPP.string_view
COMPILE_OPTIONS
${use_system_file}
@@ -45,8 +45,8 @@ add_libc_unittest(
SRCS
converter_test.cpp
DEPENDS
- libc.src.stdio.scanf_core.reader
libc.src.stdio.scanf_core.converter
+ libc.src.stdio.scanf_core.string_reader
libc.src.__support.CPP.string_view
COMPILE_OPTIONS
${use_system_file}
diff --git a/libc/test/src/stdio/scanf_core/converter_test.cpp b/libc/test/src/stdio/scanf_core/converter_test.cpp
index 19f9a376bb209..ff0ce9200e51e 100644
--- a/libc/test/src/stdio/scanf_core/converter_test.cpp
+++ b/libc/test/src/stdio/scanf_core/converter_test.cpp
@@ -9,7 +9,7 @@
#include "src/__support/CPP/string_view.h"
#include "src/stdio/scanf_core/converter.h"
#include "src/stdio/scanf_core/core_structs.h"
-#include "src/stdio/scanf_core/reader.h"
+#include "src/stdio/scanf_core/string_reader.h"
#include "test/UnitTest/Test.h"
diff --git a/libc/test/src/stdio/scanf_core/reader_test.cpp b/libc/test/src/stdio/scanf_core/reader_test.cpp
index 2ba95d84f5f50..4cafc81251f0b 100644
--- a/libc/test/src/stdio/scanf_core/reader_test.cpp
+++ b/libc/test/src/stdio/scanf_core/reader_test.cpp
@@ -7,7 +7,7 @@
//===----------------------------------------------------------------------===//
#include "src/__support/CPP/string_view.h"
-#include "src/stdio/scanf_core/reader.h"
+#include "src/stdio/scanf_core/string_reader.h"
#include "test/UnitTest/Test.h"
More information about the libc-commits
mailing list