[libc-commits] [libc] 1c40d5e - [libc] add scanf string converters
Michael Jones via libc-commits
libc-commits at lists.llvm.org
Mon Nov 7 13:49:08 PST 2022
Author: Michael Jones
Date: 2022-11-07T13:49:01-08:00
New Revision: 1c40d5ec7dc5b782262c79b0f7a57bfea6fbe75c
URL: https://github.com/llvm/llvm-project/commit/1c40d5ec7dc5b782262c79b0f7a57bfea6fbe75c
DIFF: https://github.com/llvm/llvm-project/commit/1c40d5ec7dc5b782262c79b0f7a57bfea6fbe75c.diff
LOG: [libc] add scanf string converters
This patch adds the basic conversion facilities to scanf as well as unit
tests for them. It also adds scanf_main which will be used for the
eventual scanf entrypoints.
Reviewed By: sivachandra
Differential Revision: https://reviews.llvm.org/D137376
Added:
libc/src/stdio/scanf_core/converter.cpp
libc/src/stdio/scanf_core/converter.h
libc/src/stdio/scanf_core/scanf_main.cpp
libc/src/stdio/scanf_core/scanf_main.h
libc/src/stdio/scanf_core/string_converter.cpp
libc/src/stdio/scanf_core/string_converter.h
libc/test/src/stdio/scanf_core/converter_test.cpp
Modified:
libc/src/stdio/scanf_core/CMakeLists.txt
libc/src/stdio/scanf_core/core_structs.h
libc/src/stdio/scanf_core/reader.cpp
libc/src/stdio/scanf_core/reader.h
libc/test/src/stdio/scanf_core/CMakeLists.txt
Removed:
################################################################################
diff --git a/libc/src/stdio/scanf_core/CMakeLists.txt b/libc/src/stdio/scanf_core/CMakeLists.txt
index 91cf5e2ada90..940e9f0d083f 100644
--- a/libc/src/stdio/scanf_core/CMakeLists.txt
+++ b/libc/src/stdio/scanf_core/CMakeLists.txt
@@ -30,6 +30,20 @@ if(NOT (TARGET libc.src.__support.File.file))
return()
endif()
+add_object_library(
+ scanf_main
+ SRCS
+ scanf_main.cpp
+ HDRS
+ scanf_main.h
+ DEPENDS
+ .parser
+ .reader
+ .converter
+ .core_structs
+ libc.src.__support.arg_list
+)
+
add_object_library(
string_reader
SRCS
@@ -58,3 +72,20 @@ add_object_library(
.string_reader
.file_reader
)
+
+add_object_library(
+ converter
+ SRCS
+ converter.cpp
+ string_converter.cpp
+ HDRS
+ converter.h
+ string_converter.h
+ DEPENDS
+ .reader
+ .core_structs
+ libc.src.__support.ctype_utils
+ libc.src.__support.CPP.bitset
+ libc.src.__support.CPP.string_view
+ libc.src.__support.CPP.limits
+)
diff --git a/libc/src/stdio/scanf_core/converter.cpp b/libc/src/stdio/scanf_core/converter.cpp
new file mode 100644
index 000000000000..3cfa8758349e
--- /dev/null
+++ b/libc/src/stdio/scanf_core/converter.cpp
@@ -0,0 +1,98 @@
+//===-- Format specifier converter implmentation for scanf -----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/stdio/scanf_core/converter.h"
+
+#include "src/__support/ctype_utils.h"
+#include "src/stdio/scanf_core/core_structs.h"
+#include "src/stdio/scanf_core/reader.h"
+
+#include "src/stdio/scanf_core/string_converter.h"
+
+#include <stddef.h>
+
+namespace __llvm_libc {
+namespace scanf_core {
+
+int convert(Reader *reader, const FormatSection &to_conv) {
+ int ret_val = 0;
+ switch (to_conv.conv_name) {
+ case '%':
+ return raw_match(reader, "%");
+ case 's':
+ ret_val = raw_match(reader, " ");
+ if (ret_val != READ_OK)
+ return ret_val;
+ return convert_string(reader, to_conv);
+ case 'c':
+ case '[':
+ return convert_string(reader, to_conv);
+ // case 'd':
+ // case 'i':
+ // case 'u':
+ // case 'o':
+ // case 'x':
+ // case 'X':
+ // ret_val = raw_match(reader, " ");
+ // if (ret_val != READ_OK)
+ // return ret_val;
+ // return convert_int(reader, to_conv);
+ // #ifndef LLVM_LIBC_SCANF_DISABLE_FLOAT
+ // case 'f':
+ // case 'F':
+ // case 'e':
+ // case 'E':
+ // case 'a':
+ // case 'A':
+ // case 'g':
+ // case 'G':
+ // ret_val = raw_match(reader, " ");
+ // if (ret_val != READ_OK)
+ // return ret_val;
+ // return convert_float(reader, to_conv);
+ // #endif // LLVM_LIBC_SCANF_DISABLE_FLOAT
+ // #ifndef LLVM_LIBC_SCANF_DISABLE_WRITE_INT
+ // case 'n':
+ // return convert_write_int(reader, to_conv);
+ // #endif // LLVM_LIBC_SCANF_DISABLE_WRITE_INT
+ // case 'p':
+ // ret_val = raw_match(reader, " ");
+ // if (ret_val != READ_OK)
+ // return ret_val;
+ // return convert_pointer(reader, to_conv);
+ default:
+ return raw_match(reader, to_conv.raw_string);
+ }
+ return -1;
+}
+
+// raw_string is assumed to have a positive size.
+int raw_match(Reader *reader, cpp::string_view raw_string) {
+ char cur_char = reader->getc();
+ int ret_val = READ_OK;
+ for (size_t i = 0; i < raw_string.size(); ++i) {
+ // Any space character matches any number of space characters.
+ if (internal::isspace(raw_string[i])) {
+ while (internal::isspace(cur_char)) {
+ cur_char = reader->getc();
+ }
+ } else {
+ if (raw_string[i] == cur_char) {
+ cur_char = reader->getc();
+ } else {
+ ret_val = MATCHING_FAILURE;
+ break;
+ }
+ }
+ }
+ reader->ungetc(cur_char);
+ return ret_val;
+}
+
+} // namespace scanf_core
+} // namespace __llvm_libc
diff --git a/libc/src/stdio/scanf_core/converter.h b/libc/src/stdio/scanf_core/converter.h
new file mode 100644
index 000000000000..cd91ff66a3ae
--- /dev/null
+++ b/libc/src/stdio/scanf_core/converter.h
@@ -0,0 +1,33 @@
+//===-- Format specifier converter for scanf -------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_STDIO_SCANF_CORE_CONVERTER_H
+#define LLVM_LIBC_SRC_STDIO_SCANF_CORE_CONVERTER_H
+
+#include "src/__support/CPP/string_view.h"
+#include "src/stdio/scanf_core/core_structs.h"
+#include "src/stdio/scanf_core/reader.h"
+
+#include <stddef.h>
+
+namespace __llvm_libc {
+namespace scanf_core {
+
+// convert will call a conversion function to convert the FormatSection into
+// its string representation, and then that will write the result to the
+// reader.
+int convert(Reader *reader, const FormatSection &to_conv);
+
+// raw_match takes a raw string and matches it to the characters obtained from
+// the reader.
+int raw_match(Reader *reader, cpp::string_view raw_string);
+
+} // namespace scanf_core
+} // namespace __llvm_libc
+
+#endif // LLVM_LIBC_SRC_STDIO_SCANF_CORE_CONVERTER_H
diff --git a/libc/src/stdio/scanf_core/core_structs.h b/libc/src/stdio/scanf_core/core_structs.h
index 213a5e1a2b59..7f331db36202 100644
--- a/libc/src/stdio/scanf_core/core_structs.h
+++ b/libc/src/stdio/scanf_core/core_structs.h
@@ -78,7 +78,7 @@ struct FormatSection {
enum ErrorCodes : int {
// This is the value to be returned by conversions when no error has occurred.
- WRITE_OK = 0,
+ READ_OK = 0,
// These are the scanf return values for when an error has occurred. They are
// all negative, and should be distinct.
FILE_READ_ERROR = -1,
diff --git a/libc/src/stdio/scanf_core/reader.cpp b/libc/src/stdio/scanf_core/reader.cpp
index 23dcbd405505..0d8d5a30f7c4 100644
--- a/libc/src/stdio/scanf_core/reader.cpp
+++ b/libc/src/stdio/scanf_core/reader.cpp
@@ -13,6 +13,7 @@ namespace __llvm_libc {
namespace scanf_core {
char Reader::getc() {
+ ++cur_chars_read;
if (reader_type == ReaderType::String) {
return string_reader->get_char();
} else {
@@ -21,6 +22,7 @@ char Reader::getc() {
}
void Reader::ungetc(char c) {
+ --cur_chars_read;
if (reader_type == ReaderType::String) {
// The string reader ignores the char c passed to unget since it doesn't
// need to place anything back into a buffer, and modifying the source
diff --git a/libc/src/stdio/scanf_core/reader.h b/libc/src/stdio/scanf_core/reader.h
index 4d6ed06c00e7..4ca25cc0d0ca 100644
--- a/libc/src/stdio/scanf_core/reader.h
+++ b/libc/src/stdio/scanf_core/reader.h
@@ -26,6 +26,8 @@ class Reader final {
const ReaderType reader_type;
+ size_t cur_chars_read = 0;
+
public:
Reader(StringReader *init_string_reader)
: string_reader(init_string_reader), reader_type(ReaderType::String) {}
@@ -41,6 +43,8 @@ class Reader final {
// This moves the input back by one character, placing c into the buffer if
// this is a file reader, else c is ignored.
void ungetc(char c);
+
+ size_t chars_read() { return cur_chars_read; }
};
} // namespace scanf_core
diff --git a/libc/src/stdio/scanf_core/scanf_main.cpp b/libc/src/stdio/scanf_core/scanf_main.cpp
new file mode 100644
index 000000000000..fcf7af2083f2
--- /dev/null
+++ b/libc/src/stdio/scanf_core/scanf_main.cpp
@@ -0,0 +1,47 @@
+//===-- Starting point for scanf --------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/stdio/scanf_core/scanf_main.h"
+
+#include "src/__support/arg_list.h"
+#include "src/stdio/scanf_core/converter.h"
+#include "src/stdio/scanf_core/core_structs.h"
+#include "src/stdio/scanf_core/parser.h"
+#include "src/stdio/scanf_core/reader.h"
+
+#include <stddef.h>
+
+namespace __llvm_libc {
+namespace scanf_core {
+
+int scanf_main(Reader *reader, const char *__restrict str,
+ internal::ArgList &args) {
+ Parser parser(str, args);
+ int ret_val = READ_OK;
+ int conversions = 0;
+ for (FormatSection cur_section = parser.get_next_section();
+ !cur_section.raw_string.empty() && ret_val == READ_OK;
+ cur_section = parser.get_next_section()) {
+ if (cur_section.has_conv) {
+ ret_val = convert(reader, cur_section);
+ conversions += ret_val == READ_OK ? 1 : 0;
+ } else {
+ ret_val = raw_match(reader, cur_section.raw_string);
+ }
+ }
+
+ if (conversions == 0 && ret_val != READ_OK) {
+ // This is intended to be converted to EOF in the client call to avoid
+ // including stdio.h in this internal file.
+ return -1;
+ }
+ return conversions;
+}
+
+} // namespace scanf_core
+} // namespace __llvm_libc
diff --git a/libc/src/stdio/scanf_core/scanf_main.h b/libc/src/stdio/scanf_core/scanf_main.h
new file mode 100644
index 000000000000..d1db46b7c77d
--- /dev/null
+++ b/libc/src/stdio/scanf_core/scanf_main.h
@@ -0,0 +1,26 @@
+//===-- Starting point for scanf --------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_STDIO_SCANF_CORE_SCANF_MAIN_H
+#define LLVM_LIBC_SRC_STDIO_SCANF_CORE_SCANF_MAIN_H
+
+#include "src/__support/arg_list.h"
+#include "src/stdio/scanf_core/reader.h"
+
+#include <stddef.h>
+
+namespace __llvm_libc {
+namespace scanf_core {
+
+int scanf_main(Reader *reader, const char *__restrict str,
+ internal::ArgList &args);
+
+} // namespace scanf_core
+} // namespace __llvm_libc
+
+#endif // LLVM_LIBC_SRC_STDIO_SCANF_CORE_SCANF_MAIN_H
diff --git a/libc/src/stdio/scanf_core/string_converter.cpp b/libc/src/stdio/scanf_core/string_converter.cpp
new file mode 100644
index 000000000000..bdbb5c87f75e
--- /dev/null
+++ b/libc/src/stdio/scanf_core/string_converter.cpp
@@ -0,0 +1,76 @@
+//===-- String type specifier converters for scanf --------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/stdio/scanf_core/string_converter.h"
+
+#include "src/__support/CPP/limits.h"
+#include "src/__support/ctype_utils.h"
+#include "src/stdio/scanf_core/core_structs.h"
+#include "src/stdio/scanf_core/reader.h"
+
+#include <stddef.h>
+
+namespace __llvm_libc {
+namespace scanf_core {
+
+int convert_string(Reader *reader, const FormatSection &to_conv) {
+ // %s "Matches a sequence of non-white-space characters"
+
+ // %c "Matches a sequence of characters of exactly the number specified by the
+ // field width (1 if no field width is present in the directive)"
+
+ // %[ "Matches a nonempty sequence of characters from a set of expected
+ // characters (the scanset)."
+ size_t max_width = 0;
+ if (to_conv.max_width > 0) {
+ max_width = to_conv.max_width;
+ } else {
+ if (to_conv.conv_name == 'c') {
+ max_width = 1;
+ } else {
+ max_width = cpp::numeric_limits<size_t>::max();
+ }
+ }
+
+ char *output = reinterpret_cast<char *>(to_conv.output_ptr);
+
+ char cur_char = reader->getc();
+ size_t i = 0;
+ for (; i < max_width && cur_char != '\0'; ++i) {
+ // If this is %s and we've hit a space, or if this is %[] and we've found
+ // something not in the scanset.
+ if ((to_conv.conv_name == 's' && internal::isspace(cur_char)) ||
+ (to_conv.conv_name == '[' && !to_conv.scan_set.test(cur_char))) {
+ break;
+ }
+ // if the NO_WRITE flag is not set, write to the output.
+ if ((to_conv.flags & NO_WRITE) == 0)
+ output[i] = cur_char;
+ cur_char = reader->getc();
+ }
+
+ // We always read one more character than will be used, so we have to put the
+ // last one back.
+ reader->ungetc(cur_char);
+
+ // If this is %s or %[]
+ if (to_conv.conv_name != 'c' && (to_conv.flags & NO_WRITE) == 0) {
+ // Always null terminate the string. This may cause a write to the
+ // (max_width + 1) byte, which is correct. The max width describes the max
+ // number of characters read from the input string, and doesn't necessarily
+ // correspond to the output.
+ output[i] = '\0';
+ }
+
+ if (i == 0)
+ return MATCHING_FAILURE;
+ return READ_OK;
+}
+
+} // namespace scanf_core
+} // namespace __llvm_libc
diff --git a/libc/src/stdio/scanf_core/string_converter.h b/libc/src/stdio/scanf_core/string_converter.h
new file mode 100644
index 000000000000..4113f5cb9a36
--- /dev/null
+++ b/libc/src/stdio/scanf_core/string_converter.h
@@ -0,0 +1,25 @@
+//===-- String type specifier converters for scanf --------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_STDIO_SCANF_CORE_STRING_CONVERTER_H
+#define LLVM_LIBC_SRC_STDIO_SCANF_CORE_STRING_CONVERTER_H
+
+#include "src/stdio/scanf_core/core_structs.h"
+#include "src/stdio/scanf_core/reader.h"
+
+#include <stddef.h>
+
+namespace __llvm_libc {
+namespace scanf_core {
+
+int convert_string(Reader *reader, const FormatSection &to_conv);
+
+} // namespace scanf_core
+} // namespace __llvm_libc
+
+#endif // LLVM_LIBC_SRC_STDIO_SCANF_CORE_STRING_CONVERTER_H
diff --git a/libc/test/src/stdio/scanf_core/CMakeLists.txt b/libc/test/src/stdio/scanf_core/CMakeLists.txt
index fa4878ae5b15..db20335a5c94 100644
--- a/libc/test/src/stdio/scanf_core/CMakeLists.txt
+++ b/libc/test/src/stdio/scanf_core/CMakeLists.txt
@@ -30,3 +30,16 @@ add_libc_unittest(
libc.src.stdio.scanf_core.string_reader
libc.src.__support.CPP.string_view
)
+
+add_libc_unittest(
+ converter_test
+ SUITE
+ libc_stdio_unittests
+ SRCS
+ converter_test.cpp
+ DEPENDS
+ libc.src.stdio.scanf_core.reader
+ libc.src.stdio.scanf_core.string_reader
+ libc.src.stdio.scanf_core.converter
+ libc.src.__support.CPP.string_view
+)
diff --git a/libc/test/src/stdio/scanf_core/converter_test.cpp b/libc/test/src/stdio/scanf_core/converter_test.cpp
new file mode 100644
index 000000000000..d90af34ff197
--- /dev/null
+++ b/libc/test/src/stdio/scanf_core/converter_test.cpp
@@ -0,0 +1,295 @@
+//===-- Unittests for the basic scanf converters --------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/__support/CPP/string_view.h"
+#include "src/stdio/scanf_core/converter.h"
+#include "src/stdio/scanf_core/core_structs.h"
+#include "src/stdio/scanf_core/reader.h"
+#include "src/stdio/scanf_core/string_reader.h"
+
+#include "utils/UnitTest/Test.h"
+
+TEST(LlvmLibcScanfConverterTest, RawMatchBasic) {
+ const char *str = "abcdef";
+ __llvm_libc::scanf_core::StringReader str_reader(str);
+ __llvm_libc::scanf_core::Reader reader(&str_reader);
+
+ // Reading "abc" should succeed.
+ ASSERT_EQ(__llvm_libc::scanf_core::raw_match(&reader, "abc"),
+ static_cast<int>(__llvm_libc::scanf_core::READ_OK));
+ ASSERT_EQ(reader.chars_read(), size_t(3));
+
+ // Reading nothing should succeed and not advance.
+ ASSERT_EQ(__llvm_libc::scanf_core::raw_match(&reader, ""),
+ static_cast<int>(__llvm_libc::scanf_core::READ_OK));
+ ASSERT_EQ(reader.chars_read(), size_t(3));
+
+ // Reading a space where there is none should succeed and not advance.
+ ASSERT_EQ(__llvm_libc::scanf_core::raw_match(&reader, " "),
+ static_cast<int>(__llvm_libc::scanf_core::READ_OK));
+ ASSERT_EQ(reader.chars_read(), size_t(3));
+
+ // Reading "d" should succeed and advance by 1.
+ ASSERT_EQ(__llvm_libc::scanf_core::raw_match(&reader, "d"),
+ static_cast<int>(__llvm_libc::scanf_core::READ_OK));
+ ASSERT_EQ(reader.chars_read(), size_t(4));
+
+ // Reading "z" should fail and not advance.
+ ASSERT_EQ(__llvm_libc::scanf_core::raw_match(&reader, "z"),
+ static_cast<int>(__llvm_libc::scanf_core::MATCHING_FAILURE));
+ ASSERT_EQ(reader.chars_read(), size_t(4));
+
+ // Reading "efgh" should fail but advance to the end.
+ ASSERT_EQ(__llvm_libc::scanf_core::raw_match(&reader, "efgh"),
+ static_cast<int>(__llvm_libc::scanf_core::MATCHING_FAILURE));
+ ASSERT_EQ(reader.chars_read(), size_t(6));
+}
+
+TEST(LlvmLibcScanfConverterTest, RawMatchSpaces) {
+ const char *str = " a \t\n b cd";
+ __llvm_libc::scanf_core::StringReader str_reader(str);
+ __llvm_libc::scanf_core::Reader reader(&str_reader);
+
+ // Reading "a" should fail and not advance.
+ // Since there's nothing in the format string (the second argument to
+ // raw_match) to match the space in the buffer it isn't consumed.
+ ASSERT_EQ(__llvm_libc::scanf_core::raw_match(&reader, "a"),
+ static_cast<int>(__llvm_libc::scanf_core::MATCHING_FAILURE));
+ ASSERT_EQ(reader.chars_read(), size_t(0));
+
+ // Reading " \t\n " should succeed and advance past the space.
+ // Any number of space characters in the format string match 0 or more space
+ // characters in the buffer.
+ ASSERT_EQ(__llvm_libc::scanf_core::raw_match(&reader, " \t\n "),
+ static_cast<int>(__llvm_libc::scanf_core::READ_OK));
+ ASSERT_EQ(reader.chars_read(), size_t(1));
+
+ // Reading "ab" should fail and only advance past the a
+ // The a characters match, but the format string doesn't have anything to
+ // consume the spaces in the buffer, so it fails.
+ ASSERT_EQ(__llvm_libc::scanf_core::raw_match(&reader, "ab"),
+ static_cast<int>(__llvm_libc::scanf_core::MATCHING_FAILURE));
+ ASSERT_EQ(reader.chars_read(), size_t(2));
+
+ // Reading " b" should succeed and advance past the b
+ // Any number of space characters in the format string matches 0 or more space
+ // characters in the buffer.
+ ASSERT_EQ(__llvm_libc::scanf_core::raw_match(&reader, " b"),
+ static_cast<int>(__llvm_libc::scanf_core::READ_OK));
+ ASSERT_EQ(reader.chars_read(), size_t(7));
+
+ // Reading "\t" should succeed and advance past the spaces to the c
+ ASSERT_EQ(__llvm_libc::scanf_core::raw_match(&reader, "\t"),
+ static_cast<int>(__llvm_libc::scanf_core::READ_OK));
+ ASSERT_EQ(reader.chars_read(), size_t(10));
+
+ // Reading "c d" should succeed and advance past the d.
+ // Here the space character in the format string is matching 0 space
+ // characters in the buffer.
+ ASSERT_EQ(__llvm_libc::scanf_core::raw_match(&reader, "c d"),
+ static_cast<int>(__llvm_libc::scanf_core::READ_OK));
+ ASSERT_EQ(reader.chars_read(), size_t(12));
+}
+
+TEST(LlvmLibcScanfConverterTest, StringConvSimple) {
+ const char *str = "abcDEF123 654LKJihg";
+ char result[20];
+ __llvm_libc::scanf_core::StringReader str_reader(str);
+ __llvm_libc::scanf_core::Reader reader(&str_reader);
+
+ __llvm_libc::scanf_core::FormatSection conv;
+ conv.has_conv = true;
+ conv.conv_name = 's';
+ conv.output_ptr = result;
+
+ ASSERT_EQ(__llvm_libc::scanf_core::convert(&reader, conv),
+ static_cast<int>(__llvm_libc::scanf_core::READ_OK));
+ ASSERT_EQ(reader.chars_read(), size_t(9));
+ ASSERT_STREQ(result, "abcDEF123");
+
+ //%s skips all spaces before beginning to read.
+ ASSERT_EQ(__llvm_libc::scanf_core::convert(&reader, conv),
+ static_cast<int>(__llvm_libc::scanf_core::READ_OK));
+ ASSERT_EQ(reader.chars_read(), size_t(19));
+ ASSERT_STREQ(result, "654LKJihg");
+}
+
+TEST(LlvmLibcScanfConverterTest, StringConvNoWrite) {
+ const char *str = "abcDEF123 654LKJihg";
+ __llvm_libc::scanf_core::StringReader str_reader(str);
+ __llvm_libc::scanf_core::Reader reader(&str_reader);
+
+ __llvm_libc::scanf_core::FormatSection conv;
+ conv.has_conv = true;
+ conv.conv_name = 's';
+ conv.flags = __llvm_libc::scanf_core::NO_WRITE;
+
+ ASSERT_EQ(__llvm_libc::scanf_core::convert(&reader, conv),
+ static_cast<int>(__llvm_libc::scanf_core::READ_OK));
+ ASSERT_EQ(reader.chars_read(), size_t(9));
+
+ //%s skips all spaces before beginning to read.
+ ASSERT_EQ(__llvm_libc::scanf_core::convert(&reader, conv),
+ static_cast<int>(__llvm_libc::scanf_core::READ_OK));
+ ASSERT_EQ(reader.chars_read(), size_t(19));
+}
+
+TEST(LlvmLibcScanfConverterTest, StringConvWidth) {
+ const char *str = "abcDEF123 654LKJihg";
+ char result[6];
+ __llvm_libc::scanf_core::StringReader str_reader(str);
+ __llvm_libc::scanf_core::Reader reader(&str_reader);
+
+ __llvm_libc::scanf_core::FormatSection conv;
+ conv.has_conv = true;
+ conv.conv_name = 's';
+ conv.max_width = 5; // this means the result takes up 6 characters (with \0).
+ conv.output_ptr = result;
+
+ ASSERT_EQ(__llvm_libc::scanf_core::convert(&reader, conv),
+ static_cast<int>(__llvm_libc::scanf_core::READ_OK));
+ ASSERT_EQ(reader.chars_read(), size_t(5));
+ ASSERT_STREQ(result, "abcDE");
+
+ ASSERT_EQ(__llvm_libc::scanf_core::convert(&reader, conv),
+ static_cast<int>(__llvm_libc::scanf_core::READ_OK));
+ ASSERT_EQ(reader.chars_read(), size_t(9));
+ ASSERT_STREQ(result, "F123");
+
+ //%s skips all spaces before beginning to read.
+ ASSERT_EQ(__llvm_libc::scanf_core::convert(&reader, conv),
+ static_cast<int>(__llvm_libc::scanf_core::READ_OK));
+ ASSERT_EQ(reader.chars_read(), size_t(15));
+ ASSERT_STREQ(result, "654LK");
+
+ ASSERT_EQ(__llvm_libc::scanf_core::convert(&reader, conv),
+ static_cast<int>(__llvm_libc::scanf_core::READ_OK));
+ ASSERT_EQ(reader.chars_read(), size_t(19));
+ ASSERT_STREQ(result, "Jihg");
+}
+
+TEST(LlvmLibcScanfConverterTest, CharsConv) {
+ const char *str = "abcDEF123 654LKJihg MNOpqr&*(";
+ char result[20];
+ __llvm_libc::scanf_core::StringReader str_reader(str);
+ __llvm_libc::scanf_core::Reader reader(&str_reader);
+
+ __llvm_libc::scanf_core::FormatSection conv;
+ conv.has_conv = true;
+ conv.conv_name = 'c';
+ conv.output_ptr = result;
+
+ ASSERT_EQ(__llvm_libc::scanf_core::convert(&reader, conv),
+ static_cast<int>(__llvm_libc::scanf_core::READ_OK));
+ ASSERT_EQ(reader.chars_read(), size_t(1));
+ ASSERT_EQ(result[0], 'a');
+
+ ASSERT_EQ(__llvm_libc::scanf_core::convert(&reader, conv),
+ static_cast<int>(__llvm_libc::scanf_core::READ_OK));
+ ASSERT_EQ(reader.chars_read(), size_t(2));
+ ASSERT_EQ(result[0], 'b');
+
+ ASSERT_EQ(__llvm_libc::scanf_core::convert(&reader, conv),
+ static_cast<int>(__llvm_libc::scanf_core::READ_OK));
+ ASSERT_EQ(reader.chars_read(), size_t(3));
+ ASSERT_EQ(result[0], 'c');
+
+ // Switch from character by character to 8 at a time.
+ conv.max_width = 8;
+ __llvm_libc::cpp::string_view result_view(result, 8);
+
+ //%c doesn't stop on spaces.
+ ASSERT_EQ(__llvm_libc::scanf_core::convert(&reader, conv),
+ static_cast<int>(__llvm_libc::scanf_core::READ_OK));
+ ASSERT_EQ(reader.chars_read(), size_t(11));
+ ASSERT_EQ(result_view, __llvm_libc::cpp::string_view("DEF123 6", 8));
+
+ ASSERT_EQ(__llvm_libc::scanf_core::convert(&reader, conv),
+ static_cast<int>(__llvm_libc::scanf_core::READ_OK));
+ ASSERT_EQ(reader.chars_read(), size_t(19));
+ ASSERT_EQ(result_view, __llvm_libc::cpp::string_view("54LKJihg", 8));
+
+ //%c also doesn't skip spaces at the start.
+ ASSERT_EQ(__llvm_libc::scanf_core::convert(&reader, conv),
+ static_cast<int>(__llvm_libc::scanf_core::READ_OK));
+ ASSERT_EQ(reader.chars_read(), size_t(27));
+ ASSERT_EQ(result_view, __llvm_libc::cpp::string_view(" MNOpqr&", 8));
+
+ //%c will stop on a null byte though.
+ ASSERT_EQ(__llvm_libc::scanf_core::convert(&reader, conv),
+ static_cast<int>(__llvm_libc::scanf_core::READ_OK));
+ ASSERT_EQ(reader.chars_read(), size_t(29));
+ ASSERT_EQ(__llvm_libc::cpp::string_view(result, 2),
+ __llvm_libc::cpp::string_view("*(", 2));
+}
+
+TEST(LlvmLibcScanfConverterTest, ScansetConv) {
+ const char *str = "abcDEF[123] 654LKJihg";
+ char result[20];
+ __llvm_libc::scanf_core::StringReader str_reader(str);
+ __llvm_libc::scanf_core::Reader reader(&str_reader);
+
+ __llvm_libc::scanf_core::FormatSection conv;
+ conv.has_conv = true;
+ conv.conv_name = '[';
+ conv.output_ptr = result;
+
+ __llvm_libc::cpp::bitset<256> bitset1;
+ bitset1.set_range('a', 'c');
+ bitset1.set_range('D', 'F');
+ bitset1.set_range('1', '6');
+ bitset1.set('[');
+ bitset1.set(']');
+
+ conv.scan_set = bitset1;
+
+ ASSERT_EQ(__llvm_libc::scanf_core::convert(&reader, conv),
+ static_cast<int>(__llvm_libc::scanf_core::READ_OK));
+ ASSERT_EQ(reader.chars_read(), size_t(11));
+ ASSERT_EQ(__llvm_libc::cpp::string_view(result, 11),
+ __llvm_libc::cpp::string_view("abcDEF[123]", 11));
+
+ // The scanset conversion doesn't consume leading spaces. If it did it would
+ // return "654" here.
+ ASSERT_EQ(__llvm_libc::scanf_core::convert(&reader, conv),
+ static_cast<int>(__llvm_libc::scanf_core::MATCHING_FAILURE));
+ ASSERT_EQ(reader.chars_read(), size_t(11));
+
+ // This set is everything except for a-g.
+ __llvm_libc::cpp::bitset<256> bitset2;
+ bitset2.set_range('a', 'g');
+ bitset2.flip();
+ conv.scan_set = bitset2;
+
+ conv.max_width = 5;
+
+ ASSERT_EQ(__llvm_libc::scanf_core::convert(&reader, conv),
+ static_cast<int>(__llvm_libc::scanf_core::READ_OK));
+ ASSERT_EQ(reader.chars_read(), size_t(16));
+ ASSERT_EQ(__llvm_libc::cpp::string_view(result, 5),
+ __llvm_libc::cpp::string_view(" 654L", 5));
+
+ ASSERT_EQ(__llvm_libc::scanf_core::convert(&reader, conv),
+ static_cast<int>(__llvm_libc::scanf_core::READ_OK));
+ ASSERT_EQ(reader.chars_read(), size_t(20));
+ ASSERT_EQ(__llvm_libc::cpp::string_view(result, 4),
+ __llvm_libc::cpp::string_view("KJih", 4));
+
+ // This set is g and '\0'.
+ __llvm_libc::cpp::bitset<256> bitset3;
+ bitset3.set('g');
+ bitset3.set('\0');
+ conv.scan_set = bitset3;
+
+ // Even though '\0' is in the scanset, it should still stop on it.
+ ASSERT_EQ(__llvm_libc::scanf_core::convert(&reader, conv),
+ static_cast<int>(__llvm_libc::scanf_core::READ_OK));
+ ASSERT_EQ(reader.chars_read(), size_t(21));
+ ASSERT_EQ(__llvm_libc::cpp::string_view(result, 1),
+ __llvm_libc::cpp::string_view("g", 1));
+}
More information about the libc-commits
mailing list