[libc-commits] [libc] [libc] Template the printf / scanf parser class (PR #66277)

via libc-commits libc-commits at lists.llvm.org
Wed Sep 13 12:14:02 PDT 2023


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-libc
            
<details>
<summary>Changes</summary>
Summary:
The parser class for stdio currently accepts different argument
providers. In-tree this is only used for a fuzzer test, however, the
proposed implementation of the GPU handling of printf / scanf will
require custom argument handlers. This makes the current approach of
using a preprocessor macro messier. This path porposed folding this
logic into a template instantiation. The downside to this is that
because the implementation of the parser class is placed into an
implementation file we need to manually instantiate the needed templates
which will slightly bloat binary size. Alternatively we could remove the
implementation file, or key off of the `libc` external packaging macro
so it is not present in the installed version.

--
Full diff: https://github.com/llvm/llvm-project/pull/66277.diff

11 Files Affected:

- (modified) libc/fuzzing/stdio/CMakeLists.txt (+1-3) 
- (modified) libc/fuzzing/stdio/printf_parser_fuzz.cpp (+2-5) 
- (modified) libc/src/stdio/printf_core/CMakeLists.txt (-20) 
- (modified) libc/src/stdio/printf_core/parser.cpp (+21-12) 
- (modified) libc/src/stdio/printf_core/parser.h (+2-8) 
- (modified) libc/src/stdio/printf_core/printf_main.cpp (+1-1) 
- (modified) libc/src/stdio/scanf_core/parser.cpp (+11-5) 
- (modified) libc/src/stdio/scanf_core/parser.h (+4-4) 
- (modified) libc/src/stdio/scanf_core/scanf_main.cpp (+1-1) 
- (modified) libc/test/src/stdio/printf_core/parser_test.cpp (+5-4) 
- (modified) libc/test/src/stdio/scanf_core/parser_test.cpp (+4-3) 


<pre>
diff --git a/libc/fuzzing/stdio/CMakeLists.txt b/libc/fuzzing/stdio/CMakeLists.txt
index bd7e38bc1401e56..22de67d42747fa9 100644
--- a/libc/fuzzing/stdio/CMakeLists.txt
+++ b/libc/fuzzing/stdio/CMakeLists.txt
@@ -3,9 +3,7 @@ add_libc_fuzzer(
   SRCS
     printf_parser_fuzz.cpp
   DEPENDS
-    libc.src.stdio.printf_core.mock_parser
-  COMPILE_OPTIONS
-    -DLIBC_COPT_MOCK_ARG_LIST
+    libc.src.stdio.printf_core.parser
 )
 
 add_libc_fuzzer(
diff --git a/libc/fuzzing/stdio/printf_parser_fuzz.cpp b/libc/fuzzing/stdio/printf_parser_fuzz.cpp
index 05cd616ca48b0e4..86f8c1e0a55f818 100644
--- a/libc/fuzzing/stdio/printf_parser_fuzz.cpp
+++ b/libc/fuzzing/stdio/printf_parser_fuzz.cpp
@@ -10,10 +10,6 @@
 ///
 //===----------------------------------------------------------------------===//
 
-#ifndef LIBC_COPT_MOCK_ARG_LIST
-#error The printf Parser Fuzzer must be compiled with LIBC_COPT_MOCK_ARG_LIST, and the parser itself must also be compiled with that option when it&#x27;s linked against the fuzzer.
-#endif
-
 #include "src/__support/arg_list.h"
 #include "src/stdio/printf_core/parser.h"
 
@@ -37,7 +33,8 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
 
   auto mock_arg_list = internal::MockArgList();
 
-  auto parser = printf_core::Parser(in_str, mock_arg_list);
+  auto parser =
+      printf_core::Parser<internal::MockArgList>(in_str, mock_arg_list);
 
   int str_percent_count = 0;
 
diff --git a/libc/src/stdio/printf_core/CMakeLists.txt b/libc/src/stdio/printf_core/CMakeLists.txt
index 0b3766b55e8d4b4..5ac657ee140f71c 100644
--- a/libc/src/stdio/printf_core/CMakeLists.txt
+++ b/libc/src/stdio/printf_core/CMakeLists.txt
@@ -26,26 +26,6 @@ add_object_library(
     libc.src.__support.common
 )
 
-add_object_library(
-  mock_parser
-  SRCS
-    parser.cpp
-  HDRS
-    parser.h
-  DEPENDS
-    .core_structs
-    libc.src.__support.arg_list
-    libc.src.__support.ctype_utils
-    libc.src.__support.str_to_integer
-    libc.src.__support.CPP.bit
-    libc.src.__support.CPP.optional
-    libc.src.__support.CPP.string_view
-    libc.src.__support.CPP.type_traits
-    libc.src.__support.common
-  COMPILE_OPTIONS
-    -DLIBC_COPT_MOCK_ARG_LIST
-)
-
 add_object_library(
   writer
   SRCS
diff --git a/libc/src/stdio/printf_core/parser.cpp b/libc/src/stdio/printf_core/parser.cpp
index 6b2c174c3f23329..7ae55e6b01a263b 100644
--- a/libc/src/stdio/printf_core/parser.cpp
+++ b/libc/src/stdio/printf_core/parser.cpp
@@ -50,7 +50,8 @@ template <typename T> using int_type_of_v = typename int_type_of<T>::type;
   dst = cpp::bit_cast<int_type_of_v<arg_type>>(get_next_arg_value<arg_type>())
 #endif // LIBC_COPT_PRINTF_DISABLE_INDEX_MODE
 
-FormatSection Parser::get_next_section() {
+template <typename ArgProvider>
+FormatSection Parser<ArgProvider>::get_next_section() {
   FormatSection section;
   size_t starting_pos = cur_pos;
   if (str[cur_pos] == &#x27;%&#x27;) {
@@ -196,7 +197,8 @@ FormatSection Parser::get_next_section() {
   return section;
 }
 
-FormatFlags Parser::parse_flags(size_t *local_pos) {
+template <typename ArgProvider>
+FormatFlags Parser<ArgProvider>::parse_flags(size_t *local_pos) {
   bool found_flag = true;
   FormatFlags flags = FormatFlags(0);
   while (found_flag) {
@@ -225,7 +227,8 @@ FormatFlags Parser::parse_flags(size_t *local_pos) {
   return flags;
 }
 
-LengthModifier Parser::parse_length_modifier(size_t *local_pos) {
+template <typename ArgProvider>
+LengthModifier Parser<ArgProvider>::parse_length_modifier(size_t *local_pos) {
   switch (str[*local_pos]) {
   case (&#x27;l&#x27;):
     if (str[*local_pos + 1] == &#x27;l&#x27;) {
@@ -266,7 +269,8 @@ LengthModifier Parser::parse_length_modifier(size_t *local_pos) {
 
 #ifndef LIBC_COPT_PRINTF_DISABLE_INDEX_MODE
 
-size_t Parser::parse_index(size_t *local_pos) {
+template <typename ArgProvider>
+size_t Parser<ArgProvider>::parse_index(size_t *local_pos) {
   if (internal::isdigit(str[*local_pos])) {
     auto result = internal::strtointeger<int>(str + *local_pos, 10);
     size_t index = result.value;
@@ -278,7 +282,8 @@ size_t Parser::parse_index(size_t *local_pos) {
   return 0;
 }
 
-TypeDesc Parser::get_type_desc(size_t index) {
+template <typename ArgProvider>
+TypeDesc Parser<ArgProvider>::get_type_desc(size_t index) {
   // index mode is assumed, and the indicies start at 1, so an index
   // of 0 is invalid.
   size_t local_pos = 0;
@@ -417,7 +422,8 @@ TypeDesc Parser::get_type_desc(size_t index) {
   return type_desc_from_type<void>();
 }
 
-bool Parser::args_to_index(size_t index) {
+template <typename ArgProvider>
+bool Parser<ArgProvider>::args_to_index(size_t index) {
   if (args_index > index) {
     args_index = 1;
     args_cur = args_start;
@@ -439,21 +445,21 @@ bool Parser::args_to_index(size_t index) {
       return false;
 
     if (cur_type_desc == type_desc_from_type<uint32_t>())
-      args_cur.next_var<uint32_t>();
+      args_cur.template next_var<uint32_t>();
     else if (cur_type_desc == type_desc_from_type<uint64_t>())
-      args_cur.next_var<uint64_t>();
+      args_cur.template next_var<uint64_t>();
 #ifndef LIBC_COPT_PRINTF_DISABLE_FLOAT
     // Floating point numbers are stored separately from the other arguments.
     else if (cur_type_desc == type_desc_from_type<double>())
-      args_cur.next_var<double>();
+      args_cur.template next_var<double>();
     else if (cur_type_desc == type_desc_from_type<long double>())
-      args_cur.next_var<long double>();
+      args_cur.template next_var<long double>();
 #endif // LIBC_COPT_PRINTF_DISABLE_FLOAT
     // pointers may be stored separately from normal values.
     else if (cur_type_desc == type_desc_from_type<void *>())
-      args_cur.next_var<void *>();
+      args_cur.template next_var<void *>();
     else
-      args_cur.next_var<uint32_t>();
+      args_cur.template next_var<uint32_t>();
 
     ++args_index;
   }
@@ -462,5 +468,8 @@ bool Parser::args_to_index(size_t index) {
 
 #endif // LIBC_COPT_PRINTF_DISABLE_INDEX_MODE
 
+template class Parser<internal::ArgList>;
+template class Parser<internal::MockArgList>;
+
 } // namespace printf_core
 } // namespace __llvm_libc
diff --git a/libc/src/stdio/printf_core/parser.h b/libc/src/stdio/printf_core/parser.h
index a376af99ad8d7f7..e35d9c1524b9b88 100644
--- a/libc/src/stdio/printf_core/parser.h
+++ b/libc/src/stdio/printf_core/parser.h
@@ -21,13 +21,7 @@
 namespace __llvm_libc {
 namespace printf_core {
 
-#ifndef LIBC_COPT_MOCK_ARG_LIST
-using ArgProvider = internal::ArgList;
-#else  // not defined LIBC_COPT_MOCK_ARG_LIST
-using ArgProvider = internal::MockArgList;
-#endif // LIBC_COPT_MOCK_ARG_LIST
-
-class Parser {
+template <typename ArgProvider> class Parser {
   const char *__restrict str;
 
   size_t cur_pos = 0;
@@ -84,7 +78,7 @@ class Parser {
 
   // get_next_arg_value gets the next value from the arg list as type T.
   template <class T> LIBC_INLINE T get_next_arg_value() {
-    return args_cur.next_var<T>();
+    return args_cur.template next_var<T>();
   }
 
   //----------------------------------------------------
diff --git a/libc/src/stdio/printf_core/printf_main.cpp b/libc/src/stdio/printf_core/printf_main.cpp
index b7684cdf1e74fc0..60d1e210eee4cf8 100644
--- a/libc/src/stdio/printf_core/printf_main.cpp
+++ b/libc/src/stdio/printf_core/printf_main.cpp
@@ -21,7 +21,7 @@ namespace printf_core {
 
 int printf_main(Writer *writer, const char *__restrict str,
                 internal::ArgList &args) {
-  Parser parser(str, args);
+  Parser<internal::ArgList> parser(str, args);
   int result = 0;
   for (FormatSection cur_section = parser.get_next_section();
        !cur_section.raw_string.empty();
diff --git a/libc/src/stdio/scanf_core/parser.cpp b/libc/src/stdio/scanf_core/parser.cpp
index 44e853c8a8de8fe..00836bd827fd274 100644
--- a/libc/src/stdio/scanf_core/parser.cpp
+++ b/libc/src/stdio/scanf_core/parser.cpp
@@ -28,7 +28,8 @@ namespace scanf_core {
 #define GET_ARG_VAL_SIMPLEST(arg_type, _) get_next_arg_value<arg_type>()
 #endif // LIBC_COPT_SCANF_DISABLE_INDEX_MODE
 
-FormatSection Parser::get_next_section() {
+template <typename ArgProvider>
+FormatSection Parser<ArgProvider>::get_next_section() {
   FormatSection section;
   size_t starting_pos = cur_pos;
   if (str[cur_pos] == &#x27;%&#x27;) {
@@ -152,7 +153,8 @@ FormatSection Parser::get_next_section() {
   return section;
 }
 
-LengthModifier Parser::parse_length_modifier(size_t *local_pos) {
+template <typename ArgProvider>
+LengthModifier Parser<ArgProvider>::parse_length_modifier(size_t *local_pos) {
   switch (str[*local_pos]) {
   case (&#x27;l&#x27;):
     if (str[*local_pos + 1] == &#x27;l&#x27;) {
@@ -193,7 +195,8 @@ LengthModifier Parser::parse_length_modifier(size_t *local_pos) {
 
 #ifndef LIBC_COPT_SCANF_DISABLE_INDEX_MODE
 
-size_t Parser::parse_index(size_t *local_pos) {
+template <typename ArgProvider>
+size_t Parser<ArgProvider>::parse_index(size_t *local_pos) {
   if (internal::isdigit(str[*local_pos])) {
     auto result = internal::strtointeger<int>(str + *local_pos, 10);
     size_t index = result.value;
@@ -205,7 +208,8 @@ size_t Parser::parse_index(size_t *local_pos) {
   return 0;
 }
 
-void Parser::args_to_index(size_t index) {
+template <typename ArgProvider>
+void Parser<ArgProvider>::args_to_index(size_t index) {
   if (args_index > index) {
     args_index = 1;
     args_cur = args_start;
@@ -214,12 +218,14 @@ void Parser::args_to_index(size_t index) {
   while (args_index < index) {
     // Since all arguments must be pointers, we can just read all of them as
     // void * and not worry about type issues.
-    args_cur.next_var<void *>();
+    args_cur.template next_var<void *>();
     ++args_index;
   }
 }
 
 #endif // LIBC_COPT_SCANF_DISABLE_INDEX_MODE
 
+template class Parser<internal::ArgList>;
+
 } // namespace scanf_core
 } // namespace __llvm_libc
diff --git a/libc/src/stdio/scanf_core/parser.h b/libc/src/stdio/scanf_core/parser.h
index 4b9f0b4dd95b94b..3243db9f1b0c05a 100644
--- a/libc/src/stdio/scanf_core/parser.h
+++ b/libc/src/stdio/scanf_core/parser.h
@@ -19,17 +19,17 @@
 namespace __llvm_libc {
 namespace scanf_core {
 
-class Parser {
+template <typename ArgProvider> class Parser {
   const char *__restrict str;
 
   size_t cur_pos = 0;
-  internal::ArgList args_cur;
+  ArgProvider args_cur;
 
 #ifndef LIBC_COPT_SCANF_DISABLE_INDEX_MODE
   // args_start stores the start of the va_args, which is used when a previous
   // argument is needed. In that case, we have to read the arguments from the
   // beginning since they don&#x27;t support reading backwards.
-  internal::ArgList args_start;
+  ArgProvider args_start;
   size_t args_index = 1;
 #endif // LIBC_COPT_SCANF_DISABLE_INDEX_MODE
 
@@ -57,7 +57,7 @@ class Parser {
 
   // get_next_arg_value gets the next value from the arg list as type T.
   template <class T> LIBC_INLINE T get_next_arg_value() {
-    return args_cur.next_var<T>();
+    return args_cur.template next_var<T>();
   }
 
   //----------------------------------------------------
diff --git a/libc/src/stdio/scanf_core/scanf_main.cpp b/libc/src/stdio/scanf_core/scanf_main.cpp
index 5a79d2e624ab0aa..e7e41cbe899d720 100644
--- a/libc/src/stdio/scanf_core/scanf_main.cpp
+++ b/libc/src/stdio/scanf_core/scanf_main.cpp
@@ -21,7 +21,7 @@ namespace scanf_core {
 
 int scanf_main(Reader *reader, const char *__restrict str,
                internal::ArgList &args) {
-  Parser parser(str, args);
+  Parser<internal::ArgList> parser(str, args);
   int ret_val = READ_OK;
   int conversions = 0;
   for (FormatSection cur_section = parser.get_next_section();
diff --git a/libc/test/src/stdio/printf_core/parser_test.cpp b/libc/test/src/stdio/printf_core/parser_test.cpp
index 61f8c7cbe580e74..910b611f5194939 100644
--- a/libc/test/src/stdio/printf_core/parser_test.cpp
+++ b/libc/test/src/stdio/printf_core/parser_test.cpp
@@ -17,24 +17,25 @@
 #include "test/UnitTest/Test.h"
 
 using __llvm_libc::cpp::string_view;
+using __llvm_libc::internal::ArgList;
 
 void init(const char *__restrict str, ...) {
   va_list vlist;
   va_start(vlist, str);
-  __llvm_libc::internal::ArgList v(vlist);
+  ArgList v(vlist);
   va_end(vlist);
 
-  __llvm_libc::printf_core::Parser parser(str, v);
+  __llvm_libc::printf_core::Parser<ArgList> parser(str, v);
 }
 
 void evaluate(__llvm_libc::printf_core::FormatSection *format_arr,
               const char *__restrict str, ...) {
   va_list vlist;
   va_start(vlist, str);
-  __llvm_libc::internal::ArgList v(vlist);
+  ArgList v(vlist);
   va_end(vlist);
 
-  __llvm_libc::printf_core::Parser parser(str, v);
+  __llvm_libc::printf_core::Parser<ArgList> parser(str, v);
 
   for (auto cur_section = parser.get_next_section();
        !cur_section.raw_string.empty();
diff --git a/libc/test/src/stdio/scanf_core/parser_test.cpp b/libc/test/src/stdio/scanf_core/parser_test.cpp
index 2ccaf84c6755233..b1f9efa0f8a2bc7 100644
--- a/libc/test/src/stdio/scanf_core/parser_test.cpp
+++ b/libc/test/src/stdio/scanf_core/parser_test.cpp
@@ -18,14 +18,15 @@
 #include "test/UnitTest/Test.h"
 
 using __llvm_libc::cpp::string_view;
+using __llvm_libc::internal::ArgList;
 
 void init(const char *__restrict str, ...) {
   va_list vlist;
   va_start(vlist, str);
-  __llvm_libc::internal::ArgList v(vlist);
+  ArgList v(vlist);
   va_end(vlist);
 
-  __llvm_libc::scanf_core::Parser parser(str, v);
+  __llvm_libc::scanf_core::Parser<ArgList> parser(str, v);
 }
 
 void evaluate(__llvm_libc::scanf_core::FormatSection *format_arr,
@@ -35,7 +36,7 @@ void evaluate(__llvm_libc::scanf_core::FormatSection *format_arr,
   __llvm_libc::internal::ArgList v(vlist);
   va_end(vlist);
 
-  __llvm_libc::scanf_core::Parser parser(str, v);
+  __llvm_libc::scanf_core::Parser<ArgList> parser(str, v);
 
   for (auto cur_section = parser.get_next_section();
        !cur_section.raw_string.empty();
</pre>
</details>


https://github.com/llvm/llvm-project/pull/66277


More information about the libc-commits mailing list