[libc-commits] [libc] 4f4752e - [libc][NFC] implement printf parser

Michael Jones via libc-commits libc-commits at lists.llvm.org
Fri Apr 8 14:21:18 PDT 2022


Author: Michael Jones
Date: 2022-04-08T14:21:13-07:00
New Revision: 4f4752ee6fd19efa9b7e623c10c5ba5861542dc8

URL: https://github.com/llvm/llvm-project/commit/4f4752ee6fd19efa9b7e623c10c5ba5861542dc8
DIFF: https://github.com/llvm/llvm-project/commit/4f4752ee6fd19efa9b7e623c10c5ba5861542dc8.diff

LOG: [libc][NFC] implement printf parser

This patch adds the sequential mode implementation of the printf parser,
as well as unit tests for it. In addition it adjusts the surrounding
files to accomodate changes in the design found in the implementation
process.

Reviewed By: sivachandra

Differential Revision: https://reviews.llvm.org/D123339

Added: 
    libc/src/stdio/printf_core/CMakeLists.txt
    libc/src/stdio/printf_core/converter.h
    libc/src/stdio/printf_core/core_structs.h
    libc/src/stdio/printf_core/parser.cpp
    libc/src/stdio/printf_core/parser.h
    libc/src/stdio/printf_core/printf_main.h
    libc/src/stdio/printf_core/writer.h
    libc/test/src/stdio/printf_core/CMakeLists.txt
    libc/test/src/stdio/printf_core/parser_test.cpp

Modified: 
    libc/src/stdio/CMakeLists.txt
    libc/test/src/stdio/CMakeLists.txt

Removed: 
    libc/src/stdio/printf_files/converter.h
    libc/src/stdio/printf_files/core_structs.h
    libc/src/stdio/printf_files/parser.h
    libc/src/stdio/printf_files/printf_main.h
    libc/src/stdio/printf_files/writer.h


################################################################################
diff  --git a/libc/src/stdio/CMakeLists.txt b/libc/src/stdio/CMakeLists.txt
index 2a50f798e3c98..19a3cd1c7f776 100644
--- a/libc/src/stdio/CMakeLists.txt
+++ b/libc/src/stdio/CMakeLists.txt
@@ -1,3 +1,5 @@
+add_subdirectory(printf_core)
+
 add_entrypoint_object(
   fopen
   SRCS

diff  --git a/libc/src/stdio/printf_core/CMakeLists.txt b/libc/src/stdio/printf_core/CMakeLists.txt
new file mode 100644
index 0000000000000..f93022527b605
--- /dev/null
+++ b/libc/src/stdio/printf_core/CMakeLists.txt
@@ -0,0 +1,21 @@
+
+add_header_library(
+  core_structs
+  HDRS
+    core_structs.h
+)
+
+add_object_library(
+  parser
+  SRCS
+    parser.cpp
+  HDRS
+    parser.h
+  DEPENDS
+    .core_structs
+    libc.src.__support.arg_list
+    libc.src.__support.ctype_utils
+    libc.src.__support.str_to_integer
+    libc.src.__support.CPP.bit
+
+)

diff  --git a/libc/src/stdio/printf_files/converter.h b/libc/src/stdio/printf_core/converter.h
similarity index 75%
rename from libc/src/stdio/printf_files/converter.h
rename to libc/src/stdio/printf_core/converter.h
index 282296d442fd0..114a409f85296 100644
--- a/libc/src/stdio/printf_files/converter.h
+++ b/libc/src/stdio/printf_core/converter.h
@@ -6,11 +6,11 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_LIBC_SRC_STDIO_PRINTF_FILES_CONVERTER_H
-#define LLVM_LIBC_SRC_STDIO_PRINTF_FILES_CONVERTER_H
+#ifndef LLVM_LIBC_SRC_STDIO_PRINTF_CORE_CONVERTER_H
+#define LLVM_LIBC_SRC_STDIO_PRINTF_CORE_CONVERTER_H
 
-#include "src/stdio/printf_files/core_structs.h"
-#include "src/stdio/printf_files/writer.h"
+#include "src/stdio/printf_core/core_structs.h"
+#include "src/stdio/printf_core/writer.h"
 
 #include <stddef.h>
 
@@ -32,4 +32,4 @@ class Converter {
 } // namespace printf_core
 } // namespace __llvm_libc
 
-#endif // LLVM_LIBC_SRC_STDIO_PRINTF_FILES_CONVERTER_H
+#endif // LLVM_LIBC_SRC_STDIO_PRINTF_CORE_CONVERTER_H

diff  --git a/libc/src/stdio/printf_files/core_structs.h b/libc/src/stdio/printf_core/core_structs.h
similarity index 51%
rename from libc/src/stdio/printf_files/core_structs.h
rename to libc/src/stdio/printf_core/core_structs.h
index 415fc9fca8e9b..fe4c96a6bbb9b 100644
--- a/libc/src/stdio/printf_files/core_structs.h
+++ b/libc/src/stdio/printf_core/core_structs.h
@@ -6,8 +6,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_LIBC_SRC_STDIO_PRINTF_FILES_CORE_STRUCTS_H
-#define LLVM_LIBC_SRC_STDIO_PRINTF_FILES_CORE_STRUCTS_H
+#ifndef LLVM_LIBC_SRC_STDIO_PRINTF_CORE_CORE_STRUCTS_H
+#define LLVM_LIBC_SRC_STDIO_PRINTF_CORE_CORE_STRUCTS_H
 
 #include <inttypes.h>
 #include <stddef.h>
@@ -15,33 +15,20 @@
 namespace __llvm_libc {
 namespace printf_core {
 
+// These length modifiers match the length modifiers in the format string, which
+// is why they are formatted 
diff erently from the rest of the file.
 enum class LengthModifier { hh, h, l, ll, j, z, t, L, none };
-enum VariableType : uint8_t {
-  // Types
 
-  Void = 0x00,
-  Char = 0x01,
-  // WChar = 0x02,
-  // WInt = 0x03,
-  Short = 0x04,
-  Int = 0x05,
-  Long = 0x06,
-  LLong = 0x07,
-  Intmax = 0x08,
-  Size = 0x09,
-  Ptr
diff  = 0x0a,
-  Double = 0x0b,
-  LDouble = 0x0c,
+enum FormatFlags : uint8_t {
+  LEFT_JUSTIFIED = 0x01, // -
+  FORCE_SIGN = 0x02,     // +
+  SPACE_PREFIX = 0x04,   // space
+  ALTERNATE_FORM = 0x08, // #
+  LEADING_ZEROES = 0x10, // 0
 
-  // Modifiers
-
-  Signed = 0x40,
-  Pointer = 0x80,
-
-  // Masks
-
-  Type_Mask = 0x3f,
-  Modifier_Mask = 0xc,
+  // These flags come from the GNU extensions which aren't yet implemented.
+  //  group_decimals = 0x20, // '
+  //  locale_digits = 0x40,  // I
 };
 
 struct FormatSection {
@@ -51,14 +38,10 @@ struct FormatSection {
   size_t raw_len;
 
   // Format Specifier Values
-  bool left_justified;
-  bool force_sign;
-  bool space_prefix;
-  bool alt_form;
-  bool leading_zeroes;
-  LengthModifier length_modifier;
-  int min_width;
-  int precision;
+  FormatFlags flags = FormatFlags(0);
+  LengthModifier length_modifier = LengthModifier::none;
+  int min_width = 0;
+  int precision = -1;
 
   __uint128_t conv_val_raw; // Needs to be large enough to hold a long double.
   void *conv_val_ptr;
@@ -69,4 +52,4 @@ struct FormatSection {
 } // namespace printf_core
 } // namespace __llvm_libc
 
-#endif // LLVM_LIBC_SRC_STDIO_PRINTF_FILES_CORE_STRUCTS_H
+#endif // LLVM_LIBC_SRC_STDIO_PRINTF_CORE_CORE_STRUCTS_H

diff  --git a/libc/src/stdio/printf_core/parser.cpp b/libc/src/stdio/printf_core/parser.cpp
new file mode 100644
index 0000000000000..1541416a84c7c
--- /dev/null
+++ b/libc/src/stdio/printf_core/parser.cpp
@@ -0,0 +1,220 @@
+//===-- Format string parser implementation for printf ----------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "parser.h"
+
+#include "src/__support/arg_list.h"
+
+#include "src/__support/CPP/Bit.h"
+#include "src/__support/ctype_utils.h"
+#include "src/__support/str_to_integer.h"
+
+namespace __llvm_libc {
+namespace printf_core {
+
+#define LLVM_LIBC_PRINTF_DISABLE_INDEX_MODE 1 // This will be a compile flag.
+
+#ifndef LLVM_LIBC_PRINTF_DISABLE_INDEX_MODE
+#define GET_ARG_VAL_SIMPLEST(arg_type, index) get_arg_value<arg_type>(index)
+#else
+#define GET_ARG_VAL_SIMPLEST(arg_type, _) get_next_arg_value<arg_type>()
+#endif // LLVM_LIBC_PRINTF_DISABLE_INDEX_MODE
+
+FormatSection Parser::get_next_section() {
+  FormatSection section;
+  section.raw_string = str + cur_pos;
+  size_t starting_pos = cur_pos;
+  if (str[cur_pos] == '%') {
+    // format section
+    section.has_conv = true;
+
+    ++cur_pos;
+    [[maybe_unused]] size_t conv_index = 0;
+
+    section.flags = parse_flags(&cur_pos);
+
+    // handle width
+    section.min_width = 0;
+    if (str[cur_pos] == '*') {
+      ++cur_pos;
+
+      section.min_width = GET_ARG_VAL_SIMPLEST(int, parse_index(&cur_pos));
+    } else if (internal::isdigit(str[cur_pos])) {
+      char *int_end;
+      section.min_width =
+          internal::strtointeger<int>(str + cur_pos, &int_end, 10);
+      cur_pos = int_end - str;
+    }
+    if (section.min_width < 0) {
+      section.min_width = -section.min_width;
+      section.flags =
+          static_cast<FormatFlags>(section.flags | FormatFlags::LEFT_JUSTIFIED);
+    }
+
+    // handle precision
+    section.precision = -1; // negative precisions are ignored.
+    if (str[cur_pos] == '.') {
+      ++cur_pos;
+      section.precision = 0; // if there's a . but no specified precision, the
+                             // precision is implicitly 0.
+      if (str[cur_pos] == '*') {
+        ++cur_pos;
+
+        section.precision = GET_ARG_VAL_SIMPLEST(int, parse_index(&cur_pos));
+
+      } else if (internal::isdigit(str[cur_pos])) {
+        char *int_end;
+        section.precision =
+            internal::strtointeger<int>(str + cur_pos, &int_end, 10);
+        cur_pos = int_end - str;
+      }
+    }
+
+    LengthModifier lm = parse_length_modifier(&cur_pos);
+
+    section.length_modifier = lm;
+    section.conv_name = str[cur_pos];
+    switch (str[cur_pos]) {
+    case ('%'):
+      break;
+    case ('c'):
+      section.conv_val_raw = GET_ARG_VAL_SIMPLEST(int, conv_index);
+      break;
+    case ('d'):
+    case ('i'):
+    case ('o'):
+    case ('x'):
+    case ('X'):
+    case ('u'):
+      switch (lm) {
+      case (LengthModifier::hh):
+      case (LengthModifier::h):
+      case (LengthModifier::none):
+        section.conv_val_raw = GET_ARG_VAL_SIMPLEST(int, conv_index);
+        break;
+      case (LengthModifier::l):
+        section.conv_val_raw = GET_ARG_VAL_SIMPLEST(long, conv_index);
+        break;
+      case (LengthModifier::ll):
+      case (LengthModifier::L): // This isn't in the standard, but is in other
+                                // libc implementations.
+        section.conv_val_raw = GET_ARG_VAL_SIMPLEST(long long, conv_index);
+        break;
+      case (LengthModifier::j):
+        section.conv_val_raw = GET_ARG_VAL_SIMPLEST(intmax_t, conv_index);
+        break;
+      case (LengthModifier::z):
+        section.conv_val_raw = GET_ARG_VAL_SIMPLEST(size_t, conv_index);
+        break;
+      case (LengthModifier::t):
+        section.conv_val_raw = GET_ARG_VAL_SIMPLEST(ptr
diff _t, conv_index);
+        break;
+      }
+      break;
+    case ('f'):
+    case ('F'):
+    case ('e'):
+    case ('E'):
+    case ('a'):
+    case ('A'):
+    case ('g'):
+    case ('G'):
+      if (lm != LengthModifier::L)
+        section.conv_val_raw =
+            bit_cast<uint64_t>(GET_ARG_VAL_SIMPLEST(double, conv_index));
+      else
+        section.conv_val_raw = bit_cast<__uint128_t>(
+            GET_ARG_VAL_SIMPLEST(long double, conv_index));
+      break;
+    case ('n'):
+    case ('p'):
+    case ('s'):
+      section.conv_val_ptr = GET_ARG_VAL_SIMPLEST(void *, conv_index);
+      break;
+    default:
+      // if the conversion is undefined, change this to a raw section.
+      section.has_conv = false;
+      break;
+    }
+    ++cur_pos;
+  } else {
+    // raw section
+    section.has_conv = false;
+    while (str[cur_pos] != '%' && str[cur_pos] != '\0')
+      ++cur_pos;
+  }
+  section.raw_len = cur_pos - starting_pos;
+  return section;
+}
+
+FormatFlags Parser::parse_flags(size_t *local_pos) {
+  bool found_flag = true;
+  FormatFlags flags = FormatFlags(0);
+  while (found_flag) {
+    switch (str[*local_pos]) {
+    case '-':
+      flags = static_cast<FormatFlags>(flags | FormatFlags::LEFT_JUSTIFIED);
+      break;
+    case '+':
+      flags = static_cast<FormatFlags>(flags | FormatFlags::FORCE_SIGN);
+      break;
+    case ' ':
+      flags = static_cast<FormatFlags>(flags | FormatFlags::SPACE_PREFIX);
+      break;
+    case '#':
+      flags = static_cast<FormatFlags>(flags | FormatFlags::ALTERNATE_FORM);
+      break;
+    case '0':
+      flags = static_cast<FormatFlags>(flags | FormatFlags::LEADING_ZEROES);
+      break;
+    default:
+      found_flag = false;
+    }
+    if (found_flag)
+      ++*local_pos;
+  }
+  return flags;
+}
+
+LengthModifier Parser::parse_length_modifier(size_t *local_pos) {
+  switch (str[*local_pos]) {
+  case ('l'):
+    if (str[*local_pos + 1] == 'l') {
+      *local_pos += 2;
+      return LengthModifier::ll;
+    } else {
+      ++*local_pos;
+      return LengthModifier::l;
+    }
+  case ('h'):
+    if (str[cur_pos + 1] == 'h') {
+      *local_pos += 2;
+      return LengthModifier::hh;
+    } else {
+      ++*local_pos;
+      return LengthModifier::h;
+    }
+  case ('L'):
+    ++*local_pos;
+    return LengthModifier::L;
+  case ('j'):
+    ++*local_pos;
+    return LengthModifier::j;
+  case ('z'):
+    ++*local_pos;
+    return LengthModifier::z;
+  case ('t'):
+    ++*local_pos;
+    return LengthModifier::t;
+  default:
+    return LengthModifier::none;
+  }
+}
+
+} // namespace printf_core
+} // namespace __llvm_libc

diff  --git a/libc/src/stdio/printf_core/parser.h b/libc/src/stdio/printf_core/parser.h
new file mode 100644
index 0000000000000..aa33bbd9535a9
--- /dev/null
+++ b/libc/src/stdio/printf_core/parser.h
@@ -0,0 +1,64 @@
+//===-- Format string parser for printf -------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_STDIO_PRINTF_CORE_PARSER_H
+#define LLVM_LIBC_SRC_STDIO_PRINTF_CORE_PARSER_H
+
+#include "src/__support/arg_list.h"
+#include "src/stdio/printf_core/core_structs.h"
+
+#include <stddef.h>
+
+namespace __llvm_libc {
+namespace printf_core {
+
+class Parser {
+  const char *__restrict str;
+
+  size_t cur_pos = 0;
+
+  internal::ArgList args_start;
+  internal::ArgList args_cur;
+  size_t args_index = 1;
+
+  // TODO: Look into object stores for optimization.
+
+public:
+  Parser(const char *__restrict new_str, internal::ArgList &args)
+      : str(new_str), args_start(args), args_cur(args) {}
+
+  // get_next_section will parse the format string until it has a fully
+  // specified format section. This can either be a raw format section with no
+  // conversion, or a format section with a conversion that has all of its
+  // variables stored in the format section.
+  FormatSection get_next_section();
+
+private:
+  // parse_flags parses the flags inside a format string. It assumes that
+  // str[*local_pos] is inside a format specifier, and parses any flags it
+  // finds. It returns a FormatFlags object containing the set of found flags
+  // arithmetically or'd together. local_pos will be moved past any flags found.
+  FormatFlags parse_flags(size_t *local_pos);
+
+  // parse_length_modifier parses the length modifier inside a format string. It
+  // assumes that str[*local_pos] is inside a format specifier. It returns a
+  // LengthModifier with the length modifier it found. It will advance local_pos
+  // after the format specifier if one is found.
+  LengthModifier parse_length_modifier(size_t *local_pos);
+
+  // get_next_arg_value gets the next value from the arg list as type T.
+  template <class T> T inline get_next_arg_value() {
+    ++args_index;
+    return args_cur.next_var<T>();
+  }
+};
+
+} // namespace printf_core
+} // namespace __llvm_libc
+
+#endif // LLVM_LIBC_SRC_STDIO_PRINTF_CORE_PARSER_H

diff  --git a/libc/src/stdio/printf_files/printf_main.h b/libc/src/stdio/printf_core/printf_main.h
similarity index 62%
rename from libc/src/stdio/printf_files/printf_main.h
rename to libc/src/stdio/printf_core/printf_main.h
index 84a4bf9f94015..b036835137705 100644
--- a/libc/src/stdio/printf_files/printf_main.h
+++ b/libc/src/stdio/printf_core/printf_main.h
@@ -6,22 +6,23 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_LIBC_SRC_STDIO_PRINTF_FILES_PRINTF_MAIN_H
-#define LLVM_LIBC_SRC_STDIO_PRINTF_FILES_PRINTF_MAIN_H
+#ifndef LLVM_LIBC_SRC_STDIO_PRINTF_CORE_PRINTF_MAIN_H
+#define LLVM_LIBC_SRC_STDIO_PRINTF_CORE_PRINTF_MAIN_H
 
-#include "src/stdio/printf_files/converter.h"
-#include "src/stdio/printf_files/core_structs.h"
-#include "src/stdio/printf_files/parser.h"
-#include "src/stdio/printf_files/writer.h"
+#include "src/__support/arg_list.h"
+#include "src/stdio/printf_core/converter.h"
+#include "src/stdio/printf_core/core_structs.h"
+#include "src/stdio/printf_core/parser.h"
+#include "src/stdio/printf_core/writer.h"
 
-#include <stdarg.h>
 #include <stddef.h>
 
 namespace __llvm_libc {
 namespace printf_core {
 
-int printf_main(Writer *writer, const char *__restrict str, va_list vlist) {
-  Parser parser(str, &vlist);
+int printf_main(Writer *writer, const char *__restrict str,
+                internal::ArgList args) {
+  Parser parser(str, args);
   Converter converter(writer);
 
   for (FormatSection cur_section = parser.get_next_section();
@@ -38,4 +39,4 @@ int printf_main(Writer *writer, const char *__restrict str, va_list vlist) {
 } // namespace printf_core
 } // namespace __llvm_libc
 
-#endif // LLVM_LIBC_SRC_STDIO_PRINTF_FILES_PRINTF_MAIN_H
+#endif // LLVM_LIBC_SRC_STDIO_PRINTF_CORE_PRINTF_MAIN_H

diff  --git a/libc/src/stdio/printf_files/writer.h b/libc/src/stdio/printf_core/writer.h
similarity index 91%
rename from libc/src/stdio/printf_files/writer.h
rename to libc/src/stdio/printf_core/writer.h
index 35c1857663384..47aa05a203eba 100644
--- a/libc/src/stdio/printf_files/writer.h
+++ b/libc/src/stdio/printf_core/writer.h
@@ -6,8 +6,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_LIBC_SRC_STDIO_PRINTF_FILES_WRITER_H
-#define LLVM_LIBC_SRC_STDIO_PRINTF_FILES_WRITER_H
+#ifndef LLVM_LIBC_SRC_STDIO_PRINTF_CORE_WRITER_H
+#define LLVM_LIBC_SRC_STDIO_PRINTF_CORE_WRITER_H
 
 #include <stddef.h>
 
@@ -48,4 +48,4 @@ class Writer final {
 } // namespace printf_core
 } // namespace __llvm_libc
 
-#endif // LLVM_LIBC_SRC_STDIO_PRINTF_FILES_WRITER_H
+#endif // LLVM_LIBC_SRC_STDIO_PRINTF_CORE_WRITER_H

diff  --git a/libc/src/stdio/printf_files/parser.h b/libc/src/stdio/printf_files/parser.h
deleted file mode 100644
index cfa61f0ead351..0000000000000
--- a/libc/src/stdio/printf_files/parser.h
+++ /dev/null
@@ -1,56 +0,0 @@
-//===-- Format string parser for printf -------------------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIBC_SRC_STDIO_PRINTF_FILES_PARSER_H
-#define LLVM_LIBC_SRC_STDIO_PRINTF_FILES_PARSER_H
-
-#include "src/stdio/printf_files/core_structs.h"
-
-#include <stdarg.h>
-#include <stddef.h>
-
-namespace __llvm_libc {
-namespace printf_core {
-
-// TODO: Make this a compile option.
-constexpr size_t TYPE_ARR_SIZE = 32;
-
-class Parser {
-  const char *__restrict str;
-
-  size_t cur_pos = 0;
-
-  va_list *vlist_start;
-  va_list *vlist_cur;
-  size_t vlist_index;
-
-  // TODO: Make this an optional piece.
-  VariableType type_arr[TYPE_ARR_SIZE];
-
-  // TODO: Look into object stores for optimization.
-
-public:
-  Parser(const char *__restrict str, va_list *vlist);
-
-  // get_next_section will parse the format string until it has a fully
-  // specified format section. This can either be a raw format section with no
-  // conversion, or a format section with a conversion that has all of its
-  // variables stored in the format section.
-  FormatSection get_next_section();
-
-private:
-  // get_arg_value gets the value from the vlist at index (starting at 1). This
-  // may require parsing the format string. An index of 0 is interpreted as the
-  // next value.
-  template <class T> T get_arg_value(size_t index);
-};
-
-} // namespace printf_core
-} // namespace __llvm_libc
-
-#endif // LLVM_LIBC_SRC_STDIO_PRINTF_FILES_PARSER_H

diff  --git a/libc/test/src/stdio/CMakeLists.txt b/libc/test/src/stdio/CMakeLists.txt
index 7cb524e2b0800..45717593007b3 100644
--- a/libc/test/src/stdio/CMakeLists.txt
+++ b/libc/test/src/stdio/CMakeLists.txt
@@ -14,4 +14,6 @@ add_libc_unittest(
     libc.src.stdio.fwrite
 )
 
+add_subdirectory(printf_core)
+
 add_subdirectory(testdata)

diff  --git a/libc/test/src/stdio/printf_core/CMakeLists.txt b/libc/test/src/stdio/printf_core/CMakeLists.txt
new file mode 100644
index 0000000000000..4eedc0a5bba2c
--- /dev/null
+++ b/libc/test/src/stdio/printf_core/CMakeLists.txt
@@ -0,0 +1,10 @@
+add_libc_unittest(
+  parser_test
+  SUITE
+    libc_stdio_unittests
+  SRCS
+    parser_test.cpp
+  DEPENDS
+    libc.src.stdio.printf_core.parser
+    libc.src.__support.arg_list
+)

diff  --git a/libc/test/src/stdio/printf_core/parser_test.cpp b/libc/test/src/stdio/printf_core/parser_test.cpp
new file mode 100644
index 0000000000000..e57ebdb1ceac0
--- /dev/null
+++ b/libc/test/src/stdio/printf_core/parser_test.cpp
@@ -0,0 +1,290 @@
+//===-- Unittests for the printf Parser -----------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/__support/CPP/Bit.h"
+#include "src/__support/arg_list.h"
+#include "src/stdio/printf_core/parser.h"
+
+#include <stdarg.h>
+
+#include "utils/UnitTest/Test.h"
+
+class LlvmLibcPrintfParserTest : public __llvm_libc::testing::Test {
+public:
+  void assert_eq_fs(__llvm_libc::printf_core::FormatSection expected,
+                    __llvm_libc::printf_core::FormatSection actual) {
+    ASSERT_EQ(expected.has_conv, actual.has_conv);
+    ASSERT_EQ(expected.raw_len, actual.raw_len);
+
+    for (size_t i = 0; i < expected.raw_len; ++i) {
+      EXPECT_EQ(expected.raw_string[i], actual.raw_string[i]);
+    }
+
+    if (expected.has_conv) {
+      ASSERT_EQ(static_cast<uint8_t>(expected.flags),
+                static_cast<uint8_t>(actual.flags));
+      ASSERT_EQ(expected.min_width, actual.min_width);
+      ASSERT_EQ(expected.precision, actual.precision);
+      ASSERT_TRUE(expected.length_modifier == actual.length_modifier);
+      ASSERT_EQ(expected.conv_name, actual.conv_name);
+
+      if (expected.conv_name == 'p' || expected.conv_name == 'n' ||
+          expected.conv_name == 's') {
+        ASSERT_EQ(expected.conv_val_ptr, actual.conv_val_ptr);
+      } else if (expected.conv_name != '%') {
+        ASSERT_EQ(expected.conv_val_raw, actual.conv_val_raw);
+      }
+    }
+  }
+};
+
+void init(const char *__restrict str, ...) {
+  va_list vlist;
+  va_start(vlist, str);
+  __llvm_libc::internal::ArgList v(vlist);
+  va_end(vlist);
+
+  __llvm_libc::printf_core::Parser parser(str, v);
+}
+
+void evaluate(__llvm_libc::printf_core::FormatSection *format_arr,
+              const char *__restrict str, ...) {
+  va_list vlist;
+  va_start(vlist, str);
+  __llvm_libc::internal::ArgList v(vlist);
+  va_end(vlist);
+
+  __llvm_libc::printf_core::Parser parser(str, v);
+
+  for (auto cur_section = parser.get_next_section(); cur_section.raw_len > 0;
+       cur_section = parser.get_next_section()) {
+    *format_arr = cur_section;
+    ++format_arr;
+  }
+}
+
+TEST_F(LlvmLibcPrintfParserTest, Constructor) { init("test", 1, 2); }
+
+TEST_F(LlvmLibcPrintfParserTest, EvalRaw) {
+  __llvm_libc::printf_core::FormatSection format_arr[10];
+  const char *str = "test";
+  evaluate(format_arr, str);
+
+  __llvm_libc::printf_core::FormatSection expected;
+  expected.has_conv = false;
+  expected.raw_len = 4;
+  expected.raw_string = str;
+
+  assert_eq_fs(expected, format_arr[0]);
+}
+
+TEST_F(LlvmLibcPrintfParserTest, EvalSimple) {
+  __llvm_libc::printf_core::FormatSection format_arr[10];
+  const char *str = "test %% test";
+  evaluate(format_arr, str);
+
+  __llvm_libc::printf_core::FormatSection expected0, expected1, expected2;
+  expected0.has_conv = false;
+  expected0.raw_len = 5;
+  expected0.raw_string = str;
+
+  assert_eq_fs(expected0, format_arr[0]);
+
+  expected1.has_conv = true;
+  expected1.raw_len = 2;
+  expected1.raw_string = str + 5;
+  expected1.conv_name = '%';
+
+  assert_eq_fs(expected1, format_arr[1]);
+
+  expected2.has_conv = false;
+  expected2.raw_len = 5;
+  expected2.raw_string = str + 7;
+
+  assert_eq_fs(expected2, format_arr[2]);
+}
+
+TEST_F(LlvmLibcPrintfParserTest, EvalOneArg) {
+  __llvm_libc::printf_core::FormatSection format_arr[10];
+  const char *str = "%d";
+  int arg1 = 12345;
+  evaluate(format_arr, str, arg1);
+
+  __llvm_libc::printf_core::FormatSection expected;
+  expected.has_conv = true;
+  expected.raw_len = 2;
+  expected.raw_string = str;
+  expected.conv_val_raw = arg1;
+  expected.conv_name = 'd';
+
+  assert_eq_fs(expected, format_arr[0]);
+}
+
+TEST_F(LlvmLibcPrintfParserTest, EvalOneArgWithFlags) {
+  __llvm_libc::printf_core::FormatSection format_arr[10];
+  const char *str = "%+-0 #d";
+  int arg1 = 12345;
+  evaluate(format_arr, str, arg1);
+
+  __llvm_libc::printf_core::FormatSection expected;
+  expected.has_conv = true;
+  expected.raw_len = 7;
+  expected.raw_string = str;
+  expected.flags = static_cast<__llvm_libc::printf_core::FormatFlags>(
+      __llvm_libc::printf_core::FormatFlags::FORCE_SIGN |
+      __llvm_libc::printf_core::FormatFlags::LEFT_JUSTIFIED |
+      __llvm_libc::printf_core::FormatFlags::LEADING_ZEROES |
+      __llvm_libc::printf_core::FormatFlags::SPACE_PREFIX |
+      __llvm_libc::printf_core::FormatFlags::ALTERNATE_FORM);
+  expected.conv_val_raw = arg1;
+  expected.conv_name = 'd';
+
+  assert_eq_fs(expected, format_arr[0]);
+}
+
+TEST_F(LlvmLibcPrintfParserTest, EvalOneArgWithWidth) {
+  __llvm_libc::printf_core::FormatSection format_arr[10];
+  const char *str = "%12d";
+  int arg1 = 12345;
+  evaluate(format_arr, str, arg1);
+
+  __llvm_libc::printf_core::FormatSection expected;
+  expected.has_conv = true;
+  expected.raw_len = 4;
+  expected.raw_string = str;
+  expected.min_width = 12;
+  expected.conv_val_raw = arg1;
+  expected.conv_name = 'd';
+
+  assert_eq_fs(expected, format_arr[0]);
+}
+
+TEST_F(LlvmLibcPrintfParserTest, EvalOneArgWithPrecision) {
+  __llvm_libc::printf_core::FormatSection format_arr[10];
+  const char *str = "%.34d";
+  int arg1 = 12345;
+  evaluate(format_arr, str, arg1);
+
+  __llvm_libc::printf_core::FormatSection expected;
+  expected.has_conv = true;
+  expected.raw_len = 5;
+  expected.raw_string = str;
+  expected.precision = 34;
+  expected.conv_val_raw = arg1;
+  expected.conv_name = 'd';
+
+  assert_eq_fs(expected, format_arr[0]);
+}
+
+TEST_F(LlvmLibcPrintfParserTest, EvalOneArgWithTrivialPrecision) {
+  __llvm_libc::printf_core::FormatSection format_arr[10];
+  const char *str = "%.d";
+  int arg1 = 12345;
+  evaluate(format_arr, str, arg1);
+
+  __llvm_libc::printf_core::FormatSection expected;
+  expected.has_conv = true;
+  expected.raw_len = 3;
+  expected.raw_string = str;
+  expected.precision = 0;
+  expected.conv_val_raw = arg1;
+  expected.conv_name = 'd';
+
+  assert_eq_fs(expected, format_arr[0]);
+}
+
+TEST_F(LlvmLibcPrintfParserTest, EvalOneArgWithShortLengthModifier) {
+  __llvm_libc::printf_core::FormatSection format_arr[10];
+  const char *str = "%hd";
+  int arg1 = 12345;
+  evaluate(format_arr, str, arg1);
+
+  __llvm_libc::printf_core::FormatSection expected;
+  expected.has_conv = true;
+  expected.raw_len = 3;
+  expected.raw_string = str;
+  expected.length_modifier = __llvm_libc::printf_core::LengthModifier::h;
+  expected.conv_val_raw = arg1;
+  expected.conv_name = 'd';
+
+  assert_eq_fs(expected, format_arr[0]);
+}
+
+TEST_F(LlvmLibcPrintfParserTest, EvalOneArgWithLongLengthModifier) {
+  __llvm_libc::printf_core::FormatSection format_arr[10];
+  const char *str = "%lld";
+  int arg1 = 12345;
+  evaluate(format_arr, str, arg1);
+
+  __llvm_libc::printf_core::FormatSection expected;
+  expected.has_conv = true;
+  expected.raw_len = 4;
+  expected.raw_string = str;
+  expected.length_modifier = __llvm_libc::printf_core::LengthModifier::ll;
+  expected.conv_val_raw = arg1;
+  expected.conv_name = 'd';
+
+  assert_eq_fs(expected, format_arr[0]);
+}
+
+TEST_F(LlvmLibcPrintfParserTest, EvalOneArgWithAllOptions) {
+  __llvm_libc::printf_core::FormatSection format_arr[10];
+  const char *str = "% -056.78jd";
+  int arg1 = 12345;
+  evaluate(format_arr, str, arg1);
+
+  __llvm_libc::printf_core::FormatSection expected;
+  expected.has_conv = true;
+  expected.raw_len = 11;
+  expected.raw_string = str;
+  expected.flags = static_cast<__llvm_libc::printf_core::FormatFlags>(
+      __llvm_libc::printf_core::FormatFlags::LEFT_JUSTIFIED |
+      __llvm_libc::printf_core::FormatFlags::LEADING_ZEROES |
+      __llvm_libc::printf_core::FormatFlags::SPACE_PREFIX);
+  expected.min_width = 56;
+  expected.precision = 78;
+  expected.length_modifier = __llvm_libc::printf_core::LengthModifier::j;
+  expected.conv_val_raw = arg1;
+  expected.conv_name = 'd';
+
+  assert_eq_fs(expected, format_arr[0]);
+}
+
+TEST_F(LlvmLibcPrintfParserTest, EvalThreeArgs) {
+  __llvm_libc::printf_core::FormatSection format_arr[10];
+  const char *str = "%d%f%s";
+  int arg1 = 12345;
+  double arg2 = 123.45;
+  const char *arg3 = "12345";
+  evaluate(format_arr, str, arg1, arg2, arg3);
+
+  __llvm_libc::printf_core::FormatSection expected0, expected1, expected2;
+  expected0.has_conv = true;
+  expected0.raw_len = 2;
+  expected0.raw_string = str;
+  expected0.conv_val_raw = arg1;
+  expected0.conv_name = 'd';
+
+  assert_eq_fs(expected0, format_arr[0]);
+
+  expected1.has_conv = true;
+  expected1.raw_len = 2;
+  expected1.raw_string = str + 2;
+  expected1.conv_val_raw = __llvm_libc::bit_cast<uint64_t>(arg2);
+  expected1.conv_name = 'f';
+
+  assert_eq_fs(expected1, format_arr[1]);
+
+  expected2.has_conv = true;
+  expected2.raw_len = 2;
+  expected2.raw_string = str + 4;
+  expected2.conv_val_ptr = const_cast<char *>(arg3);
+  expected2.conv_name = 's';
+
+  assert_eq_fs(expected2, format_arr[2]);
+}


        


More information about the libc-commits mailing list