[libc-commits] [libc] 945fa67 - [libc][NFC] add index mode to printf parser
Michael Jones via libc-commits
libc-commits at lists.llvm.org
Fri May 6 12:06:13 PDT 2022
Author: Michael Jones
Date: 2022-05-06T12:06:08-07:00
New Revision: 945fa672c60d265fa0dc13f5a5445768945feb0a
URL: https://github.com/llvm/llvm-project/commit/945fa672c60d265fa0dc13f5a5445768945feb0a
DIFF: https://github.com/llvm/llvm-project/commit/945fa672c60d265fa0dc13f5a5445768945feb0a.diff
LOG: [libc][NFC] add index mode to printf parser
This patch is a followup to the previous patch which implemented the
main printf parsing logic as well as sequential mode. This patch adds
index mode.
Reviewed By: sivachandra
Differential Revision: https://reviews.llvm.org/D123424
Added:
Modified:
libc/src/stdio/printf_core/CMakeLists.txt
libc/src/stdio/printf_core/parser.cpp
libc/src/stdio/printf_core/parser.h
libc/test/src/stdio/printf_core/parser_test.cpp
Removed:
################################################################################
diff --git a/libc/src/stdio/printf_core/CMakeLists.txt b/libc/src/stdio/printf_core/CMakeLists.txt
index d167a0191c840..965bbcd14da89 100644
--- a/libc/src/stdio/printf_core/CMakeLists.txt
+++ b/libc/src/stdio/printf_core/CMakeLists.txt
@@ -17,7 +17,7 @@ add_object_library(
libc.src.__support.ctype_utils
libc.src.__support.str_to_integer
libc.src.__support.CPP.bit
-
+ libc.src.string.memory_utils.memset_implementation
)
add_header_library(
diff --git a/libc/src/stdio/printf_core/parser.cpp b/libc/src/stdio/printf_core/parser.cpp
index 1541416a84c7c..a373f38536b5b 100644
--- a/libc/src/stdio/printf_core/parser.cpp
+++ b/libc/src/stdio/printf_core/parser.cpp
@@ -6,6 +6,8 @@
//
//===----------------------------------------------------------------------===//
+// #define LLVM_LIBC_PRINTF_DISABLE_INDEX_MODE 1 // This will be a compile flag.
+
#include "parser.h"
#include "src/__support/arg_list.h"
@@ -17,8 +19,6 @@
namespace __llvm_libc {
namespace printf_core {
-#define LLVM_LIBC_PRINTF_DISABLE_INDEX_MODE 1 // This will be a compile flag.
-
#ifndef LLVM_LIBC_PRINTF_DISABLE_INDEX_MODE
#define GET_ARG_VAL_SIMPLEST(arg_type, index) get_arg_value<arg_type>(index)
#else
@@ -36,6 +36,10 @@ FormatSection Parser::get_next_section() {
++cur_pos;
[[maybe_unused]] size_t conv_index = 0;
+#ifndef LLVM_LIBC_PRINTF_DISABLE_INDEX_MODE
+ conv_index = parse_index(&cur_pos);
+#endif // LLVM_LIBC_PRINTF_DISABLE_INDEX_MODE
+
section.flags = parse_flags(&cur_pos);
// handle width
@@ -192,7 +196,7 @@ LengthModifier Parser::parse_length_modifier(size_t *local_pos) {
return LengthModifier::l;
}
case ('h'):
- if (str[cur_pos + 1] == 'h') {
+ if (str[*local_pos + 1] == 'h') {
*local_pos += 2;
return LengthModifier::hh;
} else {
@@ -216,5 +220,190 @@ LengthModifier Parser::parse_length_modifier(size_t *local_pos) {
}
}
+//----------------------------------------------------
+// INDEX MODE ONLY FUNCTIONS AFTER HERE:
+//----------------------------------------------------
+
+#ifndef LLVM_LIBC_PRINTF_DISABLE_INDEX_MODE
+
+size_t Parser::parse_index(size_t *local_pos) {
+ if (internal::isdigit(str[*local_pos])) {
+ char *int_end;
+ size_t index =
+ internal::strtointeger<size_t>(str + *local_pos, &int_end, 10);
+ if (int_end[0] != '$')
+ return 0;
+ *local_pos = 1 + int_end - str;
+ return index;
+ }
+ return 0;
+}
+
+Parser::TypeDesc Parser::get_type_desc(size_t index) {
+ // index mode is assumed, and the indicies start at 1, so an index
+ // of 0 is invalid.
+ size_t local_pos = 0;
+
+ while (str[local_pos]) {
+ if (str[local_pos] == '%') {
+ ++local_pos;
+
+ size_t conv_index = parse_index(&local_pos);
+
+ // the flags aren't relevant for this situation, but I need to skip past
+ // them so they're parsed but the result is discarded.
+ parse_flags(&local_pos);
+
+ // handle width
+ if (str[local_pos] == '*') {
+ ++local_pos;
+
+ size_t width_index = parse_index(&local_pos);
+ set_type_desc(width_index, TYPE_DESC<int>);
+ if (width_index == index)
+ return TYPE_DESC<int>;
+
+ } else if (internal::isdigit(str[local_pos])) {
+ while (internal::isdigit(str[local_pos]))
+ ++local_pos;
+ }
+
+ // handle precision
+ if (str[local_pos] == '.') {
+ ++local_pos;
+ if (str[local_pos] == '*') {
+ ++local_pos;
+
+ size_t precision_index = parse_index(&local_pos);
+ set_type_desc(precision_index, TYPE_DESC<int>);
+ if (precision_index == index)
+ return TYPE_DESC<int>;
+
+ } else if (internal::isdigit(str[local_pos])) {
+ while (internal::isdigit(str[local_pos]))
+ ++local_pos;
+ }
+ }
+
+ LengthModifier lm = parse_length_modifier(&local_pos);
+
+ // if we don't have an index for this conversion, then its position is
+ // unknown and all this information is irrelevant. The rest of this logic
+ // has been for skipping past this conversion properly to avoid
+ // weirdness with %%.
+ if (conv_index == 0) {
+ ++local_pos;
+ continue;
+ }
+
+ TypeDesc conv_size = TYPE_DESC<void>;
+ switch (str[local_pos]) {
+ case ('%'):
+ conv_size = TYPE_DESC<void>;
+ break;
+ case ('c'):
+ conv_size = TYPE_DESC<int>;
+ break;
+ case ('d'):
+ case ('i'):
+ case ('o'):
+ case ('x'):
+ case ('X'):
+ case ('u'):
+ switch (lm) {
+ case (LengthModifier::hh):
+ case (LengthModifier::h):
+ case (LengthModifier::none):
+ conv_size = TYPE_DESC<int>;
+ break;
+ case (LengthModifier::l):
+ conv_size = TYPE_DESC<long>;
+ break;
+ case (LengthModifier::ll):
+ case (LengthModifier::L): // This isn't in the standard, but is in other
+ // libc implementations.
+ conv_size = TYPE_DESC<long long>;
+ break;
+ case (LengthModifier::j):
+ conv_size = TYPE_DESC<intmax_t>;
+ break;
+ case (LengthModifier::z):
+ conv_size = TYPE_DESC<size_t>;
+ break;
+ case (LengthModifier::t):
+ conv_size = TYPE_DESC<ptr
diff _t>;
+ break;
+ }
+ break;
+ case ('f'):
+ case ('F'):
+ case ('e'):
+ case ('E'):
+ case ('a'):
+ case ('A'):
+ case ('g'):
+ case ('G'):
+ if (lm != LengthModifier::L)
+ conv_size = TYPE_DESC<double>;
+ else
+ conv_size = TYPE_DESC<long double>;
+ break;
+ case ('n'):
+ case ('p'):
+ case ('s'):
+ conv_size = TYPE_DESC<void *>;
+ break;
+ default:
+ conv_size = TYPE_DESC<int>;
+ break;
+ }
+
+ set_type_desc(conv_index, conv_size);
+ if (conv_index == index)
+ return conv_size;
+ }
+ ++local_pos;
+ }
+
+ // If there is no size for the requested index, then just guess that it's an
+ // int.
+ return TYPE_DESC<int>;
+}
+
+void Parser::args_to_index(size_t index) {
+ if (args_index > index) {
+ args_index = 1;
+ args_cur = args_start;
+ }
+
+ while (args_index < index) {
+ Parser::TypeDesc cur_type_desc = TYPE_DESC<void>;
+ if (args_index <= DESC_ARR_LEN)
+ cur_type_desc = desc_arr[args_index - 1];
+
+ if (cur_type_desc == TYPE_DESC<void>)
+ cur_type_desc = get_type_desc(args_index);
+
+ if (cur_type_desc == TYPE_DESC<uint32_t>)
+ args_cur.next_var<uint32_t>();
+ else if (cur_type_desc == TYPE_DESC<uint64_t>)
+ args_cur.next_var<uint64_t>();
+ // Floating point numbers are stored separately from the other arguments.
+ else if (cur_type_desc == TYPE_DESC<double>)
+ args_cur.next_var<double>();
+ else if (cur_type_desc == TYPE_DESC<long double>)
+ args_cur.next_var<long double>();
+ // pointers may be stored separately from normal values.
+ else if (cur_type_desc == TYPE_DESC<void *>)
+ args_cur.next_var<void *>();
+ else
+ args_cur.next_var<uint32_t>();
+
+ ++args_index;
+ }
+}
+
+#endif // LLVM_LIBC_PRINTF_DISABLE_INDEX_MODE
+
} // namespace printf_core
} // namespace __llvm_libc
diff --git a/libc/src/stdio/printf_core/parser.h b/libc/src/stdio/printf_core/parser.h
index aa33bbd9535a9..25dd0f65e8171 100644
--- a/libc/src/stdio/printf_core/parser.h
+++ b/libc/src/stdio/printf_core/parser.h
@@ -11,6 +11,7 @@
#include "src/__support/arg_list.h"
#include "src/stdio/printf_core/core_structs.h"
+#include "src/string/memory_utils/memset_implementations.h"
#include <stddef.h>
@@ -21,16 +22,50 @@ class Parser {
const char *__restrict str;
size_t cur_pos = 0;
+ internal::ArgList args_cur;
+#ifndef LLVM_LIBC_PRINTF_DISABLE_INDEX_MODE
+ // args_start stores the start of the va_args, which is allows getting the
+ // value of arguments that have already been passed. args_index is tracked so
+ // that we know which argument args_cur is on.
internal::ArgList args_start;
- internal::ArgList args_cur;
size_t args_index = 1;
+ enum PrimaryType : uint8_t { Integer = 0, Float = 1, Pointer = 2 };
+
+ // TypeDesc stores the information about a type that is relevant to printf in
+ // a relatively compact manner.
+ struct TypeDesc {
+ uint8_t size;
+ PrimaryType primary_type;
+ constexpr bool operator==(const TypeDesc &other) const {
+ return (size == other.size) && (primary_type == other.primary_type);
+ }
+ };
+ // TODO: Make this size configurable via a compile option.
+ static constexpr size_t DESC_ARR_LEN = 32;
+ // desc_arr stores the sizes of the variables in the ArgList. This is used in
+ // index mode to reduce repeated string parsing. The sizes are stored as
+ // TypeDesc objects, which store the size as well as minimal type information.
+ // This is necessary because some systems separate the floating point and
+ // integer values in va_args.
+ TypeDesc desc_arr[DESC_ARR_LEN];
+
// TODO: Look into object stores for optimization.
+#endif // LLVM_LIBC_PRINTF_DISABLE_INDEX_MODE
+
public:
+#ifndef LLVM_LIBC_PRINTF_DISABLE_INDEX_MODE
Parser(const char *__restrict new_str, internal::ArgList &args)
- : str(new_str), args_start(args), args_cur(args) {}
+ : str(new_str), args_cur(args), args_start(args) {
+ inline_memset(reinterpret_cast<char *>(desc_arr), 0,
+ DESC_ARR_LEN * sizeof(TypeDesc));
+ }
+#else
+ Parser(const char *__restrict new_str, internal::ArgList &args)
+ : str(new_str), args_cur(args) {}
+#endif // LLVM_LIBC_PRINTF_DISABLE_INDEX_MODE
// get_next_section will parse the format string until it has a fully
// specified format section. This can either be a raw format section with no
@@ -53,9 +88,68 @@ class Parser {
// get_next_arg_value gets the next value from the arg list as type T.
template <class T> T inline get_next_arg_value() {
- ++args_index;
return args_cur.next_var<T>();
}
+
+ //----------------------------------------------------
+ // INDEX MODE ONLY FUNCTIONS AFTER HERE:
+ //----------------------------------------------------
+
+#ifndef LLVM_LIBC_PRINTF_DISABLE_INDEX_MODE
+
+ // parse_index parses the index of a value inside a format string. It
+ // assumes that str[*local_pos] points to character after a '%' or '*', and
+ // returns 0 if there is no closing $, or if it finds no number. If it finds a
+ // number, it will move local_pos past the end of the $, else it will not move
+ // local_pos.
+ size_t parse_index(size_t *local_pos);
+
+ template <typename T>
+ static constexpr TypeDesc TYPE_DESC{sizeof(T), PrimaryType::Integer};
+ template <>
+ static constexpr TypeDesc TYPE_DESC<double>{sizeof(double),
+ PrimaryType::Float};
+ template <>
+ static constexpr TypeDesc TYPE_DESC<long double>{sizeof(long double),
+ PrimaryType::Float};
+ template <>
+ static constexpr TypeDesc TYPE_DESC<void *>{sizeof(void *),
+ PrimaryType::Pointer};
+ template <>
+ static constexpr TypeDesc TYPE_DESC<void>{0, PrimaryType::Integer};
+
+ void inline set_type_desc(size_t index, TypeDesc value) {
+ if (index != 0 && index <= DESC_ARR_LEN)
+ desc_arr[index - 1] = value;
+ }
+
+ // get_arg_value gets the value from the arg list at index (starting at 1).
+ // This may require parsing the format string. An index of 0 is interpreted as
+ // the next value.
+ template <class T> T inline get_arg_value(size_t index) {
+ if (!(index == 0 || index == args_index))
+ args_to_index(index);
+
+ set_type_desc(index, TYPE_DESC<T>);
+
+ ++args_index;
+ return get_next_arg_value<T>();
+ }
+
+ // the ArgList can only return the next item in the list. This function is
+ // used in index mode when the item that needs to be read is not the next one.
+ // It moves cur_args to the index requested so the the appropriate value may
+ // be read. This may involve parsing the format string, and is in the worst
+ // case an O(n^2) operation.
+ void args_to_index(size_t index);
+
+ // get_type_desc assumes that this format string uses index mode. It iterates
+ // through the format string until it finds a format specifier that defines
+ // the type of index, and returns a TypeDesc describing that type. It does not
+ // modify cur_pos.
+ TypeDesc get_type_desc(size_t index);
+
+#endif // LLVM_LIBC_PRINTF_DISABLE_INDEX_MODE
};
} // namespace printf_core
diff --git a/libc/test/src/stdio/printf_core/parser_test.cpp b/libc/test/src/stdio/printf_core/parser_test.cpp
index 73c40eab94a3d..e149acb6658d8 100644
--- a/libc/test/src/stdio/printf_core/parser_test.cpp
+++ b/libc/test/src/stdio/printf_core/parser_test.cpp
@@ -260,3 +260,199 @@ TEST(LlvmLibcPrintfParserTest, EvalThreeArgs) {
ASSERT_FORMAT_EQ(expected2, format_arr[2]);
}
+
+#ifndef LLVM_LIBC_PRINTF_DISABLE_INDEX_MODE
+
+TEST(LlvmLibcPrintfParserTest, IndexModeOneArg) {
+ __llvm_libc::printf_core::FormatSection format_arr[10];
+ const char *str = "%1$d";
+ int arg1 = 12345;
+ evaluate(format_arr, str, arg1);
+
+ __llvm_libc::printf_core::FormatSection expected;
+ expected.has_conv = true;
+ expected.raw_len = 4;
+ expected.raw_string = str;
+ expected.conv_val_raw = arg1;
+ expected.conv_name = 'd';
+
+ ASSERT_FORMAT_EQ(expected, format_arr[0]);
+}
+
+TEST(LlvmLibcPrintfParserTest, IndexModeThreeArgsSequential) {
+ __llvm_libc::printf_core::FormatSection format_arr[10];
+ const char *str = "%1$d%2$f%3$s";
+ int arg1 = 12345;
+ double arg2 = 123.45;
+ const char *arg3 = "12345";
+ evaluate(format_arr, str, arg1, arg2, arg3);
+
+ __llvm_libc::printf_core::FormatSection expected0, expected1, expected2;
+ expected0.has_conv = true;
+ expected0.raw_len = 4;
+ expected0.raw_string = str;
+ expected0.conv_val_raw = arg1;
+ expected0.conv_name = 'd';
+
+ ASSERT_FORMAT_EQ(expected0, format_arr[0]);
+
+ expected1.has_conv = true;
+ expected1.raw_len = 4;
+ expected1.raw_string = str + 4;
+ expected1.conv_val_raw = __llvm_libc::bit_cast<uint64_t>(arg2);
+ expected1.conv_name = 'f';
+
+ ASSERT_FORMAT_EQ(expected1, format_arr[1]);
+
+ expected2.has_conv = true;
+ expected2.raw_len = 4;
+ expected2.raw_string = str + 8;
+ expected2.conv_val_ptr = const_cast<char *>(arg3);
+ expected2.conv_name = 's';
+
+ ASSERT_FORMAT_EQ(expected2, format_arr[2]);
+}
+
+TEST(LlvmLibcPrintfParserTest, IndexModeThreeArgsReverse) {
+ __llvm_libc::printf_core::FormatSection format_arr[10];
+ const char *str = "%3$d%2$f%1$s";
+ int arg1 = 12345;
+ double arg2 = 123.45;
+ const char *arg3 = "12345";
+ evaluate(format_arr, str, arg3, arg2, arg1);
+
+ __llvm_libc::printf_core::FormatSection expected0, expected1, expected2;
+ expected0.has_conv = true;
+ expected0.raw_len = 4;
+ expected0.raw_string = str;
+ expected0.conv_val_raw = arg1;
+ expected0.conv_name = 'd';
+
+ ASSERT_FORMAT_EQ(expected0, format_arr[0]);
+
+ expected1.has_conv = true;
+ expected1.raw_len = 4;
+ expected1.raw_string = str + 4;
+ expected1.conv_val_raw = __llvm_libc::bit_cast<uint64_t>(arg2);
+ expected1.conv_name = 'f';
+
+ ASSERT_FORMAT_EQ(expected1, format_arr[1]);
+
+ expected2.has_conv = true;
+ expected2.raw_len = 4;
+ expected2.raw_string = str + 8;
+ expected2.conv_val_ptr = const_cast<char *>(arg3);
+ expected2.conv_name = 's';
+
+ ASSERT_FORMAT_EQ(expected2, format_arr[2]);
+}
+
+TEST(LlvmLibcPrintfParserTest, IndexModeTenArgsRandom) {
+ __llvm_libc::printf_core::FormatSection format_arr[10];
+ const char *str = "%6$d%3$d%7$d%2$d%8$d%1$d%4$d%9$d%5$d%10$d";
+ int args[10] = {6, 4, 2, 7, 9, 1, 3, 5, 8, 10};
+ evaluate(format_arr, str, args[0], args[1], args[2], args[3], args[4],
+ args[5], args[6], args[7], args[8], args[9]);
+
+ for (size_t i = 0; i < 10; ++i) {
+ __llvm_libc::printf_core::FormatSection expected;
+ expected.has_conv = true;
+ expected.raw_len = 4 + (i >= 9 ? 1 : 0);
+ expected.raw_string = str + (4 * i);
+ expected.conv_val_raw = i + 1;
+ expected.conv_name = 'd';
+ EXPECT_FORMAT_EQ(expected, format_arr[i]);
+ }
+}
+
+TEST(LlvmLibcPrintfParserTest, IndexModeComplexParsing) {
+ __llvm_libc::printf_core::FormatSection format_arr[10];
+ const char *str = "normal text %3$llu %% %2$ *4$f %2$ .*4$f %1$1.1c";
+ char arg1 = '1';
+ double arg2 = 123.45;
+ unsigned long long arg3 = 12345;
+ int arg4 = 10;
+ evaluate(format_arr, str, arg1, arg2, arg3, arg4);
+
+ __llvm_libc::printf_core::FormatSection expected0, expected1, expected2,
+ expected3, expected4, expected5, expected6, expected7, expected8,
+ expected9;
+
+ expected0.has_conv = false;
+ expected0.raw_len = 12;
+ expected0.raw_string = str;
+
+ EXPECT_FORMAT_EQ(expected0, format_arr[0]);
+
+ expected1.has_conv = true;
+ expected1.raw_len = 6;
+ expected1.raw_string = str + 12;
+ expected1.length_modifier = __llvm_libc::printf_core::LengthModifier::ll;
+ expected1.conv_val_raw = arg3;
+ expected1.conv_name = 'u';
+
+ EXPECT_FORMAT_EQ(expected1, format_arr[1]);
+
+ expected2.has_conv = false;
+ expected2.raw_len = 1;
+ expected2.raw_string = str + 18;
+
+ EXPECT_FORMAT_EQ(expected2, format_arr[2]);
+
+ expected3.has_conv = true;
+ expected3.raw_len = 2;
+ expected3.raw_string = str + 19;
+ expected3.conv_name = '%';
+
+ EXPECT_FORMAT_EQ(expected3, format_arr[3]);
+
+ expected4.has_conv = false;
+ expected4.raw_len = 1;
+ expected4.raw_string = str + 21;
+
+ EXPECT_FORMAT_EQ(expected4, format_arr[4]);
+
+ expected5.has_conv = true;
+ expected5.raw_len = 8;
+ expected5.raw_string = str + 22;
+ expected5.flags = __llvm_libc::printf_core::FormatFlags::SPACE_PREFIX;
+ expected5.min_width = arg4;
+ expected5.conv_val_raw = __llvm_libc::bit_cast<uint64_t>(arg2);
+ expected5.conv_name = 'f';
+
+ EXPECT_FORMAT_EQ(expected5, format_arr[5]);
+
+ expected6.has_conv = false;
+ expected6.raw_len = 1;
+ expected6.raw_string = str + 30;
+
+ EXPECT_FORMAT_EQ(expected6, format_arr[6]);
+
+ expected7.has_conv = true;
+ expected7.raw_len = 9;
+ expected7.raw_string = str + 31;
+ expected7.flags = __llvm_libc::printf_core::FormatFlags::SPACE_PREFIX;
+ expected7.precision = arg4;
+ expected7.conv_val_raw = __llvm_libc::bit_cast<uint64_t>(arg2);
+ expected7.conv_name = 'f';
+
+ EXPECT_FORMAT_EQ(expected7, format_arr[7]);
+
+ expected8.has_conv = false;
+ expected8.raw_len = 1;
+ expected8.raw_string = str + 40;
+
+ EXPECT_FORMAT_EQ(expected8, format_arr[8]);
+
+ expected9.has_conv = true;
+ expected9.raw_len = 7;
+ expected9.raw_string = str + 41;
+ expected9.min_width = 1;
+ expected9.precision = 1;
+ expected9.conv_val_raw = arg1;
+ expected9.conv_name = 'c';
+
+ EXPECT_FORMAT_EQ(expected9, format_arr[9]);
+}
+
+#endif // LLVM_LIBC_PRINTF_DISABLE_INDEX_MODE
More information about the libc-commits
mailing list