[libc-commits] [libc] Add bit width length modifier to printf (PR #82461)
Om Prakaash via libc-commits
libc-commits at lists.llvm.org
Tue Feb 20 21:36:15 PST 2024
https://github.com/omprakaash created https://github.com/llvm/llvm-project/pull/82461
Resolves #81685. This adds support for wN and wfN length modifiers in fprintf.
>From 0ae4242bb299edbd1416767bf09ab588b49deee2 Mon Sep 17 00:00:00 2001
From: om <omsuseela at gmail.com>
Date: Mon, 19 Feb 2024 15:49:34 -0800
Subject: [PATCH] Add bit width length modifier to printf
---
libc/src/stdio/printf_core/converter_utils.h | 16 ++++++-
libc/src/stdio/printf_core/core_structs.h | 9 +++-
libc/src/stdio/printf_core/int_converter.h | 5 +--
libc/src/stdio/printf_core/parser.h | 43 +++++++++++++------
.../stdio/printf_core/write_int_converter.h | 2 +
libc/test/UnitTest/PrintfMatcher.cpp | 6 +++
.../src/stdio/printf_core/parser_test.cpp | 36 ++++++++++++++++
libc/test/src/stdio/sprintf_test.cpp | 34 +++++++++++++++
8 files changed, 133 insertions(+), 18 deletions(-)
diff --git a/libc/src/stdio/printf_core/converter_utils.h b/libc/src/stdio/printf_core/converter_utils.h
index 54f0a870d0ac4a..30eab6c4af8892 100644
--- a/libc/src/stdio/printf_core/converter_utils.h
+++ b/libc/src/stdio/printf_core/converter_utils.h
@@ -18,7 +18,9 @@
namespace LIBC_NAMESPACE {
namespace printf_core {
-LIBC_INLINE uintmax_t apply_length_modifier(uintmax_t num, LengthModifier lm) {
+LIBC_INLINE uintmax_t apply_length_modifier(uintmax_t num,
+ LengthSpec length_spec) {
+ auto [lm, bw] = length_spec;
switch (lm) {
case LengthModifier::none:
return num & cpp::numeric_limits<unsigned int>::max();
@@ -40,6 +42,18 @@ LIBC_INLINE uintmax_t apply_length_modifier(uintmax_t num, LengthModifier lm) {
return num & cpp::numeric_limits<uintptr_t>::max();
case LengthModifier::j:
return num; // j is intmax, so no mask is necessary.
+ case LengthModifier::w:
+ case LengthModifier::wf: {
+ uintmax_t mask;
+ if (bw == 0) {
+ mask = 0;
+ } else if (bw < sizeof(uintmax_t) * 8) {
+ mask = (static_cast<uintmax_t>(1) << bw) - 1;
+ } else {
+ mask = UINTMAX_MAX;
+ }
+ return num & mask;
+ }
}
__builtin_unreachable();
}
diff --git a/libc/src/stdio/printf_core/core_structs.h b/libc/src/stdio/printf_core/core_structs.h
index 7634d45568ab84..cd8bceb900de74 100644
--- a/libc/src/stdio/printf_core/core_structs.h
+++ b/libc/src/stdio/printf_core/core_structs.h
@@ -20,7 +20,12 @@ namespace printf_core {
// These length modifiers match the length modifiers in the format string, which
// is why they are formatted differently from the rest of the file.
-enum class LengthModifier { hh, h, l, ll, j, z, t, L, none };
+enum class LengthModifier { hh, h, l, ll, j, z, t, L, w, wf, none };
+
+struct LengthSpec {
+ LengthModifier lm;
+ size_t bit_width;
+};
enum FormatFlags : uint8_t {
LEFT_JUSTIFIED = 0x01, // -
@@ -42,6 +47,7 @@ struct FormatSection {
// Format Specifier Values
FormatFlags flags = FormatFlags(0);
LengthModifier length_modifier = LengthModifier::none;
+ size_t bit_width = 0;
int min_width = 0;
int precision = -1;
@@ -64,6 +70,7 @@ struct FormatSection {
if (!((static_cast<uint8_t>(flags) ==
static_cast<uint8_t>(other.flags)) &&
(min_width == other.min_width) && (precision == other.precision) &&
+ (bit_width == other.bit_width) &&
(length_modifier == other.length_modifier) &&
(conv_name == other.conv_name)))
return false;
diff --git a/libc/src/stdio/printf_core/int_converter.h b/libc/src/stdio/printf_core/int_converter.h
index 2efbf53d409382..496e7bd1a56d94 100644
--- a/libc/src/stdio/printf_core/int_converter.h
+++ b/libc/src/stdio/printf_core/int_converter.h
@@ -71,7 +71,6 @@ LIBC_INLINE int convert_int(Writer *writer, const FormatSection &to_conv) {
uintmax_t num = static_cast<uintmax_t>(to_conv.conv_val_raw);
bool is_negative = false;
FormatFlags flags = to_conv.flags;
-
const char a = is_lower(to_conv.conv_name) ? 'a' : 'A';
// If the conversion is signed, then handle negative values.
@@ -89,8 +88,8 @@ LIBC_INLINE int convert_int(Writer *writer, const FormatSection &to_conv) {
~(FormatFlags::FORCE_SIGN | FormatFlags::SPACE_PREFIX));
}
- num = apply_length_modifier(num, to_conv.length_modifier);
-
+ num =
+ apply_length_modifier(num, {to_conv.length_modifier, to_conv.bit_width});
cpp::array<char, details::num_buf_size()> buf;
auto str = details::num_to_strview(num, buf, to_conv.conv_name);
if (!str)
diff --git a/libc/src/stdio/printf_core/parser.h b/libc/src/stdio/printf_core/parser.h
index 1e7d2e58c924a6..7a8a9b879dad84 100644
--- a/libc/src/stdio/printf_core/parser.h
+++ b/libc/src/stdio/printf_core/parser.h
@@ -136,10 +136,10 @@ template <typename ArgProvider> class Parser {
}
}
- LengthModifier lm = parse_length_modifier(&cur_pos);
-
+ auto [lm, bw] = parse_length_modifier(&cur_pos);
section.length_modifier = lm;
section.conv_name = str[cur_pos];
+ section.bit_width = bw;
switch (str[cur_pos]) {
case ('%'):
// Regardless of options, a % conversion is always safe. The standard
@@ -177,6 +177,8 @@ template <typename ArgProvider> class Parser {
WRITE_ARG_VAL_SIMPLEST(section.conv_val_raw, long long, conv_index);
break;
case (LengthModifier::j):
+ case (LengthModifier::w):
+ case (LengthModifier::wf):
WRITE_ARG_VAL_SIMPLEST(section.conv_val_raw, intmax_t, conv_index);
break;
@@ -273,38 +275,51 @@ template <typename ArgProvider> class Parser {
// assumes that str[*local_pos] is inside a format specifier. It returns a
// LengthModifier with the length modifier it found. It will advance local_pos
// after the format specifier if one is found.
- LIBC_INLINE LengthModifier parse_length_modifier(size_t *local_pos) {
+ LIBC_INLINE LengthSpec parse_length_modifier(size_t *local_pos) {
switch (str[*local_pos]) {
case ('l'):
if (str[*local_pos + 1] == 'l') {
*local_pos += 2;
- return LengthModifier::ll;
+ return {LengthModifier::ll, 0};
+ } else {
+ ++*local_pos;
+ return {LengthModifier::l, 0};
+ }
+ case ('w'): {
+ LengthModifier lm;
+ if (str[*local_pos + 1] == 'f') {
+ *local_pos += 2;
+ lm = LengthModifier::wf;
} else {
++*local_pos;
- return LengthModifier::l;
+ lm = LengthModifier::w;
}
+ const auto result = internal::strtointeger<size_t>(str + *local_pos, 10);
+ *local_pos += result.parsed_len;
+ return {lm, result.value};
+ }
case ('h'):
if (str[*local_pos + 1] == 'h') {
*local_pos += 2;
- return LengthModifier::hh;
+ return {LengthModifier::hh, 0};
} else {
++*local_pos;
- return LengthModifier::h;
+ return {LengthModifier::h, 0};
}
case ('L'):
++*local_pos;
- return LengthModifier::L;
+ return {LengthModifier::L, 0};
case ('j'):
++*local_pos;
- return LengthModifier::j;
+ return {LengthModifier::j, 0};
case ('z'):
++*local_pos;
- return LengthModifier::z;
+ return {LengthModifier::z, 0};
case ('t'):
++*local_pos;
- return LengthModifier::t;
+ return {LengthModifier::t, 0};
default:
- return LengthModifier::none;
+ return {LengthModifier::none, 0};
}
}
@@ -460,7 +475,7 @@ template <typename ArgProvider> class Parser {
}
}
- LengthModifier lm = parse_length_modifier(&local_pos);
+ auto [lm, bw] = parse_length_modifier(&local_pos);
// if we don't have an index for this conversion, then its position is
// unknown and all this information is irrelevant. The rest of this
@@ -503,6 +518,8 @@ template <typename ArgProvider> class Parser {
conv_size = type_desc_from_type<long long>();
break;
case (LengthModifier::j):
+ case (LengthModifier::w):
+ case (LengthModifier::wf):
conv_size = type_desc_from_type<intmax_t>();
break;
case (LengthModifier::z):
diff --git a/libc/src/stdio/printf_core/write_int_converter.h b/libc/src/stdio/printf_core/write_int_converter.h
index 0310905f36f146..18aa5c79897ec4 100644
--- a/libc/src/stdio/printf_core/write_int_converter.h
+++ b/libc/src/stdio/printf_core/write_int_converter.h
@@ -55,6 +55,8 @@ LIBC_INLINE int convert_write_int(Writer *writer,
*reinterpret_cast<ptrdiff_t *>(to_conv.conv_val_ptr) = written;
break;
case LengthModifier::j:
+ case LengthModifier::w:
+ case LengthModifier::wf:
*reinterpret_cast<uintmax_t *>(to_conv.conv_val_ptr) = written;
break;
}
diff --git a/libc/test/UnitTest/PrintfMatcher.cpp b/libc/test/UnitTest/PrintfMatcher.cpp
index 32f3be73307e3a..c8303815c92296 100644
--- a/libc/test/UnitTest/PrintfMatcher.cpp
+++ b/libc/test/UnitTest/PrintfMatcher.cpp
@@ -39,6 +39,10 @@ namespace {
case (LengthModifier::lm): \
tlog << #lm; \
break
+#define CASE_LM_BIT_WIDTH(lm, bw) \
+ case (LengthModifier::lm): \
+ tlog << #lm << "\n\tbit width: :" << bw; \
+ break
static void display(FormatSection form) {
tlog << "Raw String (len " << form.raw_string.size() << "): \"";
@@ -67,6 +71,8 @@ static void display(FormatSection form) {
CASE_LM(z);
CASE_LM(t);
CASE_LM(L);
+ CASE_LM_BIT_WIDTH(w, form.bit_width);
+ CASE_LM_BIT_WIDTH(wf, form.bit_width);
}
tlog << "\n";
tlog << "\tconversion name: " << form.conv_name << "\n";
diff --git a/libc/test/src/stdio/printf_core/parser_test.cpp b/libc/test/src/stdio/printf_core/parser_test.cpp
index 0134277c4a1b2d..66d6dd0a86c422 100644
--- a/libc/test/src/stdio/printf_core/parser_test.cpp
+++ b/libc/test/src/stdio/printf_core/parser_test.cpp
@@ -223,6 +223,42 @@ TEST(LlvmLibcPrintfParserTest, EvalOneArgWithLongLengthModifier) {
ASSERT_PFORMAT_EQ(expected, format_arr[0]);
}
+TEST(LlvmLibcPrintfParserTest, EvalOneArgWithBitWidthLengthModifier) {
+ LIBC_NAMESPACE::printf_core::FormatSection format_arr[10];
+ const char *str = "%w32d";
+ long long arg1 = 12345;
+ evaluate(format_arr, str, arg1);
+
+ LIBC_NAMESPACE::printf_core::FormatSection expected;
+ expected.has_conv = true;
+
+ expected.raw_string = {str, 5};
+ expected.length_modifier = LIBC_NAMESPACE::printf_core::LengthModifier::w;
+ expected.bit_width = 32;
+ expected.conv_val_raw = arg1;
+ expected.conv_name = 'd';
+
+ ASSERT_PFORMAT_EQ(expected, format_arr[0]);
+}
+
+TEST(LlvmLibcPrintfParserTest, EvalOneArgWithFastBitWidthLengthModifier) {
+ LIBC_NAMESPACE::printf_core::FormatSection format_arr[10];
+ const char *str = "%wf32d";
+ long long arg1 = 12345;
+ evaluate(format_arr, str, arg1);
+
+ LIBC_NAMESPACE::printf_core::FormatSection expected;
+ expected.has_conv = true;
+
+ expected.raw_string = {str, 6};
+ expected.length_modifier = LIBC_NAMESPACE::printf_core::LengthModifier::wf;
+ expected.bit_width = 32;
+ expected.conv_val_raw = arg1;
+ expected.conv_name = 'd';
+
+ ASSERT_PFORMAT_EQ(expected, format_arr[0]);
+}
+
TEST(LlvmLibcPrintfParserTest, EvalOneArgWithAllOptions) {
LIBC_NAMESPACE::printf_core::FormatSection format_arr[10];
const char *str = "% -056.78jd";
diff --git a/libc/test/src/stdio/sprintf_test.cpp b/libc/test/src/stdio/sprintf_test.cpp
index 186b37e2898af6..8d61dc3d5dd686 100644
--- a/libc/test/src/stdio/sprintf_test.cpp
+++ b/libc/test/src/stdio/sprintf_test.cpp
@@ -169,6 +169,40 @@ TEST(LlvmLibcSPrintfTest, IntConv) {
EXPECT_EQ(written, 20);
ASSERT_STREQ(buff, "-9223372036854775808"); // ll min
+ written = LIBC_NAMESPACE::sprintf(buff, "%w3d", 5807);
+ EXPECT_EQ(written, 1);
+ ASSERT_STREQ(buff, "7");
+
+ written = LIBC_NAMESPACE::sprintf(buff, "%w3d", 1);
+ EXPECT_EQ(written, 1);
+ ASSERT_STREQ(buff, "1");
+
+ written = LIBC_NAMESPACE::sprintf(buff, "%w64u", 18446744073709551615ull);
+ EXPECT_EQ(written, 20);
+ ASSERT_STREQ(buff, "18446744073709551615"); // ull max
+
+ written =
+ LIBC_NAMESPACE::sprintf(buff, "%w64d", -9223372036854775807ll - 1ll);
+ EXPECT_EQ(written, 20);
+ ASSERT_STREQ(buff, "-9223372036854775808"); // ll min
+
+ written = LIBC_NAMESPACE::sprintf(buff, "%wf3d", 5807);
+ EXPECT_EQ(written, 1);
+ ASSERT_STREQ(buff, "7");
+
+ written = LIBC_NAMESPACE::sprintf(buff, "%wf3d", 1);
+ EXPECT_EQ(written, 1);
+ ASSERT_STREQ(buff, "1");
+
+ written = LIBC_NAMESPACE::sprintf(buff, "%wf64u", 18446744073709551615ull);
+ EXPECT_EQ(written, 20);
+ ASSERT_STREQ(buff, "18446744073709551615"); // ull max
+
+ written =
+ LIBC_NAMESPACE::sprintf(buff, "%wf64d", -9223372036854775807ll - 1ll);
+ EXPECT_EQ(written, 20);
+ ASSERT_STREQ(buff, "-9223372036854775808"); // ll min
+
// Min Width Tests.
written = LIBC_NAMESPACE::sprintf(buff, "%4d", 789);
More information about the libc-commits
mailing list