[libc-commits] [libc] Add bit width length modifier to printf (PR #82461)
Om Prakaash via libc-commits
libc-commits at lists.llvm.org
Thu Mar 28 15:23:54 PDT 2024
https://github.com/omprakaash updated https://github.com/llvm/llvm-project/pull/82461
>From 3d16955a3085c13cac89ada7e4f1a7494385e7c0 Mon Sep 17 00:00:00 2001
From: om <omsuseela at gmail.com>
Date: Mon, 19 Feb 2024 15:49:34 -0800
Subject: [PATCH] Add bit width length modifier to printf
---
libc/docs/dev/printf_behavior.rst | 4 +
libc/src/stdio/printf_core/converter_utils.h | 16 +++-
libc/src/stdio/printf_core/core_structs.h | 9 +-
libc/src/stdio/printf_core/int_converter.h | 5 +-
libc/src/stdio/printf_core/parser.h | 69 ++++++++++++---
.../stdio/printf_core/write_int_converter.h | 2 +
libc/test/UnitTest/PrintfMatcher.cpp | 6 ++
.../src/stdio/printf_core/parser_test.cpp | 36 ++++++++
libc/test/src/stdio/sprintf_test.cpp | 86 +++++++++++++++++++
9 files changed, 215 insertions(+), 18 deletions(-)
diff --git a/libc/docs/dev/printf_behavior.rst b/libc/docs/dev/printf_behavior.rst
index bc60aa43ee2b6b..d0f7409336d436 100644
--- a/libc/docs/dev/printf_behavior.rst
+++ b/libc/docs/dev/printf_behavior.rst
@@ -164,6 +164,10 @@ If a number passed as a min width or precision value is out of range for an int,
then it will be treated as the largest or smallest value in the int range
(e.g. "%-999999999999.999999999999s" is the same as "%-2147483648.2147483647s").
+If a number passed as a bit width is less than or equal to zero, the conversion
+is considered invalid. If the provided bit width is larger than the width of
+uintmax_t, it will be clamped to the width of uintmax_t.
+
----------
Conversion
----------
diff --git a/libc/src/stdio/printf_core/converter_utils.h b/libc/src/stdio/printf_core/converter_utils.h
index 54f0a870d0ac4a..a4bd3e667def35 100644
--- a/libc/src/stdio/printf_core/converter_utils.h
+++ b/libc/src/stdio/printf_core/converter_utils.h
@@ -18,7 +18,9 @@
namespace LIBC_NAMESPACE {
namespace printf_core {
-LIBC_INLINE uintmax_t apply_length_modifier(uintmax_t num, LengthModifier lm) {
+LIBC_INLINE uintmax_t apply_length_modifier(uintmax_t num,
+ LengthSpec length_spec) {
+ auto [lm, bw] = length_spec;
switch (lm) {
case LengthModifier::none:
return num & cpp::numeric_limits<unsigned int>::max();
@@ -40,6 +42,18 @@ LIBC_INLINE uintmax_t apply_length_modifier(uintmax_t num, LengthModifier lm) {
return num & cpp::numeric_limits<uintptr_t>::max();
case LengthModifier::j:
return num; // j is intmax, so no mask is necessary.
+ case LengthModifier::w:
+ case LengthModifier::wf: {
+ uintmax_t mask;
+ if (bw == 0) {
+ mask = 0;
+ } else if (bw < sizeof(uintmax_t) * CHAR_BIT) {
+ mask = (static_cast<uintmax_t>(1) << bw) - 1;
+ } else {
+ mask = UINTMAX_MAX;
+ }
+ return num & mask;
+ }
}
__builtin_unreachable();
}
diff --git a/libc/src/stdio/printf_core/core_structs.h b/libc/src/stdio/printf_core/core_structs.h
index 7634d45568ab84..cd8bceb900de74 100644
--- a/libc/src/stdio/printf_core/core_structs.h
+++ b/libc/src/stdio/printf_core/core_structs.h
@@ -20,7 +20,12 @@ namespace printf_core {
// These length modifiers match the length modifiers in the format string, which
// is why they are formatted differently from the rest of the file.
-enum class LengthModifier { hh, h, l, ll, j, z, t, L, none };
+enum class LengthModifier { hh, h, l, ll, j, z, t, L, w, wf, none };
+
+struct LengthSpec {
+ LengthModifier lm;
+ size_t bit_width;
+};
enum FormatFlags : uint8_t {
LEFT_JUSTIFIED = 0x01, // -
@@ -42,6 +47,7 @@ struct FormatSection {
// Format Specifier Values
FormatFlags flags = FormatFlags(0);
LengthModifier length_modifier = LengthModifier::none;
+ size_t bit_width = 0;
int min_width = 0;
int precision = -1;
@@ -64,6 +70,7 @@ struct FormatSection {
if (!((static_cast<uint8_t>(flags) ==
static_cast<uint8_t>(other.flags)) &&
(min_width == other.min_width) && (precision == other.precision) &&
+ (bit_width == other.bit_width) &&
(length_modifier == other.length_modifier) &&
(conv_name == other.conv_name)))
return false;
diff --git a/libc/src/stdio/printf_core/int_converter.h b/libc/src/stdio/printf_core/int_converter.h
index 2efbf53d409382..496e7bd1a56d94 100644
--- a/libc/src/stdio/printf_core/int_converter.h
+++ b/libc/src/stdio/printf_core/int_converter.h
@@ -71,7 +71,6 @@ LIBC_INLINE int convert_int(Writer *writer, const FormatSection &to_conv) {
uintmax_t num = static_cast<uintmax_t>(to_conv.conv_val_raw);
bool is_negative = false;
FormatFlags flags = to_conv.flags;
-
const char a = is_lower(to_conv.conv_name) ? 'a' : 'A';
// If the conversion is signed, then handle negative values.
@@ -89,8 +88,8 @@ LIBC_INLINE int convert_int(Writer *writer, const FormatSection &to_conv) {
~(FormatFlags::FORCE_SIGN | FormatFlags::SPACE_PREFIX));
}
- num = apply_length_modifier(num, to_conv.length_modifier);
-
+ num =
+ apply_length_modifier(num, {to_conv.length_modifier, to_conv.bit_width});
cpp::array<char, details::num_buf_size()> buf;
auto str = details::num_to_strview(num, buf, to_conv.conv_name);
if (!str)
diff --git a/libc/src/stdio/printf_core/parser.h b/libc/src/stdio/printf_core/parser.h
index 1e7d2e58c924a6..fb5eb91ce02ea7 100644
--- a/libc/src/stdio/printf_core/parser.h
+++ b/libc/src/stdio/printf_core/parser.h
@@ -136,10 +136,10 @@ template <typename ArgProvider> class Parser {
}
}
- LengthModifier lm = parse_length_modifier(&cur_pos);
-
+ auto [lm, bw] = parse_length_modifier(&cur_pos);
section.length_modifier = lm;
section.conv_name = str[cur_pos];
+ section.bit_width = bw;
switch (str[cur_pos]) {
case ('%'):
// Regardless of options, a % conversion is always safe. The standard
@@ -188,6 +188,21 @@ template <typename ArgProvider> class Parser {
WRITE_ARG_VAL_SIMPLEST(section.conv_val_raw, ptrdiff_t, conv_index);
break;
+
+ case (LengthModifier::w):
+ case (LengthModifier::wf):
+ if (bw == 0) {
+ section.has_conv = false;
+ } else if (bw <= INT_WIDTH) {
+ WRITE_ARG_VAL_SIMPLEST(section.conv_val_raw, int, conv_index);
+ } else if (bw <= LONG_WIDTH) {
+ WRITE_ARG_VAL_SIMPLEST(section.conv_val_raw, long, conv_index);
+ } else if (bw <= LLONG_WIDTH) {
+ WRITE_ARG_VAL_SIMPLEST(section.conv_val_raw, long long, conv_index);
+ } else {
+ WRITE_ARG_VAL_SIMPLEST(section.conv_val_raw, intmax_t, conv_index);
+ }
+ break;
}
break;
#ifndef LIBC_COPT_PRINTF_DISABLE_FLOAT
@@ -273,38 +288,54 @@ template <typename ArgProvider> class Parser {
// assumes that str[*local_pos] is inside a format specifier. It returns a
// LengthModifier with the length modifier it found. It will advance local_pos
// after the format specifier if one is found.
- LIBC_INLINE LengthModifier parse_length_modifier(size_t *local_pos) {
+ LIBC_INLINE LengthSpec parse_length_modifier(size_t *local_pos) {
switch (str[*local_pos]) {
case ('l'):
if (str[*local_pos + 1] == 'l') {
*local_pos += 2;
- return LengthModifier::ll;
+ return {LengthModifier::ll, 0};
} else {
++*local_pos;
- return LengthModifier::l;
+ return {LengthModifier::l, 0};
}
+ case ('w'): {
+ LengthModifier lm;
+ if (str[*local_pos + 1] == 'f') {
+ *local_pos += 2;
+ lm = LengthModifier::wf;
+ } else {
+ ++*local_pos;
+ lm = LengthModifier::w;
+ }
+ if (internal::isdigit(str[*local_pos])) {
+ const auto result = internal::strtointeger<int>(str + *local_pos, 10);
+ *local_pos += result.parsed_len;
+ return {lm, static_cast<size_t>(cpp::max(0, result.value))};
+ }
+ return {lm, 0};
+ }
case ('h'):
if (str[*local_pos + 1] == 'h') {
*local_pos += 2;
- return LengthModifier::hh;
+ return {LengthModifier::hh, 0};
} else {
++*local_pos;
- return LengthModifier::h;
+ return {LengthModifier::h, 0};
}
case ('L'):
++*local_pos;
- return LengthModifier::L;
+ return {LengthModifier::L, 0};
case ('j'):
++*local_pos;
- return LengthModifier::j;
+ return {LengthModifier::j, 0};
case ('z'):
++*local_pos;
- return LengthModifier::z;
+ return {LengthModifier::z, 0};
case ('t'):
++*local_pos;
- return LengthModifier::t;
+ return {LengthModifier::t, 0};
default:
- return LengthModifier::none;
+ return {LengthModifier::none, 0};
}
}
@@ -460,7 +491,7 @@ template <typename ArgProvider> class Parser {
}
}
- LengthModifier lm = parse_length_modifier(&local_pos);
+ auto [lm, bw] = parse_length_modifier(&local_pos);
// if we don't have an index for this conversion, then its position is
// unknown and all this information is irrelevant. The rest of this
@@ -511,6 +542,18 @@ template <typename ArgProvider> class Parser {
case (LengthModifier::t):
conv_size = type_desc_from_type<ptrdiff_t>();
break;
+ case (LengthModifier::w):
+ case (LengthModifier::wf):
+ if (bw <= INT_WIDTH) {
+ conv_size = type_desc_from_type<int>();
+ } else if (bw <= LONG_WIDTH) {
+ conv_size = type_desc_from_type<long>();
+ } else if (bw <= LLONG_WIDTH) {
+ conv_size = type_desc_from_type<long long>();
+ } else {
+ conv_size = type_desc_from_type<intmax_t>();
+ }
+ break;
}
break;
#ifndef LIBC_COPT_PRINTF_DISABLE_FLOAT
diff --git a/libc/src/stdio/printf_core/write_int_converter.h b/libc/src/stdio/printf_core/write_int_converter.h
index 0310905f36f146..18aa5c79897ec4 100644
--- a/libc/src/stdio/printf_core/write_int_converter.h
+++ b/libc/src/stdio/printf_core/write_int_converter.h
@@ -55,6 +55,8 @@ LIBC_INLINE int convert_write_int(Writer *writer,
*reinterpret_cast<ptrdiff_t *>(to_conv.conv_val_ptr) = written;
break;
case LengthModifier::j:
+ case LengthModifier::w:
+ case LengthModifier::wf:
*reinterpret_cast<uintmax_t *>(to_conv.conv_val_ptr) = written;
break;
}
diff --git a/libc/test/UnitTest/PrintfMatcher.cpp b/libc/test/UnitTest/PrintfMatcher.cpp
index 32f3be73307e3a..c8303815c92296 100644
--- a/libc/test/UnitTest/PrintfMatcher.cpp
+++ b/libc/test/UnitTest/PrintfMatcher.cpp
@@ -39,6 +39,10 @@ namespace {
case (LengthModifier::lm): \
tlog << #lm; \
break
+#define CASE_LM_BIT_WIDTH(lm, bw) \
+ case (LengthModifier::lm): \
+ tlog << #lm << "\n\tbit width: :" << bw; \
+ break
static void display(FormatSection form) {
tlog << "Raw String (len " << form.raw_string.size() << "): \"";
@@ -67,6 +71,8 @@ static void display(FormatSection form) {
CASE_LM(z);
CASE_LM(t);
CASE_LM(L);
+ CASE_LM_BIT_WIDTH(w, form.bit_width);
+ CASE_LM_BIT_WIDTH(wf, form.bit_width);
}
tlog << "\n";
tlog << "\tconversion name: " << form.conv_name << "\n";
diff --git a/libc/test/src/stdio/printf_core/parser_test.cpp b/libc/test/src/stdio/printf_core/parser_test.cpp
index 0134277c4a1b2d..66d6dd0a86c422 100644
--- a/libc/test/src/stdio/printf_core/parser_test.cpp
+++ b/libc/test/src/stdio/printf_core/parser_test.cpp
@@ -223,6 +223,42 @@ TEST(LlvmLibcPrintfParserTest, EvalOneArgWithLongLengthModifier) {
ASSERT_PFORMAT_EQ(expected, format_arr[0]);
}
+TEST(LlvmLibcPrintfParserTest, EvalOneArgWithBitWidthLengthModifier) {
+ LIBC_NAMESPACE::printf_core::FormatSection format_arr[10];
+ const char *str = "%w32d";
+ long long arg1 = 12345;
+ evaluate(format_arr, str, arg1);
+
+ LIBC_NAMESPACE::printf_core::FormatSection expected;
+ expected.has_conv = true;
+
+ expected.raw_string = {str, 5};
+ expected.length_modifier = LIBC_NAMESPACE::printf_core::LengthModifier::w;
+ expected.bit_width = 32;
+ expected.conv_val_raw = arg1;
+ expected.conv_name = 'd';
+
+ ASSERT_PFORMAT_EQ(expected, format_arr[0]);
+}
+
+TEST(LlvmLibcPrintfParserTest, EvalOneArgWithFastBitWidthLengthModifier) {
+ LIBC_NAMESPACE::printf_core::FormatSection format_arr[10];
+ const char *str = "%wf32d";
+ long long arg1 = 12345;
+ evaluate(format_arr, str, arg1);
+
+ LIBC_NAMESPACE::printf_core::FormatSection expected;
+ expected.has_conv = true;
+
+ expected.raw_string = {str, 6};
+ expected.length_modifier = LIBC_NAMESPACE::printf_core::LengthModifier::wf;
+ expected.bit_width = 32;
+ expected.conv_val_raw = arg1;
+ expected.conv_name = 'd';
+
+ ASSERT_PFORMAT_EQ(expected, format_arr[0]);
+}
+
TEST(LlvmLibcPrintfParserTest, EvalOneArgWithAllOptions) {
LIBC_NAMESPACE::printf_core::FormatSection format_arr[10];
const char *str = "% -056.78jd";
diff --git a/libc/test/src/stdio/sprintf_test.cpp b/libc/test/src/stdio/sprintf_test.cpp
index 186b37e2898af6..d5c10a26be6b3c 100644
--- a/libc/test/src/stdio/sprintf_test.cpp
+++ b/libc/test/src/stdio/sprintf_test.cpp
@@ -169,6 +169,92 @@ TEST(LlvmLibcSPrintfTest, IntConv) {
EXPECT_EQ(written, 20);
ASSERT_STREQ(buff, "-9223372036854775808"); // ll min
+ written = LIBC_NAMESPACE::sprintf(buff, "%w3d", 5807);
+ EXPECT_EQ(written, 1);
+ ASSERT_STREQ(buff, "7");
+
+ written = LIBC_NAMESPACE::sprintf(buff, "%w3d", 1);
+ EXPECT_EQ(written, 1);
+ ASSERT_STREQ(buff, "1");
+
+ written = LIBC_NAMESPACE::sprintf(buff, "%w64d", 9223372036854775807l);
+ EXPECT_EQ(written, 19);
+ ASSERT_STREQ(buff, "9223372036854775807");
+
+ written = LIBC_NAMESPACE::sprintf(buff, "%w-1d", 5807);
+ EXPECT_EQ(written, 5);
+ ASSERT_STREQ(buff, "%w-1d");
+
+ written = LIBC_NAMESPACE::sprintf(buff, "%w0d", 5807);
+ EXPECT_EQ(written, 4);
+ ASSERT_STREQ(buff, "%w0d");
+
+ written = LIBC_NAMESPACE::sprintf(buff, "%w999d", 9223372036854775807l);
+ EXPECT_EQ(written, 19);
+ ASSERT_STREQ(buff, "9223372036854775807");
+
+ written = LIBC_NAMESPACE::sprintf(buff, "%winvalid%w1d", 5807, 5807);
+ EXPECT_EQ(written, 10);
+ ASSERT_STREQ(buff, "%winvalid1");
+
+ written = LIBC_NAMESPACE::sprintf(buff, "%w-1d%w1d", 5807, 5807);
+ EXPECT_EQ(written, 6);
+ ASSERT_STREQ(buff, "%w-1d1");
+
+ char format[64];
+ char uintmax[128];
+ LIBC_NAMESPACE::sprintf(format, "%%w%du", UINTMAX_WIDTH);
+ const int uintmax_len = LIBC_NAMESPACE::sprintf(uintmax, "%ju", UINTMAX_MAX);
+ written = LIBC_NAMESPACE::sprintf(buff, format, UINTMAX_MAX);
+ EXPECT_EQ(written, uintmax_len);
+ ASSERT_STREQ(buff, uintmax);
+
+ written = LIBC_NAMESPACE::sprintf(buff, "%w64u", 18446744073709551615ull);
+ EXPECT_EQ(written, 20);
+ ASSERT_STREQ(buff, "18446744073709551615"); // ull max
+
+ written =
+ LIBC_NAMESPACE::sprintf(buff, "%w64d", -9223372036854775807ll - 1ll);
+ EXPECT_EQ(written, 20);
+ ASSERT_STREQ(buff, "-9223372036854775808"); // ll min
+
+ written = LIBC_NAMESPACE::sprintf(buff, "%wf3d", 5807);
+ EXPECT_EQ(written, 1);
+ ASSERT_STREQ(buff, "7");
+
+ written = LIBC_NAMESPACE::sprintf(buff, "%wf3d", 1);
+ EXPECT_EQ(written, 1);
+ ASSERT_STREQ(buff, "1");
+
+ written = LIBC_NAMESPACE::sprintf(buff, "%wf64u", 18446744073709551615ull);
+ EXPECT_EQ(written, 20);
+ ASSERT_STREQ(buff, "18446744073709551615"); // ull max
+
+ written =
+ LIBC_NAMESPACE::sprintf(buff, "%wf64d", -9223372036854775807ll - 1ll);
+ EXPECT_EQ(written, 20);
+ ASSERT_STREQ(buff, "-9223372036854775808"); // ll min
+
+ written = LIBC_NAMESPACE::sprintf(buff, "%wf0d", 5807);
+ EXPECT_EQ(written, 5);
+ ASSERT_STREQ(buff, "%wf0d");
+
+ written = LIBC_NAMESPACE::sprintf(buff, "%wf-1d", 5807);
+ EXPECT_EQ(written, 6);
+ ASSERT_STREQ(buff, "%wf-1d");
+
+ written = LIBC_NAMESPACE::sprintf(buff, "%wfinvalid%wf1d", 5807, 5807);
+ EXPECT_EQ(written, 11);
+ ASSERT_STREQ(buff, "%wfinvalid1");
+
+ written = LIBC_NAMESPACE::sprintf(buff, "%wf-1d%wf1d", 5807, 5807);
+ EXPECT_EQ(written, 7);
+ ASSERT_STREQ(buff, "%wf-1d1");
+
+ written = LIBC_NAMESPACE::sprintf(buff, "%wf999d", 5807);
+ EXPECT_EQ(written, 4);
+ ASSERT_STREQ(buff, "5807");
+
// Min Width Tests.
written = LIBC_NAMESPACE::sprintf(buff, "%4d", 789);
More information about the libc-commits
mailing list