[libc-commits] [libc] Add bit width length modifier to printf (PR #82461)
Om Prakaash via libc-commits
libc-commits at lists.llvm.org
Wed Feb 28 07:50:22 PST 2024
https://github.com/omprakaash updated https://github.com/llvm/llvm-project/pull/82461
>From d026192d707b5c959a93fc51e810299d08d3a736 Mon Sep 17 00:00:00 2001
From: om <omsuseela at gmail.com>
Date: Mon, 19 Feb 2024 15:49:34 -0800
Subject: [PATCH] Add bit width length modifier to printf
---
libc/src/stdio/printf_core/converter_utils.h | 16 ++++-
libc/src/stdio/printf_core/core_structs.h | 9 ++-
libc/src/stdio/printf_core/int_converter.h | 5 +-
libc/src/stdio/printf_core/parser.h | 64 +++++++++++++++----
.../stdio/printf_core/write_int_converter.h | 2 +
libc/test/UnitTest/PrintfMatcher.cpp | 6 ++
.../src/stdio/printf_core/parser_test.cpp | 36 +++++++++++
libc/test/src/stdio/sprintf_test.cpp | 47 ++++++++++++++
8 files changed, 167 insertions(+), 18 deletions(-)
diff --git a/libc/src/stdio/printf_core/converter_utils.h b/libc/src/stdio/printf_core/converter_utils.h
index 54f0a870d0ac4a..a4bd3e667def35 100644
--- a/libc/src/stdio/printf_core/converter_utils.h
+++ b/libc/src/stdio/printf_core/converter_utils.h
@@ -18,7 +18,9 @@
namespace LIBC_NAMESPACE {
namespace printf_core {
-LIBC_INLINE uintmax_t apply_length_modifier(uintmax_t num, LengthModifier lm) {
+LIBC_INLINE uintmax_t apply_length_modifier(uintmax_t num,
+ LengthSpec length_spec) {
+ auto [lm, bw] = length_spec;
switch (lm) {
case LengthModifier::none:
return num & cpp::numeric_limits<unsigned int>::max();
@@ -40,6 +42,18 @@ LIBC_INLINE uintmax_t apply_length_modifier(uintmax_t num, LengthModifier lm) {
return num & cpp::numeric_limits<uintptr_t>::max();
case LengthModifier::j:
return num; // j is intmax, so no mask is necessary.
+ case LengthModifier::w:
+ case LengthModifier::wf: {
+ uintmax_t mask;
+ if (bw == 0) {
+ mask = 0;
+ } else if (bw < sizeof(uintmax_t) * CHAR_BIT) {
+ mask = (static_cast<uintmax_t>(1) << bw) - 1;
+ } else {
+ mask = UINTMAX_MAX;
+ }
+ return num & mask;
+ }
}
__builtin_unreachable();
}
diff --git a/libc/src/stdio/printf_core/core_structs.h b/libc/src/stdio/printf_core/core_structs.h
index 7634d45568ab84..cd8bceb900de74 100644
--- a/libc/src/stdio/printf_core/core_structs.h
+++ b/libc/src/stdio/printf_core/core_structs.h
@@ -20,7 +20,12 @@ namespace printf_core {
// These length modifiers match the length modifiers in the format string, which
// is why they are formatted differently from the rest of the file.
-enum class LengthModifier { hh, h, l, ll, j, z, t, L, none };
+enum class LengthModifier { hh, h, l, ll, j, z, t, L, w, wf, none };
+
+struct LengthSpec {
+ LengthModifier lm;
+ size_t bit_width;
+};
enum FormatFlags : uint8_t {
LEFT_JUSTIFIED = 0x01, // -
@@ -42,6 +47,7 @@ struct FormatSection {
// Format Specifier Values
FormatFlags flags = FormatFlags(0);
LengthModifier length_modifier = LengthModifier::none;
+ size_t bit_width = 0;
int min_width = 0;
int precision = -1;
@@ -64,6 +70,7 @@ struct FormatSection {
if (!((static_cast<uint8_t>(flags) ==
static_cast<uint8_t>(other.flags)) &&
(min_width == other.min_width) && (precision == other.precision) &&
+ (bit_width == other.bit_width) &&
(length_modifier == other.length_modifier) &&
(conv_name == other.conv_name)))
return false;
diff --git a/libc/src/stdio/printf_core/int_converter.h b/libc/src/stdio/printf_core/int_converter.h
index 2efbf53d409382..496e7bd1a56d94 100644
--- a/libc/src/stdio/printf_core/int_converter.h
+++ b/libc/src/stdio/printf_core/int_converter.h
@@ -71,7 +71,6 @@ LIBC_INLINE int convert_int(Writer *writer, const FormatSection &to_conv) {
uintmax_t num = static_cast<uintmax_t>(to_conv.conv_val_raw);
bool is_negative = false;
FormatFlags flags = to_conv.flags;
-
const char a = is_lower(to_conv.conv_name) ? 'a' : 'A';
// If the conversion is signed, then handle negative values.
@@ -89,8 +88,8 @@ LIBC_INLINE int convert_int(Writer *writer, const FormatSection &to_conv) {
~(FormatFlags::FORCE_SIGN | FormatFlags::SPACE_PREFIX));
}
- num = apply_length_modifier(num, to_conv.length_modifier);
-
+ num =
+ apply_length_modifier(num, {to_conv.length_modifier, to_conv.bit_width});
cpp::array<char, details::num_buf_size()> buf;
auto str = details::num_to_strview(num, buf, to_conv.conv_name);
if (!str)
diff --git a/libc/src/stdio/printf_core/parser.h b/libc/src/stdio/printf_core/parser.h
index 1e7d2e58c924a6..45690191be306b 100644
--- a/libc/src/stdio/printf_core/parser.h
+++ b/libc/src/stdio/printf_core/parser.h
@@ -136,10 +136,10 @@ template <typename ArgProvider> class Parser {
}
}
- LengthModifier lm = parse_length_modifier(&cur_pos);
-
+ auto [lm, bw] = parse_length_modifier(&cur_pos);
section.length_modifier = lm;
section.conv_name = str[cur_pos];
+ section.bit_width = bw;
switch (str[cur_pos]) {
case ('%'):
// Regardless of options, a % conversion is always safe. The standard
@@ -188,6 +188,19 @@ template <typename ArgProvider> class Parser {
WRITE_ARG_VAL_SIMPLEST(section.conv_val_raw, ptrdiff_t, conv_index);
break;
+
+ case (LengthModifier::w):
+ case (LengthModifier::wf):
+ if (bw <= INT_WIDTH) {
+ WRITE_ARG_VAL_SIMPLEST(section.conv_val_raw, int, conv_index);
+ } else if (bw <= LONG_WIDTH) {
+ WRITE_ARG_VAL_SIMPLEST(section.conv_val_raw, long, conv_index);
+ } else if (bw <= LLONG_WIDTH) {
+ WRITE_ARG_VAL_SIMPLEST(section.conv_val_raw, long long, conv_index);
+ } else {
+ WRITE_ARG_VAL_SIMPLEST(section.conv_val_raw, intmax_t, conv_index);
+ }
+ break;
}
break;
#ifndef LIBC_COPT_PRINTF_DISABLE_FLOAT
@@ -273,38 +286,51 @@ template <typename ArgProvider> class Parser {
// assumes that str[*local_pos] is inside a format specifier. It returns a
// LengthModifier with the length modifier it found. It will advance local_pos
// after the format specifier if one is found.
- LIBC_INLINE LengthModifier parse_length_modifier(size_t *local_pos) {
+ LIBC_INLINE LengthSpec parse_length_modifier(size_t *local_pos) {
switch (str[*local_pos]) {
case ('l'):
if (str[*local_pos + 1] == 'l') {
*local_pos += 2;
- return LengthModifier::ll;
+ return {LengthModifier::ll, 0};
+ } else {
+ ++*local_pos;
+ return {LengthModifier::l, 0};
+ }
+ case ('w'): {
+ LengthModifier lm;
+ if (str[*local_pos + 1] == 'f') {
+ *local_pos += 2;
+ lm = LengthModifier::wf;
} else {
++*local_pos;
- return LengthModifier::l;
+ lm = LengthModifier::w;
}
+ const auto result = internal::strtointeger<size_t>(str + *local_pos, 10);
+ *local_pos += result.parsed_len;
+ return {lm, result.value};
+ }
case ('h'):
if (str[*local_pos + 1] == 'h') {
*local_pos += 2;
- return LengthModifier::hh;
+ return {LengthModifier::hh, 0};
} else {
++*local_pos;
- return LengthModifier::h;
+ return {LengthModifier::h, 0};
}
case ('L'):
++*local_pos;
- return LengthModifier::L;
+ return {LengthModifier::L, 0};
case ('j'):
++*local_pos;
- return LengthModifier::j;
+ return {LengthModifier::j, 0};
case ('z'):
++*local_pos;
- return LengthModifier::z;
+ return {LengthModifier::z, 0};
case ('t'):
++*local_pos;
- return LengthModifier::t;
+ return {LengthModifier::t, 0};
default:
- return LengthModifier::none;
+ return {LengthModifier::none, 0};
}
}
@@ -460,7 +486,7 @@ template <typename ArgProvider> class Parser {
}
}
- LengthModifier lm = parse_length_modifier(&local_pos);
+ auto [lm, bw] = parse_length_modifier(&local_pos);
// if we don't have an index for this conversion, then its position is
// unknown and all this information is irrelevant. The rest of this
@@ -511,6 +537,18 @@ template <typename ArgProvider> class Parser {
case (LengthModifier::t):
conv_size = type_desc_from_type<ptrdiff_t>();
break;
+ case (LengthModifier::w):
+ case (LengthModifier::wf):
+ if (bw <= INT_WIDTH) {
+ conv_size = type_desc_from_type<int>();
+ } else if (bw <= LONG_WIDTH) {
+ conv_size = type_desc_from_type<long>();
+ } else if (bw <= LLONG_WIDTH) {
+ conv_size = type_desc_from_type<long long>();
+ } else {
+ conv_size = type_desc_from_type<intmax_t>();
+ }
+ break;
}
break;
#ifndef LIBC_COPT_PRINTF_DISABLE_FLOAT
diff --git a/libc/src/stdio/printf_core/write_int_converter.h b/libc/src/stdio/printf_core/write_int_converter.h
index 0310905f36f146..18aa5c79897ec4 100644
--- a/libc/src/stdio/printf_core/write_int_converter.h
+++ b/libc/src/stdio/printf_core/write_int_converter.h
@@ -55,6 +55,8 @@ LIBC_INLINE int convert_write_int(Writer *writer,
*reinterpret_cast<ptrdiff_t *>(to_conv.conv_val_ptr) = written;
break;
case LengthModifier::j:
+ case LengthModifier::w:
+ case LengthModifier::wf:
*reinterpret_cast<uintmax_t *>(to_conv.conv_val_ptr) = written;
break;
}
diff --git a/libc/test/UnitTest/PrintfMatcher.cpp b/libc/test/UnitTest/PrintfMatcher.cpp
index 32f3be73307e3a..c8303815c92296 100644
--- a/libc/test/UnitTest/PrintfMatcher.cpp
+++ b/libc/test/UnitTest/PrintfMatcher.cpp
@@ -39,6 +39,10 @@ namespace {
case (LengthModifier::lm): \
tlog << #lm; \
break
+#define CASE_LM_BIT_WIDTH(lm, bw) \
+ case (LengthModifier::lm): \
+ tlog << #lm << "\n\tbit width: :" << bw; \
+ break
static void display(FormatSection form) {
tlog << "Raw String (len " << form.raw_string.size() << "): \"";
@@ -67,6 +71,8 @@ static void display(FormatSection form) {
CASE_LM(z);
CASE_LM(t);
CASE_LM(L);
+ CASE_LM_BIT_WIDTH(w, form.bit_width);
+ CASE_LM_BIT_WIDTH(wf, form.bit_width);
}
tlog << "\n";
tlog << "\tconversion name: " << form.conv_name << "\n";
diff --git a/libc/test/src/stdio/printf_core/parser_test.cpp b/libc/test/src/stdio/printf_core/parser_test.cpp
index 0134277c4a1b2d..66d6dd0a86c422 100644
--- a/libc/test/src/stdio/printf_core/parser_test.cpp
+++ b/libc/test/src/stdio/printf_core/parser_test.cpp
@@ -223,6 +223,42 @@ TEST(LlvmLibcPrintfParserTest, EvalOneArgWithLongLengthModifier) {
ASSERT_PFORMAT_EQ(expected, format_arr[0]);
}
+TEST(LlvmLibcPrintfParserTest, EvalOneArgWithBitWidthLengthModifier) {
+ LIBC_NAMESPACE::printf_core::FormatSection format_arr[10];
+ const char *str = "%w32d";
+ long long arg1 = 12345;
+ evaluate(format_arr, str, arg1);
+
+ LIBC_NAMESPACE::printf_core::FormatSection expected;
+ expected.has_conv = true;
+
+ expected.raw_string = {str, 5};
+ expected.length_modifier = LIBC_NAMESPACE::printf_core::LengthModifier::w;
+ expected.bit_width = 32;
+ expected.conv_val_raw = arg1;
+ expected.conv_name = 'd';
+
+ ASSERT_PFORMAT_EQ(expected, format_arr[0]);
+}
+
+TEST(LlvmLibcPrintfParserTest, EvalOneArgWithFastBitWidthLengthModifier) {
+ LIBC_NAMESPACE::printf_core::FormatSection format_arr[10];
+ const char *str = "%wf32d";
+ long long arg1 = 12345;
+ evaluate(format_arr, str, arg1);
+
+ LIBC_NAMESPACE::printf_core::FormatSection expected;
+ expected.has_conv = true;
+
+ expected.raw_string = {str, 6};
+ expected.length_modifier = LIBC_NAMESPACE::printf_core::LengthModifier::wf;
+ expected.bit_width = 32;
+ expected.conv_val_raw = arg1;
+ expected.conv_name = 'd';
+
+ ASSERT_PFORMAT_EQ(expected, format_arr[0]);
+}
+
TEST(LlvmLibcPrintfParserTest, EvalOneArgWithAllOptions) {
LIBC_NAMESPACE::printf_core::FormatSection format_arr[10];
const char *str = "% -056.78jd";
diff --git a/libc/test/src/stdio/sprintf_test.cpp b/libc/test/src/stdio/sprintf_test.cpp
index 186b37e2898af6..31ab9f4dfd1b04 100644
--- a/libc/test/src/stdio/sprintf_test.cpp
+++ b/libc/test/src/stdio/sprintf_test.cpp
@@ -12,6 +12,7 @@
#include "test/UnitTest/RoundingModeUtils.h"
#include "test/UnitTest/Test.h"
#include <inttypes.h>
+#include <src/string/strlen.h>
// TODO: Add a comment here explaining the printf format string.
@@ -169,6 +170,52 @@ TEST(LlvmLibcSPrintfTest, IntConv) {
EXPECT_EQ(written, 20);
ASSERT_STREQ(buff, "-9223372036854775808"); // ll min
+ written = LIBC_NAMESPACE::sprintf(buff, "%w3d", 5807);
+ EXPECT_EQ(written, 1);
+ ASSERT_STREQ(buff, "7");
+
+ written = LIBC_NAMESPACE::sprintf(buff, "%w3d", 1);
+ EXPECT_EQ(written, 1);
+ ASSERT_STREQ(buff, "1");
+
+ written = LIBC_NAMESPACE::sprintf(buff, "%w64d", 9223372036854775807l);
+ EXPECT_EQ(written, 19);
+ ASSERT_STREQ(buff, "9223372036854775807");
+
+ char format[64];
+ char uintmax[128];
+ LIBC_NAMESPACE::sprintf(format, "%%w%du", UINTMAX_WIDTH);
+ LIBC_NAMESPACE::sprintf(uintmax, "%ju", UINTMAX_MAX);
+ written = LIBC_NAMESPACE::sprintf(buff, format, UINTMAX_MAX);
+ EXPECT_EQ(written, static_cast<int>(strlen(uintmax)));
+ ASSERT_STREQ(buff, uintmax);
+
+ written = LIBC_NAMESPACE::sprintf(buff, "%w64u", 18446744073709551615ull);
+ EXPECT_EQ(written, 20);
+ ASSERT_STREQ(buff, "18446744073709551615"); // ull max
+
+ written =
+ LIBC_NAMESPACE::sprintf(buff, "%w64d", -9223372036854775807ll - 1ll);
+ EXPECT_EQ(written, 20);
+ ASSERT_STREQ(buff, "-9223372036854775808"); // ll min
+
+ written = LIBC_NAMESPACE::sprintf(buff, "%wf3d", 5807);
+ EXPECT_EQ(written, 1);
+ ASSERT_STREQ(buff, "7");
+
+ written = LIBC_NAMESPACE::sprintf(buff, "%wf3d", 1);
+ EXPECT_EQ(written, 1);
+ ASSERT_STREQ(buff, "1");
+
+ written = LIBC_NAMESPACE::sprintf(buff, "%wf64u", 18446744073709551615ull);
+ EXPECT_EQ(written, 20);
+ ASSERT_STREQ(buff, "18446744073709551615"); // ull max
+
+ written =
+ LIBC_NAMESPACE::sprintf(buff, "%wf64d", -9223372036854775807ll - 1ll);
+ EXPECT_EQ(written, 20);
+ ASSERT_STREQ(buff, "-9223372036854775808"); // ll min
+
// Min Width Tests.
written = LIBC_NAMESPACE::sprintf(buff, "%4d", 789);
More information about the libc-commits
mailing list