[libc-commits] [libc] Add bit width length modifier to printf (PR #82461)

Tue Feb 20 21:36:15 PST 2024

https://github.com/omprakaash created https://github.com/llvm/llvm-project/pull/82461

Resolves #81685. This adds support for wN and wfN length modifiers in fprintf.

>From 0ae4242bb299edbd1416767bf09ab588b49deee2 Mon Sep 17 00:00:00 2001
From: om <omsuseela at gmail.com>
Date: Mon, 19 Feb 2024 15:49:34 -0800
Subject: [PATCH] Add bit width length modifier to printf

---
 libc/src/stdio/printf_core/converter_utils.h  | 16 ++++++-
 libc/src/stdio/printf_core/core_structs.h     |  9 +++-
 libc/src/stdio/printf_core/int_converter.h    |  5 +--
 libc/src/stdio/printf_core/parser.h           | 43 +++++++++++++------
 .../stdio/printf_core/write_int_converter.h   |  2 +
 libc/test/UnitTest/PrintfMatcher.cpp          |  6 +++
 .../src/stdio/printf_core/parser_test.cpp     | 36 ++++++++++++++++
 libc/test/src/stdio/sprintf_test.cpp          | 34 +++++++++++++++
 8 files changed, 133 insertions(+), 18 deletions(-)

diff --git a/libc/src/stdio/printf_core/converter_utils.h b/libc/src/stdio/printf_core/converter_utils.h
index 54f0a870d0ac4a..30eab6c4af8892 100644
--- a/libc/src/stdio/printf_core/converter_utils.h
+++ b/libc/src/stdio/printf_core/converter_utils.h
@@ -18,7 +18,9 @@
 namespace LIBC_NAMESPACE {
 namespace printf_core {
 
-LIBC_INLINE uintmax_t apply_length_modifier(uintmax_t num, LengthModifier lm) {
+LIBC_INLINE uintmax_t apply_length_modifier(uintmax_t num,
+                                            LengthSpec length_spec) {
+  auto [lm, bw] = length_spec;
   switch (lm) {
   case LengthModifier::none:
     return num & cpp::numeric_limits<unsigned int>::max();
@@ -40,6 +42,18 @@ LIBC_INLINE uintmax_t apply_length_modifier(uintmax_t num, LengthModifier lm) {
     return num & cpp::numeric_limits<uintptr_t>::max();
   case LengthModifier::j:
     return num; // j is intmax, so no mask is necessary.
+  case LengthModifier::w:
+  case LengthModifier::wf: {
+    uintmax_t mask;
+    if (bw == 0) {
+      mask = 0;
+    } else if (bw < sizeof(uintmax_t) * 8) {
+      mask = (static_cast<uintmax_t>(1) << bw) - 1;
+    } else {
+      mask = UINTMAX_MAX;
+    }
+    return num & mask;
+  }
   }
   __builtin_unreachable();
 }
diff --git a/libc/src/stdio/printf_core/core_structs.h b/libc/src/stdio/printf_core/core_structs.h
index 7634d45568ab84..cd8bceb900de74 100644
--- a/libc/src/stdio/printf_core/core_structs.h
+++ b/libc/src/stdio/printf_core/core_structs.h
@@ -20,7 +20,12 @@ namespace printf_core {
 
 // These length modifiers match the length modifiers in the format string, which
 // is why they are formatted differently from the rest of the file.
-enum class LengthModifier { hh, h, l, ll, j, z, t, L, none };
+enum class LengthModifier { hh, h, l, ll, j, z, t, L, w, wf, none };
+
+struct LengthSpec {
+  LengthModifier lm;
+  size_t bit_width;
+};
 
 enum FormatFlags : uint8_t {
   LEFT_JUSTIFIED = 0x01, // -
@@ -42,6 +47,7 @@ struct FormatSection {
   // Format Specifier Values
   FormatFlags flags = FormatFlags(0);
   LengthModifier length_modifier = LengthModifier::none;
+  size_t bit_width = 0;
   int min_width = 0;
   int precision = -1;
 
@@ -64,6 +70,7 @@ struct FormatSection {
       if (!((static_cast<uint8_t>(flags) ==
              static_cast<uint8_t>(other.flags)) &&
             (min_width == other.min_width) && (precision == other.precision) &&
+            (bit_width == other.bit_width) &&
             (length_modifier == other.length_modifier) &&
             (conv_name == other.conv_name)))
         return false;
diff --git a/libc/src/stdio/printf_core/int_converter.h b/libc/src/stdio/printf_core/int_converter.h
index 2efbf53d409382..496e7bd1a56d94 100644
--- a/libc/src/stdio/printf_core/int_converter.h
+++ b/libc/src/stdio/printf_core/int_converter.h
@@ -71,7 +71,6 @@ LIBC_INLINE int convert_int(Writer *writer, const FormatSection &to_conv) {
   uintmax_t num = static_cast<uintmax_t>(to_conv.conv_val_raw);
   bool is_negative = false;
   FormatFlags flags = to_conv.flags;
-
   const char a = is_lower(to_conv.conv_name) ? 'a' : 'A';
 
   // If the conversion is signed, then handle negative values.
@@ -89,8 +88,8 @@ LIBC_INLINE int convert_int(Writer *writer, const FormatSection &to_conv) {
                         ~(FormatFlags::FORCE_SIGN | FormatFlags::SPACE_PREFIX));
   }
 
-  num = apply_length_modifier(num, to_conv.length_modifier);
-
+  num =
+      apply_length_modifier(num, {to_conv.length_modifier, to_conv.bit_width});
   cpp::array<char, details::num_buf_size()> buf;
   auto str = details::num_to_strview(num, buf, to_conv.conv_name);
   if (!str)
diff --git a/libc/src/stdio/printf_core/parser.h b/libc/src/stdio/printf_core/parser.h
index 1e7d2e58c924a6..7a8a9b879dad84 100644
--- a/libc/src/stdio/printf_core/parser.h
+++ b/libc/src/stdio/printf_core/parser.h
@@ -136,10 +136,10 @@ template <typename ArgProvider> class Parser {
         }
       }
 
-      LengthModifier lm = parse_length_modifier(&cur_pos);
-
+      auto [lm, bw] = parse_length_modifier(&cur_pos);
       section.length_modifier = lm;
       section.conv_name = str[cur_pos];
+      section.bit_width = bw;
       switch (str[cur_pos]) {
       case ('%'):
         // Regardless of options, a % conversion is always safe. The standard
@@ -177,6 +177,8 @@ template <typename ArgProvider> class Parser {
           WRITE_ARG_VAL_SIMPLEST(section.conv_val_raw, long long, conv_index);
           break;
         case (LengthModifier::j):
+        case (LengthModifier::w):
+        case (LengthModifier::wf):
 
           WRITE_ARG_VAL_SIMPLEST(section.conv_val_raw, intmax_t, conv_index);
           break;
@@ -273,38 +275,51 @@ template <typename ArgProvider> class Parser {
   // assumes that str[*local_pos] is inside a format specifier. It returns a
   // LengthModifier with the length modifier it found. It will advance local_pos
   // after the format specifier if one is found.
-  LIBC_INLINE LengthModifier parse_length_modifier(size_t *local_pos) {
+  LIBC_INLINE LengthSpec parse_length_modifier(size_t *local_pos) {
     switch (str[*local_pos]) {
     case ('l'):
       if (str[*local_pos + 1] == 'l') {
         *local_pos += 2;
-        return LengthModifier::ll;
+        return {LengthModifier::ll, 0};
+      } else {
+        ++*local_pos;
+        return {LengthModifier::l, 0};
+      }
+    case ('w'): {
+      LengthModifier lm;
+      if (str[*local_pos + 1] == 'f') {
+        *local_pos += 2;
+        lm = LengthModifier::wf;
       } else {
         ++*local_pos;
-        return LengthModifier::l;
+        lm = LengthModifier::w;
       }
+      const auto result = internal::strtointeger<size_t>(str + *local_pos, 10);
+      *local_pos += result.parsed_len;
+      return {lm, result.value};
+    }
     case ('h'):
       if (str[*local_pos + 1] == 'h') {
         *local_pos += 2;
-        return LengthModifier::hh;
+        return {LengthModifier::hh, 0};
       } else {
         ++*local_pos;
-        return LengthModifier::h;
+        return {LengthModifier::h, 0};
       }
     case ('L'):
       ++*local_pos;
-      return LengthModifier::L;
+      return {LengthModifier::L, 0};
     case ('j'):
       ++*local_pos;
-      return LengthModifier::j;
+      return {LengthModifier::j, 0};
     case ('z'):
       ++*local_pos;
-      return LengthModifier::z;
+      return {LengthModifier::z, 0};
     case ('t'):
       ++*local_pos;
-      return LengthModifier::t;
+      return {LengthModifier::t, 0};
     default:
-      return LengthModifier::none;
+      return {LengthModifier::none, 0};
     }
   }
 
@@ -460,7 +475,7 @@ template <typename ArgProvider> class Parser {
           }
         }
 
-        LengthModifier lm = parse_length_modifier(&local_pos);
+        auto [lm, bw] = parse_length_modifier(&local_pos);
 
         // if we don't have an index for this conversion, then its position is
         // unknown and all this information is irrelevant. The rest of this
@@ -503,6 +518,8 @@ template <typename ArgProvider> class Parser {
             conv_size = type_desc_from_type<long long>();
             break;
           case (LengthModifier::j):
+          case (LengthModifier::w):
+          case (LengthModifier::wf):
             conv_size = type_desc_from_type<intmax_t>();
             break;
           case (LengthModifier::z):
diff --git a/libc/src/stdio/printf_core/write_int_converter.h b/libc/src/stdio/printf_core/write_int_converter.h
index 0310905f36f146..18aa5c79897ec4 100644
--- a/libc/src/stdio/printf_core/write_int_converter.h
+++ b/libc/src/stdio/printf_core/write_int_converter.h
@@ -55,6 +55,8 @@ LIBC_INLINE int convert_write_int(Writer *writer,
     *reinterpret_cast<ptrdiff_t *>(to_conv.conv_val_ptr) = written;
     break;
   case LengthModifier::j:
+  case LengthModifier::w:
+  case LengthModifier::wf:
     *reinterpret_cast<uintmax_t *>(to_conv.conv_val_ptr) = written;
     break;
   }
diff --git a/libc/test/UnitTest/PrintfMatcher.cpp b/libc/test/UnitTest/PrintfMatcher.cpp
index 32f3be73307e3a..c8303815c92296 100644
--- a/libc/test/UnitTest/PrintfMatcher.cpp
+++ b/libc/test/UnitTest/PrintfMatcher.cpp
@@ -39,6 +39,10 @@ namespace {
   case (LengthModifier::lm):                                                   \
     tlog << #lm;                                                               \
     break
+#define CASE_LM_BIT_WIDTH(lm, bw)                                              \
+  case (LengthModifier::lm):                                                   \
+    tlog << #lm << "\n\tbit width: :" << bw;                                   \
+    break
 
 static void display(FormatSection form) {
   tlog << "Raw String (len " << form.raw_string.size() << "): \"";
@@ -67,6 +71,8 @@ static void display(FormatSection form) {
       CASE_LM(z);
       CASE_LM(t);
       CASE_LM(L);
+      CASE_LM_BIT_WIDTH(w, form.bit_width);
+      CASE_LM_BIT_WIDTH(wf, form.bit_width);
     }
     tlog << "\n";
     tlog << "\tconversion name: " << form.conv_name << "\n";
diff --git a/libc/test/src/stdio/printf_core/parser_test.cpp b/libc/test/src/stdio/printf_core/parser_test.cpp
index 0134277c4a1b2d..66d6dd0a86c422 100644
--- a/libc/test/src/stdio/printf_core/parser_test.cpp
+++ b/libc/test/src/stdio/printf_core/parser_test.cpp
@@ -223,6 +223,42 @@ TEST(LlvmLibcPrintfParserTest, EvalOneArgWithLongLengthModifier) {
   ASSERT_PFORMAT_EQ(expected, format_arr[0]);
 }
 
+TEST(LlvmLibcPrintfParserTest, EvalOneArgWithBitWidthLengthModifier) {
+  LIBC_NAMESPACE::printf_core::FormatSection format_arr[10];
+  const char *str = "%w32d";
+  long long arg1 = 12345;
+  evaluate(format_arr, str, arg1);
+
+  LIBC_NAMESPACE::printf_core::FormatSection expected;
+  expected.has_conv = true;
+
+  expected.raw_string = {str, 5};
+  expected.length_modifier = LIBC_NAMESPACE::printf_core::LengthModifier::w;
+  expected.bit_width = 32;
+  expected.conv_val_raw = arg1;
+  expected.conv_name = 'd';
+
+  ASSERT_PFORMAT_EQ(expected, format_arr[0]);
+}
+
+TEST(LlvmLibcPrintfParserTest, EvalOneArgWithFastBitWidthLengthModifier) {
+  LIBC_NAMESPACE::printf_core::FormatSection format_arr[10];
+  const char *str = "%wf32d";
+  long long arg1 = 12345;
+  evaluate(format_arr, str, arg1);
+
+  LIBC_NAMESPACE::printf_core::FormatSection expected;
+  expected.has_conv = true;
+
+  expected.raw_string = {str, 6};
+  expected.length_modifier = LIBC_NAMESPACE::printf_core::LengthModifier::wf;
+  expected.bit_width = 32;
+  expected.conv_val_raw = arg1;
+  expected.conv_name = 'd';
+
+  ASSERT_PFORMAT_EQ(expected, format_arr[0]);
+}
+
 TEST(LlvmLibcPrintfParserTest, EvalOneArgWithAllOptions) {
   LIBC_NAMESPACE::printf_core::FormatSection format_arr[10];
   const char *str = "% -056.78jd";
diff --git a/libc/test/src/stdio/sprintf_test.cpp b/libc/test/src/stdio/sprintf_test.cpp
index 186b37e2898af6..8d61dc3d5dd686 100644
--- a/libc/test/src/stdio/sprintf_test.cpp
+++ b/libc/test/src/stdio/sprintf_test.cpp
@@ -169,6 +169,40 @@ TEST(LlvmLibcSPrintfTest, IntConv) {
   EXPECT_EQ(written, 20);
   ASSERT_STREQ(buff, "-9223372036854775808"); // ll min
 
+  written = LIBC_NAMESPACE::sprintf(buff, "%w3d", 5807);
+  EXPECT_EQ(written, 1);
+  ASSERT_STREQ(buff, "7");
+
+  written = LIBC_NAMESPACE::sprintf(buff, "%w3d", 1);
+  EXPECT_EQ(written, 1);
+  ASSERT_STREQ(buff, "1");
+
+  written = LIBC_NAMESPACE::sprintf(buff, "%w64u", 18446744073709551615ull);
+  EXPECT_EQ(written, 20);
+  ASSERT_STREQ(buff, "18446744073709551615"); // ull max
+
+  written =
+      LIBC_NAMESPACE::sprintf(buff, "%w64d", -9223372036854775807ll - 1ll);
+  EXPECT_EQ(written, 20);
+  ASSERT_STREQ(buff, "-9223372036854775808"); // ll min
+
+  written = LIBC_NAMESPACE::sprintf(buff, "%wf3d", 5807);
+  EXPECT_EQ(written, 1);
+  ASSERT_STREQ(buff, "7");
+
+  written = LIBC_NAMESPACE::sprintf(buff, "%wf3d", 1);
+  EXPECT_EQ(written, 1);
+  ASSERT_STREQ(buff, "1");
+
+  written = LIBC_NAMESPACE::sprintf(buff, "%wf64u", 18446744073709551615ull);
+  EXPECT_EQ(written, 20);
+  ASSERT_STREQ(buff, "18446744073709551615"); // ull max
+
+  written =
+      LIBC_NAMESPACE::sprintf(buff, "%wf64d", -9223372036854775807ll - 1ll);
+  EXPECT_EQ(written, 20);
+  ASSERT_STREQ(buff, "-9223372036854775808"); // ll min
+
   // Min Width Tests.
 
   written = LIBC_NAMESPACE::sprintf(buff, "%4d", 789);