[libc-commits] [libc] Add bit width length modifier to printf (PR #82461)

Om Prakaash via libc-commits libc-commits at lists.llvm.org
Mon Mar 25 08:35:03 PDT 2024


https://github.com/omprakaash updated https://github.com/llvm/llvm-project/pull/82461

>From 238768b0587c9662a53affe5fb3515f2ae326620 Mon Sep 17 00:00:00 2001
From: om <omsuseela at gmail.com>
Date: Mon, 19 Feb 2024 15:49:34 -0800
Subject: [PATCH] Add bit width length modifier to printf

---
 libc/src/stdio/printf_core/converter_utils.h  | 16 ++++-
 libc/src/stdio/printf_core/core_structs.h     |  9 ++-
 libc/src/stdio/printf_core/int_converter.h    |  5 +-
 libc/src/stdio/printf_core/parser.h           | 65 +++++++++++++----
 .../stdio/printf_core/write_int_converter.h   |  2 +
 libc/test/UnitTest/PrintfMatcher.cpp          |  6 ++
 .../src/stdio/printf_core/parser_test.cpp     | 36 ++++++++++
 libc/test/src/stdio/sprintf_test.cpp          | 71 +++++++++++++++++++
 8 files changed, 191 insertions(+), 19 deletions(-)

diff --git a/libc/src/stdio/printf_core/converter_utils.h b/libc/src/stdio/printf_core/converter_utils.h
index 54f0a870d0ac4a..a4bd3e667def35 100644
--- a/libc/src/stdio/printf_core/converter_utils.h
+++ b/libc/src/stdio/printf_core/converter_utils.h
@@ -18,7 +18,9 @@
 namespace LIBC_NAMESPACE {
 namespace printf_core {
 
-LIBC_INLINE uintmax_t apply_length_modifier(uintmax_t num, LengthModifier lm) {
+LIBC_INLINE uintmax_t apply_length_modifier(uintmax_t num,
+                                            LengthSpec length_spec) {
+  auto [lm, bw] = length_spec;
   switch (lm) {
   case LengthModifier::none:
     return num & cpp::numeric_limits<unsigned int>::max();
@@ -40,6 +42,18 @@ LIBC_INLINE uintmax_t apply_length_modifier(uintmax_t num, LengthModifier lm) {
     return num & cpp::numeric_limits<uintptr_t>::max();
   case LengthModifier::j:
     return num; // j is intmax, so no mask is necessary.
+  case LengthModifier::w:
+  case LengthModifier::wf: {
+    uintmax_t mask;
+    if (bw == 0) {
+      mask = 0;
+    } else if (bw < sizeof(uintmax_t) * CHAR_BIT) {
+      mask = (static_cast<uintmax_t>(1) << bw) - 1;
+    } else {
+      mask = UINTMAX_MAX;
+    }
+    return num & mask;
+  }
   }
   __builtin_unreachable();
 }
diff --git a/libc/src/stdio/printf_core/core_structs.h b/libc/src/stdio/printf_core/core_structs.h
index 7634d45568ab84..cd8bceb900de74 100644
--- a/libc/src/stdio/printf_core/core_structs.h
+++ b/libc/src/stdio/printf_core/core_structs.h
@@ -20,7 +20,12 @@ namespace printf_core {
 
 // These length modifiers match the length modifiers in the format string, which
 // is why they are formatted differently from the rest of the file.
-enum class LengthModifier { hh, h, l, ll, j, z, t, L, none };
+enum class LengthModifier { hh, h, l, ll, j, z, t, L, w, wf, none };
+
+struct LengthSpec {
+  LengthModifier lm;
+  size_t bit_width;
+};
 
 enum FormatFlags : uint8_t {
   LEFT_JUSTIFIED = 0x01, // -
@@ -42,6 +47,7 @@ struct FormatSection {
   // Format Specifier Values
   FormatFlags flags = FormatFlags(0);
   LengthModifier length_modifier = LengthModifier::none;
+  size_t bit_width = 0;
   int min_width = 0;
   int precision = -1;
 
@@ -64,6 +70,7 @@ struct FormatSection {
       if (!((static_cast<uint8_t>(flags) ==
              static_cast<uint8_t>(other.flags)) &&
             (min_width == other.min_width) && (precision == other.precision) &&
+            (bit_width == other.bit_width) &&
             (length_modifier == other.length_modifier) &&
             (conv_name == other.conv_name)))
         return false;
diff --git a/libc/src/stdio/printf_core/int_converter.h b/libc/src/stdio/printf_core/int_converter.h
index 2efbf53d409382..496e7bd1a56d94 100644
--- a/libc/src/stdio/printf_core/int_converter.h
+++ b/libc/src/stdio/printf_core/int_converter.h
@@ -71,7 +71,6 @@ LIBC_INLINE int convert_int(Writer *writer, const FormatSection &to_conv) {
   uintmax_t num = static_cast<uintmax_t>(to_conv.conv_val_raw);
   bool is_negative = false;
   FormatFlags flags = to_conv.flags;
-
   const char a = is_lower(to_conv.conv_name) ? 'a' : 'A';
 
   // If the conversion is signed, then handle negative values.
@@ -89,8 +88,8 @@ LIBC_INLINE int convert_int(Writer *writer, const FormatSection &to_conv) {
                         ~(FormatFlags::FORCE_SIGN | FormatFlags::SPACE_PREFIX));
   }
 
-  num = apply_length_modifier(num, to_conv.length_modifier);
-
+  num =
+      apply_length_modifier(num, {to_conv.length_modifier, to_conv.bit_width});
   cpp::array<char, details::num_buf_size()> buf;
   auto str = details::num_to_strview(num, buf, to_conv.conv_name);
   if (!str)
diff --git a/libc/src/stdio/printf_core/parser.h b/libc/src/stdio/printf_core/parser.h
index 1e7d2e58c924a6..ea255f9b2816d6 100644
--- a/libc/src/stdio/printf_core/parser.h
+++ b/libc/src/stdio/printf_core/parser.h
@@ -13,7 +13,6 @@
 #include "src/__support/str_to_integer.h"
 #include "src/stdio/printf_core/core_structs.h"
 #include "src/stdio/printf_core/printf_config.h"
-
 #include <stddef.h>
 
 namespace LIBC_NAMESPACE {
@@ -136,10 +135,10 @@ template <typename ArgProvider> class Parser {
         }
       }
 
-      LengthModifier lm = parse_length_modifier(&cur_pos);
-
+      auto [lm, bw] = parse_length_modifier(&cur_pos);
       section.length_modifier = lm;
       section.conv_name = str[cur_pos];
+      section.bit_width = bw;
       switch (str[cur_pos]) {
       case ('%'):
         // Regardless of options, a % conversion is always safe. The standard
@@ -188,6 +187,19 @@ template <typename ArgProvider> class Parser {
 
           WRITE_ARG_VAL_SIMPLEST(section.conv_val_raw, ptrdiff_t, conv_index);
           break;
+
+        case (LengthModifier::w):
+        case (LengthModifier::wf):
+          if (bw <= INT_WIDTH) {
+            WRITE_ARG_VAL_SIMPLEST(section.conv_val_raw, int, conv_index);
+          } else if (bw <= LONG_WIDTH) {
+            WRITE_ARG_VAL_SIMPLEST(section.conv_val_raw, long, conv_index);
+          } else if (bw <= LLONG_WIDTH) {
+            WRITE_ARG_VAL_SIMPLEST(section.conv_val_raw, long long, conv_index);
+          } else {
+            WRITE_ARG_VAL_SIMPLEST(section.conv_val_raw, intmax_t, conv_index);
+          }
+          break;
         }
         break;
 #ifndef LIBC_COPT_PRINTF_DISABLE_FLOAT
@@ -273,38 +285,51 @@ template <typename ArgProvider> class Parser {
   // assumes that str[*local_pos] is inside a format specifier. It returns a
   // LengthModifier with the length modifier it found. It will advance local_pos
   // after the format specifier if one is found.
-  LIBC_INLINE LengthModifier parse_length_modifier(size_t *local_pos) {
+  LIBC_INLINE LengthSpec parse_length_modifier(size_t *local_pos) {
     switch (str[*local_pos]) {
     case ('l'):
       if (str[*local_pos + 1] == 'l') {
         *local_pos += 2;
-        return LengthModifier::ll;
+        return {LengthModifier::ll, 0};
       } else {
         ++*local_pos;
-        return LengthModifier::l;
+        return {LengthModifier::l, 0};
       }
+    case ('w'): {
+      LengthModifier lm;
+      if (str[*local_pos + 1] == 'f') {
+        *local_pos += 2;
+        lm = LengthModifier::wf;
+      } else {
+        ++*local_pos;
+        lm = LengthModifier::w;
+      }
+      const auto result = internal::strtointeger<int>(str + *local_pos, 10);
+      *local_pos += result.parsed_len;
+      return {lm, static_cast<size_t>(cpp::max(0, result.value))};
+    }
     case ('h'):
       if (str[*local_pos + 1] == 'h') {
         *local_pos += 2;
-        return LengthModifier::hh;
+        return {LengthModifier::hh, 0};
       } else {
         ++*local_pos;
-        return LengthModifier::h;
+        return {LengthModifier::h, 0};
       }
     case ('L'):
       ++*local_pos;
-      return LengthModifier::L;
+      return {LengthModifier::L, 0};
     case ('j'):
       ++*local_pos;
-      return LengthModifier::j;
+      return {LengthModifier::j, 0};
     case ('z'):
       ++*local_pos;
-      return LengthModifier::z;
+      return {LengthModifier::z, 0};
     case ('t'):
       ++*local_pos;
-      return LengthModifier::t;
+      return {LengthModifier::t, 0};
     default:
-      return LengthModifier::none;
+      return {LengthModifier::none, 0};
     }
   }
 
@@ -460,7 +485,7 @@ template <typename ArgProvider> class Parser {
           }
         }
 
-        LengthModifier lm = parse_length_modifier(&local_pos);
+        auto [lm, bw] = parse_length_modifier(&local_pos);
 
         // if we don't have an index for this conversion, then its position is
         // unknown and all this information is irrelevant. The rest of this
@@ -511,6 +536,18 @@ template <typename ArgProvider> class Parser {
           case (LengthModifier::t):
             conv_size = type_desc_from_type<ptrdiff_t>();
             break;
+          case (LengthModifier::w):
+          case (LengthModifier::wf):
+            if (bw <= INT_WIDTH) {
+              conv_size = type_desc_from_type<int>();
+            } else if (bw <= LONG_WIDTH) {
+              conv_size = type_desc_from_type<long>();
+            } else if (bw <= LLONG_WIDTH) {
+              conv_size = type_desc_from_type<long long>();
+            } else {
+              conv_size = type_desc_from_type<intmax_t>();
+            }
+            break;
           }
           break;
 #ifndef LIBC_COPT_PRINTF_DISABLE_FLOAT
diff --git a/libc/src/stdio/printf_core/write_int_converter.h b/libc/src/stdio/printf_core/write_int_converter.h
index 0310905f36f146..18aa5c79897ec4 100644
--- a/libc/src/stdio/printf_core/write_int_converter.h
+++ b/libc/src/stdio/printf_core/write_int_converter.h
@@ -55,6 +55,8 @@ LIBC_INLINE int convert_write_int(Writer *writer,
     *reinterpret_cast<ptrdiff_t *>(to_conv.conv_val_ptr) = written;
     break;
   case LengthModifier::j:
+  case LengthModifier::w:
+  case LengthModifier::wf:
     *reinterpret_cast<uintmax_t *>(to_conv.conv_val_ptr) = written;
     break;
   }
diff --git a/libc/test/UnitTest/PrintfMatcher.cpp b/libc/test/UnitTest/PrintfMatcher.cpp
index 32f3be73307e3a..c8303815c92296 100644
--- a/libc/test/UnitTest/PrintfMatcher.cpp
+++ b/libc/test/UnitTest/PrintfMatcher.cpp
@@ -39,6 +39,10 @@ namespace {
   case (LengthModifier::lm):                                                   \
     tlog << #lm;                                                               \
     break
+#define CASE_LM_BIT_WIDTH(lm, bw)                                              \
+  case (LengthModifier::lm):                                                   \
+    tlog << #lm << "\n\tbit width: :" << bw;                                   \
+    break
 
 static void display(FormatSection form) {
   tlog << "Raw String (len " << form.raw_string.size() << "): \"";
@@ -67,6 +71,8 @@ static void display(FormatSection form) {
       CASE_LM(z);
       CASE_LM(t);
       CASE_LM(L);
+      CASE_LM_BIT_WIDTH(w, form.bit_width);
+      CASE_LM_BIT_WIDTH(wf, form.bit_width);
     }
     tlog << "\n";
     tlog << "\tconversion name: " << form.conv_name << "\n";
diff --git a/libc/test/src/stdio/printf_core/parser_test.cpp b/libc/test/src/stdio/printf_core/parser_test.cpp
index 0134277c4a1b2d..66d6dd0a86c422 100644
--- a/libc/test/src/stdio/printf_core/parser_test.cpp
+++ b/libc/test/src/stdio/printf_core/parser_test.cpp
@@ -223,6 +223,42 @@ TEST(LlvmLibcPrintfParserTest, EvalOneArgWithLongLengthModifier) {
   ASSERT_PFORMAT_EQ(expected, format_arr[0]);
 }
 
+TEST(LlvmLibcPrintfParserTest, EvalOneArgWithBitWidthLengthModifier) {
+  LIBC_NAMESPACE::printf_core::FormatSection format_arr[10];
+  const char *str = "%w32d";
+  long long arg1 = 12345;
+  evaluate(format_arr, str, arg1);
+
+  LIBC_NAMESPACE::printf_core::FormatSection expected;
+  expected.has_conv = true;
+
+  expected.raw_string = {str, 5};
+  expected.length_modifier = LIBC_NAMESPACE::printf_core::LengthModifier::w;
+  expected.bit_width = 32;
+  expected.conv_val_raw = arg1;
+  expected.conv_name = 'd';
+
+  ASSERT_PFORMAT_EQ(expected, format_arr[0]);
+}
+
+TEST(LlvmLibcPrintfParserTest, EvalOneArgWithFastBitWidthLengthModifier) {
+  LIBC_NAMESPACE::printf_core::FormatSection format_arr[10];
+  const char *str = "%wf32d";
+  long long arg1 = 12345;
+  evaluate(format_arr, str, arg1);
+
+  LIBC_NAMESPACE::printf_core::FormatSection expected;
+  expected.has_conv = true;
+
+  expected.raw_string = {str, 6};
+  expected.length_modifier = LIBC_NAMESPACE::printf_core::LengthModifier::wf;
+  expected.bit_width = 32;
+  expected.conv_val_raw = arg1;
+  expected.conv_name = 'd';
+
+  ASSERT_PFORMAT_EQ(expected, format_arr[0]);
+}
+
 TEST(LlvmLibcPrintfParserTest, EvalOneArgWithAllOptions) {
   LIBC_NAMESPACE::printf_core::FormatSection format_arr[10];
   const char *str = "% -056.78jd";
diff --git a/libc/test/src/stdio/sprintf_test.cpp b/libc/test/src/stdio/sprintf_test.cpp
index 186b37e2898af6..7e41a30047f534 100644
--- a/libc/test/src/stdio/sprintf_test.cpp
+++ b/libc/test/src/stdio/sprintf_test.cpp
@@ -12,6 +12,7 @@
 #include "test/UnitTest/RoundingModeUtils.h"
 #include "test/UnitTest/Test.h"
 #include <inttypes.h>
+#include <src/string/strlen.h>
 
 // TODO: Add a comment here explaining the printf format string.
 
@@ -169,6 +170,76 @@ TEST(LlvmLibcSPrintfTest, IntConv) {
   EXPECT_EQ(written, 20);
   ASSERT_STREQ(buff, "-9223372036854775808"); // ll min
 
+  written = LIBC_NAMESPACE::sprintf(buff, "%w3d", 5807);
+  EXPECT_EQ(written, 1);
+  ASSERT_STREQ(buff, "7");
+
+  written = LIBC_NAMESPACE::sprintf(buff, "%w3d", 1);
+  EXPECT_EQ(written, 1);
+  ASSERT_STREQ(buff, "1");
+
+  written = LIBC_NAMESPACE::sprintf(buff, "%w64d", 9223372036854775807l);
+  EXPECT_EQ(written, 19);
+  ASSERT_STREQ(buff, "9223372036854775807");
+
+  written = LIBC_NAMESPACE::sprintf(buff, "%w-1d", 5807);
+  EXPECT_EQ(written, 1);
+  ASSERT_STREQ(buff, "0");
+
+  written = LIBC_NAMESPACE::sprintf(buff, "%w0d", 5807);
+  EXPECT_EQ(written, 1);
+  ASSERT_STREQ(buff, "0");
+
+  written = LIBC_NAMESPACE::sprintf(buff, "%w999d", 9223372036854775807l);
+  EXPECT_EQ(written, 19);
+  ASSERT_STREQ(buff, "9223372036854775807");
+
+  char format[64];
+  char uintmax[128];
+  LIBC_NAMESPACE::sprintf(format, "%%w%du", UINTMAX_WIDTH);
+  const int uintmax_len = LIBC_NAMESPACE::sprintf(uintmax, "%ju", UINTMAX_MAX);
+  written = LIBC_NAMESPACE::sprintf(buff, format, UINTMAX_MAX);
+  EXPECT_EQ(written, uintmax_len);
+  ASSERT_STREQ(buff, uintmax);
+
+  written = LIBC_NAMESPACE::sprintf(buff, "%w64u", 18446744073709551615ull);
+  EXPECT_EQ(written, 20);
+  ASSERT_STREQ(buff, "18446744073709551615"); // ull max
+
+  written =
+      LIBC_NAMESPACE::sprintf(buff, "%w64d", -9223372036854775807ll - 1ll);
+  EXPECT_EQ(written, 20);
+  ASSERT_STREQ(buff, "-9223372036854775808"); // ll min
+
+  written = LIBC_NAMESPACE::sprintf(buff, "%wf3d", 5807);
+  EXPECT_EQ(written, 1);
+  ASSERT_STREQ(buff, "7");
+
+  written = LIBC_NAMESPACE::sprintf(buff, "%wf3d", 1);
+  EXPECT_EQ(written, 1);
+  ASSERT_STREQ(buff, "1");
+
+  written = LIBC_NAMESPACE::sprintf(buff, "%wf64u", 18446744073709551615ull);
+  EXPECT_EQ(written, 20);
+  ASSERT_STREQ(buff, "18446744073709551615"); // ull max
+
+  written =
+      LIBC_NAMESPACE::sprintf(buff, "%wf64d", -9223372036854775807ll - 1ll);
+  EXPECT_EQ(written, 20);
+  ASSERT_STREQ(buff, "-9223372036854775808"); // ll min
+
+  written = LIBC_NAMESPACE::sprintf(buff, "%wf0d", 5807);
+  EXPECT_EQ(written, 1);
+  ASSERT_STREQ(buff, "0");
+
+  written = LIBC_NAMESPACE::sprintf(buff, "%wf-1d", 5807);
+  EXPECT_EQ(written, 1);
+  ASSERT_STREQ(buff, "0");
+
+  written = LIBC_NAMESPACE::sprintf(buff, "%wf999d", 5807);
+  EXPECT_EQ(written, 4);
+  ASSERT_STREQ(buff, "5807");
+
   // Min Width Tests.
 
   written = LIBC_NAMESPACE::sprintf(buff, "%4d", 789);



More information about the libc-commits mailing list