[libc-commits] [libc] [libc] Support C23 'b' (binary) modifier in printf (PR #80851)

Artem Tyurin via libc-commits libc-commits at lists.llvm.org
Tue Feb 6 12:30:40 PST 2024


https://github.com/agentcooper updated https://github.com/llvm/llvm-project/pull/80851

>From ef3ebe50003dedd1a69f5560967050ef8903b509 Mon Sep 17 00:00:00 2001
From: Artem Tyurin <artem.tyurin at gmail.com>
Date: Tue, 6 Feb 2024 16:36:15 +0100
Subject: [PATCH 1/5] [libc] Support C23 'b' (binary) modifier in printf

Reference: https://www.open-std.org/jtc1/sc22/wg14/www/docs/n2612.pdf.

Fixes https://github.com/llvm/llvm-project/issues/80727.
---
 libc/src/stdio/printf_core/converter.cpp           |  2 ++
 libc/src/stdio/printf_core/int_converter.h         |  6 +++++-
 libc/src/stdio/printf_core/parser.h                |  4 ++++
 libc/test/src/stdio/printf_core/converter_test.cpp | 14 ++++++++++++++
 4 files changed, 25 insertions(+), 1 deletion(-)

diff --git a/libc/src/stdio/printf_core/converter.cpp b/libc/src/stdio/printf_core/converter.cpp
index 74a36cbf7432f..52412aef3c5c1 100644
--- a/libc/src/stdio/printf_core/converter.cpp
+++ b/libc/src/stdio/printf_core/converter.cpp
@@ -58,6 +58,8 @@ int convert(Writer *writer, const FormatSection &to_conv) {
   case 'o':
   case 'x':
   case 'X':
+  case 'b':
+  case 'B':
     return convert_int(writer, to_conv);
 #ifndef LIBC_COPT_PRINTF_DISABLE_FLOAT
   case 'f':
diff --git a/libc/src/stdio/printf_core/int_converter.h b/libc/src/stdio/printf_core/int_converter.h
index 7744d801cbc18..8520f12c0372c 100644
--- a/libc/src/stdio/printf_core/int_converter.h
+++ b/libc/src/stdio/printf_core/int_converter.h
@@ -33,6 +33,7 @@ using HexFmt = IntegerToString<uintmax_t, radix::Hex>;
 using HexFmtUppercase = IntegerToString<uintmax_t, radix::Hex::Uppercase>;
 using OctFmt = IntegerToString<uintmax_t, radix::Oct>;
 using DecFmt = IntegerToString<uintmax_t>;
+using BinFmt = IntegerToString<uintmax_t, radix::Bin>;
 
 LIBC_INLINE constexpr size_t num_buf_size() {
   constexpr auto max = [](size_t a, size_t b) -> size_t {
@@ -40,7 +41,8 @@ LIBC_INLINE constexpr size_t num_buf_size() {
   };
   return max(HexFmt::buffer_size(),
              max(HexFmtUppercase::buffer_size(),
-                 max(OctFmt::buffer_size(), DecFmt::buffer_size())));
+                 max(OctFmt::buffer_size(),
+                     max(DecFmt::buffer_size(), BinFmt::buffer_size()))));
 }
 
 LIBC_INLINE cpp::optional<cpp::string_view>
@@ -52,6 +54,8 @@ num_to_strview(uintmax_t num, cpp::span<char> bufref, char conv_name) {
       return HexFmtUppercase::format_to(bufref, num);
   } else if (conv_name == 'o') {
     return OctFmt::format_to(bufref, num);
+  } else if (to_lower(conv_name) == 'b') {
+    return BinFmt::format_to(bufref, num);
   } else {
     return DecFmt::format_to(bufref, num);
   }
diff --git a/libc/src/stdio/printf_core/parser.h b/libc/src/stdio/printf_core/parser.h
index ab491655275fb..1e7d2e58c924a 100644
--- a/libc/src/stdio/printf_core/parser.h
+++ b/libc/src/stdio/printf_core/parser.h
@@ -159,6 +159,8 @@ template <typename ArgProvider> class Parser {
       case ('x'):
       case ('X'):
       case ('u'):
+      case ('b'):
+      case ('B'):
         switch (lm) {
         case (LengthModifier::hh):
         case (LengthModifier::h):
@@ -484,6 +486,8 @@ template <typename ArgProvider> class Parser {
         case ('x'):
         case ('X'):
         case ('u'):
+        case ('b'):
+        case ('B'):
           switch (lm) {
           case (LengthModifier::hh):
           case (LengthModifier::h):
diff --git a/libc/test/src/stdio/printf_core/converter_test.cpp b/libc/test/src/stdio/printf_core/converter_test.cpp
index 8404ef6ec7db4..9da749f3b8ad1 100644
--- a/libc/test/src/stdio/printf_core/converter_test.cpp
+++ b/libc/test/src/stdio/printf_core/converter_test.cpp
@@ -210,6 +210,20 @@ TEST_F(LlvmLibcPrintfConverterTest, HexConversion) {
   ASSERT_EQ(writer.get_chars_written(), 18);
 }
 
+TEST_F(LlvmLibcPrintfConverterTest, BinaryConversion) {
+  LIBC_NAMESPACE::printf_core::FormatSection section;
+  section.has_conv = true;
+  section.raw_string = "%b";
+  section.conv_name = 'b';
+  section.conv_val_raw = 42;
+  LIBC_NAMESPACE::printf_core::convert(&writer, section);
+
+  wb.buff[wb.buff_cur] = '\0';
+
+  ASSERT_STREQ(str, "101010");
+  ASSERT_EQ(writer.get_chars_written(), 6);
+}
+
 TEST_F(LlvmLibcPrintfConverterTest, PointerConversion) {
 
   LIBC_NAMESPACE::printf_core::FormatSection section;

>From f1672b5d8b8febacc4b6873d76d0b0b9ebf7df82 Mon Sep 17 00:00:00 2001
From: Artem Tyurin <artem.tyurin at gmail.com>
Date: Tue, 6 Feb 2024 20:35:53 +0100
Subject: [PATCH 2/5] Handle uppercase, refactor num_buf_size, add more tests

---
 libc/src/stdio/printf_core/int_converter.h |  27 +++--
 libc/test/src/stdio/sprintf_test.cpp       | 109 +++++++++++++++++++++
 2 files changed, 128 insertions(+), 8 deletions(-)

diff --git a/libc/src/stdio/printf_core/int_converter.h b/libc/src/stdio/printf_core/int_converter.h
index 8520f12c0372c..cd0826ff529c1 100644
--- a/libc/src/stdio/printf_core/int_converter.h
+++ b/libc/src/stdio/printf_core/int_converter.h
@@ -34,15 +34,19 @@ using HexFmtUppercase = IntegerToString<uintmax_t, radix::Hex::Uppercase>;
 using OctFmt = IntegerToString<uintmax_t, radix::Oct>;
 using DecFmt = IntegerToString<uintmax_t>;
 using BinFmt = IntegerToString<uintmax_t, radix::Bin>;
+using BinFmtUppercase = IntegerToString<uintmax_t, radix::Bin::Uppercase>;
 
 LIBC_INLINE constexpr size_t num_buf_size() {
-  constexpr auto max = [](size_t a, size_t b) -> size_t {
-    return (a < b) ? b : a;
-  };
-  return max(HexFmt::buffer_size(),
-             max(HexFmtUppercase::buffer_size(),
-                 max(OctFmt::buffer_size(),
-                     max(DecFmt::buffer_size(), BinFmt::buffer_size()))));
+  cpp::array<size_t, 5> sizes{
+      HexFmt::buffer_size(), HexFmtUppercase::buffer_size(),
+      OctFmt::buffer_size(), DecFmt::buffer_size(),
+      BinFmt::buffer_size(), BinFmtUppercase::buffer_size()};
+
+  auto result = sizes[0];
+  for (size_t i = 1; i < sizes.size(); i++)
+    result = cpp::max(result, sizes[i]);
+}
+return result;
 }
 
 LIBC_INLINE cpp::optional<cpp::string_view>
@@ -54,8 +58,10 @@ num_to_strview(uintmax_t num, cpp::span<char> bufref, char conv_name) {
       return HexFmtUppercase::format_to(bufref, num);
   } else if (conv_name == 'o') {
     return OctFmt::format_to(bufref, num);
-  } else if (to_lower(conv_name) == 'b') {
+  } else if (conv_name == 'b') {
     return BinFmt::format_to(bufref, num);
+  } else if (conv_name == 'B') {
+    return BinFmtUppercase::format_to(bufref, num);
   } else {
     return DecFmt::format_to(bufref, num);
   }
@@ -120,6 +126,11 @@ LIBC_INLINE int convert_int(Writer *writer, const FormatSection &to_conv) {
     prefix_len = 2;
     prefix[0] = '0';
     prefix[1] = a + ('x' - 'a');
+  } else if ((to_lower(to_conv.conv_name) == 'b') &&
+             ((flags & FormatFlags::ALTERNATE_FORM) != 0) && num != 0) {
+    prefix_len = 2;
+    prefix[0] = '0';
+    prefix[1] = a + ('b' - 'a');
   } else {
     prefix_len = (sign_char == 0 ? 0 : 1);
     prefix[0] = sign_char;
diff --git a/libc/test/src/stdio/sprintf_test.cpp b/libc/test/src/stdio/sprintf_test.cpp
index f3614b05a0c3e..8c3167b957c7d 100644
--- a/libc/test/src/stdio/sprintf_test.cpp
+++ b/libc/test/src/stdio/sprintf_test.cpp
@@ -410,6 +410,115 @@ TEST(LlvmLibcSPrintfTest, HexConv) {
   ASSERT_STREQ(buff, "007F 0x1000000000 002   ");
 }
 
+TEST(LlvmLibcSPrintfTest, BinConv) {
+  char buff[64];
+  int written;
+
+  // Basic Tests.
+
+  written = LIBC_NAMESPACE::sprintf(buff, "%b", 42);
+  EXPECT_EQ(written, 6);
+  ASSERT_STREQ(buff, "101010");
+
+  written = LIBC_NAMESPACE::sprintf(buff, "%B", 12081991);
+  EXPECT_EQ(written, 24);
+  ASSERT_STREQ(buff, "101110000101101101000111");
+
+  // Min Width Tests.
+
+  written = LIBC_NAMESPACE::sprintf(buff, "%10b", 0b101010);
+  EXPECT_EQ(written, 10);
+  ASSERT_STREQ(buff, "    101010");
+
+  written = LIBC_NAMESPACE::sprintf(buff, "%2B", 0b101010);
+  EXPECT_EQ(written, 6);
+  ASSERT_STREQ(buff, "101010");
+
+  // Precision Tests.
+
+  written = LIBC_NAMESPACE::sprintf(buff, "%b", 0);
+  EXPECT_EQ(written, 1);
+  ASSERT_STREQ(buff, "0");
+
+  written = LIBC_NAMESPACE::sprintf(buff, "%.0b", 0);
+  EXPECT_EQ(written, 0);
+  ASSERT_STREQ(buff, "");
+
+  written = LIBC_NAMESPACE::sprintf(buff, "%.5b", 0b111);
+  EXPECT_EQ(written, 5);
+  ASSERT_STREQ(buff, "00111");
+
+  written = LIBC_NAMESPACE::sprintf(buff, "%.2b", 0b111);
+  EXPECT_EQ(written, 3);
+  ASSERT_STREQ(buff, "111");
+
+  // Flag Tests.
+
+  written = LIBC_NAMESPACE::sprintf(buff, "%-5b", 0b111);
+  EXPECT_EQ(written, 5);
+  ASSERT_STREQ(buff, "111  ");
+
+  written = LIBC_NAMESPACE::sprintf(buff, "%#b", 0b111);
+  EXPECT_EQ(written, 5);
+  ASSERT_STREQ(buff, "0b111");
+
+  written = LIBC_NAMESPACE::sprintf(buff, "%#b", 0);
+  EXPECT_EQ(written, 1);
+  ASSERT_STREQ(buff, "0");
+
+  written = LIBC_NAMESPACE::sprintf(buff, "%#B", 0b111);
+  EXPECT_EQ(written, 5);
+  ASSERT_STREQ(buff, "0B111");
+
+  written = LIBC_NAMESPACE::sprintf(buff, "%05b", 0b111);
+  EXPECT_EQ(written, 5);
+  ASSERT_STREQ(buff, "00111");
+
+  written = LIBC_NAMESPACE::sprintf(buff, "%0#6b", 0b111);
+  EXPECT_EQ(written, 6);
+  ASSERT_STREQ(buff, "0b0111");
+
+  written = LIBC_NAMESPACE::sprintf(buff, "%-#6b", 0b111);
+  EXPECT_EQ(written, 6);
+  ASSERT_STREQ(buff, "0b111 ");
+
+  // Combined Tests.
+
+  written = LIBC_NAMESPACE::sprintf(buff, "%#-07b", 0b111);
+  EXPECT_EQ(written, 7);
+  ASSERT_STREQ(buff, "0b111  ");
+
+  written = LIBC_NAMESPACE::sprintf(buff, "%7.5b", 0b111);
+  EXPECT_EQ(written, 7);
+  ASSERT_STREQ(buff, "  00111");
+
+  written = LIBC_NAMESPACE::sprintf(buff, "%#9.5B", 0b111);
+  EXPECT_EQ(written, 9);
+  ASSERT_STREQ(buff, "  0B00111");
+
+  written = LIBC_NAMESPACE::sprintf(buff, "%#.b", 0);
+  EXPECT_EQ(written, 0);
+  ASSERT_STREQ(buff, "");
+
+  written = LIBC_NAMESPACE::sprintf(buff, "%-7.5b", 0b111);
+  EXPECT_EQ(written, 7);
+  ASSERT_STREQ(buff, "00111  ");
+
+  written = LIBC_NAMESPACE::sprintf(buff, "%5.4b", 0b11111);
+  EXPECT_EQ(written, 5);
+  ASSERT_STREQ(buff, " 0111");
+
+  // Multiple Conversion Tests.
+
+  written = LIBC_NAMESPACE::sprintf(buff, "%10B %-#10b", 0b101, 0b110);
+  EXPECT_EQ(written, 21);
+  ASSERT_STREQ(buff, "       101 0b110     ");
+
+  written = LIBC_NAMESPACE::sprintf(buff, "%-5.4b%#.4b", 0b101, 0b110);
+  EXPECT_EQ(written, 11);
+  ASSERT_STREQ(buff, "0101 0b0110");
+}
+
 TEST(LlvmLibcSPrintfTest, PointerConv) {
   char buff[64];
   int written;

>From 097f8a732652afca61847e13232811ede1efa284 Mon Sep 17 00:00:00 2001
From: Artem Tyurin <artem.tyurin at gmail.com>
Date: Tue, 6 Feb 2024 20:36:50 +0100
Subject: [PATCH 3/5] Fix array size

---
 libc/src/stdio/printf_core/int_converter.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libc/src/stdio/printf_core/int_converter.h b/libc/src/stdio/printf_core/int_converter.h
index cd0826ff529c1..49a308c95d4d6 100644
--- a/libc/src/stdio/printf_core/int_converter.h
+++ b/libc/src/stdio/printf_core/int_converter.h
@@ -37,7 +37,7 @@ using BinFmt = IntegerToString<uintmax_t, radix::Bin>;
 using BinFmtUppercase = IntegerToString<uintmax_t, radix::Bin::Uppercase>;
 
 LIBC_INLINE constexpr size_t num_buf_size() {
-  cpp::array<size_t, 5> sizes{
+  cpp::array<size_t, 6> sizes{
       HexFmt::buffer_size(), HexFmtUppercase::buffer_size(),
       OctFmt::buffer_size(), DecFmt::buffer_size(),
       BinFmt::buffer_size(), BinFmtUppercase::buffer_size()};

>From b3207578e3a2cc6b0aa90b1104fd3977b0b49f42 Mon Sep 17 00:00:00 2001
From: Artem Tyurin <artem.tyurin at gmail.com>
Date: Tue, 6 Feb 2024 21:20:01 +0100
Subject: [PATCH 4/5] Binary does not need uppercase

---
 libc/src/stdio/printf_core/int_converter.h | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/libc/src/stdio/printf_core/int_converter.h b/libc/src/stdio/printf_core/int_converter.h
index 49a308c95d4d6..ce8acd8724443 100644
--- a/libc/src/stdio/printf_core/int_converter.h
+++ b/libc/src/stdio/printf_core/int_converter.h
@@ -34,13 +34,11 @@ using HexFmtUppercase = IntegerToString<uintmax_t, radix::Hex::Uppercase>;
 using OctFmt = IntegerToString<uintmax_t, radix::Oct>;
 using DecFmt = IntegerToString<uintmax_t>;
 using BinFmt = IntegerToString<uintmax_t, radix::Bin>;
-using BinFmtUppercase = IntegerToString<uintmax_t, radix::Bin::Uppercase>;
 
 LIBC_INLINE constexpr size_t num_buf_size() {
-  cpp::array<size_t, 6> sizes{
+  cpp::array<size_t, 5> sizes{
       HexFmt::buffer_size(), HexFmtUppercase::buffer_size(),
-      OctFmt::buffer_size(), DecFmt::buffer_size(),
-      BinFmt::buffer_size(), BinFmtUppercase::buffer_size()};
+      OctFmt::buffer_size(), DecFmt::buffer_size(), BinFmt::buffer_size()};
 
   auto result = sizes[0];
   for (size_t i = 1; i < sizes.size(); i++)
@@ -58,10 +56,8 @@ num_to_strview(uintmax_t num, cpp::span<char> bufref, char conv_name) {
       return HexFmtUppercase::format_to(bufref, num);
   } else if (conv_name == 'o') {
     return OctFmt::format_to(bufref, num);
-  } else if (conv_name == 'b') {
+  } else if (to_lower(conv_name) == 'b') {
     return BinFmt::format_to(bufref, num);
-  } else if (conv_name == 'B') {
-    return BinFmtUppercase::format_to(bufref, num);
   } else {
     return DecFmt::format_to(bufref, num);
   }

>From 3142fdb3d775678de15e060af87f6bf09aa7443e Mon Sep 17 00:00:00 2001
From: Artem Tyurin <artem.tyurin at gmail.com>
Date: Tue, 6 Feb 2024 21:30:26 +0100
Subject: [PATCH 5/5] Fix return statement

---
 libc/src/stdio/printf_core/int_converter.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libc/src/stdio/printf_core/int_converter.h b/libc/src/stdio/printf_core/int_converter.h
index ce8acd8724443..050b697357602 100644
--- a/libc/src/stdio/printf_core/int_converter.h
+++ b/libc/src/stdio/printf_core/int_converter.h
@@ -43,8 +43,8 @@ LIBC_INLINE constexpr size_t num_buf_size() {
   auto result = sizes[0];
   for (size_t i = 1; i < sizes.size(); i++)
     result = cpp::max(result, sizes[i]);
+  return result;
 }
-return result;
 }
 
 LIBC_INLINE cpp::optional<cpp::string_view>



More information about the libc-commits mailing list