[libc-commits] [libc] [libc] Support ls in printf (PR #178841)

Shubh Pachchigar via libc-commits libc-commits at lists.llvm.org
Thu Feb 26 22:37:17 PST 2026


https://github.com/shubhe25p updated https://github.com/llvm/llvm-project/pull/178841

>From 10171584b2a1f8bc6b9bbb1da9e5981fff82ed45 Mon Sep 17 00:00:00 2001
From: shubhe25p <shubhp at mbm3a24.local>
Date: Thu, 26 Feb 2026 22:35:21 -0800
Subject: [PATCH] [libc] Support %ls in printf

Add support for %ls in printf by calling internal
string converter and add relevant end-to-end
sprintf test. Additionally, modified printf
parser for recognizing length modifier. This
also disables wide string support on windows
and other unsupported platforms.
---
 libc/src/stdio/printf_core/CMakeLists.txt     |   5 +
 libc/src/stdio/printf_core/parser.h           |   2 +-
 libc/src/stdio/printf_core/string_converter.h |  93 ++++++++++++++--
 libc/test/src/stdio/sprintf_test.cpp          | 103 ++++++++++++++++++
 4 files changed, 195 insertions(+), 8 deletions(-)

diff --git a/libc/src/stdio/printf_core/CMakeLists.txt b/libc/src/stdio/printf_core/CMakeLists.txt
index ae93cc754299b..2009e564da1df 100644
--- a/libc/src/stdio/printf_core/CMakeLists.txt
+++ b/libc/src/stdio/printf_core/CMakeLists.txt
@@ -49,10 +49,13 @@ if(LIBC_CONF_PRINTF_DISABLE_WIDE)
 else()
   set(wchar_deps
     libc.hdr.types.wchar_t
+    libc.hdr.types.char32_t
+    libc.hdr.types.char8_t
     libc.hdr.types.wint_t
     libc.hdr.wchar_macros
     libc.src.__support.wchar.wcrtomb
     libc.src.__support.wchar.mbstate
+    libc.src.__support.wchar.string_converter
   )
   set(parser_wchar_deps
     libc.hdr.types.wint_t
@@ -129,8 +132,10 @@ add_header_library(
     .writer
     libc.include.inttypes
     libc.hdr.limits_macros
+    libc.src.string.string_utils
     libc.src.__support.big_int
     libc.src.__support.common
+    libc.src.__support.CPP.algorithm
     libc.src.__support.CPP.limits
     libc.src.__support.CPP.span
     libc.src.__support.CPP.string_view
diff --git a/libc/src/stdio/printf_core/parser.h b/libc/src/stdio/printf_core/parser.h
index a3b62991bcec9..7762aa1d87fce 100644
--- a/libc/src/stdio/printf_core/parser.h
+++ b/libc/src/stdio/printf_core/parser.h
@@ -290,7 +290,7 @@ template <typename ArgProvider> class Parser {
         WRITE_ARG_VAL_SIMPLEST(section.conv_val_ptr, void *, conv_index);
         break;
       case ('s'):
-        WRITE_ARG_VAL_SIMPLEST(section.conv_val_ptr, char *, conv_index);
+        WRITE_ARG_VAL_SIMPLEST(section.conv_val_ptr, void *, conv_index);
         break;
       default:
         // if the conversion is undefined, change this to a raw section.
diff --git a/libc/src/stdio/printf_core/string_converter.h b/libc/src/stdio/printf_core/string_converter.h
index 74c9f598210f7..2f521544785c0 100644
--- a/libc/src/stdio/printf_core/string_converter.h
+++ b/libc/src/stdio/printf_core/string_converter.h
@@ -9,11 +9,18 @@
 #ifndef LLVM_LIBC_SRC_STDIO_PRINTF_CORE_STRING_CONVERTER_H
 #define LLVM_LIBC_SRC_STDIO_PRINTF_CORE_STRING_CONVERTER_H
 
-#include "src/__support/CPP/string_view.h"
+#ifndef LIBC_COPT_PRINTF_DISABLE_WIDE
+#include "hdr/types/char32_t.h"
+#include "hdr/types/char8_t.h"
+#include "src/__support/wchar/mbstate.h"
+#include "src/__support/wchar/string_converter.h"
+#endif // LIBC_COPT_PRINTF_DISABLE_WIDE
+
 #include "src/__support/macros/config.h"
 #include "src/stdio/printf_core/converter_utils.h"
 #include "src/stdio/printf_core/core_structs.h"
 #include "src/stdio/printf_core/writer.h"
+#include "src/string/string_utils.h" // string_length
 
 #include <stddef.h>
 
@@ -21,10 +28,10 @@ namespace LIBC_NAMESPACE_DECL {
 namespace printf_core {
 
 template <WriteMode write_mode>
-LIBC_INLINE int convert_string(Writer<write_mode> *writer,
-                               const FormatSection &to_conv) {
-  size_t string_len = 0;
+LIBC_INLINE int char_writer(Writer<write_mode> *writer,
+                            const FormatSection &to_conv) {
   const char *str_ptr = reinterpret_cast<const char *>(to_conv.conv_val_ptr);
+  size_t string_len = 0;
 
 #ifndef LIBC_COPT_PRINTF_NO_NULLPTR_CHECKS
   if (str_ptr == nullptr) {
@@ -32,9 +39,7 @@ LIBC_INLINE int convert_string(Writer<write_mode> *writer,
   }
 #endif // LIBC_COPT_PRINTF_NO_NULLPTR_CHECKS
 
-  for (const char *cur_str = (str_ptr); cur_str[string_len]; ++string_len) {
-    ;
-  }
+  string_len = internal::string_length(str_ptr);
 
   if (to_conv.precision >= 0 &&
       static_cast<size_t>(to_conv.precision) < string_len)
@@ -60,6 +65,80 @@ LIBC_INLINE int convert_string(Writer<write_mode> *writer,
   return WRITE_OK;
 }
 
+#ifndef LIBC_COPT_PRINTF_DISABLE_WIDE
+template <WriteMode write_mode>
+LIBC_INLINE int wchar_writer(Writer<write_mode> *writer,
+                             const FormatSection &to_conv) {
+  size_t string_len = 0;
+  const char32_t *wstr_ptr =
+      reinterpret_cast<const char32_t *>(to_conv.conv_val_ptr);
+  size_t precision =
+      to_conv.precision < 0 ? SIZE_MAX : static_cast<size_t>(to_conv.precision);
+
+#ifndef LIBC_COPT_PRINTF_NO_NULLPTR_CHECKS
+  if (wstr_ptr == nullptr) {
+    wstr_ptr = U"(null)";
+  }
+#endif // LIBC_COPT_PRINTF_NO_NULLPTR_CHECKS
+
+  internal::mbstate mbstate;
+
+  internal::StringConverter<char32_t> length_counter(wstr_ptr, &mbstate,
+                                                     precision);
+
+  for (auto converted = length_counter.pop<char8_t>();
+       converted.has_value() && converted.value() != '\0';
+       converted = length_counter.pop<char8_t>()) {
+    ++string_len;
+  }
+
+  size_t padding_spaces = to_conv.min_width > static_cast<int>(string_len)
+                              ? to_conv.min_width - string_len
+                              : 0;
+
+  // If the padding is on the left side, write the spaces first.
+  if (padding_spaces > 0 &&
+      (to_conv.flags & FormatFlags::LEFT_JUSTIFIED) == 0) {
+    RET_IF_RESULT_NEGATIVE(writer->write(' ', padding_spaces));
+  }
+
+  mbstate = internal::mbstate();
+  internal::StringConverter<char32_t> out_conv(wstr_ptr, &mbstate, precision);
+
+  for (auto converted = out_conv.pop<char8_t>();
+       converted.has_value() && converted.value() != '\0';
+       converted = out_conv.pop<char8_t>()) {
+    RET_IF_RESULT_NEGATIVE(writer->write(static_cast<char>(converted.value())));
+  }
+
+  // If the padding is on the right side, write the spaces last.
+  if (padding_spaces > 0 &&
+      (to_conv.flags & FormatFlags::LEFT_JUSTIFIED) != 0) {
+    RET_IF_RESULT_NEGATIVE(writer->write(' ', padding_spaces));
+  }
+
+  return WRITE_OK;
+}
+#endif // LIBC_COPT_PRINTF_DISABLE_WIDE
+
+template <WriteMode write_mode>
+LIBC_INLINE int convert_string(Writer<write_mode> *writer,
+                               const FormatSection &to_conv) {
+  int ret = 0;
+  if (to_conv.length_modifier == LengthModifier::l) {
+    // find length and print wide char characters
+#ifndef LIBC_COPT_PRINTF_DISABLE_WIDE
+    ret = wchar_writer(writer, to_conv);
+#else
+    ret = char_writer(writer, to_conv);
+#endif
+  } else {
+    ret = char_writer(writer, to_conv);
+  }
+
+  return ret;
+}
+
 } // namespace printf_core
 } // namespace LIBC_NAMESPACE_DECL
 
diff --git a/libc/test/src/stdio/sprintf_test.cpp b/libc/test/src/stdio/sprintf_test.cpp
index 78186abb9966e..8c09594fddf79 100644
--- a/libc/test/src/stdio/sprintf_test.cpp
+++ b/libc/test/src/stdio/sprintf_test.cpp
@@ -3582,4 +3582,107 @@ TEST(LlvmLibcSprintfTest, WideCharConversion) {
             -1);
   ASSERT_ERRNO_EQ(EILSEQ);
 }
+
+TEST(LlvmLibcSprintfTest, WideCharStringConversion) {
+  char buff[64];
+  int written;
+
+  // Basic test.
+  written = LIBC_NAMESPACE::sprintf(buff, "%ls", L"Hello, World!");
+  EXPECT_EQ(written, 13);
+  ASSERT_STREQ_LEN(written, buff, "Hello, World!");
+
+  // Left justified.
+  written = LIBC_NAMESPACE::sprintf(buff, "%-10ls", L"Hello, World!");
+  EXPECT_EQ(written, 13);
+  ASSERT_STREQ_LEN(written, buff, "Hello, World!");
+
+  // Precision test.
+  written = LIBC_NAMESPACE::sprintf(buff, "%.5ls", L"Hello, World!");
+  EXPECT_EQ(written, 5);
+  ASSERT_STREQ_LEN(written, buff, "Hello");
+
+  // Test ¢ which is 2 bytes multibyte character.
+  written = LIBC_NAMESPACE::sprintf(buff, "%ls", L"¢");
+  EXPECT_EQ(written, 2);
+  ASSERT_STREQ_LEN(written, buff, "¢");
+
+  // Test € which is 3 bytes multibyte character.
+  written = LIBC_NAMESPACE::sprintf(buff, "%ls", L"€");
+  EXPECT_EQ(written, 3);
+  ASSERT_STREQ_LEN(written, buff, "€");
+
+  // Test 😀 which is 4 bytes multibyte character
+  written = LIBC_NAMESPACE::sprintf(buff, "%ls", L"😀");
+  EXPECT_EQ(written, 4);
+  ASSERT_STREQ_LEN(written, buff, "😀");
+
+  // Test string with multiple multibyte characters.
+  written = LIBC_NAMESPACE::sprintf(buff, "%ls", L"¢€😀");
+  EXPECT_EQ(written, 9);
+  ASSERT_STREQ_LEN(written, buff, "¢€😀");
+
+  // Width with multibyte characters.
+  written = LIBC_NAMESPACE::sprintf(buff, "%2ls", L"¢€😀");
+  EXPECT_EQ(written, 9);
+  ASSERT_STREQ_LEN(written, buff, "¢€😀");
+
+  // Width with multibyte characters and padding.
+  written = LIBC_NAMESPACE::sprintf(buff, "%12ls", L"¢€😀");
+  EXPECT_EQ(written, 12);
+  ASSERT_STREQ_LEN(written, buff, "   ¢€😀");
+
+  // Precision with multibyte characters.
+  written = LIBC_NAMESPACE::sprintf(buff, "%.4ls", L"¢€😀");
+  EXPECT_EQ(written, 2); // Only ¢ fits in 4 bytes.
+  ASSERT_STREQ_LEN(written, buff, "¢");
+
+  // Precision with padding and multibyte characters.
+  written = LIBC_NAMESPACE::sprintf(buff, "%5.4ls", L"¢€😀");
+  EXPECT_EQ(written, 5);
+  ASSERT_STREQ_LEN(written, buff, "   ¢");
+
+  // Precision with left justification and multibyte characters.
+  written = LIBC_NAMESPACE::sprintf(buff, "%-5.4ls", L"¢€😀");
+  EXPECT_EQ(written, 5);
+  ASSERT_STREQ_LEN(written, buff, "¢   ");
+
+  // Short precision that cuts a multibyte character.
+  written = LIBC_NAMESPACE::sprintf(buff, "%.1ls", L"¢€😀");
+  EXPECT_EQ(written, 0);
+  ASSERT_STREQ_LEN(written, buff, "");
+
+  // Longer precision and shorter width.
+  written = LIBC_NAMESPACE::sprintf(buff, "%3.5ls", L"¢€😀");
+  EXPECT_EQ(written, 5);
+  ASSERT_STREQ_LEN(written, buff, "¢€");
+
+  // Reverse the wide string and test with width and precision.
+  written = LIBC_NAMESPACE::sprintf(buff, "%4.4ls", L"😀€¢");
+  EXPECT_EQ(written, 4);
+  ASSERT_STREQ_LEN(written, buff, "😀");
+
+  written = LIBC_NAMESPACE::sprintf(buff, "%5.4ls", L"😀€¢");
+  EXPECT_EQ(written, 5);
+  ASSERT_STREQ_LEN(written, buff, " 😀");
+
+  written = LIBC_NAMESPACE::sprintf(buff, "%.3ls", L"😀€¢");
+  EXPECT_EQ(written, 0);
+  ASSERT_STREQ_LEN(written, buff, "");
+
+  // NULL string test.
+  written = LIBC_NAMESPACE::sprintf(buff, "%ls", nullptr);
+  EXPECT_EQ(written, 6);
+  ASSERT_STREQ_LEN(written, buff, "(null)");
+
+  // NULL string with precision.
+  written = LIBC_NAMESPACE::sprintf(buff, "%.3ls", nullptr);
+  EXPECT_EQ(written, 3);
+  ASSERT_STREQ_LEN(written, buff, "(nu");
+
+  // NULL string with precision and width.
+  written = LIBC_NAMESPACE::sprintf(buff, "%6.3ls", nullptr);
+  EXPECT_EQ(written, 6);
+  ASSERT_STREQ_LEN(written, buff, "   (nu");
+}
 #endif // LIBC_COPT_PRINTF_DISABLE_WIDE



More information about the libc-commits mailing list