[libc-commits] [libc] [libc] Support ls in printf (PR #178841)

via libc-commits libc-commits at lists.llvm.org
Sat Jan 31 02:45:08 PST 2026


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-libc

Author: Shubh Pachchigar (shubhe25p)

<details>
<summary>Changes</summary>

Add support for %ls in printf by calling internal wcrtomb function and relevant end-to-end sprintf test. Additionally, modified printf parser for recognizing length modifier. This PR disables wide string support on windows platform.

---
Full diff: https://github.com/llvm/llvm-project/pull/178841.diff


4 Files Affected:

- (modified) libc/src/stdio/printf_core/CMakeLists.txt (+1) 
- (modified) libc/src/stdio/printf_core/parser.h (+2-1) 
- (modified) libc/src/stdio/printf_core/string_converter.h (+87-7) 
- (modified) libc/test/src/stdio/sprintf_test.cpp (+90) 


``````````diff
diff --git a/libc/src/stdio/printf_core/CMakeLists.txt b/libc/src/stdio/printf_core/CMakeLists.txt
index ae93cc754299b..54e5051a838bc 100644
--- a/libc/src/stdio/printf_core/CMakeLists.txt
+++ b/libc/src/stdio/printf_core/CMakeLists.txt
@@ -56,6 +56,7 @@ else()
   )
   set(parser_wchar_deps
     libc.hdr.types.wint_t
+    libc.hdr.types.wchar_t
   )
 endif()
 
diff --git a/libc/src/stdio/printf_core/parser.h b/libc/src/stdio/printf_core/parser.h
index a3b62991bcec9..47c7b7fa6b04b 100644
--- a/libc/src/stdio/printf_core/parser.h
+++ b/libc/src/stdio/printf_core/parser.h
@@ -28,6 +28,7 @@
 #include "src/__support/libc_errno.h"
 #endif // LIBC_COPT_PRINTF_DISABLE_STRERROR
 #ifndef LIBC_COPT_PRINTF_DISABLE_WIDE
+#include "hdr/types/wchar_t.h"
 #include "hdr/types/wint_t.h"
 #endif // LIBC_COPT_PRINTF_DISABLE_WIDE
 
@@ -290,7 +291,7 @@ template <typename ArgProvider> class Parser {
         WRITE_ARG_VAL_SIMPLEST(section.conv_val_ptr, void *, conv_index);
         break;
       case ('s'):
-        WRITE_ARG_VAL_SIMPLEST(section.conv_val_ptr, char *, conv_index);
+        WRITE_ARG_VAL_SIMPLEST(section.conv_val_ptr, void *, conv_index);
         break;
       default:
         // if the conversion is undefined, change this to a raw section.
diff --git a/libc/src/stdio/printf_core/string_converter.h b/libc/src/stdio/printf_core/string_converter.h
index 74c9f598210f7..152dd6ff6d830 100644
--- a/libc/src/stdio/printf_core/string_converter.h
+++ b/libc/src/stdio/printf_core/string_converter.h
@@ -9,6 +9,14 @@
 #ifndef LLVM_LIBC_SRC_STDIO_PRINTF_CORE_STRING_CONVERTER_H
 #define LLVM_LIBC_SRC_STDIO_PRINTF_CORE_STRING_CONVERTER_H
 
+#ifndef LIBC_COPT_PRINTF_DISABLE_WIDE
+#include "hdr/types/wchar_t.h"
+#include "hdr/types/wint_t.h"
+#include "hdr/wchar_macros.h"
+#include "src/__support/wchar/mbstate.h"
+#include "src/__support/wchar/wcrtomb.h"
+#endif // LIBC_COPT_PRINTF_DISABLE_WIDE
+
 #include "src/__support/CPP/string_view.h"
 #include "src/__support/macros/config.h"
 #include "src/stdio/printf_core/converter_utils.h"
@@ -32,13 +40,58 @@ LIBC_INLINE int convert_string(Writer<write_mode> *writer,
   }
 #endif // LIBC_COPT_PRINTF_NO_NULLPTR_CHECKS
 
-  for (const char *cur_str = (str_ptr); cur_str[string_len]; ++string_len) {
-    ;
-  }
+  if (to_conv.length_modifier == LengthModifier::l) {
+#ifndef LIBC_COPT_PRINTF_DISABLE_WIDE
+    const wchar_t *wstr_ptr =
+        reinterpret_cast<const wchar_t *>(to_conv.conv_val_ptr);
+
+#ifndef LIBC_COPT_PRINTF_NO_NULLPTR_CHECKS
+    if (wstr_ptr == nullptr) {
+      wstr_ptr = L"(null)";
+    }
+#endif // LIBC_COPT_PRINTF_NO_NULLPTR_CHECKS
+
+    char buffer[MB_LEN_MAX];
+    internal::mbstate mbstate;
+    size_t written = 0;
+    for (const wchar_t *cur_str = (wstr_ptr); cur_str[string_len];
+         ++string_len) {
+      wchar_t wc = cur_str[string_len];
+
+      auto ret = internal::wcrtomb(buffer, wc, &mbstate);
+      if (!ret.has_value()) {
+        return MB_CONVERSION_ERROR;
+      }
+      written += ret.value();
+
+      if (to_conv.precision >= 0 &&
+          static_cast<size_t>(to_conv.precision) < written) {
+        written -= ret.value();
+        break;
+      }
+    }
+    string_len = written;
+#else
+    for (const char *cur_str = (str_ptr); cur_str[string_len]; ++string_len) {
+      ;
+    }
+
+    if (to_conv.precision >= 0 &&
+        static_cast<size_t>(to_conv.precision) < string_len) {
+      string_len = to_conv.precision;
+    }
 
-  if (to_conv.precision >= 0 &&
-      static_cast<size_t>(to_conv.precision) < string_len)
-    string_len = to_conv.precision;
+#endif // LIBC_COPT_PRINTF_DISABLE_WIDE
+  } else {
+
+    for (const char *cur_str = (str_ptr); cur_str[string_len]; ++string_len) {
+      ;
+    }
+    if (to_conv.precision >= 0 &&
+        static_cast<size_t>(to_conv.precision) < string_len) {
+      string_len = to_conv.precision;
+    }
+  }
 
   size_t padding_spaces = to_conv.min_width > static_cast<int>(string_len)
                               ? to_conv.min_width - string_len
@@ -50,7 +103,34 @@ LIBC_INLINE int convert_string(Writer<write_mode> *writer,
     RET_IF_RESULT_NEGATIVE(writer->write(' ', padding_spaces));
   }
 
-  RET_IF_RESULT_NEGATIVE(writer->write({(str_ptr), string_len}));
+  if (to_conv.length_modifier == LengthModifier::l) {
+#ifndef LIBC_COPT_PRINTF_DISABLE_WIDE
+    const wchar_t *wstr_ptr =
+        reinterpret_cast<const wchar_t *>(to_conv.conv_val_ptr);
+
+#ifndef LIBC_COPT_PRINTF_NO_NULLPTR_CHECKS
+    if (wstr_ptr == nullptr) {
+      wstr_ptr = L"(null)";
+    }
+#endif // LIBC_COPT_PRINTF_NO_NULLPTR_CHECKS
+
+    size_t written = 0;
+    char buffer[MB_LEN_MAX];
+    internal::mbstate mbstate;
+    for (size_t i = 0; written < string_len; ++i) {
+      // We don't need to check errors/precision here; Pass 1 guaranteed safety.
+      auto ret = internal::wcrtomb(buffer, wstr_ptr[i], &mbstate);
+      size_t mb_len = ret.value();
+
+      RET_IF_RESULT_NEGATIVE(writer->write({buffer, mb_len}));
+      written += mb_len;
+    }
+#else
+    RET_IF_RESULT_NEGATIVE(writer->write({(str_ptr), string_len}));
+#endif // LIBC_COPT_PRINTF_DISABLE_WIDE
+  } else {
+    RET_IF_RESULT_NEGATIVE(writer->write({(str_ptr), string_len}));
+  }
 
   // If the padding is on the right side, write the spaces last.
   if (padding_spaces > 0 &&
diff --git a/libc/test/src/stdio/sprintf_test.cpp b/libc/test/src/stdio/sprintf_test.cpp
index 78186abb9966e..b7ddf2fda3a4f 100644
--- a/libc/test/src/stdio/sprintf_test.cpp
+++ b/libc/test/src/stdio/sprintf_test.cpp
@@ -3582,4 +3582,94 @@ TEST(LlvmLibcSprintfTest, WideCharConversion) {
             -1);
   ASSERT_ERRNO_EQ(EILSEQ);
 }
+
+TEST(LlvmLibcSprintfTest, WideCharStringConversion) {
+  char buff[64];
+  int written;
+
+  // Basic test.
+  written = LIBC_NAMESPACE::sprintf(buff, "%ls", L"Hello, World!");
+  EXPECT_EQ(written, 13);
+  ASSERT_STREQ_LEN(written, buff, "Hello, World!");
+
+  // Left justified.
+  written = LIBC_NAMESPACE::sprintf(buff, "%-10ls", L"Hello, World!");
+  EXPECT_EQ(written, 13);
+  ASSERT_STREQ_LEN(written, buff, "Hello, World!");
+
+  // Precision test.
+  written = LIBC_NAMESPACE::sprintf(buff, "%.5ls", L"Hello, World!");
+  EXPECT_EQ(written, 5);
+  ASSERT_STREQ_LEN(written, buff, "Hello");
+
+  // Test ¢ which is 2 bytes multibyte character.
+  written = LIBC_NAMESPACE::sprintf(buff, "%ls", L"¢");
+  EXPECT_EQ(written, 2);
+  ASSERT_STREQ_LEN(written, buff, "¢");
+
+  // Test € which is 3 bytes multibyte character.
+  written = LIBC_NAMESPACE::sprintf(buff, "%ls", L"€");
+  EXPECT_EQ(written, 3);
+  ASSERT_STREQ_LEN(written, buff, "€");
+
+  // Test 😀 which is 4 bytes multibyte character
+  written = LIBC_NAMESPACE::sprintf(buff, "%ls", L"😀");
+  EXPECT_EQ(written, 4);
+  ASSERT_STREQ_LEN(written, buff, "😀");
+
+  // Test string with multiple multibyte characters.
+  written = LIBC_NAMESPACE::sprintf(buff, "%ls", L"¢€😀");
+  EXPECT_EQ(written, 9);
+  ASSERT_STREQ_LEN(written, buff, "¢€😀");
+
+  // Width with multibyte characters.
+  written = LIBC_NAMESPACE::sprintf(buff, "%2ls", L"¢€😀");
+  EXPECT_EQ(written, 9);
+  ASSERT_STREQ_LEN(written, buff, "¢€😀");
+
+  // Width with multibyte characters and padding.
+  written = LIBC_NAMESPACE::sprintf(buff, "%12ls", L"¢€😀");
+  EXPECT_EQ(written, 12);
+  ASSERT_STREQ_LEN(written, buff, "   ¢€😀");
+
+  // Precision with multibyte characters.
+  written = LIBC_NAMESPACE::sprintf(buff, "%.4ls", L"¢€😀");
+  EXPECT_EQ(written, 2); // Only ¢ fits in 4 bytes.
+  ASSERT_STREQ_LEN(written, buff, "¢");
+
+  // Precision with padding and multibyte characters.
+  written = LIBC_NAMESPACE::sprintf(buff, "%5.4ls", L"¢€😀");
+  EXPECT_EQ(written, 5);
+  ASSERT_STREQ_LEN(written, buff, "   ¢");
+
+  // Precision with left justification and multibyte characters.
+  written = LIBC_NAMESPACE::sprintf(buff, "%-5.4ls", L"¢€😀");
+  EXPECT_EQ(written, 5);
+  ASSERT_STREQ_LEN(written, buff, "¢   ");
+
+  // Short precision that cuts a multibyte character.
+  written = LIBC_NAMESPACE::sprintf(buff, "%.1ls", L"¢€😀");
+  EXPECT_EQ(written, 0);
+  ASSERT_STREQ_LEN(written, buff, "");
+
+  // Longer precision and shorter width.
+  written = LIBC_NAMESPACE::sprintf(buff, "%3.5ls", L"¢€😀");
+  EXPECT_EQ(written, 5);
+  ASSERT_STREQ_LEN(written, buff, "¢€");
+
+  // NULL string test.
+  written = LIBC_NAMESPACE::sprintf(buff, "%ls", nullptr);
+  EXPECT_EQ(written, 6);
+  ASSERT_STREQ_LEN(written, buff, "(null)");
+
+  // NULL string with precision.
+  written = LIBC_NAMESPACE::sprintf(buff, "%.3ls", nullptr);
+  EXPECT_EQ(written, 3);
+  ASSERT_STREQ_LEN(written, buff, "(nu");
+
+  // NULL string with precision and width.
+  written = LIBC_NAMESPACE::sprintf(buff, "%6.3ls", nullptr);
+  EXPECT_EQ(written, 6);
+  ASSERT_STREQ_LEN(written, buff, "   (nu");
+}
 #endif // LIBC_COPT_PRINTF_DISABLE_WIDE

``````````

</details>


https://github.com/llvm/llvm-project/pull/178841


More information about the libc-commits mailing list