[libc-commits] [libc] [libc] Support ls in printf (PR #178841)
Shubh Pachchigar via libc-commits
libc-commits at lists.llvm.org
Sat Jan 31 02:40:56 PST 2026
https://github.com/shubhe25p updated https://github.com/llvm/llvm-project/pull/178841
>From 7219fa1d451ef070db74a4334ab75d4a049f88e3 Mon Sep 17 00:00:00 2001
From: shubhe25p <shubhp at mbm3a24.local>
Date: Sat, 31 Jan 2026 02:40:10 -0800
Subject: [PATCH] [libc] Support %ls in printf
Add support for %ls in printf by calling internal
wcrtomb function and relevant end-to-end sprintf
test. Additionally, modified printf parser for
recognizing length modifier. This also disables
wide string support on windows platform.
---
libc/src/stdio/printf_core/CMakeLists.txt | 1 +
libc/src/stdio/printf_core/parser.h | 3 +-
libc/src/stdio/printf_core/string_converter.h | 94 +++++++++++++++++--
libc/test/src/stdio/sprintf_test.cpp | 90 ++++++++++++++++++
4 files changed, 180 insertions(+), 8 deletions(-)
diff --git a/libc/src/stdio/printf_core/CMakeLists.txt b/libc/src/stdio/printf_core/CMakeLists.txt
index ae93cc754299b..54e5051a838bc 100644
--- a/libc/src/stdio/printf_core/CMakeLists.txt
+++ b/libc/src/stdio/printf_core/CMakeLists.txt
@@ -56,6 +56,7 @@ else()
)
set(parser_wchar_deps
libc.hdr.types.wint_t
+ libc.hdr.types.wchar_t
)
endif()
diff --git a/libc/src/stdio/printf_core/parser.h b/libc/src/stdio/printf_core/parser.h
index a3b62991bcec9..47c7b7fa6b04b 100644
--- a/libc/src/stdio/printf_core/parser.h
+++ b/libc/src/stdio/printf_core/parser.h
@@ -28,6 +28,7 @@
#include "src/__support/libc_errno.h"
#endif // LIBC_COPT_PRINTF_DISABLE_STRERROR
#ifndef LIBC_COPT_PRINTF_DISABLE_WIDE
+#include "hdr/types/wchar_t.h"
#include "hdr/types/wint_t.h"
#endif // LIBC_COPT_PRINTF_DISABLE_WIDE
@@ -290,7 +291,7 @@ template <typename ArgProvider> class Parser {
WRITE_ARG_VAL_SIMPLEST(section.conv_val_ptr, void *, conv_index);
break;
case ('s'):
- WRITE_ARG_VAL_SIMPLEST(section.conv_val_ptr, char *, conv_index);
+ WRITE_ARG_VAL_SIMPLEST(section.conv_val_ptr, void *, conv_index);
break;
default:
// if the conversion is undefined, change this to a raw section.
diff --git a/libc/src/stdio/printf_core/string_converter.h b/libc/src/stdio/printf_core/string_converter.h
index 74c9f598210f7..152dd6ff6d830 100644
--- a/libc/src/stdio/printf_core/string_converter.h
+++ b/libc/src/stdio/printf_core/string_converter.h
@@ -9,6 +9,14 @@
#ifndef LLVM_LIBC_SRC_STDIO_PRINTF_CORE_STRING_CONVERTER_H
#define LLVM_LIBC_SRC_STDIO_PRINTF_CORE_STRING_CONVERTER_H
+#ifndef LIBC_COPT_PRINTF_DISABLE_WIDE
+#include "hdr/types/wchar_t.h"
+#include "hdr/types/wint_t.h"
+#include "hdr/wchar_macros.h"
+#include "src/__support/wchar/mbstate.h"
+#include "src/__support/wchar/wcrtomb.h"
+#endif // LIBC_COPT_PRINTF_DISABLE_WIDE
+
#include "src/__support/CPP/string_view.h"
#include "src/__support/macros/config.h"
#include "src/stdio/printf_core/converter_utils.h"
@@ -32,13 +40,58 @@ LIBC_INLINE int convert_string(Writer<write_mode> *writer,
}
#endif // LIBC_COPT_PRINTF_NO_NULLPTR_CHECKS
- for (const char *cur_str = (str_ptr); cur_str[string_len]; ++string_len) {
- ;
- }
+ if (to_conv.length_modifier == LengthModifier::l) {
+#ifndef LIBC_COPT_PRINTF_DISABLE_WIDE
+ const wchar_t *wstr_ptr =
+ reinterpret_cast<const wchar_t *>(to_conv.conv_val_ptr);
+
+#ifndef LIBC_COPT_PRINTF_NO_NULLPTR_CHECKS
+ if (wstr_ptr == nullptr) {
+ wstr_ptr = L"(null)";
+ }
+#endif // LIBC_COPT_PRINTF_NO_NULLPTR_CHECKS
+
+ char buffer[MB_LEN_MAX];
+ internal::mbstate mbstate;
+ size_t written = 0;
+ for (const wchar_t *cur_str = (wstr_ptr); cur_str[string_len];
+ ++string_len) {
+ wchar_t wc = cur_str[string_len];
+
+ auto ret = internal::wcrtomb(buffer, wc, &mbstate);
+ if (!ret.has_value()) {
+ return MB_CONVERSION_ERROR;
+ }
+ written += ret.value();
+
+ if (to_conv.precision >= 0 &&
+ static_cast<size_t>(to_conv.precision) < written) {
+ written -= ret.value();
+ break;
+ }
+ }
+ string_len = written;
+#else
+ for (const char *cur_str = (str_ptr); cur_str[string_len]; ++string_len) {
+ ;
+ }
+
+ if (to_conv.precision >= 0 &&
+ static_cast<size_t>(to_conv.precision) < string_len) {
+ string_len = to_conv.precision;
+ }
- if (to_conv.precision >= 0 &&
- static_cast<size_t>(to_conv.precision) < string_len)
- string_len = to_conv.precision;
+#endif // LIBC_COPT_PRINTF_DISABLE_WIDE
+ } else {
+
+ for (const char *cur_str = (str_ptr); cur_str[string_len]; ++string_len) {
+ ;
+ }
+ if (to_conv.precision >= 0 &&
+ static_cast<size_t>(to_conv.precision) < string_len) {
+ string_len = to_conv.precision;
+ }
+ }
size_t padding_spaces = to_conv.min_width > static_cast<int>(string_len)
? to_conv.min_width - string_len
@@ -50,7 +103,34 @@ LIBC_INLINE int convert_string(Writer<write_mode> *writer,
RET_IF_RESULT_NEGATIVE(writer->write(' ', padding_spaces));
}
- RET_IF_RESULT_NEGATIVE(writer->write({(str_ptr), string_len}));
+ if (to_conv.length_modifier == LengthModifier::l) {
+#ifndef LIBC_COPT_PRINTF_DISABLE_WIDE
+ const wchar_t *wstr_ptr =
+ reinterpret_cast<const wchar_t *>(to_conv.conv_val_ptr);
+
+#ifndef LIBC_COPT_PRINTF_NO_NULLPTR_CHECKS
+ if (wstr_ptr == nullptr) {
+ wstr_ptr = L"(null)";
+ }
+#endif // LIBC_COPT_PRINTF_NO_NULLPTR_CHECKS
+
+ size_t written = 0;
+ char buffer[MB_LEN_MAX];
+ internal::mbstate mbstate;
+ for (size_t i = 0; written < string_len; ++i) {
+ // We don't need to check errors/precision here; Pass 1 guaranteed safety.
+ auto ret = internal::wcrtomb(buffer, wstr_ptr[i], &mbstate);
+ size_t mb_len = ret.value();
+
+ RET_IF_RESULT_NEGATIVE(writer->write({buffer, mb_len}));
+ written += mb_len;
+ }
+#else
+ RET_IF_RESULT_NEGATIVE(writer->write({(str_ptr), string_len}));
+#endif // LIBC_COPT_PRINTF_DISABLE_WIDE
+ } else {
+ RET_IF_RESULT_NEGATIVE(writer->write({(str_ptr), string_len}));
+ }
// If the padding is on the right side, write the spaces last.
if (padding_spaces > 0 &&
diff --git a/libc/test/src/stdio/sprintf_test.cpp b/libc/test/src/stdio/sprintf_test.cpp
index 78186abb9966e..b7ddf2fda3a4f 100644
--- a/libc/test/src/stdio/sprintf_test.cpp
+++ b/libc/test/src/stdio/sprintf_test.cpp
@@ -3582,4 +3582,94 @@ TEST(LlvmLibcSprintfTest, WideCharConversion) {
-1);
ASSERT_ERRNO_EQ(EILSEQ);
}
+
+TEST(LlvmLibcSprintfTest, WideCharStringConversion) {
+ char buff[64];
+ int written;
+
+ // Basic test.
+ written = LIBC_NAMESPACE::sprintf(buff, "%ls", L"Hello, World!");
+ EXPECT_EQ(written, 13);
+ ASSERT_STREQ_LEN(written, buff, "Hello, World!");
+
+ // Left justified.
+ written = LIBC_NAMESPACE::sprintf(buff, "%-10ls", L"Hello, World!");
+ EXPECT_EQ(written, 13);
+ ASSERT_STREQ_LEN(written, buff, "Hello, World!");
+
+ // Precision test.
+ written = LIBC_NAMESPACE::sprintf(buff, "%.5ls", L"Hello, World!");
+ EXPECT_EQ(written, 5);
+ ASSERT_STREQ_LEN(written, buff, "Hello");
+
+ // Test ¢ which is 2 bytes multibyte character.
+ written = LIBC_NAMESPACE::sprintf(buff, "%ls", L"¢");
+ EXPECT_EQ(written, 2);
+ ASSERT_STREQ_LEN(written, buff, "¢");
+
+ // Test € which is 3 bytes multibyte character.
+ written = LIBC_NAMESPACE::sprintf(buff, "%ls", L"€");
+ EXPECT_EQ(written, 3);
+ ASSERT_STREQ_LEN(written, buff, "€");
+
+ // Test 😀 which is 4 bytes multibyte character
+ written = LIBC_NAMESPACE::sprintf(buff, "%ls", L"😀");
+ EXPECT_EQ(written, 4);
+ ASSERT_STREQ_LEN(written, buff, "😀");
+
+ // Test string with multiple multibyte characters.
+ written = LIBC_NAMESPACE::sprintf(buff, "%ls", L"¢€😀");
+ EXPECT_EQ(written, 9);
+ ASSERT_STREQ_LEN(written, buff, "¢€😀");
+
+ // Width with multibyte characters.
+ written = LIBC_NAMESPACE::sprintf(buff, "%2ls", L"¢€😀");
+ EXPECT_EQ(written, 9);
+ ASSERT_STREQ_LEN(written, buff, "¢€😀");
+
+ // Width with multibyte characters and padding.
+ written = LIBC_NAMESPACE::sprintf(buff, "%12ls", L"¢€😀");
+ EXPECT_EQ(written, 12);
+ ASSERT_STREQ_LEN(written, buff, " ¢€😀");
+
+ // Precision with multibyte characters.
+ written = LIBC_NAMESPACE::sprintf(buff, "%.4ls", L"¢€😀");
+ EXPECT_EQ(written, 2); // Only ¢ fits in 4 bytes.
+ ASSERT_STREQ_LEN(written, buff, "¢");
+
+ // Precision with padding and multibyte characters.
+ written = LIBC_NAMESPACE::sprintf(buff, "%5.4ls", L"¢€😀");
+ EXPECT_EQ(written, 5);
+ ASSERT_STREQ_LEN(written, buff, " ¢");
+
+ // Precision with left justification and multibyte characters.
+ written = LIBC_NAMESPACE::sprintf(buff, "%-5.4ls", L"¢€😀");
+ EXPECT_EQ(written, 5);
+ ASSERT_STREQ_LEN(written, buff, "¢ ");
+
+ // Short precision that cuts a multibyte character.
+ written = LIBC_NAMESPACE::sprintf(buff, "%.1ls", L"¢€😀");
+ EXPECT_EQ(written, 0);
+ ASSERT_STREQ_LEN(written, buff, "");
+
+ // Longer precision and shorter width.
+ written = LIBC_NAMESPACE::sprintf(buff, "%3.5ls", L"¢€😀");
+ EXPECT_EQ(written, 5);
+ ASSERT_STREQ_LEN(written, buff, "¢€");
+
+ // NULL string test.
+ written = LIBC_NAMESPACE::sprintf(buff, "%ls", nullptr);
+ EXPECT_EQ(written, 6);
+ ASSERT_STREQ_LEN(written, buff, "(null)");
+
+ // NULL string with precision.
+ written = LIBC_NAMESPACE::sprintf(buff, "%.3ls", nullptr);
+ EXPECT_EQ(written, 3);
+ ASSERT_STREQ_LEN(written, buff, "(nu");
+
+ // NULL string with precision and width.
+ written = LIBC_NAMESPACE::sprintf(buff, "%6.3ls", nullptr);
+ EXPECT_EQ(written, 6);
+ ASSERT_STREQ_LEN(written, buff, " (nu");
+}
#endif // LIBC_COPT_PRINTF_DISABLE_WIDE
More information about the libc-commits
mailing list