[libc-commits] [libc] [libc] Support ls in printf (PR #178841)
Shubh Pachchigar via libc-commits
libc-commits at lists.llvm.org
Thu Feb 26 22:37:17 PST 2026
https://github.com/shubhe25p updated https://github.com/llvm/llvm-project/pull/178841
>From 10171584b2a1f8bc6b9bbb1da9e5981fff82ed45 Mon Sep 17 00:00:00 2001
From: shubhe25p <shubhp at mbm3a24.local>
Date: Thu, 26 Feb 2026 22:35:21 -0800
Subject: [PATCH] [libc] Support %ls in printf
Add support for %ls in printf by calling internal
string converter and add relevant end-to-end
sprintf test. Additionally, modified printf
parser for recognizing length modifier. This
also disables wide string support on windows
and other unsupported platforms.
---
libc/src/stdio/printf_core/CMakeLists.txt | 5 +
libc/src/stdio/printf_core/parser.h | 2 +-
libc/src/stdio/printf_core/string_converter.h | 93 ++++++++++++++--
libc/test/src/stdio/sprintf_test.cpp | 103 ++++++++++++++++++
4 files changed, 195 insertions(+), 8 deletions(-)
diff --git a/libc/src/stdio/printf_core/CMakeLists.txt b/libc/src/stdio/printf_core/CMakeLists.txt
index ae93cc754299b..2009e564da1df 100644
--- a/libc/src/stdio/printf_core/CMakeLists.txt
+++ b/libc/src/stdio/printf_core/CMakeLists.txt
@@ -49,10 +49,13 @@ if(LIBC_CONF_PRINTF_DISABLE_WIDE)
else()
set(wchar_deps
libc.hdr.types.wchar_t
+ libc.hdr.types.char32_t
+ libc.hdr.types.char8_t
libc.hdr.types.wint_t
libc.hdr.wchar_macros
libc.src.__support.wchar.wcrtomb
libc.src.__support.wchar.mbstate
+ libc.src.__support.wchar.string_converter
)
set(parser_wchar_deps
libc.hdr.types.wint_t
@@ -129,8 +132,10 @@ add_header_library(
.writer
libc.include.inttypes
libc.hdr.limits_macros
+ libc.src.string.string_utils
libc.src.__support.big_int
libc.src.__support.common
+ libc.src.__support.CPP.algorithm
libc.src.__support.CPP.limits
libc.src.__support.CPP.span
libc.src.__support.CPP.string_view
diff --git a/libc/src/stdio/printf_core/parser.h b/libc/src/stdio/printf_core/parser.h
index a3b62991bcec9..7762aa1d87fce 100644
--- a/libc/src/stdio/printf_core/parser.h
+++ b/libc/src/stdio/printf_core/parser.h
@@ -290,7 +290,7 @@ template <typename ArgProvider> class Parser {
WRITE_ARG_VAL_SIMPLEST(section.conv_val_ptr, void *, conv_index);
break;
case ('s'):
- WRITE_ARG_VAL_SIMPLEST(section.conv_val_ptr, char *, conv_index);
+ WRITE_ARG_VAL_SIMPLEST(section.conv_val_ptr, void *, conv_index);
break;
default:
// if the conversion is undefined, change this to a raw section.
diff --git a/libc/src/stdio/printf_core/string_converter.h b/libc/src/stdio/printf_core/string_converter.h
index 74c9f598210f7..2f521544785c0 100644
--- a/libc/src/stdio/printf_core/string_converter.h
+++ b/libc/src/stdio/printf_core/string_converter.h
@@ -9,11 +9,18 @@
#ifndef LLVM_LIBC_SRC_STDIO_PRINTF_CORE_STRING_CONVERTER_H
#define LLVM_LIBC_SRC_STDIO_PRINTF_CORE_STRING_CONVERTER_H
-#include "src/__support/CPP/string_view.h"
+#ifndef LIBC_COPT_PRINTF_DISABLE_WIDE
+#include "hdr/types/char32_t.h"
+#include "hdr/types/char8_t.h"
+#include "src/__support/wchar/mbstate.h"
+#include "src/__support/wchar/string_converter.h"
+#endif // LIBC_COPT_PRINTF_DISABLE_WIDE
+
#include "src/__support/macros/config.h"
#include "src/stdio/printf_core/converter_utils.h"
#include "src/stdio/printf_core/core_structs.h"
#include "src/stdio/printf_core/writer.h"
+#include "src/string/string_utils.h" // string_length
#include <stddef.h>
@@ -21,10 +28,10 @@ namespace LIBC_NAMESPACE_DECL {
namespace printf_core {
template <WriteMode write_mode>
-LIBC_INLINE int convert_string(Writer<write_mode> *writer,
- const FormatSection &to_conv) {
- size_t string_len = 0;
+LIBC_INLINE int char_writer(Writer<write_mode> *writer,
+ const FormatSection &to_conv) {
const char *str_ptr = reinterpret_cast<const char *>(to_conv.conv_val_ptr);
+ size_t string_len = 0;
#ifndef LIBC_COPT_PRINTF_NO_NULLPTR_CHECKS
if (str_ptr == nullptr) {
@@ -32,9 +39,7 @@ LIBC_INLINE int convert_string(Writer<write_mode> *writer,
}
#endif // LIBC_COPT_PRINTF_NO_NULLPTR_CHECKS
- for (const char *cur_str = (str_ptr); cur_str[string_len]; ++string_len) {
- ;
- }
+ string_len = internal::string_length(str_ptr);
if (to_conv.precision >= 0 &&
static_cast<size_t>(to_conv.precision) < string_len)
@@ -60,6 +65,80 @@ LIBC_INLINE int convert_string(Writer<write_mode> *writer,
return WRITE_OK;
}
+#ifndef LIBC_COPT_PRINTF_DISABLE_WIDE
+template <WriteMode write_mode>
+LIBC_INLINE int wchar_writer(Writer<write_mode> *writer,
+ const FormatSection &to_conv) {
+ size_t string_len = 0;
+ const char32_t *wstr_ptr =
+ reinterpret_cast<const char32_t *>(to_conv.conv_val_ptr);
+ size_t precision =
+ to_conv.precision < 0 ? SIZE_MAX : static_cast<size_t>(to_conv.precision);
+
+#ifndef LIBC_COPT_PRINTF_NO_NULLPTR_CHECKS
+ if (wstr_ptr == nullptr) {
+ wstr_ptr = U"(null)";
+ }
+#endif // LIBC_COPT_PRINTF_NO_NULLPTR_CHECKS
+
+ internal::mbstate mbstate;
+
+ internal::StringConverter<char32_t> length_counter(wstr_ptr, &mbstate,
+ precision);
+
+ for (auto converted = length_counter.pop<char8_t>();
+ converted.has_value() && converted.value() != '\0';
+ converted = length_counter.pop<char8_t>()) {
+ ++string_len;
+ }
+
+ size_t padding_spaces = to_conv.min_width > static_cast<int>(string_len)
+ ? to_conv.min_width - string_len
+ : 0;
+
+ // If the padding is on the left side, write the spaces first.
+ if (padding_spaces > 0 &&
+ (to_conv.flags & FormatFlags::LEFT_JUSTIFIED) == 0) {
+ RET_IF_RESULT_NEGATIVE(writer->write(' ', padding_spaces));
+ }
+
+ mbstate = internal::mbstate();
+ internal::StringConverter<char32_t> out_conv(wstr_ptr, &mbstate, precision);
+
+ for (auto converted = out_conv.pop<char8_t>();
+ converted.has_value() && converted.value() != '\0';
+ converted = out_conv.pop<char8_t>()) {
+ RET_IF_RESULT_NEGATIVE(writer->write(static_cast<char>(converted.value())));
+ }
+
+ // If the padding is on the right side, write the spaces last.
+ if (padding_spaces > 0 &&
+ (to_conv.flags & FormatFlags::LEFT_JUSTIFIED) != 0) {
+ RET_IF_RESULT_NEGATIVE(writer->write(' ', padding_spaces));
+ }
+
+ return WRITE_OK;
+}
+#endif // LIBC_COPT_PRINTF_DISABLE_WIDE
+
+template <WriteMode write_mode>
+LIBC_INLINE int convert_string(Writer<write_mode> *writer,
+ const FormatSection &to_conv) {
+ int ret = 0;
+ if (to_conv.length_modifier == LengthModifier::l) {
+ // find length and print wide char characters
+#ifndef LIBC_COPT_PRINTF_DISABLE_WIDE
+ ret = wchar_writer(writer, to_conv);
+#else
+ ret = char_writer(writer, to_conv);
+#endif
+ } else {
+ ret = char_writer(writer, to_conv);
+ }
+
+ return ret;
+}
+
} // namespace printf_core
} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/test/src/stdio/sprintf_test.cpp b/libc/test/src/stdio/sprintf_test.cpp
index 78186abb9966e..8c09594fddf79 100644
--- a/libc/test/src/stdio/sprintf_test.cpp
+++ b/libc/test/src/stdio/sprintf_test.cpp
@@ -3582,4 +3582,107 @@ TEST(LlvmLibcSprintfTest, WideCharConversion) {
-1);
ASSERT_ERRNO_EQ(EILSEQ);
}
+
+TEST(LlvmLibcSprintfTest, WideCharStringConversion) {
+ char buff[64];
+ int written;
+
+ // Basic test.
+ written = LIBC_NAMESPACE::sprintf(buff, "%ls", L"Hello, World!");
+ EXPECT_EQ(written, 13);
+ ASSERT_STREQ_LEN(written, buff, "Hello, World!");
+
+ // Left justified.
+ written = LIBC_NAMESPACE::sprintf(buff, "%-10ls", L"Hello, World!");
+ EXPECT_EQ(written, 13);
+ ASSERT_STREQ_LEN(written, buff, "Hello, World!");
+
+ // Precision test.
+ written = LIBC_NAMESPACE::sprintf(buff, "%.5ls", L"Hello, World!");
+ EXPECT_EQ(written, 5);
+ ASSERT_STREQ_LEN(written, buff, "Hello");
+
+ // Test ¢ which is 2 bytes multibyte character.
+ written = LIBC_NAMESPACE::sprintf(buff, "%ls", L"¢");
+ EXPECT_EQ(written, 2);
+ ASSERT_STREQ_LEN(written, buff, "¢");
+
+ // Test € which is 3 bytes multibyte character.
+ written = LIBC_NAMESPACE::sprintf(buff, "%ls", L"€");
+ EXPECT_EQ(written, 3);
+ ASSERT_STREQ_LEN(written, buff, "€");
+
+ // Test 😀 which is 4 bytes multibyte character
+ written = LIBC_NAMESPACE::sprintf(buff, "%ls", L"😀");
+ EXPECT_EQ(written, 4);
+ ASSERT_STREQ_LEN(written, buff, "😀");
+
+ // Test string with multiple multibyte characters.
+ written = LIBC_NAMESPACE::sprintf(buff, "%ls", L"¢€😀");
+ EXPECT_EQ(written, 9);
+ ASSERT_STREQ_LEN(written, buff, "¢€😀");
+
+ // Width with multibyte characters.
+ written = LIBC_NAMESPACE::sprintf(buff, "%2ls", L"¢€😀");
+ EXPECT_EQ(written, 9);
+ ASSERT_STREQ_LEN(written, buff, "¢€😀");
+
+ // Width with multibyte characters and padding.
+ written = LIBC_NAMESPACE::sprintf(buff, "%12ls", L"¢€😀");
+ EXPECT_EQ(written, 12);
+ ASSERT_STREQ_LEN(written, buff, " ¢€😀");
+
+ // Precision with multibyte characters.
+ written = LIBC_NAMESPACE::sprintf(buff, "%.4ls", L"¢€😀");
+ EXPECT_EQ(written, 2); // Only ¢ fits in 4 bytes.
+ ASSERT_STREQ_LEN(written, buff, "¢");
+
+ // Precision with padding and multibyte characters.
+ written = LIBC_NAMESPACE::sprintf(buff, "%5.4ls", L"¢€😀");
+ EXPECT_EQ(written, 5);
+ ASSERT_STREQ_LEN(written, buff, " ¢");
+
+ // Precision with left justification and multibyte characters.
+ written = LIBC_NAMESPACE::sprintf(buff, "%-5.4ls", L"¢€😀");
+ EXPECT_EQ(written, 5);
+ ASSERT_STREQ_LEN(written, buff, "¢ ");
+
+ // Short precision that cuts a multibyte character.
+ written = LIBC_NAMESPACE::sprintf(buff, "%.1ls", L"¢€😀");
+ EXPECT_EQ(written, 0);
+ ASSERT_STREQ_LEN(written, buff, "");
+
+ // Longer precision and shorter width.
+ written = LIBC_NAMESPACE::sprintf(buff, "%3.5ls", L"¢€😀");
+ EXPECT_EQ(written, 5);
+ ASSERT_STREQ_LEN(written, buff, "¢€");
+
+ // Reverse the wide string and test with width and precision.
+ written = LIBC_NAMESPACE::sprintf(buff, "%4.4ls", L"😀€¢");
+ EXPECT_EQ(written, 4);
+ ASSERT_STREQ_LEN(written, buff, "😀");
+
+ written = LIBC_NAMESPACE::sprintf(buff, "%5.4ls", L"😀€¢");
+ EXPECT_EQ(written, 5);
+ ASSERT_STREQ_LEN(written, buff, " 😀");
+
+ written = LIBC_NAMESPACE::sprintf(buff, "%.3ls", L"😀€¢");
+ EXPECT_EQ(written, 0);
+ ASSERT_STREQ_LEN(written, buff, "");
+
+ // NULL string test.
+ written = LIBC_NAMESPACE::sprintf(buff, "%ls", nullptr);
+ EXPECT_EQ(written, 6);
+ ASSERT_STREQ_LEN(written, buff, "(null)");
+
+ // NULL string with precision.
+ written = LIBC_NAMESPACE::sprintf(buff, "%.3ls", nullptr);
+ EXPECT_EQ(written, 3);
+ ASSERT_STREQ_LEN(written, buff, "(nu");
+
+ // NULL string with precision and width.
+ written = LIBC_NAMESPACE::sprintf(buff, "%6.3ls", nullptr);
+ EXPECT_EQ(written, 6);
+ ASSERT_STREQ_LEN(written, buff, " (nu");
+}
#endif // LIBC_COPT_PRINTF_DISABLE_WIDE
More information about the libc-commits
mailing list