[libc-commits] [libc] [libc] Support ls in printf (PR #178841)
Shubh Pachchigar via libc-commits
libc-commits at lists.llvm.org
Mon Feb 2 23:36:05 PST 2026
https://github.com/shubhe25p updated https://github.com/llvm/llvm-project/pull/178841
>From 9a56247b9c1132ea0b804daa5fd04f05a8a92ec6 Mon Sep 17 00:00:00 2001
From: shubhe25p <shubhp at mbm3a24.local>
Date: Mon, 2 Feb 2026 23:35:32 -0800
Subject: [PATCH] [libc] Support %ls in printf
Add support for %ls in printf by calling internal
wcrtomb function and relevant end-to-end sprintf
test. Additionally, modified printf parser for
recognizing length modifier. This also disables
wide string support on windows platform.
---
libc/src/stdio/printf_core/CMakeLists.txt | 2 +
libc/src/stdio/printf_core/parser.h | 2 +-
libc/src/stdio/printf_core/string_converter.h | 110 ++++++++++++++++--
libc/test/src/stdio/sprintf_test.cpp | 90 ++++++++++++++
4 files changed, 194 insertions(+), 10 deletions(-)
diff --git a/libc/src/stdio/printf_core/CMakeLists.txt b/libc/src/stdio/printf_core/CMakeLists.txt
index ae93cc754299b..9d43fb2a2088a 100644
--- a/libc/src/stdio/printf_core/CMakeLists.txt
+++ b/libc/src/stdio/printf_core/CMakeLists.txt
@@ -129,8 +129,10 @@ add_header_library(
.writer
libc.include.inttypes
libc.hdr.limits_macros
+ libc.src.string.string_length
libc.src.__support.big_int
libc.src.__support.common
+ libc.src.__support.CPP.algorithm
libc.src.__support.CPP.limits
libc.src.__support.CPP.span
libc.src.__support.CPP.string_view
diff --git a/libc/src/stdio/printf_core/parser.h b/libc/src/stdio/printf_core/parser.h
index a3b62991bcec9..7762aa1d87fce 100644
--- a/libc/src/stdio/printf_core/parser.h
+++ b/libc/src/stdio/printf_core/parser.h
@@ -290,7 +290,7 @@ template <typename ArgProvider> class Parser {
WRITE_ARG_VAL_SIMPLEST(section.conv_val_ptr, void *, conv_index);
break;
case ('s'):
- WRITE_ARG_VAL_SIMPLEST(section.conv_val_ptr, char *, conv_index);
+ WRITE_ARG_VAL_SIMPLEST(section.conv_val_ptr, void *, conv_index);
break;
default:
// if the conversion is undefined, change this to a raw section.
diff --git a/libc/src/stdio/printf_core/string_converter.h b/libc/src/stdio/printf_core/string_converter.h
index 74c9f598210f7..2f1dfdfc313a2 100644
--- a/libc/src/stdio/printf_core/string_converter.h
+++ b/libc/src/stdio/printf_core/string_converter.h
@@ -9,11 +9,20 @@
#ifndef LLVM_LIBC_SRC_STDIO_PRINTF_CORE_STRING_CONVERTER_H
#define LLVM_LIBC_SRC_STDIO_PRINTF_CORE_STRING_CONVERTER_H
-#include "src/__support/CPP/string_view.h"
+#ifndef LIBC_COPT_PRINTF_DISABLE_WIDE
+#include "hdr/types/wchar_t.h"
+#include "hdr/types/wint_t.h"
+#include "hdr/wchar_macros.h"
+#include "src/__support/wchar/mbstate.h"
+#include "src/__support/wchar/wcrtomb.h"
+#endif // LIBC_COPT_PRINTF_DISABLE_WIDE
+
+#include "src/__support/CPP/algorithm.h" // cpp::min
#include "src/__support/macros/config.h"
#include "src/stdio/printf_core/converter_utils.h"
#include "src/stdio/printf_core/core_structs.h"
#include "src/stdio/printf_core/writer.h"
+#include "src/string/string_length.h"
#include <stddef.h>
@@ -24,7 +33,41 @@ template <WriteMode write_mode>
LIBC_INLINE int convert_string(Writer<write_mode> *writer,
const FormatSection &to_conv) {
size_t string_len = 0;
- const char *str_ptr = reinterpret_cast<const char *>(to_conv.conv_val_ptr);
+
+ if (to_conv.length_modifier == LengthModifier::l) {
+#ifndef LIBC_COPT_PRINTF_DISABLE_WIDE
+ const wchar_t *wstr_ptr =
+ reinterpret_cast<const wchar_t *>(to_conv.conv_val_ptr);
+
+#ifndef LIBC_COPT_PRINTF_NO_NULLPTR_CHECKS
+ if (wstr_ptr == nullptr) {
+ wstr_ptr = L"(null)";
+ }
+#endif // LIBC_COPT_PRINTF_NO_NULLPTR_CHECKS
+
+ char buffer[MB_LEN_MAX];
+ internal::mbstate mbstate;
+ size_t written = 0;
+ for (const wchar_t *cur_str = (wstr_ptr); cur_str[string_len];
+ ++string_len) {
+ wchar_t wc = cur_str[string_len];
+
+ auto ret = internal::wcrtomb(buffer, wc, &mbstate);
+ if (!ret.has_value()) {
+ return MB_CONVERSION_ERROR;
+ }
+ written += ret.value();
+
+ if (to_conv.precision >= 0 &&
+ static_cast<size_t>(to_conv.precision) < written) {
+ written -= ret.value();
+ break;
+ }
+ }
+ string_len = written;
+#else
+
+ const char *str_ptr = reinterpret_cast<const char *>(to_conv.conv_val_ptr);
#ifndef LIBC_COPT_PRINTF_NO_NULLPTR_CHECKS
if (str_ptr == nullptr) {
@@ -32,13 +75,21 @@ LIBC_INLINE int convert_string(Writer<write_mode> *writer,
}
#endif // LIBC_COPT_PRINTF_NO_NULLPTR_CHECKS
- for (const char *cur_str = (str_ptr); cur_str[string_len]; ++string_len) {
- ;
- }
+ string_len = cpp::min(internal::string_length(str_ptr),
+ static_cast<size_t>(to_conv.precision));
- if (to_conv.precision >= 0 &&
- static_cast<size_t>(to_conv.precision) < string_len)
- string_len = to_conv.precision;
+#endif // LIBC_COPT_PRINTF_DISABLE_WIDE
+ } else {
+
+ const char *str_ptr = reinterpret_cast<const char *>(to_conv.conv_val_ptr);
+#ifndef LIBC_COPT_PRINTF_NO_NULLPTR_CHECKS
+ if (str_ptr == nullptr) {
+ str_ptr = "(null)";
+ }
+#endif // LIBC_COPT_PRINTF_NO_NULLPTR_CHECKS
+ string_len = cpp::min(internal::string_length(str_ptr),
+ static_cast<size_t>(to_conv.precision));
+ }
size_t padding_spaces = to_conv.min_width > static_cast<int>(string_len)
? to_conv.min_width - string_len
@@ -50,7 +101,48 @@ LIBC_INLINE int convert_string(Writer<write_mode> *writer,
RET_IF_RESULT_NEGATIVE(writer->write(' ', padding_spaces));
}
- RET_IF_RESULT_NEGATIVE(writer->write({(str_ptr), string_len}));
+ if (to_conv.length_modifier == LengthModifier::l) {
+#ifndef LIBC_COPT_PRINTF_DISABLE_WIDE
+ const wchar_t *wstr_ptr =
+ reinterpret_cast<const wchar_t *>(to_conv.conv_val_ptr);
+
+#ifndef LIBC_COPT_PRINTF_NO_NULLPTR_CHECKS
+ if (wstr_ptr == nullptr) {
+ wstr_ptr = L"(null)";
+ }
+#endif // LIBC_COPT_PRINTF_NO_NULLPTR_CHECKS
+
+ size_t written = 0;
+ char buffer[MB_LEN_MAX];
+ internal::mbstate mbstate;
+ for (size_t i = 0; written < string_len; ++i) {
+ // We don't need to check errors/precision here; Pass 1 guaranteed safety.
+ auto ret = internal::wcrtomb(buffer, wstr_ptr[i], &mbstate);
+ size_t mb_len = ret.value();
+
+ RET_IF_RESULT_NEGATIVE(writer->write({buffer, mb_len}));
+ written += mb_len;
+ }
+#else
+ const char *str_ptr = reinterpret_cast<const char *>(to_conv.conv_val_ptr);
+
+#ifndef LIBC_COPT_PRINTF_NO_NULLPTR_CHECKS
+ if (str_ptr == nullptr) {
+ str_ptr = "(null)";
+ }
+#endif // LIBC_COPT_PRINTF_NO_NULLPTR_CHECKS
+ RET_IF_RESULT_NEGATIVE(writer->write({(str_ptr), string_len}));
+#endif // LIBC_COPT_PRINTF_DISABLE_WIDE
+ } else {
+ const char *str_ptr = reinterpret_cast<const char *>(to_conv.conv_val_ptr);
+
+#ifndef LIBC_COPT_PRINTF_NO_NULLPTR_CHECKS
+ if (str_ptr == nullptr) {
+ str_ptr = "(null)";
+ }
+#endif // LIBC_COPT_PRINTF_NO_NULLPTR_CHECKS
+ RET_IF_RESULT_NEGATIVE(writer->write({(str_ptr), string_len}));
+ }
// If the padding is on the right side, write the spaces last.
if (padding_spaces > 0 &&
diff --git a/libc/test/src/stdio/sprintf_test.cpp b/libc/test/src/stdio/sprintf_test.cpp
index 78186abb9966e..b7ddf2fda3a4f 100644
--- a/libc/test/src/stdio/sprintf_test.cpp
+++ b/libc/test/src/stdio/sprintf_test.cpp
@@ -3582,4 +3582,94 @@ TEST(LlvmLibcSprintfTest, WideCharConversion) {
-1);
ASSERT_ERRNO_EQ(EILSEQ);
}
+
+TEST(LlvmLibcSprintfTest, WideCharStringConversion) {
+ char buff[64];
+ int written;
+
+ // Basic test.
+ written = LIBC_NAMESPACE::sprintf(buff, "%ls", L"Hello, World!");
+ EXPECT_EQ(written, 13);
+ ASSERT_STREQ_LEN(written, buff, "Hello, World!");
+
+ // Left justified.
+ written = LIBC_NAMESPACE::sprintf(buff, "%-10ls", L"Hello, World!");
+ EXPECT_EQ(written, 13);
+ ASSERT_STREQ_LEN(written, buff, "Hello, World!");
+
+ // Precision test.
+ written = LIBC_NAMESPACE::sprintf(buff, "%.5ls", L"Hello, World!");
+ EXPECT_EQ(written, 5);
+ ASSERT_STREQ_LEN(written, buff, "Hello");
+
+ // Test ¢ which is 2 bytes multibyte character.
+ written = LIBC_NAMESPACE::sprintf(buff, "%ls", L"¢");
+ EXPECT_EQ(written, 2);
+ ASSERT_STREQ_LEN(written, buff, "¢");
+
+ // Test € which is 3 bytes multibyte character.
+ written = LIBC_NAMESPACE::sprintf(buff, "%ls", L"€");
+ EXPECT_EQ(written, 3);
+ ASSERT_STREQ_LEN(written, buff, "€");
+
+ // Test 😀 which is 4 bytes multibyte character
+ written = LIBC_NAMESPACE::sprintf(buff, "%ls", L"😀");
+ EXPECT_EQ(written, 4);
+ ASSERT_STREQ_LEN(written, buff, "😀");
+
+ // Test string with multiple multibyte characters.
+ written = LIBC_NAMESPACE::sprintf(buff, "%ls", L"¢€😀");
+ EXPECT_EQ(written, 9);
+ ASSERT_STREQ_LEN(written, buff, "¢€😀");
+
+ // Width with multibyte characters.
+ written = LIBC_NAMESPACE::sprintf(buff, "%2ls", L"¢€😀");
+ EXPECT_EQ(written, 9);
+ ASSERT_STREQ_LEN(written, buff, "¢€😀");
+
+ // Width with multibyte characters and padding.
+ written = LIBC_NAMESPACE::sprintf(buff, "%12ls", L"¢€😀");
+ EXPECT_EQ(written, 12);
+ ASSERT_STREQ_LEN(written, buff, " ¢€😀");
+
+ // Precision with multibyte characters.
+ written = LIBC_NAMESPACE::sprintf(buff, "%.4ls", L"¢€😀");
+ EXPECT_EQ(written, 2); // Only ¢ fits in 4 bytes.
+ ASSERT_STREQ_LEN(written, buff, "¢");
+
+ // Precision with padding and multibyte characters.
+ written = LIBC_NAMESPACE::sprintf(buff, "%5.4ls", L"¢€😀");
+ EXPECT_EQ(written, 5);
+ ASSERT_STREQ_LEN(written, buff, " ¢");
+
+ // Precision with left justification and multibyte characters.
+ written = LIBC_NAMESPACE::sprintf(buff, "%-5.4ls", L"¢€😀");
+ EXPECT_EQ(written, 5);
+ ASSERT_STREQ_LEN(written, buff, "¢ ");
+
+ // Short precision that cuts a multibyte character.
+ written = LIBC_NAMESPACE::sprintf(buff, "%.1ls", L"¢€😀");
+ EXPECT_EQ(written, 0);
+ ASSERT_STREQ_LEN(written, buff, "");
+
+ // Longer precision and shorter width.
+ written = LIBC_NAMESPACE::sprintf(buff, "%3.5ls", L"¢€😀");
+ EXPECT_EQ(written, 5);
+ ASSERT_STREQ_LEN(written, buff, "¢€");
+
+ // NULL string test.
+ written = LIBC_NAMESPACE::sprintf(buff, "%ls", nullptr);
+ EXPECT_EQ(written, 6);
+ ASSERT_STREQ_LEN(written, buff, "(null)");
+
+ // NULL string with precision.
+ written = LIBC_NAMESPACE::sprintf(buff, "%.3ls", nullptr);
+ EXPECT_EQ(written, 3);
+ ASSERT_STREQ_LEN(written, buff, "(nu");
+
+ // NULL string with precision and width.
+ written = LIBC_NAMESPACE::sprintf(buff, "%6.3ls", nullptr);
+ EXPECT_EQ(written, 6);
+ ASSERT_STREQ_LEN(written, buff, " (nu");
+}
#endif // LIBC_COPT_PRINTF_DISABLE_WIDE
More information about the libc-commits
mailing list