[libc-commits] [libc] [libc] Support %lc in printf (PR #169983)
Shubh Pachchigar via libc-commits
libc-commits at lists.llvm.org
Wed Dec 3 00:45:25 PST 2025
https://github.com/shubhe25p updated https://github.com/llvm/llvm-project/pull/169983
>From 7a33e23a7b5a94500ed62dc7cc9487c873ac4bf6 Mon Sep 17 00:00:00 2001
From: "shubh at DOE" <shubhp at mbm3a24.local>
Date: Wed, 3 Dec 2025 00:44:15 -0800
Subject: [PATCH] init
---
libc/src/stdio/printf_core/CMakeLists.txt | 5 ++
libc/src/stdio/printf_core/char_converter.h | 29 ++++++-
libc/src/stdio/printf_core/parser.h | 13 ++-
.../test/src/stdio/printf_core/CMakeLists.txt | 2 +
.../src/stdio/printf_core/converter_test.cpp | 82 ++++++++++++++++++-
.../src/stdio/printf_core/parser_test.cpp | 19 +++++
6 files changed, 142 insertions(+), 8 deletions(-)
diff --git a/libc/src/stdio/printf_core/CMakeLists.txt b/libc/src/stdio/printf_core/CMakeLists.txt
index 624129b2b36e7..798e8706b5331 100644
--- a/libc/src/stdio/printf_core/CMakeLists.txt
+++ b/libc/src/stdio/printf_core/CMakeLists.txt
@@ -67,6 +67,7 @@ add_header_library(
parser.h
DEPENDS
.core_structs
+ libc.hdr.types.wint_t.h
libc.src.__support.arg_list
libc.src.__support.ctype_utils
libc.src.__support.str_to_integer
@@ -111,6 +112,8 @@ add_header_library(
.printf_config
.writer
libc.include.inttypes
+ libc.hdr.types.wchar_t
+ libc.hdr.types.wint_t
libc.src.__support.big_int
libc.src.__support.common
libc.src.__support.CPP.limits
@@ -123,6 +126,8 @@ add_header_library(
libc.src.__support.integer_to_string
libc.src.__support.libc_assert
libc.src.__support.uint128
+ libc.src.__support.wchar.mbstate
+ libc.src.__support.wchar.wcrtomb
libc.src.__support.StringUtil.error_to_string
libc.src.string.memory_utils.inline_memcpy
)
diff --git a/libc/src/stdio/printf_core/char_converter.h b/libc/src/stdio/printf_core/char_converter.h
index fd2eb2553887a..43d3ebb70e708 100644
--- a/libc/src/stdio/printf_core/char_converter.h
+++ b/libc/src/stdio/printf_core/char_converter.h
@@ -1,4 +1,4 @@
-//===-- String Converter for printf -----------------------------*- C++ -*-===//
+//===-- Character Converter for printf --------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -9,7 +9,11 @@
#ifndef LLVM_LIBC_SRC_STDIO_PRINTF_CORE_CHAR_CONVERTER_H
#define LLVM_LIBC_SRC_STDIO_PRINTF_CORE_CHAR_CONVERTER_H
+#include "hdr/types/wchar_t.h"
+#include "hdr/types/wint_t.h"
#include "src/__support/macros/config.h"
+#include "src/__support/wchar/mbstate.h"
+#include "src/__support/wchar/wcrtomb.h"
#include "src/stdio/printf_core/converter_utils.h"
#include "src/stdio/printf_core/core_structs.h"
#include "src/stdio/printf_core/writer.h"
@@ -21,7 +25,6 @@ template <WriteMode write_mode>
LIBC_INLINE int convert_char(Writer<write_mode> *writer,
const FormatSection &to_conv) {
char c = static_cast<char>(to_conv.conv_val_raw);
-
constexpr int STRING_LEN = 1;
size_t padding_spaces =
@@ -33,7 +36,27 @@ LIBC_INLINE int convert_char(Writer<write_mode> *writer,
RET_IF_RESULT_NEGATIVE(writer->write(' ', padding_spaces));
}
- RET_IF_RESULT_NEGATIVE(writer->write(c));
+ if (to_conv.length_modifier == LengthModifier::l) {
+ wint_t wi = static_cast<wint_t>(to_conv.conv_val_raw);
+
+ if (wi == WEOF) {
+ return -1;
+ }
+
+ char mb_str[MB_LEN_MAX];
+ internal::mbstate mbstate;
+ wchar_t wc = static_cast<wchar_t>(wi);
+
+ auto ret = internal::wcrtomb(mb_str, wc, &internal_mbstate);
+ if (!ret.has_value()) {
+ return -1;
+ }
+
+ RET_IF_RESULT_NEGATIVE(writer->write({mb_str, ret.value()}));
+
+ } else {
+ RET_IF_RESULT_NEGATIVE(writer->write(c));
+ }
// If the padding is on the right side, write the spaces last.
if (padding_spaces > 0 &&
diff --git a/libc/src/stdio/printf_core/parser.h b/libc/src/stdio/printf_core/parser.h
index cef9b1ae58fa0..dea1ff966b6bb 100644
--- a/libc/src/stdio/printf_core/parser.h
+++ b/libc/src/stdio/printf_core/parser.h
@@ -9,6 +9,7 @@
#ifndef LLVM_LIBC_SRC_STDIO_PRINTF_CORE_PARSER_H
#define LLVM_LIBC_SRC_STDIO_PRINTF_CORE_PARSER_H
+#include "hdr/types/wint_t.h"
#include "include/llvm-libc-macros/stdfix-macros.h"
#include "src/__support/CPP/algorithm.h" // max
#include "src/__support/CPP/limits.h"
@@ -73,9 +74,9 @@ template <typename ArgProvider> class Parser {
ArgProvider args_cur;
#ifndef LIBC_COPT_PRINTF_DISABLE_INDEX_MODE
- // args_start stores the start of the va_args, which is allows getting the
- // value of arguments that have already been passed. args_index is tracked so
- // that we know which argument args_cur is on.
+ // args_start stores the start of the va_args, which helps in getting the
+ // number of arguments that have already been passed. args_index is tracked
+ // so that we know which argument args_cur is on.
ArgProvider args_start;
size_t args_index = 1;
@@ -173,7 +174,11 @@ template <typename ArgProvider> class Parser {
section.has_conv = true;
break;
case ('c'):
- WRITE_ARG_VAL_SIMPLEST(section.conv_val_raw, int, conv_index);
+ if (section.length_modifier == LengthModifier::l) {
+ WRITE_ARG_VAL_SIMPLEST(section.conv_val_raw, wint_t, conv_index);
+ } else {
+ WRITE_ARG_VAL_SIMPLEST(section.conv_val_raw, int, conv_index);
+ }
break;
case ('d'):
case ('i'):
diff --git a/libc/test/src/stdio/printf_core/CMakeLists.txt b/libc/test/src/stdio/printf_core/CMakeLists.txt
index ff7ebbc4f5fd0..f65d2baef41c7 100644
--- a/libc/test/src/stdio/printf_core/CMakeLists.txt
+++ b/libc/test/src/stdio/printf_core/CMakeLists.txt
@@ -7,6 +7,7 @@ add_libc_unittest(
LINK_LIBRARIES
LibcPrintfHelpers
DEPENDS
+ libc.hdr.types.wchar_t
libc.src.stdio.printf_core.parser
libc.src.stdio.printf_core.core_structs
libc.src.__support.CPP.string_view
@@ -32,6 +33,7 @@ add_libc_unittest(
SRCS
converter_test.cpp
DEPENDS
+ libc.hdr.types.wchar_t
libc.src.stdio.printf_core.converter
libc.src.stdio.printf_core.writer
libc.src.stdio.printf_core.core_structs
diff --git a/libc/test/src/stdio/printf_core/converter_test.cpp b/libc/test/src/stdio/printf_core/converter_test.cpp
index 2dae2a22c864c..296df9d9e6fa5 100644
--- a/libc/test/src/stdio/printf_core/converter_test.cpp
+++ b/libc/test/src/stdio/printf_core/converter_test.cpp
@@ -6,10 +6,10 @@
//
//===----------------------------------------------------------------------===//
+#include "hdr/types/wchar_t.h"
#include "src/stdio/printf_core/converter.h"
#include "src/stdio/printf_core/core_structs.h"
#include "src/stdio/printf_core/writer.h"
-
#include "test/UnitTest/Test.h"
class LlvmLibcPrintfConverterTest : public LIBC_NAMESPACE::testing::Test {
@@ -255,3 +255,83 @@ TEST_F(LlvmLibcPrintfConverterTest, OctConversion) {
ASSERT_STREQ(str, "1234");
ASSERT_EQ(writer.get_chars_written(), size_t{4});
}
+
+TEST_F(LlvmLibcPrintfConverterTest, WideCharConversion) {
+ LIBC_NAMESPACE::printf_core::FormatSection section;
+ section.has_conv = true;
+ section.raw_string = "%lc";
+ section.conv_name = 'c';
+ section.length_modifier = LIBC_NAMESPACE::printf_core::LengthModifier::l;
+ section.conv_val_raw = static_cast<wchar_t>(L'€');
+
+ LIBC_NAMESPACE::printf_core::convert(&writer, section);
+
+ wb.buff[wb.buff_cur] = '\0';
+
+ ASSERT_STREQ(str, "€");
+ ASSERT_EQ(writer.get_chars_written(), size_t{1});
+}
+
+TEST_F(LlvmLibcPrintfConverterTest, WideCharConversionLeftJustified) {
+ LIBC_NAMESPACE::printf_core::FormatSection left_justified_conv;
+ left_justified_conv.has_conv = true;
+ left_justified_conv.raw_string = "%-4lc";
+ left_justified_conv.conv_name = 'c';
+ left_justified_conv.length_modifier =
+ LIBC_NAMESPACE::printf_core::LengthModifier::l;
+ left_justified_conv.flags =
+ LIBC_NAMESPACE::printf_core::FormatFlags::LEFT_JUSTIFIED;
+ left_justified_conv.min_width = 4;
+ left_justified_conv.conv_val_raw = static_cast<wchar_t>(L'€');
+
+ LIBC_NAMESPACE::printf_core::convert(&writer, left_justified_conv);
+ wb.buff[wb.buff_cur] = '\0';
+
+ ASSERT_STREQ(str, "€ ");
+ ASSERT_EQ(writer.get_chars_written(), size_t{4});
+}
+
+TEST_F(LlvmLibcPrintfConverterTest, WideCharConversionRightJustified) {
+ LIBC_NAMESPACE::printf_core::FormatSection right_justified_conv;
+ right_justified_conv.has_conv = true;
+ right_justified_conv.raw_string = "%4lc";
+ right_justified_conv.conv_name = 'c';
+ right_justified_conv.length_modifier =
+ LIBC_NAMESPACE::printf_core::LengthModifier::l;
+ right_justified_conv.min_width = 4;
+ right_justified_conv.conv_val_raw = static_cast<wchar_t>(L'€');
+
+ LIBC_NAMESPACE::printf_core::convert(&writer, right_justified_conv);
+ wb.buff[wb.buff_cur] = '\0';
+
+ ASSERT_STREQ(str, " €");
+ ASSERT_EQ(writer.get_chars_written(), size_t{4});
+}
+
+TEST_F(LlvmLibcPrintfConverterTest, WideCharConversionInvalid) {
+ LIBC_NAMESPACE::printf_core::FormatSection section;
+ section.has_conv = true;
+ section.raw_string = "%lc";
+ section.conv_name = 'c';
+ section.length_modifier = LIBC_NAMESPACE::printf_core::LengthModifier::l;
+ // An invalid wide character.
+ section.conv_val_raw = static_cast<wchar_t>(0xFFFFFFFF);
+
+ int ret = LIBC_NAMESPACE::printf_core::convert(&writer, section);
+
+ ASSERT_EQ(ret, -1);
+}
+
+TEST_F(LlvmLibcPrintfConverterTest, WideCharWEOFConversion) {
+ LIBC_NAMESPACE::printf_core::FormatSection section;
+ section.has_conv = true;
+ section.raw_string = "%lc";
+ section.conv_name = 'c';
+ section.length_modifier = LIBC_NAMESPACE::printf_core::LengthModifier::l;
+ // WEOF value.
+ section.conv_val_raw = static_cast<wchar_t>(WEOF);
+
+ int ret = LIBC_NAMESPACE::printf_core::convert(&writer, section);
+
+ ASSERT_EQ(ret, -1);
+}
diff --git a/libc/test/src/stdio/printf_core/parser_test.cpp b/libc/test/src/stdio/printf_core/parser_test.cpp
index 9d192828860f7..b2edf2b61d992 100644
--- a/libc/test/src/stdio/printf_core/parser_test.cpp
+++ b/libc/test/src/stdio/printf_core/parser_test.cpp
@@ -6,6 +6,7 @@
//
//===----------------------------------------------------------------------===//
+#include "hdr/types/wchar_t.h"
#include "src/__support/CPP/bit.h"
#include "src/__support/CPP/string_view.h"
#include "src/__support/arg_list.h"
@@ -370,6 +371,24 @@ TEST(LlvmLibcPrintfParserTest,
ASSERT_PFORMAT_EQ(expected, format_arr[0]);
}
+TEST(LlvmLibcPrintfParserTest, EvalOneArgWithWideCharacter) {
+ LIBC_NAMESPACE::printf_core::FormatSection format_arr[2];
+ const char *str = "%lc";
+ wchar_t arg1 = L'€';
+ evaluate(format_arr, str, arg1);
+
+ LIBC_NAMESPACE::printf_core::FormatSection expected;
+ expected.has_conv = true;
+
+ expected.raw_string = {str, 3};
+ expected.length_modifier = LIBC_NAMESPACE::printf_core::LengthModifier::l;
+ expected.conv_val_raw =
+ static_cast<LIBC_NAMESPACE::fputil::FPBits<double>::StorageType>(arg1);
+ expected.conv_name = 'c';
+
+ ASSERT_PFORMAT_EQ(expected, format_arr[0]);
+}
+
#ifndef LIBC_COPT_PRINTF_DISABLE_INDEX_MODE
TEST(LlvmLibcPrintfParserTest, IndexModeOneArg) {
More information about the libc-commits
mailing list