[libc-commits] [libc] [libc] Reland %lc support in printf (PR #176110)

Shubh Pachchigar via libc-commits libc-commits at lists.llvm.org
Tue Jan 20 10:21:18 PST 2026


https://github.com/shubhe25p updated https://github.com/llvm/llvm-project/pull/176110

>From b6426431df31e4f4d4eb0de795f6e38f9601a7ce Mon Sep 17 00:00:00 2001
From: shubhe25p <shubhp at mbm3a24.local>
Date: Tue, 20 Jan 2026 10:20:56 -0800
Subject: [PATCH] [libc] Reland %lc support in printf

Add support for %lc in printf by calling internal wcrtomb function and relevant end-to-end sprintf test.

Additionally, made the following changes:

- Modified printf parser for recognizing length modifier
- Added two internal error codes.
- Added a flag to disable wchar support on windows platform.
- To keep printf interface header only, converted wcrtomb and CharacterConverter to header only implementation and removed the cpp source.

Resolves GPU libc issues in #169983
---
 .../modules/LLVMLibCCompileOptionRules.cmake  |   4 +
 libc/config/config.json                       |   4 +
 libc/config/windows/config.json               |   8 +
 libc/docs/dev/printf_behavior.rst             |   7 +
 libc/src/__support/wchar/CMakeLists.txt       |   9 +-
 .../__support/wchar/character_converter.cpp   | 176 ------------------
 .../src/__support/wchar/character_converter.h | 166 ++++++++++++++++-
 libc/src/__support/wchar/wcrtomb.cpp          |  50 -----
 libc/src/__support/wchar/wcrtomb.h            |  39 +++-
 libc/src/stdio/printf_core/CMakeLists.txt     |  19 ++
 libc/src/stdio/printf_core/char_converter.h   |  50 ++++-
 libc/src/stdio/printf_core/core_structs.h     |   2 +
 .../stdio/printf_core/linux/error_mapper.h    |   3 +
 libc/src/stdio/printf_core/parser.h           |  32 +++-
 libc/test/src/stdio/CMakeLists.txt            |  10 +
 libc/test/src/stdio/sprintf_test.cpp          |  98 +++++++++-
 16 files changed, 425 insertions(+), 252 deletions(-)
 create mode 100644 libc/config/windows/config.json
 delete mode 100644 libc/src/__support/wchar/character_converter.cpp
 delete mode 100644 libc/src/__support/wchar/wcrtomb.cpp

diff --git a/libc/cmake/modules/LLVMLibCCompileOptionRules.cmake b/libc/cmake/modules/LLVMLibCCompileOptionRules.cmake
index a719f20d532e0..638cfc122d7de 100644
--- a/libc/cmake/modules/LLVMLibCCompileOptionRules.cmake
+++ b/libc/cmake/modules/LLVMLibCCompileOptionRules.cmake
@@ -135,6 +135,10 @@ function(_get_compile_options_from_config output_var)
     endif()
   endif()
 
+  if (LIBC_CONF_PRINTF_DISABLE_WIDE)
+    list(APPEND config_options "-DLIBC_COPT_PRINTF_DISABLE_WIDE")
+  endif()
+
   set(${output_var} ${config_options} PARENT_SCOPE)
 endfunction(_get_compile_options_from_config)
 
diff --git a/libc/config/config.json b/libc/config/config.json
index f981c433b2c7c..296d2e539c23d 100644
--- a/libc/config/config.json
+++ b/libc/config/config.json
@@ -52,6 +52,10 @@
     "LIBC_CONF_PRINTF_RUNTIME_DISPATCH": {
       "value": true,
       "doc": "Use dynamic dispatch for the output mechanism to reduce code size."
+    },
+    "LIBC_CONF_PRINTF_DISABLE_WIDE": {
+      "value": false,
+      "doc": "Disable handling wide characters for printf and friends."
     }
   },
   "scanf": {
diff --git a/libc/config/windows/config.json b/libc/config/windows/config.json
new file mode 100644
index 0000000000000..6bb09db9d1aab
--- /dev/null
+++ b/libc/config/windows/config.json
@@ -0,0 +1,8 @@
+{
+    "printf": {
+      "LIBC_CONF_PRINTF_DISABLE_WIDE": {
+        "value": "true",
+        "doc": "Disable handling wide characters for printf and friends."
+      }
+    }
+}
diff --git a/libc/docs/dev/printf_behavior.rst b/libc/docs/dev/printf_behavior.rst
index 01ab128a1f238..ba0578aee3fd8 100644
--- a/libc/docs/dev/printf_behavior.rst
+++ b/libc/docs/dev/printf_behavior.rst
@@ -71,6 +71,13 @@ conversions (%r, %k); any fixed point number conversion will be treated as
 invalid. This reduces code size. This has no effect if the current compiler does
 not support fixed point numbers.
 
+LIBC_COPT_PRINTF_DISABLE_WIDE
+--------------------------------
+When set, this flag disables support for wide characters (%lc and %ls). Any
+conversions will be ignored. This reduces code size. This will be set by default
+on windows platforms as current printf implementation does not support UTF-16 wide
+characters.
+
 LIBC_COPT_PRINTF_NO_NULLPTR_CHECKS
 ----------------------------------
 When set, this flag disables the nullptr checks in %n and %s.
diff --git a/libc/src/__support/wchar/CMakeLists.txt b/libc/src/__support/wchar/CMakeLists.txt
index aed1d53bdf522..304b123b4520b 100644
--- a/libc/src/__support/wchar/CMakeLists.txt
+++ b/libc/src/__support/wchar/CMakeLists.txt
@@ -20,12 +20,10 @@ add_header_library(
     .character_converter 
 )
 
-add_object_library(
+add_header_library(
   character_converter
   HDRS
     character_converter.h
-  SRCS 
-    character_converter.cpp
   DEPENDS
     libc.hdr.errno_macros
     libc.hdr.types.char8_t
@@ -36,12 +34,10 @@ add_object_library(
     .mbstate
 )
 
-add_object_library(
+add_header_library(
   wcrtomb
   HDRS
     wcrtomb.h
-  SRCS 
-    wcrtomb.cpp
   DEPENDS
     libc.hdr.errno_macros
     libc.hdr.types.char32_t
@@ -49,6 +45,7 @@ add_object_library(
     libc.hdr.types.wchar_t
     libc.src.__support.error_or
     libc.src.__support.common
+    libc.src.__support.macros.null_check
     .character_converter
     .mbstate
 )
diff --git a/libc/src/__support/wchar/character_converter.cpp b/libc/src/__support/wchar/character_converter.cpp
deleted file mode 100644
index 26672884d7b16..0000000000000
--- a/libc/src/__support/wchar/character_converter.cpp
+++ /dev/null
@@ -1,176 +0,0 @@
-//===-- Implementation of a class for conversion --------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "hdr/errno_macros.h"
-#include "hdr/types/char32_t.h"
-#include "hdr/types/char8_t.h"
-#include "hdr/types/size_t.h"
-#include "src/__support/CPP/bit.h"
-#include "src/__support/common.h"
-#include "src/__support/error_or.h"
-#include "src/__support/math_extras.h"
-#include "src/__support/wchar/mbstate.h"
-
-#include "character_converter.h"
-
-namespace LIBC_NAMESPACE_DECL {
-namespace internal {
-
-// This is for utf-8 bytes other than the first byte
-constexpr size_t ENCODED_BITS_PER_UTF8 = 6;
-// The number of bits per utf-8 byte that actually encode character
-// Information not metadata (# of bits excluding the byte headers)
-constexpr uint32_t MASK_ENCODED_BITS =
-    mask_trailing_ones<uint32_t, ENCODED_BITS_PER_UTF8>();
-// Maximum value for utf-32 for a utf-8 sequence of a given length
-constexpr char32_t MAX_VALUE_PER_UTF8_LEN[] = {0x7f, 0x7ff, 0xffff, 0x10ffff};
-constexpr int MAX_UTF8_LENGTH = 4;
-
-CharacterConverter::CharacterConverter(mbstate *mbstate) { state = mbstate; }
-
-void CharacterConverter::clear() {
-  state->partial = 0;
-  state->bytes_stored = 0;
-  state->total_bytes = 0;
-}
-
-bool CharacterConverter::isFull() {
-  return state->bytes_stored == state->total_bytes && state->total_bytes != 0;
-}
-
-bool CharacterConverter::isEmpty() { return state->bytes_stored == 0; }
-
-bool CharacterConverter::isValidState() {
-  if (state->total_bytes > MAX_UTF8_LENGTH)
-    return false;
-
-  const char32_t max_utf32_value =
-      state->total_bytes == 0 ? 0
-                              : MAX_VALUE_PER_UTF8_LEN[state->total_bytes - 1];
-  return state->bytes_stored <= state->total_bytes &&
-         state->partial <= max_utf32_value;
-}
-
-int CharacterConverter::push(char8_t utf8_byte) {
-  uint8_t num_ones = static_cast<uint8_t>(cpp::countl_one(utf8_byte));
-  // Checking the first byte if first push
-  if (isEmpty()) {
-    // UTF-8 char has 1 byte total
-    if (num_ones == 0) {
-      state->total_bytes = 1;
-    }
-    // UTF-8 char has 2 through 4 bytes total
-    else if (num_ones >= 2 && num_ones <= 4) {
-      /* Since the format is 110xxxxx, 1110xxxx, and 11110xxx for 2, 3, and 4,
-      we will make the base mask with 7 ones and right shift it as necessary. */
-      constexpr size_t SIGNIFICANT_BITS = 7;
-      char8_t base_mask =
-          static_cast<char8_t>(mask_trailing_ones<uint8_t, SIGNIFICANT_BITS>());
-      state->total_bytes = num_ones;
-      utf8_byte &= (base_mask >> num_ones);
-    }
-    // Invalid first byte
-    else {
-      // bytes_stored and total_bytes will always be 0 here
-      state->partial = static_cast<char32_t>(0);
-      return EILSEQ;
-    }
-    state->partial = static_cast<char32_t>(utf8_byte);
-    state->bytes_stored++;
-    return 0;
-  }
-  // Any subsequent push
-  // Adding 6 more bits so need to left shift
-  if (num_ones == 1 && !isFull()) {
-    char32_t byte = utf8_byte & MASK_ENCODED_BITS;
-    state->partial = state->partial << ENCODED_BITS_PER_UTF8;
-    state->partial |= byte;
-    state->bytes_stored++;
-    return 0;
-  }
-
-  // Invalid byte -> reset the state
-  clear();
-  return EILSEQ;
-}
-
-int CharacterConverter::push(char32_t utf32) {
-  // we can't be partially through a conversion when pushing a utf32 value
-  if (!isEmpty())
-    return -1;
-
-  state->partial = utf32;
-
-  // determine number of utf-8 bytes needed to represent this utf32 value
-  for (uint8_t i = 0; i < MAX_UTF8_LENGTH; i++) {
-    if (state->partial <= MAX_VALUE_PER_UTF8_LEN[i]) {
-      state->total_bytes = i + 1;
-      state->bytes_stored = i + 1;
-      return 0;
-    }
-  }
-
-  // `utf32` contains a value that is too large to actually represent a valid
-  // unicode character
-  clear();
-  return EILSEQ;
-}
-
-ErrorOr<char32_t> CharacterConverter::pop_utf32() {
-  // If pop is called too early, do not reset the state, use error to determine
-  // whether enough bytes have been pushed
-  if (!isFull())
-    return Error(-1);
-  char32_t utf32 = state->partial;
-  // reset if successful pop
-  clear();
-  return utf32;
-}
-
-ErrorOr<char8_t> CharacterConverter::pop_utf8() {
-  if (isEmpty())
-    return Error(-1);
-
-  constexpr char8_t FIRST_BYTE_HEADERS[] = {0, 0xC0, 0xE0, 0xF0};
-  constexpr char8_t CONTINUING_BYTE_HEADER = 0x80;
-
-  char32_t output;
-
-  // Shift to get the next 6 bits from the utf32 encoding
-  const size_t shift_amount = (state->bytes_stored - 1) * ENCODED_BITS_PER_UTF8;
-  if (isFull()) {
-    /*
-      Choose the correct set of most significant bits to encode the length
-      of the utf8 sequence. The remaining bits contain the most significant
-      bits of the unicode value of the character.
-    */
-    output = FIRST_BYTE_HEADERS[state->total_bytes - 1] |
-             (state->partial >> shift_amount);
-  } else {
-    // Get the next 6 bits and format it like so: 10xxxxxx
-    output = CONTINUING_BYTE_HEADER |
-             ((state->partial >> shift_amount) & MASK_ENCODED_BITS);
-  }
-
-  state->bytes_stored--;
-  if (state->bytes_stored == 0)
-    clear();
-
-  return static_cast<char8_t>(output);
-}
-
-template <> ErrorOr<char8_t> CharacterConverter::pop() { return pop_utf8(); }
-template <> ErrorOr<char32_t> CharacterConverter::pop() { return pop_utf32(); }
-
-template <> size_t CharacterConverter::sizeAs<char8_t>() {
-  return state->total_bytes;
-}
-template <> size_t CharacterConverter::sizeAs<char32_t>() { return 1; }
-
-} // namespace internal
-} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/__support/wchar/character_converter.h b/libc/src/__support/wchar/character_converter.h
index 2cc28abf2772a..dd2fde181cc7d 100644
--- a/libc/src/__support/wchar/character_converter.h
+++ b/libc/src/__support/wchar/character_converter.h
@@ -9,12 +9,16 @@
 #ifndef LLVM_LIBC_SRC___SUPPORT_CHARACTER_CONVERTER_H
 #define LLVM_LIBC_SRC___SUPPORT_CHARACTER_CONVERTER_H
 
+#include "hdr/errno_macros.h"
 #include "hdr/types/char32_t.h"
 #include "hdr/types/char8_t.h"
 #include "hdr/types/size_t.h"
+
+#include "src/__support/CPP/bit.h"
 #include "src/__support/CPP/type_traits.h"
 #include "src/__support/common.h"
 #include "src/__support/error_or.h"
+#include "src/__support/math_extras.h"
 #include "src/__support/wchar/mbstate.h"
 
 namespace LIBC_NAMESPACE_DECL {
@@ -24,12 +28,31 @@ class CharacterConverter {
 private:
   mbstate *state;
 
+  // This is for utf-8 bytes other than the first byte
+  static constexpr size_t ENCODED_BITS_PER_UTF8 = 6;
+
+  // The number of bits per utf-8 byte that actually encode character
+  // Information not metadata (# of bits excluding the byte headers)
+  static constexpr uint32_t MASK_ENCODED_BITS =
+      mask_trailing_ones<uint32_t, ENCODED_BITS_PER_UTF8>();
+
+  // Maximum value for utf-32 for a utf-8 sequence of a given length
+  static constexpr char32_t MAX_VALUE_PER_UTF8_LEN[] = {0x7f, 0x7ff, 0xffff,
+                                                        0x10ffff};
+  static constexpr int MAX_UTF8_LENGTH = 4;
+
 public:
-  CharacterConverter(mbstate *mbstate);
+  CharacterConverter(mbstate *mbstate): state(mbstate) {}
 
-  void clear();
-  bool isFull();
-  bool isEmpty();
+  void clear() {
+    state->partial = 0;
+    state->bytes_stored = 0;
+    state->total_bytes = 0;
+  }
+  bool isFull() {
+    return state->bytes_stored == state->total_bytes && state->total_bytes != 0;
+  }
+  bool isEmpty() { return state->bytes_stored == 0; }
   bool isValidState();
 
   template <typename CharType> size_t sizeAs();
@@ -42,6 +65,141 @@ class CharacterConverter {
   template <typename CharType> ErrorOr<CharType> pop();
 };
 
+LIBC_INLINE bool CharacterConverter::isValidState() {
+  if (state->total_bytes > MAX_UTF8_LENGTH)
+    return false;
+
+  const char32_t max_utf32_value =
+      state->total_bytes == 0 ? 0
+                              : MAX_VALUE_PER_UTF8_LEN[state->total_bytes - 1];
+  return state->bytes_stored <= state->total_bytes &&
+         state->partial <= max_utf32_value;
+}
+
+LIBC_INLINE int CharacterConverter::push(char8_t utf8_byte) {
+  uint8_t num_ones = static_cast<uint8_t>(cpp::countl_one(utf8_byte));
+  // Checking the first byte if first push
+  if (isEmpty()) {
+    // UTF-8 char has 1 byte total
+    if (num_ones == 0) {
+      state->total_bytes = 1;
+    }
+    // UTF-8 char has 2 through 4 bytes total
+    else if (num_ones >= 2 && num_ones <= 4) {
+      /* Since the format is 110xxxxx, 1110xxxx, and 11110xxx for 2, 3, and 4,
+      we will make the base mask with 7 ones and right shift it as necessary. */
+      constexpr size_t SIGNIFICANT_BITS = 7;
+      char8_t base_mask =
+          static_cast<char8_t>(mask_trailing_ones<uint8_t, SIGNIFICANT_BITS>());
+      state->total_bytes = num_ones;
+      utf8_byte &= (base_mask >> num_ones);
+    }
+    // Invalid first byte
+    else {
+      // bytes_stored and total_bytes will always be 0 here
+      state->partial = static_cast<char32_t>(0);
+      return EILSEQ;
+    }
+    state->partial = static_cast<char32_t>(utf8_byte);
+    state->bytes_stored++;
+    return 0;
+  }
+  // Any subsequent push
+  // Adding 6 more bits so need to left shift
+  if (num_ones == 1 && !isFull()) {
+    char32_t byte = utf8_byte & MASK_ENCODED_BITS;
+    state->partial = state->partial << ENCODED_BITS_PER_UTF8;
+    state->partial |= byte;
+    state->bytes_stored++;
+    return 0;
+  }
+
+  // Invalid byte -> reset the state
+  clear();
+  return EILSEQ;
+}
+
+LIBC_INLINE int CharacterConverter::push(char32_t utf32) {
+  // we can't be partially through a conversion when pushing a utf32 value
+  if (!isEmpty())
+    return -1;
+
+  state->partial = utf32;
+
+  // determine number of utf-8 bytes needed to represent this utf32 value
+  for (uint8_t i = 0; i < MAX_UTF8_LENGTH; i++) {
+    if (state->partial <= MAX_VALUE_PER_UTF8_LEN[i]) {
+      state->total_bytes = i + 1;
+      state->bytes_stored = i + 1;
+      return 0;
+    }
+  }
+
+  // `utf32` contains a value that is too large to actually represent a valid
+  // unicode character
+  clear();
+  return EILSEQ;
+}
+
+LIBC_INLINE ErrorOr<char32_t> CharacterConverter::pop_utf32() {
+  // If pop is called too early, do not reset the state, use error to determine
+  // whether enough bytes have been pushed
+  if (!isFull())
+    return Error(-1);
+  char32_t utf32 = state->partial;
+  // reset if successful pop
+  clear();
+  return utf32;
+}
+
+LIBC_INLINE ErrorOr<char8_t> CharacterConverter::pop_utf8() {
+  if (isEmpty())
+    return Error(-1);
+
+  constexpr char8_t FIRST_BYTE_HEADERS[] = {0, 0xC0, 0xE0, 0xF0};
+  constexpr char8_t CONTINUING_BYTE_HEADER = 0x80;
+
+  char32_t output;
+
+  // Shift to get the next 6 bits from the utf32 encoding
+  const size_t shift_amount = (state->bytes_stored - 1) * ENCODED_BITS_PER_UTF8;
+  if (isFull()) {
+    /*
+      Choose the correct set of most significant bits to encode the length
+      of the utf8 sequence. The remaining bits contain the most significant
+      bits of the unicode value of the character.
+    */
+    output = FIRST_BYTE_HEADERS[state->total_bytes - 1] |
+             (state->partial >> shift_amount);
+  } else {
+    // Get the next 6 bits and format it like so: 10xxxxxx
+    output = CONTINUING_BYTE_HEADER |
+             ((state->partial >> shift_amount) & MASK_ENCODED_BITS);
+  }
+
+  state->bytes_stored--;
+  if (state->bytes_stored == 0)
+    clear();
+
+  return static_cast<char8_t>(output);
+}
+
+template <> LIBC_INLINE ErrorOr<char8_t> CharacterConverter::pop() {
+  return pop_utf8();
+}
+
+template <> LIBC_INLINE ErrorOr<char32_t> CharacterConverter::pop() {
+  return pop_utf32();
+}
+
+template <> LIBC_INLINE size_t CharacterConverter::sizeAs<char8_t>() {
+  return state->total_bytes;
+}
+
+template <> LIBC_INLINE size_t CharacterConverter::sizeAs<char32_t>() {
+  return 1;
+}
+
 } // namespace internal
 } // namespace LIBC_NAMESPACE_DECL
 
diff --git a/libc/src/__support/wchar/wcrtomb.cpp b/libc/src/__support/wchar/wcrtomb.cpp
deleted file mode 100644
index fc54bbfc93e0c..0000000000000
--- a/libc/src/__support/wchar/wcrtomb.cpp
+++ /dev/null
@@ -1,50 +0,0 @@
-//===-- Implementation of wcrtomb -----------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "src/__support/wchar/wcrtomb.h"
-#include "src/__support/error_or.h"
-#include "src/__support/wchar/character_converter.h"
-#include "src/__support/wchar/mbstate.h"
-
-#include "hdr/errno_macros.h"
-#include "hdr/types/char32_t.h"
-#include "hdr/types/size_t.h"
-#include "hdr/types/wchar_t.h"
-#include "src/__support/common.h"
-#include "src/__support/libc_assert.h"
-
-namespace LIBC_NAMESPACE_DECL {
-namespace internal {
-
-ErrorOr<size_t> wcrtomb(char *__restrict s, wchar_t wc,
-                        mbstate *__restrict ps) {
-  static_assert(sizeof(wchar_t) == 4);
-
-  CharacterConverter cr(ps);
-
-  if (!cr.isValidState())
-    return Error(EINVAL);
-
-  int status = cr.push(static_cast<char32_t>(wc));
-  if (status != 0)
-    return Error(status);
-
-  size_t count = 0;
-  while (!cr.isEmpty()) {
-    auto utf8 = cr.pop_utf8(); // can never fail as long as the push succeeded
-    LIBC_ASSERT(utf8.has_value());
-
-    *s = utf8.value();
-    s++;
-    count++;
-  }
-  return count;
-}
-
-} // namespace internal
-} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/__support/wchar/wcrtomb.h b/libc/src/__support/wchar/wcrtomb.h
index bcd39a92a3b76..98cf852799d5d 100644
--- a/libc/src/__support/wchar/wcrtomb.h
+++ b/libc/src/__support/wchar/wcrtomb.h
@@ -9,16 +9,47 @@
 #ifndef LLVM_LIBC_SRC__SUPPORT_WCHAR_WCRTOMB_H
 #define LLVM_LIBC_SRC__SUPPORT_WCHAR_WCRTOMB_H
 
-#include "hdr/types/size_t.h"
-#include "hdr/types/wchar_t.h"
 #include "src/__support/error_or.h"
-#include "src/__support/macros/config.h"
+#include "src/__support/macros/null_check.h"
+#include "src/__support/wchar/character_converter.h"
 #include "src/__support/wchar/mbstate.h"
 
+#include "hdr/errno_macros.h"
+#include "hdr/types/char32_t.h"
+#include "hdr/types/size_t.h"
+#include "hdr/types/wchar_t.h"
+#include "src/__support/common.h"
+#include "src/__support/libc_assert.h"
+
 namespace LIBC_NAMESPACE_DECL {
 namespace internal {
 
-ErrorOr<size_t> wcrtomb(char *__restrict s, wchar_t wc, mbstate *__restrict ps);
+LIBC_INLINE ErrorOr<size_t> wcrtomb(char *__restrict s, wchar_t wc,
+                                    mbstate *__restrict ps) {
+  LIBC_CRASH_ON_NULLPTR(s);
+  LIBC_CRASH_ON_NULLPTR(ps);
+  static_assert(sizeof(wchar_t) == 4);
+
+  CharacterConverter cr(ps);
+
+  if (!cr.isValidState())
+    return Error(EINVAL);
+
+  int status = cr.push(static_cast<char32_t>(wc));
+  if (status != 0)
+    return Error(status);
+
+  size_t count = 0;
+  while (!cr.isEmpty()) {
+    auto utf8 = cr.pop_utf8(); // can never fail as long as the push succeeded
+    LIBC_ASSERT(utf8.has_value());
+
+    *s = utf8.value();
+    s++;
+    count++;
+  }
+  return count;
+}
 
 } // namespace internal
 } // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/stdio/printf_core/CMakeLists.txt b/libc/src/stdio/printf_core/CMakeLists.txt
index 624129b2b36e7..ae93cc754299b 100644
--- a/libc/src/stdio/printf_core/CMakeLists.txt
+++ b/libc/src/stdio/printf_core/CMakeLists.txt
@@ -43,6 +43,22 @@ if(NOT TARGET ${target_error_mapper})
     set(target_error_mapper libc.src.stdio.printf_core.generic.error_mapper)
 endif()
 
+if(LIBC_CONF_PRINTF_DISABLE_WIDE)
+  set(wchar_deps "")
+  set(parser_wchar_deps "")
+else()
+  set(wchar_deps
+    libc.hdr.types.wchar_t
+    libc.hdr.types.wint_t
+    libc.hdr.wchar_macros
+    libc.src.__support.wchar.wcrtomb
+    libc.src.__support.wchar.mbstate
+  )
+  set(parser_wchar_deps
+    libc.hdr.types.wint_t
+  )
+endif()
+
 add_header_library(
   printf_config
   HDRS
@@ -76,6 +92,7 @@ add_header_library(
     libc.src.__support.CPP.string_view
     libc.src.__support.CPP.type_traits
     libc.src.__support.common
+    ${parser_wchar_deps}
 )
 
 add_header_library(
@@ -111,6 +128,7 @@ add_header_library(
     .printf_config
     .writer
     libc.include.inttypes
+    libc.hdr.limits_macros
     libc.src.__support.big_int
     libc.src.__support.common
     libc.src.__support.CPP.limits
@@ -125,6 +143,7 @@ add_header_library(
     libc.src.__support.uint128
     libc.src.__support.StringUtil.error_to_string
     libc.src.string.memory_utils.inline_memcpy
+    ${wchar_deps}
 )
 
 add_header_library(
diff --git a/libc/src/stdio/printf_core/char_converter.h b/libc/src/stdio/printf_core/char_converter.h
index fd2eb2553887a..e4792c3b23b9f 100644
--- a/libc/src/stdio/printf_core/char_converter.h
+++ b/libc/src/stdio/printf_core/char_converter.h
@@ -1,4 +1,4 @@
-//===-- String Converter for printf -----------------------------*- C++ -*-===//
+//===-- Character Converter for printf --------------------------*- C++ -*-===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -9,6 +9,15 @@
 #ifndef LLVM_LIBC_SRC_STDIO_PRINTF_CORE_CHAR_CONVERTER_H
 #define LLVM_LIBC_SRC_STDIO_PRINTF_CORE_CHAR_CONVERTER_H
 
+#ifndef LIBC_COPT_PRINTF_DISABLE_WIDE
+#include "hdr/types/wchar_t.h"
+#include "hdr/types/wint_t.h"
+#include "hdr/wchar_macros.h"
+#include "src/__support/wchar/mbstate.h"
+#include "src/__support/wchar/wcrtomb.h"
+#endif // LIBC_COPT_PRINTF_DISABLE_WIDE
+
+#include "hdr/limits_macros.h"
 #include "src/__support/macros/config.h"
 #include "src/stdio/printf_core/converter_utils.h"
 #include "src/stdio/printf_core/core_structs.h"
@@ -20,12 +29,41 @@ namespace printf_core {
 template <WriteMode write_mode>
 LIBC_INLINE int convert_char(Writer<write_mode> *writer,
                              const FormatSection &to_conv) {
-  char c = static_cast<char>(to_conv.conv_val_raw);
 
-  constexpr int STRING_LEN = 1;
+  char buffer[MB_LEN_MAX];
+  size_t write_size = 0;
+
+  if (to_conv.length_modifier == LengthModifier::l) {
+#ifndef LIBC_COPT_PRINTF_DISABLE_WIDE
+    wint_t wi = static_cast<wint_t>(to_conv.conv_val_raw);
+
+    if (wi == WEOF) {
+      return ILLEGAL_WIDE_CHAR;
+    }
+
+    internal::mbstate mbstate;
+    wchar_t wc = static_cast<wchar_t>(wi);
+    auto ret = internal::wcrtomb(buffer, wc, &mbstate);
+
+    if (!ret.has_value()) {
+      return MB_CONVERSION_ERROR;
+    }
+
+    write_size = ret.value();
+#else
+    // If wide characters are disabled, treat the 'l' modifier as a no-op.
+    buffer[0] = static_cast<char>(to_conv.conv_val_raw);
+    write_size = 1;
+
+#endif // LIBC_COPT_PRINTF_DISABLE_WIDE
+  } else {
+    buffer[0] = static_cast<char>(to_conv.conv_val_raw);
+    write_size = 1;
+  }
 
-  size_t padding_spaces =
-      to_conv.min_width > STRING_LEN ? to_conv.min_width - STRING_LEN : 0;
+  size_t padding_spaces = to_conv.min_width > static_cast<int>(write_size)
+                              ? to_conv.min_width - static_cast<int>(write_size)
+                              : 0;
 
   // If the padding is on the left side, write the spaces first.
   if (padding_spaces > 0 &&
@@ -33,7 +71,7 @@ LIBC_INLINE int convert_char(Writer<write_mode> *writer,
     RET_IF_RESULT_NEGATIVE(writer->write(' ', padding_spaces));
   }
 
-  RET_IF_RESULT_NEGATIVE(writer->write(c));
+  RET_IF_RESULT_NEGATIVE(writer->write({buffer, write_size}));
 
   // If the padding is on the right side, write the spaces last.
   if (padding_spaces > 0 &&
diff --git a/libc/src/stdio/printf_core/core_structs.h b/libc/src/stdio/printf_core/core_structs.h
index 0d41f2244d8da..d93fa962db90e 100644
--- a/libc/src/stdio/printf_core/core_structs.h
+++ b/libc/src/stdio/printf_core/core_structs.h
@@ -142,6 +142,8 @@ constexpr int INT_CONVERSION_ERROR = -1004;
 constexpr int FIXED_POINT_CONVERSION_ERROR = -1005;
 constexpr int ALLOCATION_ERROR = -1006;
 constexpr int OVERFLOW_ERROR = -1007;
+constexpr int ILLEGAL_WIDE_CHAR = -1008;
+constexpr int MB_CONVERSION_ERROR = -1009;
 
 } // namespace printf_core
 } // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/stdio/printf_core/linux/error_mapper.h b/libc/src/stdio/printf_core/linux/error_mapper.h
index 3c2fe663072d0..3449f12593469 100644
--- a/libc/src/stdio/printf_core/linux/error_mapper.h
+++ b/libc/src/stdio/printf_core/linux/error_mapper.h
@@ -40,6 +40,9 @@ LIBC_INLINE static int internal_error_to_errno(int internal_error) {
     return ENOMEM;
   case OVERFLOW_ERROR:
     return EOVERFLOW;
+  case ILLEGAL_WIDE_CHAR:
+  case MB_CONVERSION_ERROR:
+    return EILSEQ;
   default:
     LIBC_ASSERT(
         false &&
diff --git a/libc/src/stdio/printf_core/parser.h b/libc/src/stdio/printf_core/parser.h
index cef9b1ae58fa0..a3b62991bcec9 100644
--- a/libc/src/stdio/printf_core/parser.h
+++ b/libc/src/stdio/printf_core/parser.h
@@ -27,6 +27,9 @@
 #ifndef LIBC_COPT_PRINTF_DISABLE_STRERROR
 #include "src/__support/libc_errno.h"
 #endif // LIBC_COPT_PRINTF_DISABLE_STRERROR
+#ifndef LIBC_COPT_PRINTF_DISABLE_WIDE
+#include "hdr/types/wint_t.h"
+#endif // LIBC_COPT_PRINTF_DISABLE_WIDE
 
 namespace LIBC_NAMESPACE_DECL {
 namespace printf_core {
@@ -73,9 +76,9 @@ template <typename ArgProvider> class Parser {
   ArgProvider args_cur;
 
 #ifndef LIBC_COPT_PRINTF_DISABLE_INDEX_MODE
-  // args_start stores the start of the va_args, which is allows getting the
-  // value of arguments that have already been passed. args_index is tracked so
-  // that we know which argument args_cur is on.
+  // args_start stores the start of the va_args, which helps in getting the
+  // number of arguments that have already been passed. args_index is tracked
+  // so that we know which argument args_cur is on.
   ArgProvider args_start;
   size_t args_index = 1;
 
@@ -173,7 +176,17 @@ template <typename ArgProvider> class Parser {
         section.has_conv = true;
         break;
       case ('c'):
-        WRITE_ARG_VAL_SIMPLEST(section.conv_val_raw, int, conv_index);
+        if (section.length_modifier == LengthModifier::l) {
+#ifdef LIBC_COPT_PRINTF_DISABLE_WIDE
+          using WideCharArgType = int;
+#else
+          using WideCharArgType = wint_t;
+#endif // LIBC_COPT_PRINTF_DISABLE_WIDE
+          WRITE_ARG_VAL_SIMPLEST(section.conv_val_raw, WideCharArgType,
+                                 conv_index);
+        } else {
+          WRITE_ARG_VAL_SIMPLEST(section.conv_val_raw, int, conv_index);
+        }
         break;
       case ('d'):
       case ('i'):
@@ -574,7 +587,16 @@ template <typename ArgProvider> class Parser {
           conv_size = type_desc_from_type<void>();
           break;
         case ('c'):
-          conv_size = type_desc_from_type<int>();
+          if (lm == LengthModifier::l) {
+#ifdef LIBC_COPT_PRINTF_DISABLE_WIDE
+            using WideCharArgType = int;
+#else
+            using WideCharArgType = wint_t;
+#endif // LIBC_COPT_PRINTF_DISABLE_WIDE
+            conv_size = type_desc_from_type<WideCharArgType>();
+          } else {
+            conv_size = type_desc_from_type<int>();
+          }
           break;
         case ('d'):
         case ('i'):
diff --git a/libc/test/src/stdio/CMakeLists.txt b/libc/test/src/stdio/CMakeLists.txt
index a39428fb8d16c..fde2023caaac1 100644
--- a/libc/test/src/stdio/CMakeLists.txt
+++ b/libc/test/src/stdio/CMakeLists.txt
@@ -137,6 +137,15 @@ if(LIBC_CONF_PRINTF_DISABLE_STRERROR)
   list(APPEND sprintf_test_copts "-DLIBC_COPT_PRINTF_DISABLE_STRERROR")
 endif()
 
+if(LIBC_CONF_PRINTF_DISABLE_WIDE)
+  set(wchar_deps "")
+else()
+  set(wchar_deps
+    libc.hdr.types.wint_t
+    libc.hdr.wchar_macros
+  )
+endif()
+
 add_fp_unittest(
   sprintf_test
   UNIT_TEST_ONLY
@@ -148,6 +157,7 @@ add_fp_unittest(
     libc.src.stdio.sprintf
     libc.src.__support.FPUtil.fp_bits
     libc.include.inttypes
+    ${wchar_deps}
   COMPILE_OPTIONS
     ${sprintf_test_copts}
 )
diff --git a/libc/test/src/stdio/sprintf_test.cpp b/libc/test/src/stdio/sprintf_test.cpp
index 689a38a49f13c..78186abb9966e 100644
--- a/libc/test/src/stdio/sprintf_test.cpp
+++ b/libc/test/src/stdio/sprintf_test.cpp
@@ -9,8 +9,13 @@
 #include "src/__support/macros/config.h"
 #include "src/stdio/sprintf.h"
 
+#ifndef LIBC_COPT_PRINTF_DISABLE_WIDE
+#include "hdr/types/wint_t.h"
+#include "hdr/wchar_macros.h"
+#endif // LIBC_COPT_PRINTF_DISABLE_WIDE
+
 #include "src/__support/FPUtil/FPBits.h"
-#include "src/__support/libc_errno.h"
+#include "test/UnitTest/ErrnoCheckingTest.h"
 #include "test/UnitTest/RoundingModeUtils.h"
 #include "test/UnitTest/Test.h"
 #include <inttypes.h>
@@ -3487,3 +3492,94 @@ TEST(LlvmLibcSPrintfTest, IndexModeParsing) {
                    "why would u do this, this is such   a pain. %");
 }
 #endif // LIBC_COPT_PRINTF_DISABLE_INDEX_MODE
+
+#ifndef LIBC_COPT_PRINTF_DISABLE_WIDE
+TEST(LlvmLibcSprintfTest, WideCharConversion) {
+  char buff[16];
+  int written;
+
+  // 1 byte UTF-8 character.
+  written = LIBC_NAMESPACE::sprintf(buff, "%lc", L'A');
+  EXPECT_EQ(written, 1);
+  ASSERT_STREQ_LEN(written, buff, "A");
+
+  // 1 byte UTF-8 character left justified.
+  written = LIBC_NAMESPACE::sprintf(buff, "%-4lc", L'A');
+  EXPECT_EQ(written, 4);
+  ASSERT_STREQ_LEN(written, buff, "A   ");
+
+  // 1 byte UTF-8 character right justified.
+  written = LIBC_NAMESPACE::sprintf(buff, "%4lc", L'A');
+  EXPECT_EQ(written, 4);
+  ASSERT_STREQ_LEN(written, buff, "   A");
+
+  // 2 byte UTF-8 character.
+  written = LIBC_NAMESPACE::sprintf(buff, "%lc", L'¢');
+  EXPECT_EQ(written, 2);
+  ASSERT_STREQ_LEN(written, buff, "¢");
+
+  // 2 byte UTF-8 character left justified.
+  written = LIBC_NAMESPACE::sprintf(buff, "%-4lc", L'¢');
+  EXPECT_EQ(written, 4);
+  ASSERT_STREQ_LEN(written, buff, "¢  ");
+
+  // 2 byte UTF-8 character right justified.
+  written = LIBC_NAMESPACE::sprintf(buff, "%4lc", L'¢');
+  EXPECT_EQ(written, 4);
+  ASSERT_STREQ_LEN(written, buff, "  ¢");
+
+  // Euro sign is a 3-byte UTF-8 character.
+  written = LIBC_NAMESPACE::sprintf(buff, "%lc", L'€');
+  EXPECT_EQ(written, 3);
+  ASSERT_STREQ_LEN(written, buff, "€");
+
+  // Euro sign left justified.
+  written = LIBC_NAMESPACE::sprintf(buff, "%-4lc", L'€');
+  EXPECT_EQ(written, 4);
+  ASSERT_STREQ_LEN(written, buff, "€ ");
+
+  // Euro sign right justified.
+  written = LIBC_NAMESPACE::sprintf(buff, "%4lc", L'€');
+  EXPECT_EQ(written, 4);
+  ASSERT_STREQ_LEN(written, buff, " €");
+
+  // Euro sign right justified.
+  written = LIBC_NAMESPACE::sprintf(buff, "%+4lc", L'€');
+  EXPECT_EQ(written, 4);
+  ASSERT_STREQ_LEN(written, buff, " €");
+
+  // Grinning face emoji is a 4-byte UTF-8 character.
+  written = LIBC_NAMESPACE::sprintf(buff, "%lc", L'😀');
+  EXPECT_EQ(written, 4);
+  ASSERT_STREQ_LEN(written, buff, "😀");
+
+  // Grinning face emoji left justified.
+  written = LIBC_NAMESPACE::sprintf(buff, "%-4lc", L'😀');
+  EXPECT_EQ(written, 4);
+  ASSERT_STREQ_LEN(written, buff, "😀");
+
+  // Grinning face emoji right justified.
+  written = LIBC_NAMESPACE::sprintf(buff, "%4lc", L'😀');
+  EXPECT_EQ(written, 4);
+  ASSERT_STREQ_LEN(written, buff, "😀");
+
+  // Grinning face emoji with smaller width, left justified.
+  written = LIBC_NAMESPACE::sprintf(buff, "%-3lc", L'😀');
+  EXPECT_EQ(written, 4);
+  ASSERT_STREQ_LEN(written, buff, "😀");
+
+  // Grinning face emoji with smaller width, right justified.
+  written = LIBC_NAMESPACE::sprintf(buff, "%3lc", L'😀');
+  EXPECT_EQ(written, 4);
+  ASSERT_STREQ_LEN(written, buff, "😀");
+
+  // WEOF test.
+  EXPECT_EQ(LIBC_NAMESPACE::sprintf(buff, "%lc", WEOF), -1);
+  ASSERT_ERRNO_EQ(EILSEQ);
+
+  // Invalid wide character test
+  EXPECT_EQ(LIBC_NAMESPACE::sprintf(buff, "%lc", static_cast<wint_t>(0x12ffff)),
+            -1);
+  ASSERT_ERRNO_EQ(EILSEQ);
+}
+#endif // LIBC_COPT_PRINTF_DISABLE_WIDE



More information about the libc-commits mailing list