[libc-commits] [libc] [libc] Implemented wcrtomb internal function and public libc function (PR #144596)

Uzair Nawaz via libc-commits libc-commits at lists.llvm.org
Fri Jun 20 14:35:58 PDT 2025


https://github.com/uzairnawaz updated https://github.com/llvm/llvm-project/pull/144596

>From bc5e83f4f84479babd932cc0b2632d028eb47508 Mon Sep 17 00:00:00 2001
From: Uzair Nawaz <uzairnawaz at google.com>
Date: Tue, 17 Jun 2025 18:55:20 +0000
Subject: [PATCH 01/11] Implemented wcrtomb internal and public function

---
 libc/config/linux/x86_64/entrypoints.txt      |  1 +
 libc/hdr/types/CMakeLists.txt                 |  8 ++
 libc/hdr/types/mbstate_t.h                    | 14 +++
 libc/include/llvm-libc-types/mbstate_t.h      |  6 +-
 libc/include/wchar.yaml                       |  8 ++
 libc/src/__support/wchar/CMakeLists.txt       | 16 ++++
 .../__support/wchar/character_converter.cpp   |  3 +-
 libc/src/__support/wchar/wcrtomb.cpp          | 48 ++++++++++
 libc/src/__support/wchar/wcrtomb.h            | 27 ++++++
 libc/src/wchar/CMakeLists.txt                 | 13 +++
 libc/src/wchar/wcrtomb.cpp                    | 34 +++++++
 libc/src/wchar/wcrtomb.h                      | 23 +++++
 libc/test/src/wchar/CMakeLists.txt            | 14 +++
 libc/test/src/wchar/wcrtomb_test.cpp          | 93 +++++++++++++++++++
 14 files changed, 306 insertions(+), 2 deletions(-)
 create mode 100644 libc/hdr/types/mbstate_t.h
 create mode 100644 libc/src/__support/wchar/wcrtomb.cpp
 create mode 100644 libc/src/__support/wchar/wcrtomb.h
 create mode 100644 libc/src/wchar/wcrtomb.cpp
 create mode 100644 libc/src/wchar/wcrtomb.h
 create mode 100644 libc/test/src/wchar/wcrtomb_test.cpp

diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt
index aa2079faed409..0373c23db04e5 100644
--- a/libc/config/linux/x86_64/entrypoints.txt
+++ b/libc/config/linux/x86_64/entrypoints.txt
@@ -365,6 +365,7 @@ set(TARGET_LIBC_ENTRYPOINTS
 
     # wchar.h entrypoints
     libc.src.wchar.btowc
+    libc.src.wchar.wcrtomb
     libc.src.wchar.wcslen
     libc.src.wchar.wctob
     libc.src.wchar.wmemmove
diff --git a/libc/hdr/types/CMakeLists.txt b/libc/hdr/types/CMakeLists.txt
index c88c357009072..58f500a6343f6 100644
--- a/libc/hdr/types/CMakeLists.txt
+++ b/libc/hdr/types/CMakeLists.txt
@@ -20,6 +20,14 @@ add_proxy_header_library(
     libc.include.uchar
 )
 
+add_proxy_header_library(
+  mbstate_t 
+  HDRS
+    mbstate_t.h
+  DEPENDS
+    libc.include.llvm-libc-types.mbstate_t
+)
+
 add_proxy_header_library(
   div_t
   HDRS
diff --git a/libc/hdr/types/mbstate_t.h b/libc/hdr/types/mbstate_t.h
new file mode 100644
index 0000000000000..1e232af08e4db
--- /dev/null
+++ b/libc/hdr/types/mbstate_t.h
@@ -0,0 +1,14 @@
+//===-- Definition of macros from mbstate_t.h -----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_HDR_TYPES_MBSTATE_T_H
+#define LLVM_LIBC_HDR_TYPES_MBSTATE_T_H
+
+#include "include/llvm-libc-types/mbstate_t.h"
+
+#endif // LLVM_LIBC_HDR_TYPES_MBSTATE_T_H
diff --git a/libc/include/llvm-libc-types/mbstate_t.h b/libc/include/llvm-libc-types/mbstate_t.h
index 540d50975a264..40e693355dcd0 100644
--- a/libc/include/llvm-libc-types/mbstate_t.h
+++ b/libc/include/llvm-libc-types/mbstate_t.h
@@ -9,8 +9,12 @@
 #ifndef LLVM_LIBC_TYPES_MBSTATE_T_H
 #define LLVM_LIBC_TYPES_MBSTATE_T_H
 
-// TODO: Complete this once we implement functions that operate on this type.
+#include "../llvm-libc-macros/stdint-macros.h"
+
 typedef struct {
+    uint32_t __field1;
+    uint8_t __field2;
+    uint8_t __field3;
 } mbstate_t;
 
 #endif // LLVM_LIBC_TYPES_MBSTATE_T_H
diff --git a/libc/include/wchar.yaml b/libc/include/wchar.yaml
index 84db73d8f01ea..3c373f8ec34c0 100644
--- a/libc/include/wchar.yaml
+++ b/libc/include/wchar.yaml
@@ -150,6 +150,14 @@ functions:
       - type: wchar_t *__restrict
       - type: const wchar_t *__restrict
       - type: size_t
+  - name: wcrtomb
+    standards:
+      - stdc
+    return_type: size_t
+    arguments:
+      - type: char *__restrict
+      - type: wchar_t
+      - type: mbstate_t *__restrict
   - name: wcscpy
     standards:
       - stdc
diff --git a/libc/src/__support/wchar/CMakeLists.txt b/libc/src/__support/wchar/CMakeLists.txt
index 6715e354e23e5..e4a3bfd7b9c5c 100644
--- a/libc/src/__support/wchar/CMakeLists.txt
+++ b/libc/src/__support/wchar/CMakeLists.txt
@@ -19,3 +19,19 @@ add_object_library(
     libc.src.__support.math_extras
     .mbstate
 )
+
+add_object_library(
+  wcrtomb
+  HDRS
+    wcrtomb.h
+  SRCS 
+    wcrtomb.cpp
+  DEPENDS
+    libc.hdr.types.char32_t
+    libc.hdr.types.size_t
+    libc.hdr.types.mbstate_t
+    libc.hdr.types.wchar_t
+    libc.src.__support.error_or
+    .character_converter
+    .mbstate
+)
diff --git a/libc/src/__support/wchar/character_converter.cpp b/libc/src/__support/wchar/character_converter.cpp
index ca709769616c3..fc8c9aaaa3d50 100644
--- a/libc/src/__support/wchar/character_converter.cpp
+++ b/libc/src/__support/wchar/character_converter.cpp
@@ -12,6 +12,7 @@
 #include "src/__support/error_or.h"
 #include "src/__support/math_extras.h"
 #include "src/__support/wchar/mbstate.h"
+#include "src/__support/libc_errno.h" // for error numbers
 
 #include "character_converter.h"
 
@@ -51,7 +52,7 @@ int CharacterConverter::push(char32_t utf32) {
   // `utf32` contains a value that is too large to actually represent a valid
   // unicode character
   clear();
-  return -1;
+  return EILSEQ;
 }
 
 ErrorOr<char8_t> CharacterConverter::pop_utf8() {
diff --git a/libc/src/__support/wchar/wcrtomb.cpp b/libc/src/__support/wchar/wcrtomb.cpp
new file mode 100644
index 0000000000000..abd18aa391d75
--- /dev/null
+++ b/libc/src/__support/wchar/wcrtomb.cpp
@@ -0,0 +1,48 @@
+//===-- Implementation of wcrtomb -----------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/__support/wchar/wcrtomb.h"
+#include "src/__support/error_or.h"
+#include "src/__support/wchar/character_converter.h"
+#include "src/__support/wchar/mbstate.h"
+
+#include "hdr/types/char32_t.h"
+#include "hdr/types/mbstate_t.h"
+#include "hdr/types/size_t.h"
+#include "hdr/types/wchar_t.h"
+#include "src/__support/common.h"
+
+namespace LIBC_NAMESPACE_DECL {
+namespace internal {
+
+ErrorOr<size_t> wcrtomb(char *__restrict s, wchar_t wc,
+                        mbstate_t *__restrict ps) {               
+  CharacterConverter cr((internal::mbstate *)ps);
+
+  char buf[sizeof(wchar_t) / sizeof(char)];
+  if (s == nullptr) {
+    s = buf;
+    wc = L'\0';
+  }
+
+  int status = cr.push((char32_t)wc);
+  if (status != 0)
+    return Error(status);
+
+  size_t count = 0;
+  while (!cr.isComplete()) {
+    auto utf8 = cr.pop_utf8(); // can never fail as long as the push succeeded
+    *s = utf8.value();
+    s++;
+    count++;
+  }
+  return count;
+}
+
+} // namespace internal
+} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/__support/wchar/wcrtomb.h b/libc/src/__support/wchar/wcrtomb.h
new file mode 100644
index 0000000000000..35a472548b67a
--- /dev/null
+++ b/libc/src/__support/wchar/wcrtomb.h
@@ -0,0 +1,27 @@
+//===-- Implementation header for wcrtomb ---------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC__SUPPORT_WCHAR_WCRTOMB_H
+#define LLVM_LIBC_SRC__SUPPORT_WCHAR_WCRTOMB_H
+
+#include "hdr/types/mbstate_t.h"
+#include "hdr/types/size_t.h"
+#include "hdr/types/wchar_t.h"
+#include "src/__support/error_or.h"
+#include "src/__support/macros/config.h"
+
+namespace LIBC_NAMESPACE_DECL {
+namespace internal {
+
+ErrorOr<size_t> wcrtomb(char *__restrict s, wchar_t wc,
+                        mbstate_t *__restrict ps);
+
+} // namespace internal
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC__SUPPORT_WCHAR_WCRTOMB_H
diff --git a/libc/src/wchar/CMakeLists.txt b/libc/src/wchar/CMakeLists.txt
index 491dd5b34340a..a106a8ee9aa41 100644
--- a/libc/src/wchar/CMakeLists.txt
+++ b/libc/src/wchar/CMakeLists.txt
@@ -34,6 +34,19 @@ add_entrypoint_object(
     libc.src.__support.wctype_utils
 )
 
+add_entrypoint_object(
+  wcrtomb
+  SRCS
+    wcrtomb.cpp
+  HDRS
+    wcrtomb.h
+  DEPENDS
+    libc.hdr.types.wchar_t
+    libc.hdr.types.mbstate_t
+    libc.src.__support.libc_errno
+    libc.src.__support.wchar.wcrtomb
+)
+
 add_entrypoint_object(
   wmemset
   SRCS
diff --git a/libc/src/wchar/wcrtomb.cpp b/libc/src/wchar/wcrtomb.cpp
new file mode 100644
index 0000000000000..82c813747e0f6
--- /dev/null
+++ b/libc/src/wchar/wcrtomb.cpp
@@ -0,0 +1,34 @@
+//===-- Implementation of wcrtomb -----------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/wchar/wcrtomb.h"
+
+#include "hdr/types/mbstate_t.h"
+#include "src/__support/common.h"
+#include "src/__support/libc_errno.h"
+#include "src/__support/macros/config.h"
+#include "src/__support/wchar/wcrtomb.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+LLVM_LIBC_FUNCTION(size_t, wcrtomb,
+                   (char *__restrict s, wchar_t wc, mbstate_t *__restrict ps)) {
+  static mbstate_t internal_mbstate{0, 0, 0};
+
+  auto result =
+      internal::wcrtomb(s, wc, ps == nullptr ? &internal_mbstate : ps);
+
+  if (!result.has_value()) {
+    libc_errno = result.error();
+    return -1;
+  }
+
+  return result.value();
+}
+
+} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/wchar/wcrtomb.h b/libc/src/wchar/wcrtomb.h
new file mode 100644
index 0000000000000..3cfb1a6f2dc84
--- /dev/null
+++ b/libc/src/wchar/wcrtomb.h
@@ -0,0 +1,23 @@
+//===-- Implementation header for wcrtomb -----------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_WCHAR_WCRTOMB_H
+#define LLVM_LIBC_SRC_WCHAR_WCRTOMB_H
+
+#include "hdr/types/wchar_t.h"
+#include "hdr/types/mbstate_t.h"
+#include "hdr/types/size_t.h"
+#include "src/__support/macros/config.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+size_t wcrtomb(char *__restrict s, wchar_t wc, mbstate_t *__restrict ps);
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC_WCHAR_WCRTOMB_H
diff --git a/libc/test/src/wchar/CMakeLists.txt b/libc/test/src/wchar/CMakeLists.txt
index 4990b6953348b..c4f51b5f94a12 100644
--- a/libc/test/src/wchar/CMakeLists.txt
+++ b/libc/test/src/wchar/CMakeLists.txt
@@ -33,6 +33,20 @@ add_libc_test(
     libc.src.wchar.wctob
 )
 
+add_libc_test(
+  wcrtomb_test
+  SUITE
+    libc_wchar_unittests
+  SRCS
+    wcrtomb_test.cpp
+  DEPENDS
+    libc.src.wchar.wcrtomb
+    libc.src.string.memset
+    libc.hdr.types.wchar_t
+    libc.hdr.types.mbstate_t
+    libc.src.__support.libc_errno
+)
+
 add_libc_test(
   wmemset_test 
   SUITE
diff --git a/libc/test/src/wchar/wcrtomb_test.cpp b/libc/test/src/wchar/wcrtomb_test.cpp
new file mode 100644
index 0000000000000..be249f4f6dae4
--- /dev/null
+++ b/libc/test/src/wchar/wcrtomb_test.cpp
@@ -0,0 +1,93 @@
+//===-- Unittests for wcrtomb --------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "hdr/types/wchar_t.h"
+#include "hdr/types/mbstate_t.h"
+#include "src/wchar/wcrtomb.h"
+#include "src/string/memset.h"
+#include "test/UnitTest/Test.h"
+#include "src/__support/libc_errno.h"
+
+TEST(LlvmLibcWCRToMBTest, OneByte) {
+  mbstate_t state;
+  LIBC_NAMESPACE::memset(&state, 0, sizeof (mbstate_t));
+  wchar_t wc = L'U';
+  char mb[4];
+  size_t cnt = LIBC_NAMESPACE::wcrtomb(mb, wc, &state);
+  ASSERT_EQ(cnt, static_cast<size_t>(1));
+  ASSERT_EQ(mb[0], 'U');
+}
+
+TEST(LlvmLibcWCRToMBTest, TwoByte) {
+  mbstate_t state;
+  LIBC_NAMESPACE::memset(&state, 0, sizeof (mbstate_t));
+  // testing utf32: 0xff -> utf8: 0xc3 0xbf
+  wchar_t wc = 0xff;
+  char mb[4];
+  size_t cnt = LIBC_NAMESPACE::wcrtomb(mb, wc, &state);
+  ASSERT_EQ(cnt, static_cast<size_t>(2));
+  ASSERT_EQ(mb[0], static_cast<char>(0xc3));
+  ASSERT_EQ(mb[1], static_cast<char>(0xbf));
+}
+
+TEST(LlvmLibcWCRToMBTest, ThreeByte) {
+  mbstate_t state;
+  LIBC_NAMESPACE::memset(&state, 0, sizeof (mbstate_t));
+  // testing utf32: 0xac15 -> utf8: 0xea 0xb0 0x95
+  wchar_t wc = 0xac15;
+  char mb[4];
+  size_t cnt = LIBC_NAMESPACE::wcrtomb(mb, wc, &state);
+  ASSERT_EQ(cnt, static_cast<size_t>(3));
+  ASSERT_EQ(mb[0], static_cast<char>(0xea));
+  ASSERT_EQ(mb[1], static_cast<char>(0xb0));
+  ASSERT_EQ(mb[2], static_cast<char>(0x95));
+}
+
+TEST(LlvmLibcWCRToMBTest, FourByte) {
+  mbstate_t state;
+  LIBC_NAMESPACE::memset(&state, 0, sizeof (mbstate_t));
+  // testing utf32: 0x1f921 -> utf8: 0xf0 0x9f 0xa4 0xa1
+  wchar_t wc = 0x1f921;
+  char mb[4];
+  size_t cnt = LIBC_NAMESPACE::wcrtomb(mb, wc, &state);
+  ASSERT_EQ(cnt, static_cast<size_t>(4));
+  ASSERT_EQ(mb[0], static_cast<char>(0xf0));
+  ASSERT_EQ(mb[1], static_cast<char>(0x9f));
+  ASSERT_EQ(mb[2], static_cast<char>(0xa4));
+  ASSERT_EQ(mb[3], static_cast<char>(0xa1));
+}
+
+TEST(LlvmLibcWCRToMBTest, NullString) {
+  mbstate_t state;
+  LIBC_NAMESPACE::memset(&state, 0, sizeof (mbstate_t));
+  wchar_t wc = L'A';
+  char mb[4];
+
+  // should be equivalent to the call wcrtomb(buf, L'\0', state)
+  size_t cnt1 = LIBC_NAMESPACE::wcrtomb(nullptr, wc, &state);
+  size_t cnt2 = LIBC_NAMESPACE::wcrtomb(mb, L'\0', &state);
+
+  ASSERT_EQ(cnt1, cnt2);
+}
+
+TEST(LlvmLibcWCRToMBTest, NullState) {
+  wchar_t wc = L'A';
+  char mb[4];
+  size_t cnt = LIBC_NAMESPACE::wcrtomb(mb, wc, nullptr);
+  ASSERT_EQ(cnt, static_cast<size_t>(1));
+}
+
+TEST(LlvmLibcWCRToMBTest, InvalidWchar) {
+  mbstate_t state;
+  LIBC_NAMESPACE::memset(&state, 0, sizeof (mbstate_t));
+  wchar_t wc = 0x12ffff;
+  char mb[4];
+  size_t cnt = LIBC_NAMESPACE::wcrtomb(mb, wc, &state);
+  ASSERT_EQ(cnt, static_cast<size_t>(-1));
+  ASSERT_EQ(static_cast<int>(libc_errno), EILSEQ);
+}

>From bb072dff5747e5e110d08838c57f846e56420634 Mon Sep 17 00:00:00 2001
From: Uzair Nawaz <uzairnawaz at google.com>
Date: Tue, 17 Jun 2025 20:21:14 +0000
Subject: [PATCH 02/11] formatting

---
 libc/include/llvm-libc-types/mbstate_t.h       |  6 +++---
 .../__support/wchar/character_converter.cpp    |  2 +-
 libc/src/__support/wchar/wcrtomb.cpp           |  2 +-
 libc/test/src/wchar/wcrtomb_test.cpp           | 18 +++++++++---------
 4 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/libc/include/llvm-libc-types/mbstate_t.h b/libc/include/llvm-libc-types/mbstate_t.h
index 40e693355dcd0..009fe57da50e2 100644
--- a/libc/include/llvm-libc-types/mbstate_t.h
+++ b/libc/include/llvm-libc-types/mbstate_t.h
@@ -12,9 +12,9 @@
 #include "../llvm-libc-macros/stdint-macros.h"
 
 typedef struct {
-    uint32_t __field1;
-    uint8_t __field2;
-    uint8_t __field3;
+  uint32_t __field1;
+  uint8_t __field2;
+  uint8_t __field3;
 } mbstate_t;
 
 #endif // LLVM_LIBC_TYPES_MBSTATE_T_H
diff --git a/libc/src/__support/wchar/character_converter.cpp b/libc/src/__support/wchar/character_converter.cpp
index fc8c9aaaa3d50..61b3a4abd24fd 100644
--- a/libc/src/__support/wchar/character_converter.cpp
+++ b/libc/src/__support/wchar/character_converter.cpp
@@ -10,9 +10,9 @@
 #include "hdr/types/char8_t.h"
 #include "src/__support/common.h"
 #include "src/__support/error_or.h"
+#include "src/__support/libc_errno.h" // for error numbers
 #include "src/__support/math_extras.h"
 #include "src/__support/wchar/mbstate.h"
-#include "src/__support/libc_errno.h" // for error numbers
 
 #include "character_converter.h"
 
diff --git a/libc/src/__support/wchar/wcrtomb.cpp b/libc/src/__support/wchar/wcrtomb.cpp
index abd18aa391d75..847ffd162afc0 100644
--- a/libc/src/__support/wchar/wcrtomb.cpp
+++ b/libc/src/__support/wchar/wcrtomb.cpp
@@ -21,7 +21,7 @@ namespace LIBC_NAMESPACE_DECL {
 namespace internal {
 
 ErrorOr<size_t> wcrtomb(char *__restrict s, wchar_t wc,
-                        mbstate_t *__restrict ps) {               
+                        mbstate_t *__restrict ps) {
   CharacterConverter cr((internal::mbstate *)ps);
 
   char buf[sizeof(wchar_t) / sizeof(char)];
diff --git a/libc/test/src/wchar/wcrtomb_test.cpp b/libc/test/src/wchar/wcrtomb_test.cpp
index be249f4f6dae4..c06b39ae0143f 100644
--- a/libc/test/src/wchar/wcrtomb_test.cpp
+++ b/libc/test/src/wchar/wcrtomb_test.cpp
@@ -6,16 +6,16 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "hdr/types/wchar_t.h"
 #include "hdr/types/mbstate_t.h"
-#include "src/wchar/wcrtomb.h"
+#include "hdr/types/wchar_t.h"
+#include "src/__support/libc_errno.h"
 #include "src/string/memset.h"
+#include "src/wchar/wcrtomb.h"
 #include "test/UnitTest/Test.h"
-#include "src/__support/libc_errno.h"
 
 TEST(LlvmLibcWCRToMBTest, OneByte) {
   mbstate_t state;
-  LIBC_NAMESPACE::memset(&state, 0, sizeof (mbstate_t));
+  LIBC_NAMESPACE::memset(&state, 0, sizeof(mbstate_t));
   wchar_t wc = L'U';
   char mb[4];
   size_t cnt = LIBC_NAMESPACE::wcrtomb(mb, wc, &state);
@@ -25,7 +25,7 @@ TEST(LlvmLibcWCRToMBTest, OneByte) {
 
 TEST(LlvmLibcWCRToMBTest, TwoByte) {
   mbstate_t state;
-  LIBC_NAMESPACE::memset(&state, 0, sizeof (mbstate_t));
+  LIBC_NAMESPACE::memset(&state, 0, sizeof(mbstate_t));
   // testing utf32: 0xff -> utf8: 0xc3 0xbf
   wchar_t wc = 0xff;
   char mb[4];
@@ -37,7 +37,7 @@ TEST(LlvmLibcWCRToMBTest, TwoByte) {
 
 TEST(LlvmLibcWCRToMBTest, ThreeByte) {
   mbstate_t state;
-  LIBC_NAMESPACE::memset(&state, 0, sizeof (mbstate_t));
+  LIBC_NAMESPACE::memset(&state, 0, sizeof(mbstate_t));
   // testing utf32: 0xac15 -> utf8: 0xea 0xb0 0x95
   wchar_t wc = 0xac15;
   char mb[4];
@@ -50,7 +50,7 @@ TEST(LlvmLibcWCRToMBTest, ThreeByte) {
 
 TEST(LlvmLibcWCRToMBTest, FourByte) {
   mbstate_t state;
-  LIBC_NAMESPACE::memset(&state, 0, sizeof (mbstate_t));
+  LIBC_NAMESPACE::memset(&state, 0, sizeof(mbstate_t));
   // testing utf32: 0x1f921 -> utf8: 0xf0 0x9f 0xa4 0xa1
   wchar_t wc = 0x1f921;
   char mb[4];
@@ -64,7 +64,7 @@ TEST(LlvmLibcWCRToMBTest, FourByte) {
 
 TEST(LlvmLibcWCRToMBTest, NullString) {
   mbstate_t state;
-  LIBC_NAMESPACE::memset(&state, 0, sizeof (mbstate_t));
+  LIBC_NAMESPACE::memset(&state, 0, sizeof(mbstate_t));
   wchar_t wc = L'A';
   char mb[4];
 
@@ -84,7 +84,7 @@ TEST(LlvmLibcWCRToMBTest, NullState) {
 
 TEST(LlvmLibcWCRToMBTest, InvalidWchar) {
   mbstate_t state;
-  LIBC_NAMESPACE::memset(&state, 0, sizeof (mbstate_t));
+  LIBC_NAMESPACE::memset(&state, 0, sizeof(mbstate_t));
   wchar_t wc = 0x12ffff;
   char mb[4];
   size_t cnt = LIBC_NAMESPACE::wcrtomb(mb, wc, &state);

>From a16caf9834ad4001f7410b34ee613230cc3cf43d Mon Sep 17 00:00:00 2001
From: Uzair Nawaz <uzairnawaz at google.com>
Date: Tue, 17 Jun 2025 20:34:25 +0000
Subject: [PATCH 03/11] formatting

---
 libc/src/wchar/wcrtomb.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libc/src/wchar/wcrtomb.h b/libc/src/wchar/wcrtomb.h
index 3cfb1a6f2dc84..06c42f158122c 100644
--- a/libc/src/wchar/wcrtomb.h
+++ b/libc/src/wchar/wcrtomb.h
@@ -9,9 +9,9 @@
 #ifndef LLVM_LIBC_SRC_WCHAR_WCRTOMB_H
 #define LLVM_LIBC_SRC_WCHAR_WCRTOMB_H
 
-#include "hdr/types/wchar_t.h"
 #include "hdr/types/mbstate_t.h"
 #include "hdr/types/size_t.h"
+#include "hdr/types/wchar_t.h"
 #include "src/__support/macros/config.h"
 
 namespace LIBC_NAMESPACE_DECL {

>From fc563be2635bb8247e56ad325da674fe639ffb18 Mon Sep 17 00:00:00 2001
From: Uzair Nawaz <uzairnawaz at google.com>
Date: Tue, 17 Jun 2025 20:59:08 +0000
Subject: [PATCH 04/11] fix mbstate_t in overlay mode

---
 libc/hdr/types/mbstate_t.h | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/libc/hdr/types/mbstate_t.h b/libc/hdr/types/mbstate_t.h
index 1e232af08e4db..15b2614341d7d 100644
--- a/libc/hdr/types/mbstate_t.h
+++ b/libc/hdr/types/mbstate_t.h
@@ -9,6 +9,14 @@
 #ifndef LLVM_LIBC_HDR_TYPES_MBSTATE_T_H
 #define LLVM_LIBC_HDR_TYPES_MBSTATE_T_H
 
+#ifdef LIBC_FULL_BUILD
+
 #include "include/llvm-libc-types/mbstate_t.h"
 
+#else // Overlay mode
+
+#include "hdr/wchar_overlay.h"
+
+#endif // LLVM_LIBC_FULL_BUILD
+
 #endif // LLVM_LIBC_HDR_TYPES_MBSTATE_T_H

>From 094d2c4b8a40076aaa31ef0d32f34aa1579b6450 Mon Sep 17 00:00:00 2001
From: Uzair Nawaz <uzairnawaz at google.com>
Date: Wed, 18 Jun 2025 16:46:26 +0000
Subject: [PATCH 05/11] moved libc error code to public function for
 consistency

---
 libc/src/__support/wchar/CMakeLists.txt          | 1 +
 libc/src/__support/wchar/character_converter.cpp | 3 +--
 libc/src/__support/wchar/wcrtomb.cpp             | 1 +
 libc/src/wchar/wcrtomb.cpp                       | 2 +-
 4 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/libc/src/__support/wchar/CMakeLists.txt b/libc/src/__support/wchar/CMakeLists.txt
index e4a3bfd7b9c5c..38dfd76513be1 100644
--- a/libc/src/__support/wchar/CMakeLists.txt
+++ b/libc/src/__support/wchar/CMakeLists.txt
@@ -32,6 +32,7 @@ add_object_library(
     libc.hdr.types.mbstate_t
     libc.hdr.types.wchar_t
     libc.src.__support.error_or
+    libc.src.__support.common
     .character_converter
     .mbstate
 )
diff --git a/libc/src/__support/wchar/character_converter.cpp b/libc/src/__support/wchar/character_converter.cpp
index 61b3a4abd24fd..ca709769616c3 100644
--- a/libc/src/__support/wchar/character_converter.cpp
+++ b/libc/src/__support/wchar/character_converter.cpp
@@ -10,7 +10,6 @@
 #include "hdr/types/char8_t.h"
 #include "src/__support/common.h"
 #include "src/__support/error_or.h"
-#include "src/__support/libc_errno.h" // for error numbers
 #include "src/__support/math_extras.h"
 #include "src/__support/wchar/mbstate.h"
 
@@ -52,7 +51,7 @@ int CharacterConverter::push(char32_t utf32) {
   // `utf32` contains a value that is too large to actually represent a valid
   // unicode character
   clear();
-  return EILSEQ;
+  return -1;
 }
 
 ErrorOr<char8_t> CharacterConverter::pop_utf8() {
diff --git a/libc/src/__support/wchar/wcrtomb.cpp b/libc/src/__support/wchar/wcrtomb.cpp
index 847ffd162afc0..43d714d436898 100644
--- a/libc/src/__support/wchar/wcrtomb.cpp
+++ b/libc/src/__support/wchar/wcrtomb.cpp
@@ -24,6 +24,7 @@ ErrorOr<size_t> wcrtomb(char *__restrict s, wchar_t wc,
                         mbstate_t *__restrict ps) {
   CharacterConverter cr((internal::mbstate *)ps);
 
+  // when s is nullptr, this is equivalent to wcrtomb(buf, L'\0', ps)
   char buf[sizeof(wchar_t) / sizeof(char)];
   if (s == nullptr) {
     s = buf;
diff --git a/libc/src/wchar/wcrtomb.cpp b/libc/src/wchar/wcrtomb.cpp
index 82c813747e0f6..5917769af9d4e 100644
--- a/libc/src/wchar/wcrtomb.cpp
+++ b/libc/src/wchar/wcrtomb.cpp
@@ -24,7 +24,7 @@ LLVM_LIBC_FUNCTION(size_t, wcrtomb,
       internal::wcrtomb(s, wc, ps == nullptr ? &internal_mbstate : ps);
 
   if (!result.has_value()) {
-    libc_errno = result.error();
+    libc_errno = EILSEQ;
     return -1;
   }
 

>From 591882f2b91051a79bd51027952a5938c4a208f1 Mon Sep 17 00:00:00 2001
From: Uzair Nawaz <uzairnawaz at google.com>
Date: Wed, 18 Jun 2025 17:16:18 +0000
Subject: [PATCH 06/11] use internal mbstate in internal function; used
 explicit casts

---
 libc/src/__support/wchar/CMakeLists.txt | 1 -
 libc/src/__support/wchar/mbstate.h      | 6 +++---
 libc/src/__support/wchar/wcrtomb.cpp    | 9 +++++----
 libc/src/__support/wchar/wcrtomb.h      | 5 ++---
 libc/src/wchar/CMakeLists.txt           | 1 +
 libc/src/wchar/wcrtomb.cpp              | 9 ++++++---
 6 files changed, 17 insertions(+), 14 deletions(-)

diff --git a/libc/src/__support/wchar/CMakeLists.txt b/libc/src/__support/wchar/CMakeLists.txt
index 38dfd76513be1..41ae0cdc00f39 100644
--- a/libc/src/__support/wchar/CMakeLists.txt
+++ b/libc/src/__support/wchar/CMakeLists.txt
@@ -29,7 +29,6 @@ add_object_library(
   DEPENDS
     libc.hdr.types.char32_t
     libc.hdr.types.size_t
-    libc.hdr.types.mbstate_t
     libc.hdr.types.wchar_t
     libc.src.__support.error_or
     libc.src.__support.common
diff --git a/libc/src/__support/wchar/mbstate.h b/libc/src/__support/wchar/mbstate.h
index fb08fb4eaa188..087d3c0fbc4f0 100644
--- a/libc/src/__support/wchar/mbstate.h
+++ b/libc/src/__support/wchar/mbstate.h
@@ -18,17 +18,17 @@ namespace internal {
 
 struct mbstate {
   // store a partial codepoint (in UTF-32)
-  char32_t partial;
+  char32_t partial = 0;
 
   /*
   Progress towards a conversion
     For utf8  -> utf32, increases with each CharacterConverter::push(utf8_byte)
     For utf32 ->  utf8, increases with each CharacterConverter::pop_utf8()
   */
-  uint8_t bytes_processed;
+  uint8_t bytes_processed = 0;
 
   // Total number of bytes that will be needed to represent this character
-  uint8_t total_bytes;
+  uint8_t total_bytes = 0;
 };
 
 } // namespace internal
diff --git a/libc/src/__support/wchar/wcrtomb.cpp b/libc/src/__support/wchar/wcrtomb.cpp
index 43d714d436898..cf0db4ee2763b 100644
--- a/libc/src/__support/wchar/wcrtomb.cpp
+++ b/libc/src/__support/wchar/wcrtomb.cpp
@@ -12,7 +12,6 @@
 #include "src/__support/wchar/mbstate.h"
 
 #include "hdr/types/char32_t.h"
-#include "hdr/types/mbstate_t.h"
 #include "hdr/types/size_t.h"
 #include "hdr/types/wchar_t.h"
 #include "src/__support/common.h"
@@ -21,8 +20,10 @@ namespace LIBC_NAMESPACE_DECL {
 namespace internal {
 
 ErrorOr<size_t> wcrtomb(char *__restrict s, wchar_t wc,
-                        mbstate_t *__restrict ps) {
-  CharacterConverter cr((internal::mbstate *)ps);
+                        mbstate *__restrict ps) {
+  static_assert(sizeof(wchar_t) == 4);
+
+  CharacterConverter cr(ps);
 
   // when s is nullptr, this is equivalent to wcrtomb(buf, L'\0', ps)
   char buf[sizeof(wchar_t) / sizeof(char)];
@@ -31,7 +32,7 @@ ErrorOr<size_t> wcrtomb(char *__restrict s, wchar_t wc,
     wc = L'\0';
   }
 
-  int status = cr.push((char32_t)wc);
+  int status = cr.push(static_cast<char32_t>(wc));
   if (status != 0)
     return Error(status);
 
diff --git a/libc/src/__support/wchar/wcrtomb.h b/libc/src/__support/wchar/wcrtomb.h
index 35a472548b67a..5e36b76eb01de 100644
--- a/libc/src/__support/wchar/wcrtomb.h
+++ b/libc/src/__support/wchar/wcrtomb.h
@@ -9,17 +9,16 @@
 #ifndef LLVM_LIBC_SRC__SUPPORT_WCHAR_WCRTOMB_H
 #define LLVM_LIBC_SRC__SUPPORT_WCHAR_WCRTOMB_H
 
-#include "hdr/types/mbstate_t.h"
 #include "hdr/types/size_t.h"
 #include "hdr/types/wchar_t.h"
 #include "src/__support/error_or.h"
+#include "src/__support/wchar/mbstate.h"
 #include "src/__support/macros/config.h"
 
 namespace LIBC_NAMESPACE_DECL {
 namespace internal {
 
-ErrorOr<size_t> wcrtomb(char *__restrict s, wchar_t wc,
-                        mbstate_t *__restrict ps);
+ErrorOr<size_t> wcrtomb(char *__restrict s, wchar_t wc, mbstate *__restrict ps);
 
 } // namespace internal
 } // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/wchar/CMakeLists.txt b/libc/src/wchar/CMakeLists.txt
index a106a8ee9aa41..6fe6da513ba04 100644
--- a/libc/src/wchar/CMakeLists.txt
+++ b/libc/src/wchar/CMakeLists.txt
@@ -45,6 +45,7 @@ add_entrypoint_object(
     libc.hdr.types.mbstate_t
     libc.src.__support.libc_errno
     libc.src.__support.wchar.wcrtomb
+    libc.src.__support.wchar.mbstate
 )
 
 add_entrypoint_object(
diff --git a/libc/src/wchar/wcrtomb.cpp b/libc/src/wchar/wcrtomb.cpp
index 5917769af9d4e..7951bea2120bc 100644
--- a/libc/src/wchar/wcrtomb.cpp
+++ b/libc/src/wchar/wcrtomb.cpp
@@ -12,16 +12,19 @@
 #include "src/__support/common.h"
 #include "src/__support/libc_errno.h"
 #include "src/__support/macros/config.h"
+#include "src/__support/wchar/mbstate.h"
 #include "src/__support/wchar/wcrtomb.h"
 
 namespace LIBC_NAMESPACE_DECL {
 
 LLVM_LIBC_FUNCTION(size_t, wcrtomb,
                    (char *__restrict s, wchar_t wc, mbstate_t *__restrict ps)) {
-  static mbstate_t internal_mbstate{0, 0, 0};
+  static internal::mbstate internal_mbstate;
 
-  auto result =
-      internal::wcrtomb(s, wc, ps == nullptr ? &internal_mbstate : ps);
+  auto result = internal::wcrtomb(
+      s, wc,
+      ps == nullptr ? &internal_mbstate
+                    : reinterpret_cast<internal::mbstate *>(ps));
 
   if (!result.has_value()) {
     libc_errno = EILSEQ;

>From babc9984281287c08fbfb4bac5cfe82e7e324168 Mon Sep 17 00:00:00 2001
From: Uzair Nawaz <uzairnawaz at google.com>
Date: Wed, 18 Jun 2025 17:23:46 +0000
Subject: [PATCH 07/11] formatting

---
 libc/src/__support/wchar/wcrtomb.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libc/src/__support/wchar/wcrtomb.h b/libc/src/__support/wchar/wcrtomb.h
index 5e36b76eb01de..bcd39a92a3b76 100644
--- a/libc/src/__support/wchar/wcrtomb.h
+++ b/libc/src/__support/wchar/wcrtomb.h
@@ -12,8 +12,8 @@
 #include "hdr/types/size_t.h"
 #include "hdr/types/wchar_t.h"
 #include "src/__support/error_or.h"
-#include "src/__support/wchar/mbstate.h"
 #include "src/__support/macros/config.h"
+#include "src/__support/wchar/mbstate.h"
 
 namespace LIBC_NAMESPACE_DECL {
 namespace internal {

>From 8b1d981deadd330a2318130a4aacd367dab366b9 Mon Sep 17 00:00:00 2001
From: Uzair Nawaz <uzairnawaz at google.com>
Date: Fri, 20 Jun 2025 17:21:10 +0000
Subject: [PATCH 08/11] prevent overlaying mbstate_t

---
 libc/hdr/types/mbstate_t.h           | 2 +-
 libc/src/__support/wchar/wcrtomb.cpp | 3 +++
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/libc/hdr/types/mbstate_t.h b/libc/hdr/types/mbstate_t.h
index 15b2614341d7d..ad826d49c14c7 100644
--- a/libc/hdr/types/mbstate_t.h
+++ b/libc/hdr/types/mbstate_t.h
@@ -15,7 +15,7 @@
 
 #else // Overlay mode
 
-#include "hdr/wchar_overlay.h"
+#error "type not available in overlay mode"
 
 #endif // LLVM_LIBC_FULL_BUILD
 
diff --git a/libc/src/__support/wchar/wcrtomb.cpp b/libc/src/__support/wchar/wcrtomb.cpp
index cf0db4ee2763b..fdbbcb5526917 100644
--- a/libc/src/__support/wchar/wcrtomb.cpp
+++ b/libc/src/__support/wchar/wcrtomb.cpp
@@ -15,6 +15,7 @@
 #include "hdr/types/size_t.h"
 #include "hdr/types/wchar_t.h"
 #include "src/__support/common.h"
+#include "src/__support/libc_assert.h"
 
 namespace LIBC_NAMESPACE_DECL {
 namespace internal {
@@ -39,6 +40,8 @@ ErrorOr<size_t> wcrtomb(char *__restrict s, wchar_t wc,
   size_t count = 0;
   while (!cr.isComplete()) {
     auto utf8 = cr.pop_utf8(); // can never fail as long as the push succeeded
+    LIBC_ASSERT(utf8.has_value());
+    
     *s = utf8.value();
     s++;
     count++;

>From f2be9eafdd1e97955195eb367326e04a28f93ccf Mon Sep 17 00:00:00 2001
From: Uzair Nawaz <uzairnawaz at google.com>
Date: Fri, 20 Jun 2025 17:27:17 +0000
Subject: [PATCH 09/11] move internal buffer to public function

---
 libc/src/__support/wchar/wcrtomb.cpp | 10 +++-------
 libc/src/wchar/wcrtomb.cpp           |  7 +++++++
 2 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/libc/src/__support/wchar/wcrtomb.cpp b/libc/src/__support/wchar/wcrtomb.cpp
index fdbbcb5526917..b51e4cd7982bf 100644
--- a/libc/src/__support/wchar/wcrtomb.cpp
+++ b/libc/src/__support/wchar/wcrtomb.cpp
@@ -26,12 +26,8 @@ ErrorOr<size_t> wcrtomb(char *__restrict s, wchar_t wc,
 
   CharacterConverter cr(ps);
 
-  // when s is nullptr, this is equivalent to wcrtomb(buf, L'\0', ps)
-  char buf[sizeof(wchar_t) / sizeof(char)];
-  if (s == nullptr) {
-    s = buf;
-    wc = L'\0';
-  }
+  if (s == nullptr)
+    return Error(-1);
 
   int status = cr.push(static_cast<char32_t>(wc));
   if (status != 0)
@@ -41,7 +37,7 @@ ErrorOr<size_t> wcrtomb(char *__restrict s, wchar_t wc,
   while (!cr.isComplete()) {
     auto utf8 = cr.pop_utf8(); // can never fail as long as the push succeeded
     LIBC_ASSERT(utf8.has_value());
-    
+
     *s = utf8.value();
     s++;
     count++;
diff --git a/libc/src/wchar/wcrtomb.cpp b/libc/src/wchar/wcrtomb.cpp
index 7951bea2120bc..45494b65cf275 100644
--- a/libc/src/wchar/wcrtomb.cpp
+++ b/libc/src/wchar/wcrtomb.cpp
@@ -21,6 +21,13 @@ LLVM_LIBC_FUNCTION(size_t, wcrtomb,
                    (char *__restrict s, wchar_t wc, mbstate_t *__restrict ps)) {
   static internal::mbstate internal_mbstate;
 
+  // when s is nullptr, this is equivalent to wcrtomb(buf, L'\0', ps)
+  char buf[sizeof(wchar_t) / sizeof(char)];
+  if (s == nullptr) {
+    s = buf;
+    wc = L'\0';
+  }
+
   auto result = internal::wcrtomb(
       s, wc,
       ps == nullptr ? &internal_mbstate

>From ddf318383c29352ba4249730f78c810e43c49a0e Mon Sep 17 00:00:00 2001
From: Uzair Nawaz <uzairnawaz at google.com>
Date: Fri, 20 Jun 2025 19:30:47 +0000
Subject: [PATCH 10/11] fixed uninitialized variable

---
 libc/src/__support/wchar/mbstate.h   | 2 +-
 libc/src/__support/wchar/wcrtomb.cpp | 2 +-
 libc/src/wchar/wcrtomb.cpp           | 1 +
 3 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/libc/src/__support/wchar/mbstate.h b/libc/src/__support/wchar/mbstate.h
index 1fb46818cdfba..32304a5215241 100644
--- a/libc/src/__support/wchar/mbstate.h
+++ b/libc/src/__support/wchar/mbstate.h
@@ -25,7 +25,7 @@ struct mbstate {
     Increases with each push(...) until it reaches total_bytes
     Decreases with each pop(...) until it reaches 0
   */
-  uint8_t bytes_stored;
+  uint8_t bytes_stored = 0;
 
   // Total number of bytes that will be needed to represent this character
   uint8_t total_bytes = 0;
diff --git a/libc/src/__support/wchar/wcrtomb.cpp b/libc/src/__support/wchar/wcrtomb.cpp
index b51e4cd7982bf..8ca3d17ad6ce1 100644
--- a/libc/src/__support/wchar/wcrtomb.cpp
+++ b/libc/src/__support/wchar/wcrtomb.cpp
@@ -34,7 +34,7 @@ ErrorOr<size_t> wcrtomb(char *__restrict s, wchar_t wc,
     return Error(status);
 
   size_t count = 0;
-  while (!cr.isComplete()) {
+  while (!cr.isEmpty()) {
     auto utf8 = cr.pop_utf8(); // can never fail as long as the push succeeded
     LIBC_ASSERT(utf8.has_value());
 
diff --git a/libc/src/wchar/wcrtomb.cpp b/libc/src/wchar/wcrtomb.cpp
index 45494b65cf275..6d604a00599ee 100644
--- a/libc/src/wchar/wcrtomb.cpp
+++ b/libc/src/wchar/wcrtomb.cpp
@@ -9,6 +9,7 @@
 #include "src/wchar/wcrtomb.h"
 
 #include "hdr/types/mbstate_t.h"
+#include "hdr/types/wchar_t.h"
 #include "src/__support/common.h"
 #include "src/__support/libc_errno.h"
 #include "src/__support/macros/config.h"

>From edf8aea1e415adffa711788f460d75c2df9feaa6 Mon Sep 17 00:00:00 2001
From: Uzair Nawaz <uzairnawaz at google.com>
Date: Fri, 20 Jun 2025 19:50:42 +0000
Subject: [PATCH 11/11] updated entrypoings

---
 libc/config/linux/x86_64/entrypoints.txt | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt
index 0373c23db04e5..5674aa50a6051 100644
--- a/libc/config/linux/x86_64/entrypoints.txt
+++ b/libc/config/linux/x86_64/entrypoints.txt
@@ -365,7 +365,6 @@ set(TARGET_LIBC_ENTRYPOINTS
 
     # wchar.h entrypoints
     libc.src.wchar.btowc
-    libc.src.wchar.wcrtomb
     libc.src.wchar.wcslen
     libc.src.wchar.wctob
     libc.src.wchar.wmemmove
@@ -1245,6 +1244,9 @@ if(LLVM_LIBC_FULL_BUILD)
     libc.src.sys.socket.recv
     libc.src.sys.socket.recvfrom
     libc.src.sys.socket.recvmsg
+
+    # wchar entrypoints
+    libc.src.wchar.wcrtomb
   )
 endif()
 



More information about the libc-commits mailing list