[libc-commits] [libc] [libc] Implemented wcrtomb internal function and public libc function (PR #144596)
Uzair Nawaz via libc-commits
libc-commits at lists.llvm.org
Fri Jun 20 14:35:58 PDT 2025
https://github.com/uzairnawaz updated https://github.com/llvm/llvm-project/pull/144596
>From bc5e83f4f84479babd932cc0b2632d028eb47508 Mon Sep 17 00:00:00 2001
From: Uzair Nawaz <uzairnawaz at google.com>
Date: Tue, 17 Jun 2025 18:55:20 +0000
Subject: [PATCH 01/11] Implemented wcrtomb internal and public function
---
libc/config/linux/x86_64/entrypoints.txt | 1 +
libc/hdr/types/CMakeLists.txt | 8 ++
libc/hdr/types/mbstate_t.h | 14 +++
libc/include/llvm-libc-types/mbstate_t.h | 6 +-
libc/include/wchar.yaml | 8 ++
libc/src/__support/wchar/CMakeLists.txt | 16 ++++
.../__support/wchar/character_converter.cpp | 3 +-
libc/src/__support/wchar/wcrtomb.cpp | 48 ++++++++++
libc/src/__support/wchar/wcrtomb.h | 27 ++++++
libc/src/wchar/CMakeLists.txt | 13 +++
libc/src/wchar/wcrtomb.cpp | 34 +++++++
libc/src/wchar/wcrtomb.h | 23 +++++
libc/test/src/wchar/CMakeLists.txt | 14 +++
libc/test/src/wchar/wcrtomb_test.cpp | 93 +++++++++++++++++++
14 files changed, 306 insertions(+), 2 deletions(-)
create mode 100644 libc/hdr/types/mbstate_t.h
create mode 100644 libc/src/__support/wchar/wcrtomb.cpp
create mode 100644 libc/src/__support/wchar/wcrtomb.h
create mode 100644 libc/src/wchar/wcrtomb.cpp
create mode 100644 libc/src/wchar/wcrtomb.h
create mode 100644 libc/test/src/wchar/wcrtomb_test.cpp
diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt
index aa2079faed409..0373c23db04e5 100644
--- a/libc/config/linux/x86_64/entrypoints.txt
+++ b/libc/config/linux/x86_64/entrypoints.txt
@@ -365,6 +365,7 @@ set(TARGET_LIBC_ENTRYPOINTS
# wchar.h entrypoints
libc.src.wchar.btowc
+ libc.src.wchar.wcrtomb
libc.src.wchar.wcslen
libc.src.wchar.wctob
libc.src.wchar.wmemmove
diff --git a/libc/hdr/types/CMakeLists.txt b/libc/hdr/types/CMakeLists.txt
index c88c357009072..58f500a6343f6 100644
--- a/libc/hdr/types/CMakeLists.txt
+++ b/libc/hdr/types/CMakeLists.txt
@@ -20,6 +20,14 @@ add_proxy_header_library(
libc.include.uchar
)
+add_proxy_header_library(
+ mbstate_t
+ HDRS
+ mbstate_t.h
+ DEPENDS
+ libc.include.llvm-libc-types.mbstate_t
+)
+
add_proxy_header_library(
div_t
HDRS
diff --git a/libc/hdr/types/mbstate_t.h b/libc/hdr/types/mbstate_t.h
new file mode 100644
index 0000000000000..1e232af08e4db
--- /dev/null
+++ b/libc/hdr/types/mbstate_t.h
@@ -0,0 +1,14 @@
+//===-- Definition of macros from mbstate_t.h -----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_HDR_TYPES_MBSTATE_T_H
+#define LLVM_LIBC_HDR_TYPES_MBSTATE_T_H
+
+#include "include/llvm-libc-types/mbstate_t.h"
+
+#endif // LLVM_LIBC_HDR_TYPES_MBSTATE_T_H
diff --git a/libc/include/llvm-libc-types/mbstate_t.h b/libc/include/llvm-libc-types/mbstate_t.h
index 540d50975a264..40e693355dcd0 100644
--- a/libc/include/llvm-libc-types/mbstate_t.h
+++ b/libc/include/llvm-libc-types/mbstate_t.h
@@ -9,8 +9,12 @@
#ifndef LLVM_LIBC_TYPES_MBSTATE_T_H
#define LLVM_LIBC_TYPES_MBSTATE_T_H
-// TODO: Complete this once we implement functions that operate on this type.
+#include "../llvm-libc-macros/stdint-macros.h"
+
typedef struct {
+ uint32_t __field1;
+ uint8_t __field2;
+ uint8_t __field3;
} mbstate_t;
#endif // LLVM_LIBC_TYPES_MBSTATE_T_H
diff --git a/libc/include/wchar.yaml b/libc/include/wchar.yaml
index 84db73d8f01ea..3c373f8ec34c0 100644
--- a/libc/include/wchar.yaml
+++ b/libc/include/wchar.yaml
@@ -150,6 +150,14 @@ functions:
- type: wchar_t *__restrict
- type: const wchar_t *__restrict
- type: size_t
+ - name: wcrtomb
+ standards:
+ - stdc
+ return_type: size_t
+ arguments:
+ - type: char *__restrict
+ - type: wchar_t
+ - type: mbstate_t *__restrict
- name: wcscpy
standards:
- stdc
diff --git a/libc/src/__support/wchar/CMakeLists.txt b/libc/src/__support/wchar/CMakeLists.txt
index 6715e354e23e5..e4a3bfd7b9c5c 100644
--- a/libc/src/__support/wchar/CMakeLists.txt
+++ b/libc/src/__support/wchar/CMakeLists.txt
@@ -19,3 +19,19 @@ add_object_library(
libc.src.__support.math_extras
.mbstate
)
+
+add_object_library(
+ wcrtomb
+ HDRS
+ wcrtomb.h
+ SRCS
+ wcrtomb.cpp
+ DEPENDS
+ libc.hdr.types.char32_t
+ libc.hdr.types.size_t
+ libc.hdr.types.mbstate_t
+ libc.hdr.types.wchar_t
+ libc.src.__support.error_or
+ .character_converter
+ .mbstate
+)
diff --git a/libc/src/__support/wchar/character_converter.cpp b/libc/src/__support/wchar/character_converter.cpp
index ca709769616c3..fc8c9aaaa3d50 100644
--- a/libc/src/__support/wchar/character_converter.cpp
+++ b/libc/src/__support/wchar/character_converter.cpp
@@ -12,6 +12,7 @@
#include "src/__support/error_or.h"
#include "src/__support/math_extras.h"
#include "src/__support/wchar/mbstate.h"
+#include "src/__support/libc_errno.h" // for error numbers
#include "character_converter.h"
@@ -51,7 +52,7 @@ int CharacterConverter::push(char32_t utf32) {
// `utf32` contains a value that is too large to actually represent a valid
// unicode character
clear();
- return -1;
+ return EILSEQ;
}
ErrorOr<char8_t> CharacterConverter::pop_utf8() {
diff --git a/libc/src/__support/wchar/wcrtomb.cpp b/libc/src/__support/wchar/wcrtomb.cpp
new file mode 100644
index 0000000000000..abd18aa391d75
--- /dev/null
+++ b/libc/src/__support/wchar/wcrtomb.cpp
@@ -0,0 +1,48 @@
+//===-- Implementation of wcrtomb -----------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/__support/wchar/wcrtomb.h"
+#include "src/__support/error_or.h"
+#include "src/__support/wchar/character_converter.h"
+#include "src/__support/wchar/mbstate.h"
+
+#include "hdr/types/char32_t.h"
+#include "hdr/types/mbstate_t.h"
+#include "hdr/types/size_t.h"
+#include "hdr/types/wchar_t.h"
+#include "src/__support/common.h"
+
+namespace LIBC_NAMESPACE_DECL {
+namespace internal {
+
+ErrorOr<size_t> wcrtomb(char *__restrict s, wchar_t wc,
+ mbstate_t *__restrict ps) {
+ CharacterConverter cr((internal::mbstate *)ps);
+
+ char buf[sizeof(wchar_t) / sizeof(char)];
+ if (s == nullptr) {
+ s = buf;
+ wc = L'\0';
+ }
+
+ int status = cr.push((char32_t)wc);
+ if (status != 0)
+ return Error(status);
+
+ size_t count = 0;
+ while (!cr.isComplete()) {
+ auto utf8 = cr.pop_utf8(); // can never fail as long as the push succeeded
+ *s = utf8.value();
+ s++;
+ count++;
+ }
+ return count;
+}
+
+} // namespace internal
+} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/__support/wchar/wcrtomb.h b/libc/src/__support/wchar/wcrtomb.h
new file mode 100644
index 0000000000000..35a472548b67a
--- /dev/null
+++ b/libc/src/__support/wchar/wcrtomb.h
@@ -0,0 +1,27 @@
+//===-- Implementation header for wcrtomb ---------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC__SUPPORT_WCHAR_WCRTOMB_H
+#define LLVM_LIBC_SRC__SUPPORT_WCHAR_WCRTOMB_H
+
+#include "hdr/types/mbstate_t.h"
+#include "hdr/types/size_t.h"
+#include "hdr/types/wchar_t.h"
+#include "src/__support/error_or.h"
+#include "src/__support/macros/config.h"
+
+namespace LIBC_NAMESPACE_DECL {
+namespace internal {
+
+ErrorOr<size_t> wcrtomb(char *__restrict s, wchar_t wc,
+ mbstate_t *__restrict ps);
+
+} // namespace internal
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC__SUPPORT_WCHAR_WCRTOMB_H
diff --git a/libc/src/wchar/CMakeLists.txt b/libc/src/wchar/CMakeLists.txt
index 491dd5b34340a..a106a8ee9aa41 100644
--- a/libc/src/wchar/CMakeLists.txt
+++ b/libc/src/wchar/CMakeLists.txt
@@ -34,6 +34,19 @@ add_entrypoint_object(
libc.src.__support.wctype_utils
)
+add_entrypoint_object(
+ wcrtomb
+ SRCS
+ wcrtomb.cpp
+ HDRS
+ wcrtomb.h
+ DEPENDS
+ libc.hdr.types.wchar_t
+ libc.hdr.types.mbstate_t
+ libc.src.__support.libc_errno
+ libc.src.__support.wchar.wcrtomb
+)
+
add_entrypoint_object(
wmemset
SRCS
diff --git a/libc/src/wchar/wcrtomb.cpp b/libc/src/wchar/wcrtomb.cpp
new file mode 100644
index 0000000000000..82c813747e0f6
--- /dev/null
+++ b/libc/src/wchar/wcrtomb.cpp
@@ -0,0 +1,34 @@
+//===-- Implementation of wcrtomb -----------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/wchar/wcrtomb.h"
+
+#include "hdr/types/mbstate_t.h"
+#include "src/__support/common.h"
+#include "src/__support/libc_errno.h"
+#include "src/__support/macros/config.h"
+#include "src/__support/wchar/wcrtomb.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+LLVM_LIBC_FUNCTION(size_t, wcrtomb,
+ (char *__restrict s, wchar_t wc, mbstate_t *__restrict ps)) {
+ static mbstate_t internal_mbstate{0, 0, 0};
+
+ auto result =
+ internal::wcrtomb(s, wc, ps == nullptr ? &internal_mbstate : ps);
+
+ if (!result.has_value()) {
+ libc_errno = result.error();
+ return -1;
+ }
+
+ return result.value();
+}
+
+} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/wchar/wcrtomb.h b/libc/src/wchar/wcrtomb.h
new file mode 100644
index 0000000000000..3cfb1a6f2dc84
--- /dev/null
+++ b/libc/src/wchar/wcrtomb.h
@@ -0,0 +1,23 @@
+//===-- Implementation header for wcrtomb -----------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_WCHAR_WCRTOMB_H
+#define LLVM_LIBC_SRC_WCHAR_WCRTOMB_H
+
+#include "hdr/types/wchar_t.h"
+#include "hdr/types/mbstate_t.h"
+#include "hdr/types/size_t.h"
+#include "src/__support/macros/config.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+size_t wcrtomb(char *__restrict s, wchar_t wc, mbstate_t *__restrict ps);
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC_WCHAR_WCRTOMB_H
diff --git a/libc/test/src/wchar/CMakeLists.txt b/libc/test/src/wchar/CMakeLists.txt
index 4990b6953348b..c4f51b5f94a12 100644
--- a/libc/test/src/wchar/CMakeLists.txt
+++ b/libc/test/src/wchar/CMakeLists.txt
@@ -33,6 +33,20 @@ add_libc_test(
libc.src.wchar.wctob
)
+add_libc_test(
+ wcrtomb_test
+ SUITE
+ libc_wchar_unittests
+ SRCS
+ wcrtomb_test.cpp
+ DEPENDS
+ libc.src.wchar.wcrtomb
+ libc.src.string.memset
+ libc.hdr.types.wchar_t
+ libc.hdr.types.mbstate_t
+ libc.src.__support.libc_errno
+)
+
add_libc_test(
wmemset_test
SUITE
diff --git a/libc/test/src/wchar/wcrtomb_test.cpp b/libc/test/src/wchar/wcrtomb_test.cpp
new file mode 100644
index 0000000000000..be249f4f6dae4
--- /dev/null
+++ b/libc/test/src/wchar/wcrtomb_test.cpp
@@ -0,0 +1,93 @@
+//===-- Unittests for wcrtomb --------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "hdr/types/wchar_t.h"
+#include "hdr/types/mbstate_t.h"
+#include "src/wchar/wcrtomb.h"
+#include "src/string/memset.h"
+#include "test/UnitTest/Test.h"
+#include "src/__support/libc_errno.h"
+
+TEST(LlvmLibcWCRToMBTest, OneByte) {
+ mbstate_t state;
+ LIBC_NAMESPACE::memset(&state, 0, sizeof (mbstate_t));
+ wchar_t wc = L'U';
+ char mb[4];
+ size_t cnt = LIBC_NAMESPACE::wcrtomb(mb, wc, &state);
+ ASSERT_EQ(cnt, static_cast<size_t>(1));
+ ASSERT_EQ(mb[0], 'U');
+}
+
+TEST(LlvmLibcWCRToMBTest, TwoByte) {
+ mbstate_t state;
+ LIBC_NAMESPACE::memset(&state, 0, sizeof (mbstate_t));
+ // testing utf32: 0xff -> utf8: 0xc3 0xbf
+ wchar_t wc = 0xff;
+ char mb[4];
+ size_t cnt = LIBC_NAMESPACE::wcrtomb(mb, wc, &state);
+ ASSERT_EQ(cnt, static_cast<size_t>(2));
+ ASSERT_EQ(mb[0], static_cast<char>(0xc3));
+ ASSERT_EQ(mb[1], static_cast<char>(0xbf));
+}
+
+TEST(LlvmLibcWCRToMBTest, ThreeByte) {
+ mbstate_t state;
+ LIBC_NAMESPACE::memset(&state, 0, sizeof (mbstate_t));
+ // testing utf32: 0xac15 -> utf8: 0xea 0xb0 0x95
+ wchar_t wc = 0xac15;
+ char mb[4];
+ size_t cnt = LIBC_NAMESPACE::wcrtomb(mb, wc, &state);
+ ASSERT_EQ(cnt, static_cast<size_t>(3));
+ ASSERT_EQ(mb[0], static_cast<char>(0xea));
+ ASSERT_EQ(mb[1], static_cast<char>(0xb0));
+ ASSERT_EQ(mb[2], static_cast<char>(0x95));
+}
+
+TEST(LlvmLibcWCRToMBTest, FourByte) {
+ mbstate_t state;
+ LIBC_NAMESPACE::memset(&state, 0, sizeof (mbstate_t));
+ // testing utf32: 0x1f921 -> utf8: 0xf0 0x9f 0xa4 0xa1
+ wchar_t wc = 0x1f921;
+ char mb[4];
+ size_t cnt = LIBC_NAMESPACE::wcrtomb(mb, wc, &state);
+ ASSERT_EQ(cnt, static_cast<size_t>(4));
+ ASSERT_EQ(mb[0], static_cast<char>(0xf0));
+ ASSERT_EQ(mb[1], static_cast<char>(0x9f));
+ ASSERT_EQ(mb[2], static_cast<char>(0xa4));
+ ASSERT_EQ(mb[3], static_cast<char>(0xa1));
+}
+
+TEST(LlvmLibcWCRToMBTest, NullString) {
+ mbstate_t state;
+ LIBC_NAMESPACE::memset(&state, 0, sizeof (mbstate_t));
+ wchar_t wc = L'A';
+ char mb[4];
+
+ // should be equivalent to the call wcrtomb(buf, L'\0', state)
+ size_t cnt1 = LIBC_NAMESPACE::wcrtomb(nullptr, wc, &state);
+ size_t cnt2 = LIBC_NAMESPACE::wcrtomb(mb, L'\0', &state);
+
+ ASSERT_EQ(cnt1, cnt2);
+}
+
+TEST(LlvmLibcWCRToMBTest, NullState) {
+ wchar_t wc = L'A';
+ char mb[4];
+ size_t cnt = LIBC_NAMESPACE::wcrtomb(mb, wc, nullptr);
+ ASSERT_EQ(cnt, static_cast<size_t>(1));
+}
+
+TEST(LlvmLibcWCRToMBTest, InvalidWchar) {
+ mbstate_t state;
+ LIBC_NAMESPACE::memset(&state, 0, sizeof (mbstate_t));
+ wchar_t wc = 0x12ffff;
+ char mb[4];
+ size_t cnt = LIBC_NAMESPACE::wcrtomb(mb, wc, &state);
+ ASSERT_EQ(cnt, static_cast<size_t>(-1));
+ ASSERT_EQ(static_cast<int>(libc_errno), EILSEQ);
+}
>From bb072dff5747e5e110d08838c57f846e56420634 Mon Sep 17 00:00:00 2001
From: Uzair Nawaz <uzairnawaz at google.com>
Date: Tue, 17 Jun 2025 20:21:14 +0000
Subject: [PATCH 02/11] formatting
---
libc/include/llvm-libc-types/mbstate_t.h | 6 +++---
.../__support/wchar/character_converter.cpp | 2 +-
libc/src/__support/wchar/wcrtomb.cpp | 2 +-
libc/test/src/wchar/wcrtomb_test.cpp | 18 +++++++++---------
4 files changed, 14 insertions(+), 14 deletions(-)
diff --git a/libc/include/llvm-libc-types/mbstate_t.h b/libc/include/llvm-libc-types/mbstate_t.h
index 40e693355dcd0..009fe57da50e2 100644
--- a/libc/include/llvm-libc-types/mbstate_t.h
+++ b/libc/include/llvm-libc-types/mbstate_t.h
@@ -12,9 +12,9 @@
#include "../llvm-libc-macros/stdint-macros.h"
typedef struct {
- uint32_t __field1;
- uint8_t __field2;
- uint8_t __field3;
+ uint32_t __field1;
+ uint8_t __field2;
+ uint8_t __field3;
} mbstate_t;
#endif // LLVM_LIBC_TYPES_MBSTATE_T_H
diff --git a/libc/src/__support/wchar/character_converter.cpp b/libc/src/__support/wchar/character_converter.cpp
index fc8c9aaaa3d50..61b3a4abd24fd 100644
--- a/libc/src/__support/wchar/character_converter.cpp
+++ b/libc/src/__support/wchar/character_converter.cpp
@@ -10,9 +10,9 @@
#include "hdr/types/char8_t.h"
#include "src/__support/common.h"
#include "src/__support/error_or.h"
+#include "src/__support/libc_errno.h" // for error numbers
#include "src/__support/math_extras.h"
#include "src/__support/wchar/mbstate.h"
-#include "src/__support/libc_errno.h" // for error numbers
#include "character_converter.h"
diff --git a/libc/src/__support/wchar/wcrtomb.cpp b/libc/src/__support/wchar/wcrtomb.cpp
index abd18aa391d75..847ffd162afc0 100644
--- a/libc/src/__support/wchar/wcrtomb.cpp
+++ b/libc/src/__support/wchar/wcrtomb.cpp
@@ -21,7 +21,7 @@ namespace LIBC_NAMESPACE_DECL {
namespace internal {
ErrorOr<size_t> wcrtomb(char *__restrict s, wchar_t wc,
- mbstate_t *__restrict ps) {
+ mbstate_t *__restrict ps) {
CharacterConverter cr((internal::mbstate *)ps);
char buf[sizeof(wchar_t) / sizeof(char)];
diff --git a/libc/test/src/wchar/wcrtomb_test.cpp b/libc/test/src/wchar/wcrtomb_test.cpp
index be249f4f6dae4..c06b39ae0143f 100644
--- a/libc/test/src/wchar/wcrtomb_test.cpp
+++ b/libc/test/src/wchar/wcrtomb_test.cpp
@@ -6,16 +6,16 @@
//
//===----------------------------------------------------------------------===//
-#include "hdr/types/wchar_t.h"
#include "hdr/types/mbstate_t.h"
-#include "src/wchar/wcrtomb.h"
+#include "hdr/types/wchar_t.h"
+#include "src/__support/libc_errno.h"
#include "src/string/memset.h"
+#include "src/wchar/wcrtomb.h"
#include "test/UnitTest/Test.h"
-#include "src/__support/libc_errno.h"
TEST(LlvmLibcWCRToMBTest, OneByte) {
mbstate_t state;
- LIBC_NAMESPACE::memset(&state, 0, sizeof (mbstate_t));
+ LIBC_NAMESPACE::memset(&state, 0, sizeof(mbstate_t));
wchar_t wc = L'U';
char mb[4];
size_t cnt = LIBC_NAMESPACE::wcrtomb(mb, wc, &state);
@@ -25,7 +25,7 @@ TEST(LlvmLibcWCRToMBTest, OneByte) {
TEST(LlvmLibcWCRToMBTest, TwoByte) {
mbstate_t state;
- LIBC_NAMESPACE::memset(&state, 0, sizeof (mbstate_t));
+ LIBC_NAMESPACE::memset(&state, 0, sizeof(mbstate_t));
// testing utf32: 0xff -> utf8: 0xc3 0xbf
wchar_t wc = 0xff;
char mb[4];
@@ -37,7 +37,7 @@ TEST(LlvmLibcWCRToMBTest, TwoByte) {
TEST(LlvmLibcWCRToMBTest, ThreeByte) {
mbstate_t state;
- LIBC_NAMESPACE::memset(&state, 0, sizeof (mbstate_t));
+ LIBC_NAMESPACE::memset(&state, 0, sizeof(mbstate_t));
// testing utf32: 0xac15 -> utf8: 0xea 0xb0 0x95
wchar_t wc = 0xac15;
char mb[4];
@@ -50,7 +50,7 @@ TEST(LlvmLibcWCRToMBTest, ThreeByte) {
TEST(LlvmLibcWCRToMBTest, FourByte) {
mbstate_t state;
- LIBC_NAMESPACE::memset(&state, 0, sizeof (mbstate_t));
+ LIBC_NAMESPACE::memset(&state, 0, sizeof(mbstate_t));
// testing utf32: 0x1f921 -> utf8: 0xf0 0x9f 0xa4 0xa1
wchar_t wc = 0x1f921;
char mb[4];
@@ -64,7 +64,7 @@ TEST(LlvmLibcWCRToMBTest, FourByte) {
TEST(LlvmLibcWCRToMBTest, NullString) {
mbstate_t state;
- LIBC_NAMESPACE::memset(&state, 0, sizeof (mbstate_t));
+ LIBC_NAMESPACE::memset(&state, 0, sizeof(mbstate_t));
wchar_t wc = L'A';
char mb[4];
@@ -84,7 +84,7 @@ TEST(LlvmLibcWCRToMBTest, NullState) {
TEST(LlvmLibcWCRToMBTest, InvalidWchar) {
mbstate_t state;
- LIBC_NAMESPACE::memset(&state, 0, sizeof (mbstate_t));
+ LIBC_NAMESPACE::memset(&state, 0, sizeof(mbstate_t));
wchar_t wc = 0x12ffff;
char mb[4];
size_t cnt = LIBC_NAMESPACE::wcrtomb(mb, wc, &state);
>From a16caf9834ad4001f7410b34ee613230cc3cf43d Mon Sep 17 00:00:00 2001
From: Uzair Nawaz <uzairnawaz at google.com>
Date: Tue, 17 Jun 2025 20:34:25 +0000
Subject: [PATCH 03/11] formatting
---
libc/src/wchar/wcrtomb.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/libc/src/wchar/wcrtomb.h b/libc/src/wchar/wcrtomb.h
index 3cfb1a6f2dc84..06c42f158122c 100644
--- a/libc/src/wchar/wcrtomb.h
+++ b/libc/src/wchar/wcrtomb.h
@@ -9,9 +9,9 @@
#ifndef LLVM_LIBC_SRC_WCHAR_WCRTOMB_H
#define LLVM_LIBC_SRC_WCHAR_WCRTOMB_H
-#include "hdr/types/wchar_t.h"
#include "hdr/types/mbstate_t.h"
#include "hdr/types/size_t.h"
+#include "hdr/types/wchar_t.h"
#include "src/__support/macros/config.h"
namespace LIBC_NAMESPACE_DECL {
>From fc563be2635bb8247e56ad325da674fe639ffb18 Mon Sep 17 00:00:00 2001
From: Uzair Nawaz <uzairnawaz at google.com>
Date: Tue, 17 Jun 2025 20:59:08 +0000
Subject: [PATCH 04/11] fix mbstate_t in overlay mode
---
libc/hdr/types/mbstate_t.h | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/libc/hdr/types/mbstate_t.h b/libc/hdr/types/mbstate_t.h
index 1e232af08e4db..15b2614341d7d 100644
--- a/libc/hdr/types/mbstate_t.h
+++ b/libc/hdr/types/mbstate_t.h
@@ -9,6 +9,14 @@
#ifndef LLVM_LIBC_HDR_TYPES_MBSTATE_T_H
#define LLVM_LIBC_HDR_TYPES_MBSTATE_T_H
+#ifdef LIBC_FULL_BUILD
+
#include "include/llvm-libc-types/mbstate_t.h"
+#else // Overlay mode
+
+#include "hdr/wchar_overlay.h"
+
+#endif // LLVM_LIBC_FULL_BUILD
+
#endif // LLVM_LIBC_HDR_TYPES_MBSTATE_T_H
>From 094d2c4b8a40076aaa31ef0d32f34aa1579b6450 Mon Sep 17 00:00:00 2001
From: Uzair Nawaz <uzairnawaz at google.com>
Date: Wed, 18 Jun 2025 16:46:26 +0000
Subject: [PATCH 05/11] moved libc error code to public function for
consistency
---
libc/src/__support/wchar/CMakeLists.txt | 1 +
libc/src/__support/wchar/character_converter.cpp | 3 +--
libc/src/__support/wchar/wcrtomb.cpp | 1 +
libc/src/wchar/wcrtomb.cpp | 2 +-
4 files changed, 4 insertions(+), 3 deletions(-)
diff --git a/libc/src/__support/wchar/CMakeLists.txt b/libc/src/__support/wchar/CMakeLists.txt
index e4a3bfd7b9c5c..38dfd76513be1 100644
--- a/libc/src/__support/wchar/CMakeLists.txt
+++ b/libc/src/__support/wchar/CMakeLists.txt
@@ -32,6 +32,7 @@ add_object_library(
libc.hdr.types.mbstate_t
libc.hdr.types.wchar_t
libc.src.__support.error_or
+ libc.src.__support.common
.character_converter
.mbstate
)
diff --git a/libc/src/__support/wchar/character_converter.cpp b/libc/src/__support/wchar/character_converter.cpp
index 61b3a4abd24fd..ca709769616c3 100644
--- a/libc/src/__support/wchar/character_converter.cpp
+++ b/libc/src/__support/wchar/character_converter.cpp
@@ -10,7 +10,6 @@
#include "hdr/types/char8_t.h"
#include "src/__support/common.h"
#include "src/__support/error_or.h"
-#include "src/__support/libc_errno.h" // for error numbers
#include "src/__support/math_extras.h"
#include "src/__support/wchar/mbstate.h"
@@ -52,7 +51,7 @@ int CharacterConverter::push(char32_t utf32) {
// `utf32` contains a value that is too large to actually represent a valid
// unicode character
clear();
- return EILSEQ;
+ return -1;
}
ErrorOr<char8_t> CharacterConverter::pop_utf8() {
diff --git a/libc/src/__support/wchar/wcrtomb.cpp b/libc/src/__support/wchar/wcrtomb.cpp
index 847ffd162afc0..43d714d436898 100644
--- a/libc/src/__support/wchar/wcrtomb.cpp
+++ b/libc/src/__support/wchar/wcrtomb.cpp
@@ -24,6 +24,7 @@ ErrorOr<size_t> wcrtomb(char *__restrict s, wchar_t wc,
mbstate_t *__restrict ps) {
CharacterConverter cr((internal::mbstate *)ps);
+ // when s is nullptr, this is equivalent to wcrtomb(buf, L'\0', ps)
char buf[sizeof(wchar_t) / sizeof(char)];
if (s == nullptr) {
s = buf;
diff --git a/libc/src/wchar/wcrtomb.cpp b/libc/src/wchar/wcrtomb.cpp
index 82c813747e0f6..5917769af9d4e 100644
--- a/libc/src/wchar/wcrtomb.cpp
+++ b/libc/src/wchar/wcrtomb.cpp
@@ -24,7 +24,7 @@ LLVM_LIBC_FUNCTION(size_t, wcrtomb,
internal::wcrtomb(s, wc, ps == nullptr ? &internal_mbstate : ps);
if (!result.has_value()) {
- libc_errno = result.error();
+ libc_errno = EILSEQ;
return -1;
}
>From 591882f2b91051a79bd51027952a5938c4a208f1 Mon Sep 17 00:00:00 2001
From: Uzair Nawaz <uzairnawaz at google.com>
Date: Wed, 18 Jun 2025 17:16:18 +0000
Subject: [PATCH 06/11] use internal mbstate in internal function; used
explicit casts
---
libc/src/__support/wchar/CMakeLists.txt | 1 -
libc/src/__support/wchar/mbstate.h | 6 +++---
libc/src/__support/wchar/wcrtomb.cpp | 9 +++++----
libc/src/__support/wchar/wcrtomb.h | 5 ++---
libc/src/wchar/CMakeLists.txt | 1 +
libc/src/wchar/wcrtomb.cpp | 9 ++++++---
6 files changed, 17 insertions(+), 14 deletions(-)
diff --git a/libc/src/__support/wchar/CMakeLists.txt b/libc/src/__support/wchar/CMakeLists.txt
index 38dfd76513be1..41ae0cdc00f39 100644
--- a/libc/src/__support/wchar/CMakeLists.txt
+++ b/libc/src/__support/wchar/CMakeLists.txt
@@ -29,7 +29,6 @@ add_object_library(
DEPENDS
libc.hdr.types.char32_t
libc.hdr.types.size_t
- libc.hdr.types.mbstate_t
libc.hdr.types.wchar_t
libc.src.__support.error_or
libc.src.__support.common
diff --git a/libc/src/__support/wchar/mbstate.h b/libc/src/__support/wchar/mbstate.h
index fb08fb4eaa188..087d3c0fbc4f0 100644
--- a/libc/src/__support/wchar/mbstate.h
+++ b/libc/src/__support/wchar/mbstate.h
@@ -18,17 +18,17 @@ namespace internal {
struct mbstate {
// store a partial codepoint (in UTF-32)
- char32_t partial;
+ char32_t partial = 0;
/*
Progress towards a conversion
For utf8 -> utf32, increases with each CharacterConverter::push(utf8_byte)
For utf32 -> utf8, increases with each CharacterConverter::pop_utf8()
*/
- uint8_t bytes_processed;
+ uint8_t bytes_processed = 0;
// Total number of bytes that will be needed to represent this character
- uint8_t total_bytes;
+ uint8_t total_bytes = 0;
};
} // namespace internal
diff --git a/libc/src/__support/wchar/wcrtomb.cpp b/libc/src/__support/wchar/wcrtomb.cpp
index 43d714d436898..cf0db4ee2763b 100644
--- a/libc/src/__support/wchar/wcrtomb.cpp
+++ b/libc/src/__support/wchar/wcrtomb.cpp
@@ -12,7 +12,6 @@
#include "src/__support/wchar/mbstate.h"
#include "hdr/types/char32_t.h"
-#include "hdr/types/mbstate_t.h"
#include "hdr/types/size_t.h"
#include "hdr/types/wchar_t.h"
#include "src/__support/common.h"
@@ -21,8 +20,10 @@ namespace LIBC_NAMESPACE_DECL {
namespace internal {
ErrorOr<size_t> wcrtomb(char *__restrict s, wchar_t wc,
- mbstate_t *__restrict ps) {
- CharacterConverter cr((internal::mbstate *)ps);
+ mbstate *__restrict ps) {
+ static_assert(sizeof(wchar_t) == 4);
+
+ CharacterConverter cr(ps);
// when s is nullptr, this is equivalent to wcrtomb(buf, L'\0', ps)
char buf[sizeof(wchar_t) / sizeof(char)];
@@ -31,7 +32,7 @@ ErrorOr<size_t> wcrtomb(char *__restrict s, wchar_t wc,
wc = L'\0';
}
- int status = cr.push((char32_t)wc);
+ int status = cr.push(static_cast<char32_t>(wc));
if (status != 0)
return Error(status);
diff --git a/libc/src/__support/wchar/wcrtomb.h b/libc/src/__support/wchar/wcrtomb.h
index 35a472548b67a..5e36b76eb01de 100644
--- a/libc/src/__support/wchar/wcrtomb.h
+++ b/libc/src/__support/wchar/wcrtomb.h
@@ -9,17 +9,16 @@
#ifndef LLVM_LIBC_SRC__SUPPORT_WCHAR_WCRTOMB_H
#define LLVM_LIBC_SRC__SUPPORT_WCHAR_WCRTOMB_H
-#include "hdr/types/mbstate_t.h"
#include "hdr/types/size_t.h"
#include "hdr/types/wchar_t.h"
#include "src/__support/error_or.h"
+#include "src/__support/wchar/mbstate.h"
#include "src/__support/macros/config.h"
namespace LIBC_NAMESPACE_DECL {
namespace internal {
-ErrorOr<size_t> wcrtomb(char *__restrict s, wchar_t wc,
- mbstate_t *__restrict ps);
+ErrorOr<size_t> wcrtomb(char *__restrict s, wchar_t wc, mbstate *__restrict ps);
} // namespace internal
} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/wchar/CMakeLists.txt b/libc/src/wchar/CMakeLists.txt
index a106a8ee9aa41..6fe6da513ba04 100644
--- a/libc/src/wchar/CMakeLists.txt
+++ b/libc/src/wchar/CMakeLists.txt
@@ -45,6 +45,7 @@ add_entrypoint_object(
libc.hdr.types.mbstate_t
libc.src.__support.libc_errno
libc.src.__support.wchar.wcrtomb
+ libc.src.__support.wchar.mbstate
)
add_entrypoint_object(
diff --git a/libc/src/wchar/wcrtomb.cpp b/libc/src/wchar/wcrtomb.cpp
index 5917769af9d4e..7951bea2120bc 100644
--- a/libc/src/wchar/wcrtomb.cpp
+++ b/libc/src/wchar/wcrtomb.cpp
@@ -12,16 +12,19 @@
#include "src/__support/common.h"
#include "src/__support/libc_errno.h"
#include "src/__support/macros/config.h"
+#include "src/__support/wchar/mbstate.h"
#include "src/__support/wchar/wcrtomb.h"
namespace LIBC_NAMESPACE_DECL {
LLVM_LIBC_FUNCTION(size_t, wcrtomb,
(char *__restrict s, wchar_t wc, mbstate_t *__restrict ps)) {
- static mbstate_t internal_mbstate{0, 0, 0};
+ static internal::mbstate internal_mbstate;
- auto result =
- internal::wcrtomb(s, wc, ps == nullptr ? &internal_mbstate : ps);
+ auto result = internal::wcrtomb(
+ s, wc,
+ ps == nullptr ? &internal_mbstate
+ : reinterpret_cast<internal::mbstate *>(ps));
if (!result.has_value()) {
libc_errno = EILSEQ;
>From babc9984281287c08fbfb4bac5cfe82e7e324168 Mon Sep 17 00:00:00 2001
From: Uzair Nawaz <uzairnawaz at google.com>
Date: Wed, 18 Jun 2025 17:23:46 +0000
Subject: [PATCH 07/11] formatting
---
libc/src/__support/wchar/wcrtomb.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/libc/src/__support/wchar/wcrtomb.h b/libc/src/__support/wchar/wcrtomb.h
index 5e36b76eb01de..bcd39a92a3b76 100644
--- a/libc/src/__support/wchar/wcrtomb.h
+++ b/libc/src/__support/wchar/wcrtomb.h
@@ -12,8 +12,8 @@
#include "hdr/types/size_t.h"
#include "hdr/types/wchar_t.h"
#include "src/__support/error_or.h"
-#include "src/__support/wchar/mbstate.h"
#include "src/__support/macros/config.h"
+#include "src/__support/wchar/mbstate.h"
namespace LIBC_NAMESPACE_DECL {
namespace internal {
>From 8b1d981deadd330a2318130a4aacd367dab366b9 Mon Sep 17 00:00:00 2001
From: Uzair Nawaz <uzairnawaz at google.com>
Date: Fri, 20 Jun 2025 17:21:10 +0000
Subject: [PATCH 08/11] prevent overlaying mbstate_t
---
libc/hdr/types/mbstate_t.h | 2 +-
libc/src/__support/wchar/wcrtomb.cpp | 3 +++
2 files changed, 4 insertions(+), 1 deletion(-)
diff --git a/libc/hdr/types/mbstate_t.h b/libc/hdr/types/mbstate_t.h
index 15b2614341d7d..ad826d49c14c7 100644
--- a/libc/hdr/types/mbstate_t.h
+++ b/libc/hdr/types/mbstate_t.h
@@ -15,7 +15,7 @@
#else // Overlay mode
-#include "hdr/wchar_overlay.h"
+#error "type not available in overlay mode"
#endif // LLVM_LIBC_FULL_BUILD
diff --git a/libc/src/__support/wchar/wcrtomb.cpp b/libc/src/__support/wchar/wcrtomb.cpp
index cf0db4ee2763b..fdbbcb5526917 100644
--- a/libc/src/__support/wchar/wcrtomb.cpp
+++ b/libc/src/__support/wchar/wcrtomb.cpp
@@ -15,6 +15,7 @@
#include "hdr/types/size_t.h"
#include "hdr/types/wchar_t.h"
#include "src/__support/common.h"
+#include "src/__support/libc_assert.h"
namespace LIBC_NAMESPACE_DECL {
namespace internal {
@@ -39,6 +40,8 @@ ErrorOr<size_t> wcrtomb(char *__restrict s, wchar_t wc,
size_t count = 0;
while (!cr.isComplete()) {
auto utf8 = cr.pop_utf8(); // can never fail as long as the push succeeded
+ LIBC_ASSERT(utf8.has_value());
+
*s = utf8.value();
s++;
count++;
>From f2be9eafdd1e97955195eb367326e04a28f93ccf Mon Sep 17 00:00:00 2001
From: Uzair Nawaz <uzairnawaz at google.com>
Date: Fri, 20 Jun 2025 17:27:17 +0000
Subject: [PATCH 09/11] move internal buffer to public function
---
libc/src/__support/wchar/wcrtomb.cpp | 10 +++-------
libc/src/wchar/wcrtomb.cpp | 7 +++++++
2 files changed, 10 insertions(+), 7 deletions(-)
diff --git a/libc/src/__support/wchar/wcrtomb.cpp b/libc/src/__support/wchar/wcrtomb.cpp
index fdbbcb5526917..b51e4cd7982bf 100644
--- a/libc/src/__support/wchar/wcrtomb.cpp
+++ b/libc/src/__support/wchar/wcrtomb.cpp
@@ -26,12 +26,8 @@ ErrorOr<size_t> wcrtomb(char *__restrict s, wchar_t wc,
CharacterConverter cr(ps);
- // when s is nullptr, this is equivalent to wcrtomb(buf, L'\0', ps)
- char buf[sizeof(wchar_t) / sizeof(char)];
- if (s == nullptr) {
- s = buf;
- wc = L'\0';
- }
+ if (s == nullptr)
+ return Error(-1);
int status = cr.push(static_cast<char32_t>(wc));
if (status != 0)
@@ -41,7 +37,7 @@ ErrorOr<size_t> wcrtomb(char *__restrict s, wchar_t wc,
while (!cr.isComplete()) {
auto utf8 = cr.pop_utf8(); // can never fail as long as the push succeeded
LIBC_ASSERT(utf8.has_value());
-
+
*s = utf8.value();
s++;
count++;
diff --git a/libc/src/wchar/wcrtomb.cpp b/libc/src/wchar/wcrtomb.cpp
index 7951bea2120bc..45494b65cf275 100644
--- a/libc/src/wchar/wcrtomb.cpp
+++ b/libc/src/wchar/wcrtomb.cpp
@@ -21,6 +21,13 @@ LLVM_LIBC_FUNCTION(size_t, wcrtomb,
(char *__restrict s, wchar_t wc, mbstate_t *__restrict ps)) {
static internal::mbstate internal_mbstate;
+ // when s is nullptr, this is equivalent to wcrtomb(buf, L'\0', ps)
+ char buf[sizeof(wchar_t) / sizeof(char)];
+ if (s == nullptr) {
+ s = buf;
+ wc = L'\0';
+ }
+
auto result = internal::wcrtomb(
s, wc,
ps == nullptr ? &internal_mbstate
>From ddf318383c29352ba4249730f78c810e43c49a0e Mon Sep 17 00:00:00 2001
From: Uzair Nawaz <uzairnawaz at google.com>
Date: Fri, 20 Jun 2025 19:30:47 +0000
Subject: [PATCH 10/11] fixed uninitialized variable
---
libc/src/__support/wchar/mbstate.h | 2 +-
libc/src/__support/wchar/wcrtomb.cpp | 2 +-
libc/src/wchar/wcrtomb.cpp | 1 +
3 files changed, 3 insertions(+), 2 deletions(-)
diff --git a/libc/src/__support/wchar/mbstate.h b/libc/src/__support/wchar/mbstate.h
index 1fb46818cdfba..32304a5215241 100644
--- a/libc/src/__support/wchar/mbstate.h
+++ b/libc/src/__support/wchar/mbstate.h
@@ -25,7 +25,7 @@ struct mbstate {
Increases with each push(...) until it reaches total_bytes
Decreases with each pop(...) until it reaches 0
*/
- uint8_t bytes_stored;
+ uint8_t bytes_stored = 0;
// Total number of bytes that will be needed to represent this character
uint8_t total_bytes = 0;
diff --git a/libc/src/__support/wchar/wcrtomb.cpp b/libc/src/__support/wchar/wcrtomb.cpp
index b51e4cd7982bf..8ca3d17ad6ce1 100644
--- a/libc/src/__support/wchar/wcrtomb.cpp
+++ b/libc/src/__support/wchar/wcrtomb.cpp
@@ -34,7 +34,7 @@ ErrorOr<size_t> wcrtomb(char *__restrict s, wchar_t wc,
return Error(status);
size_t count = 0;
- while (!cr.isComplete()) {
+ while (!cr.isEmpty()) {
auto utf8 = cr.pop_utf8(); // can never fail as long as the push succeeded
LIBC_ASSERT(utf8.has_value());
diff --git a/libc/src/wchar/wcrtomb.cpp b/libc/src/wchar/wcrtomb.cpp
index 45494b65cf275..6d604a00599ee 100644
--- a/libc/src/wchar/wcrtomb.cpp
+++ b/libc/src/wchar/wcrtomb.cpp
@@ -9,6 +9,7 @@
#include "src/wchar/wcrtomb.h"
#include "hdr/types/mbstate_t.h"
+#include "hdr/types/wchar_t.h"
#include "src/__support/common.h"
#include "src/__support/libc_errno.h"
#include "src/__support/macros/config.h"
>From edf8aea1e415adffa711788f460d75c2df9feaa6 Mon Sep 17 00:00:00 2001
From: Uzair Nawaz <uzairnawaz at google.com>
Date: Fri, 20 Jun 2025 19:50:42 +0000
Subject: [PATCH 11/11] updated entrypoings
---
libc/config/linux/x86_64/entrypoints.txt | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt
index 0373c23db04e5..5674aa50a6051 100644
--- a/libc/config/linux/x86_64/entrypoints.txt
+++ b/libc/config/linux/x86_64/entrypoints.txt
@@ -365,7 +365,6 @@ set(TARGET_LIBC_ENTRYPOINTS
# wchar.h entrypoints
libc.src.wchar.btowc
- libc.src.wchar.wcrtomb
libc.src.wchar.wcslen
libc.src.wchar.wctob
libc.src.wchar.wmemmove
@@ -1245,6 +1244,9 @@ if(LLVM_LIBC_FULL_BUILD)
libc.src.sys.socket.recv
libc.src.sys.socket.recvfrom
libc.src.sys.socket.recvmsg
+
+ # wchar entrypoints
+ libc.src.wchar.wcrtomb
)
endif()
More information about the libc-commits
mailing list