[libc-commits] [libc] [libc] Implemented wcrtomb internal function and public libc function (PR #144596)
Uzair Nawaz via libc-commits
libc-commits at lists.llvm.org
Tue Jun 17 13:16:30 PDT 2025
https://github.com/uzairnawaz created https://github.com/llvm/llvm-project/pull/144596
Implemented internal wcrtomb function using the CharacterConverter class
public libc function calls this internal function to perform the conversion
>From bc5e83f4f84479babd932cc0b2632d028eb47508 Mon Sep 17 00:00:00 2001
From: Uzair Nawaz <uzairnawaz at google.com>
Date: Tue, 17 Jun 2025 18:55:20 +0000
Subject: [PATCH] Implemented wcrtomb internal and public function
---
libc/config/linux/x86_64/entrypoints.txt | 1 +
libc/hdr/types/CMakeLists.txt | 8 ++
libc/hdr/types/mbstate_t.h | 14 +++
libc/include/llvm-libc-types/mbstate_t.h | 6 +-
libc/include/wchar.yaml | 8 ++
libc/src/__support/wchar/CMakeLists.txt | 16 ++++
.../__support/wchar/character_converter.cpp | 3 +-
libc/src/__support/wchar/wcrtomb.cpp | 48 ++++++++++
libc/src/__support/wchar/wcrtomb.h | 27 ++++++
libc/src/wchar/CMakeLists.txt | 13 +++
libc/src/wchar/wcrtomb.cpp | 34 +++++++
libc/src/wchar/wcrtomb.h | 23 +++++
libc/test/src/wchar/CMakeLists.txt | 14 +++
libc/test/src/wchar/wcrtomb_test.cpp | 93 +++++++++++++++++++
14 files changed, 306 insertions(+), 2 deletions(-)
create mode 100644 libc/hdr/types/mbstate_t.h
create mode 100644 libc/src/__support/wchar/wcrtomb.cpp
create mode 100644 libc/src/__support/wchar/wcrtomb.h
create mode 100644 libc/src/wchar/wcrtomb.cpp
create mode 100644 libc/src/wchar/wcrtomb.h
create mode 100644 libc/test/src/wchar/wcrtomb_test.cpp
diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt
index aa2079faed409..0373c23db04e5 100644
--- a/libc/config/linux/x86_64/entrypoints.txt
+++ b/libc/config/linux/x86_64/entrypoints.txt
@@ -365,6 +365,7 @@ set(TARGET_LIBC_ENTRYPOINTS
# wchar.h entrypoints
libc.src.wchar.btowc
+ libc.src.wchar.wcrtomb
libc.src.wchar.wcslen
libc.src.wchar.wctob
libc.src.wchar.wmemmove
diff --git a/libc/hdr/types/CMakeLists.txt b/libc/hdr/types/CMakeLists.txt
index c88c357009072..58f500a6343f6 100644
--- a/libc/hdr/types/CMakeLists.txt
+++ b/libc/hdr/types/CMakeLists.txt
@@ -20,6 +20,14 @@ add_proxy_header_library(
libc.include.uchar
)
+add_proxy_header_library(
+ mbstate_t
+ HDRS
+ mbstate_t.h
+ DEPENDS
+ libc.include.llvm-libc-types.mbstate_t
+)
+
add_proxy_header_library(
div_t
HDRS
diff --git a/libc/hdr/types/mbstate_t.h b/libc/hdr/types/mbstate_t.h
new file mode 100644
index 0000000000000..1e232af08e4db
--- /dev/null
+++ b/libc/hdr/types/mbstate_t.h
@@ -0,0 +1,14 @@
+//===-- Definition of macros from mbstate_t.h -----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_HDR_TYPES_MBSTATE_T_H
+#define LLVM_LIBC_HDR_TYPES_MBSTATE_T_H
+
+#include "include/llvm-libc-types/mbstate_t.h"
+
+#endif // LLVM_LIBC_HDR_TYPES_MBSTATE_T_H
diff --git a/libc/include/llvm-libc-types/mbstate_t.h b/libc/include/llvm-libc-types/mbstate_t.h
index 540d50975a264..40e693355dcd0 100644
--- a/libc/include/llvm-libc-types/mbstate_t.h
+++ b/libc/include/llvm-libc-types/mbstate_t.h
@@ -9,8 +9,12 @@
#ifndef LLVM_LIBC_TYPES_MBSTATE_T_H
#define LLVM_LIBC_TYPES_MBSTATE_T_H
-// TODO: Complete this once we implement functions that operate on this type.
+#include "../llvm-libc-macros/stdint-macros.h"
+
typedef struct {
+ uint32_t __field1;
+ uint8_t __field2;
+ uint8_t __field3;
} mbstate_t;
#endif // LLVM_LIBC_TYPES_MBSTATE_T_H
diff --git a/libc/include/wchar.yaml b/libc/include/wchar.yaml
index 84db73d8f01ea..3c373f8ec34c0 100644
--- a/libc/include/wchar.yaml
+++ b/libc/include/wchar.yaml
@@ -150,6 +150,14 @@ functions:
- type: wchar_t *__restrict
- type: const wchar_t *__restrict
- type: size_t
+ - name: wcrtomb
+ standards:
+ - stdc
+ return_type: size_t
+ arguments:
+ - type: char *__restrict
+ - type: wchar_t
+ - type: mbstate_t *__restrict
- name: wcscpy
standards:
- stdc
diff --git a/libc/src/__support/wchar/CMakeLists.txt b/libc/src/__support/wchar/CMakeLists.txt
index 6715e354e23e5..e4a3bfd7b9c5c 100644
--- a/libc/src/__support/wchar/CMakeLists.txt
+++ b/libc/src/__support/wchar/CMakeLists.txt
@@ -19,3 +19,19 @@ add_object_library(
libc.src.__support.math_extras
.mbstate
)
+
+add_object_library(
+ wcrtomb
+ HDRS
+ wcrtomb.h
+ SRCS
+ wcrtomb.cpp
+ DEPENDS
+ libc.hdr.types.char32_t
+ libc.hdr.types.size_t
+ libc.hdr.types.mbstate_t
+ libc.hdr.types.wchar_t
+ libc.src.__support.error_or
+ .character_converter
+ .mbstate
+)
diff --git a/libc/src/__support/wchar/character_converter.cpp b/libc/src/__support/wchar/character_converter.cpp
index ca709769616c3..fc8c9aaaa3d50 100644
--- a/libc/src/__support/wchar/character_converter.cpp
+++ b/libc/src/__support/wchar/character_converter.cpp
@@ -12,6 +12,7 @@
#include "src/__support/error_or.h"
#include "src/__support/math_extras.h"
#include "src/__support/wchar/mbstate.h"
+#include "src/__support/libc_errno.h" // for error numbers
#include "character_converter.h"
@@ -51,7 +52,7 @@ int CharacterConverter::push(char32_t utf32) {
// `utf32` contains a value that is too large to actually represent a valid
// unicode character
clear();
- return -1;
+ return EILSEQ;
}
ErrorOr<char8_t> CharacterConverter::pop_utf8() {
diff --git a/libc/src/__support/wchar/wcrtomb.cpp b/libc/src/__support/wchar/wcrtomb.cpp
new file mode 100644
index 0000000000000..abd18aa391d75
--- /dev/null
+++ b/libc/src/__support/wchar/wcrtomb.cpp
@@ -0,0 +1,48 @@
+//===-- Implementation of wcrtomb -----------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/__support/wchar/wcrtomb.h"
+#include "src/__support/error_or.h"
+#include "src/__support/wchar/character_converter.h"
+#include "src/__support/wchar/mbstate.h"
+
+#include "hdr/types/char32_t.h"
+#include "hdr/types/mbstate_t.h"
+#include "hdr/types/size_t.h"
+#include "hdr/types/wchar_t.h"
+#include "src/__support/common.h"
+
+namespace LIBC_NAMESPACE_DECL {
+namespace internal {
+
+ErrorOr<size_t> wcrtomb(char *__restrict s, wchar_t wc,
+ mbstate_t *__restrict ps) {
+ CharacterConverter cr((internal::mbstate *)ps);
+
+ char buf[sizeof(wchar_t) / sizeof(char)];
+ if (s == nullptr) {
+ s = buf;
+ wc = L'\0';
+ }
+
+ int status = cr.push((char32_t)wc);
+ if (status != 0)
+ return Error(status);
+
+ size_t count = 0;
+ while (!cr.isComplete()) {
+ auto utf8 = cr.pop_utf8(); // can never fail as long as the push succeeded
+ *s = utf8.value();
+ s++;
+ count++;
+ }
+ return count;
+}
+
+} // namespace internal
+} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/__support/wchar/wcrtomb.h b/libc/src/__support/wchar/wcrtomb.h
new file mode 100644
index 0000000000000..35a472548b67a
--- /dev/null
+++ b/libc/src/__support/wchar/wcrtomb.h
@@ -0,0 +1,27 @@
+//===-- Implementation header for wcrtomb ---------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC__SUPPORT_WCHAR_WCRTOMB_H
+#define LLVM_LIBC_SRC__SUPPORT_WCHAR_WCRTOMB_H
+
+#include "hdr/types/mbstate_t.h"
+#include "hdr/types/size_t.h"
+#include "hdr/types/wchar_t.h"
+#include "src/__support/error_or.h"
+#include "src/__support/macros/config.h"
+
+namespace LIBC_NAMESPACE_DECL {
+namespace internal {
+
+ErrorOr<size_t> wcrtomb(char *__restrict s, wchar_t wc,
+ mbstate_t *__restrict ps);
+
+} // namespace internal
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC__SUPPORT_WCHAR_WCRTOMB_H
diff --git a/libc/src/wchar/CMakeLists.txt b/libc/src/wchar/CMakeLists.txt
index 491dd5b34340a..a106a8ee9aa41 100644
--- a/libc/src/wchar/CMakeLists.txt
+++ b/libc/src/wchar/CMakeLists.txt
@@ -34,6 +34,19 @@ add_entrypoint_object(
libc.src.__support.wctype_utils
)
+add_entrypoint_object(
+ wcrtomb
+ SRCS
+ wcrtomb.cpp
+ HDRS
+ wcrtomb.h
+ DEPENDS
+ libc.hdr.types.wchar_t
+ libc.hdr.types.mbstate_t
+ libc.src.__support.libc_errno
+ libc.src.__support.wchar.wcrtomb
+)
+
add_entrypoint_object(
wmemset
SRCS
diff --git a/libc/src/wchar/wcrtomb.cpp b/libc/src/wchar/wcrtomb.cpp
new file mode 100644
index 0000000000000..82c813747e0f6
--- /dev/null
+++ b/libc/src/wchar/wcrtomb.cpp
@@ -0,0 +1,34 @@
+//===-- Implementation of wcrtomb -----------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/wchar/wcrtomb.h"
+
+#include "hdr/types/mbstate_t.h"
+#include "src/__support/common.h"
+#include "src/__support/libc_errno.h"
+#include "src/__support/macros/config.h"
+#include "src/__support/wchar/wcrtomb.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+LLVM_LIBC_FUNCTION(size_t, wcrtomb,
+ (char *__restrict s, wchar_t wc, mbstate_t *__restrict ps)) {
+ static mbstate_t internal_mbstate{0, 0, 0};
+
+ auto result =
+ internal::wcrtomb(s, wc, ps == nullptr ? &internal_mbstate : ps);
+
+ if (!result.has_value()) {
+ libc_errno = result.error();
+ return -1;
+ }
+
+ return result.value();
+}
+
+} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/wchar/wcrtomb.h b/libc/src/wchar/wcrtomb.h
new file mode 100644
index 0000000000000..3cfb1a6f2dc84
--- /dev/null
+++ b/libc/src/wchar/wcrtomb.h
@@ -0,0 +1,23 @@
+//===-- Implementation header for wcrtomb -----------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_WCHAR_WCRTOMB_H
+#define LLVM_LIBC_SRC_WCHAR_WCRTOMB_H
+
+#include "hdr/types/wchar_t.h"
+#include "hdr/types/mbstate_t.h"
+#include "hdr/types/size_t.h"
+#include "src/__support/macros/config.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+size_t wcrtomb(char *__restrict s, wchar_t wc, mbstate_t *__restrict ps);
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC_WCHAR_WCRTOMB_H
diff --git a/libc/test/src/wchar/CMakeLists.txt b/libc/test/src/wchar/CMakeLists.txt
index 4990b6953348b..c4f51b5f94a12 100644
--- a/libc/test/src/wchar/CMakeLists.txt
+++ b/libc/test/src/wchar/CMakeLists.txt
@@ -33,6 +33,20 @@ add_libc_test(
libc.src.wchar.wctob
)
+add_libc_test(
+ wcrtomb_test
+ SUITE
+ libc_wchar_unittests
+ SRCS
+ wcrtomb_test.cpp
+ DEPENDS
+ libc.src.wchar.wcrtomb
+ libc.src.string.memset
+ libc.hdr.types.wchar_t
+ libc.hdr.types.mbstate_t
+ libc.src.__support.libc_errno
+)
+
add_libc_test(
wmemset_test
SUITE
diff --git a/libc/test/src/wchar/wcrtomb_test.cpp b/libc/test/src/wchar/wcrtomb_test.cpp
new file mode 100644
index 0000000000000..be249f4f6dae4
--- /dev/null
+++ b/libc/test/src/wchar/wcrtomb_test.cpp
@@ -0,0 +1,93 @@
+//===-- Unittests for wcrtomb --------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "hdr/types/wchar_t.h"
+#include "hdr/types/mbstate_t.h"
+#include "src/wchar/wcrtomb.h"
+#include "src/string/memset.h"
+#include "test/UnitTest/Test.h"
+#include "src/__support/libc_errno.h"
+
+TEST(LlvmLibcWCRToMBTest, OneByte) {
+ mbstate_t state;
+ LIBC_NAMESPACE::memset(&state, 0, sizeof (mbstate_t));
+ wchar_t wc = L'U';
+ char mb[4];
+ size_t cnt = LIBC_NAMESPACE::wcrtomb(mb, wc, &state);
+ ASSERT_EQ(cnt, static_cast<size_t>(1));
+ ASSERT_EQ(mb[0], 'U');
+}
+
+TEST(LlvmLibcWCRToMBTest, TwoByte) {
+ mbstate_t state;
+ LIBC_NAMESPACE::memset(&state, 0, sizeof (mbstate_t));
+ // testing utf32: 0xff -> utf8: 0xc3 0xbf
+ wchar_t wc = 0xff;
+ char mb[4];
+ size_t cnt = LIBC_NAMESPACE::wcrtomb(mb, wc, &state);
+ ASSERT_EQ(cnt, static_cast<size_t>(2));
+ ASSERT_EQ(mb[0], static_cast<char>(0xc3));
+ ASSERT_EQ(mb[1], static_cast<char>(0xbf));
+}
+
+TEST(LlvmLibcWCRToMBTest, ThreeByte) {
+ mbstate_t state;
+ LIBC_NAMESPACE::memset(&state, 0, sizeof (mbstate_t));
+ // testing utf32: 0xac15 -> utf8: 0xea 0xb0 0x95
+ wchar_t wc = 0xac15;
+ char mb[4];
+ size_t cnt = LIBC_NAMESPACE::wcrtomb(mb, wc, &state);
+ ASSERT_EQ(cnt, static_cast<size_t>(3));
+ ASSERT_EQ(mb[0], static_cast<char>(0xea));
+ ASSERT_EQ(mb[1], static_cast<char>(0xb0));
+ ASSERT_EQ(mb[2], static_cast<char>(0x95));
+}
+
+TEST(LlvmLibcWCRToMBTest, FourByte) {
+ mbstate_t state;
+ LIBC_NAMESPACE::memset(&state, 0, sizeof (mbstate_t));
+ // testing utf32: 0x1f921 -> utf8: 0xf0 0x9f 0xa4 0xa1
+ wchar_t wc = 0x1f921;
+ char mb[4];
+ size_t cnt = LIBC_NAMESPACE::wcrtomb(mb, wc, &state);
+ ASSERT_EQ(cnt, static_cast<size_t>(4));
+ ASSERT_EQ(mb[0], static_cast<char>(0xf0));
+ ASSERT_EQ(mb[1], static_cast<char>(0x9f));
+ ASSERT_EQ(mb[2], static_cast<char>(0xa4));
+ ASSERT_EQ(mb[3], static_cast<char>(0xa1));
+}
+
+TEST(LlvmLibcWCRToMBTest, NullString) {
+ mbstate_t state;
+ LIBC_NAMESPACE::memset(&state, 0, sizeof (mbstate_t));
+ wchar_t wc = L'A';
+ char mb[4];
+
+ // should be equivalent to the call wcrtomb(buf, L'\0', state)
+ size_t cnt1 = LIBC_NAMESPACE::wcrtomb(nullptr, wc, &state);
+ size_t cnt2 = LIBC_NAMESPACE::wcrtomb(mb, L'\0', &state);
+
+ ASSERT_EQ(cnt1, cnt2);
+}
+
+TEST(LlvmLibcWCRToMBTest, NullState) {
+ wchar_t wc = L'A';
+ char mb[4];
+ size_t cnt = LIBC_NAMESPACE::wcrtomb(mb, wc, nullptr);
+ ASSERT_EQ(cnt, static_cast<size_t>(1));
+}
+
+TEST(LlvmLibcWCRToMBTest, InvalidWchar) {
+ mbstate_t state;
+ LIBC_NAMESPACE::memset(&state, 0, sizeof (mbstate_t));
+ wchar_t wc = 0x12ffff;
+ char mb[4];
+ size_t cnt = LIBC_NAMESPACE::wcrtomb(mb, wc, &state);
+ ASSERT_EQ(cnt, static_cast<size_t>(-1));
+ ASSERT_EQ(static_cast<int>(libc_errno), EILSEQ);
+}
More information about the libc-commits
mailing list