[libc-commits] [libc] [libc] Implemented wctomb (PR #145554)
Uzair Nawaz via libc-commits
libc-commits at lists.llvm.org
Wed Jun 25 09:12:44 PDT 2025
https://github.com/uzairnawaz updated https://github.com/llvm/llvm-project/pull/145554
>From d0cb767f997eaacd9ad8d34af0b5bbd398c65042 Mon Sep 17 00:00:00 2001
From: Uzair Nawaz <uzairnawaz at google.com>
Date: Tue, 24 Jun 2025 17:21:11 +0000
Subject: [PATCH 1/3] implemented wctomb
---
libc/config/linux/x86_64/entrypoints.txt | 1 +
libc/include/wchar.yaml | 7 +++
libc/src/wchar/CMakeLists.txt | 12 +++++
libc/src/wchar/wctomb.cpp | 32 +++++++++++
libc/src/wchar/wctomb.h | 22 ++++++++
libc/test/src/wchar/CMakeLists.txt | 11 ++++
libc/test/src/wchar/wctomb_test.cpp | 68 ++++++++++++++++++++++++
7 files changed, 153 insertions(+)
create mode 100644 libc/src/wchar/wctomb.cpp
create mode 100644 libc/src/wchar/wctomb.h
create mode 100644 libc/test/src/wchar/wctomb_test.cpp
diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt
index 4d94f10196fd7..60f70fcba2448 100644
--- a/libc/config/linux/x86_64/entrypoints.txt
+++ b/libc/config/linux/x86_64/entrypoints.txt
@@ -1248,6 +1248,7 @@ if(LLVM_LIBC_FULL_BUILD)
# wchar.h entrypoints
libc.src.wchar.mbrtowc
libc.src.wchar.wcrtomb
+ libc.src.wchar.wctomb
)
endif()
diff --git a/libc/include/wchar.yaml b/libc/include/wchar.yaml
index 64eb381710668..9c798b101b53f 100644
--- a/libc/include/wchar.yaml
+++ b/libc/include/wchar.yaml
@@ -167,6 +167,13 @@ functions:
- type: char *__restrict
- type: wchar_t
- type: mbstate_t *__restrict
+ - name: wctomb
+ standards:
+ - stdc
+ return_type: int
+ arguments:
+ - type: char *
+ - type: wchar_t
- name: wcscpy
standards:
- stdc
diff --git a/libc/src/wchar/CMakeLists.txt b/libc/src/wchar/CMakeLists.txt
index ec33caccb16d5..812afab66cf87 100644
--- a/libc/src/wchar/CMakeLists.txt
+++ b/libc/src/wchar/CMakeLists.txt
@@ -48,6 +48,18 @@ add_entrypoint_object(
libc.src.__support.wchar.mbstate
)
+add_entrypoint_object(
+ wctomb
+ SRCS
+ wctomb.cpp
+ HDRS
+ wctomb.h
+ DEPENDS
+ libc.hdr.types.wchar_t
+ libc.src.__support.wchar.wcrtomb
+ libc.src.__support.wchar.mbstate
+)
+
add_entrypoint_object(
mbrtowc
SRCS
diff --git a/libc/src/wchar/wctomb.cpp b/libc/src/wchar/wctomb.cpp
new file mode 100644
index 0000000000000..3278051eb196f
--- /dev/null
+++ b/libc/src/wchar/wctomb.cpp
@@ -0,0 +1,32 @@
+//===-- Implementation of wctomb ------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/wchar/wctomb.h"
+
+#include "hdr/types/wchar_t.h"
+#include "src/__support/common.h"
+#include "src/__support/macros/config.h"
+#include "src/__support/wchar/mbstate.h"
+#include "src/__support/wchar/wcrtomb.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+LLVM_LIBC_FUNCTION(int, wctomb, (char *s, wchar_t wc)) {
+ internal::mbstate internal_mbstate;
+ if (s == nullptr)
+ return 0;
+
+ auto result = internal::wcrtomb(s, wc, &internal_mbstate);
+
+ if (!result.has_value()) // invalid wide character
+ return -1;
+
+ return static_cast<int>(result.value());
+}
+
+} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/wchar/wctomb.h b/libc/src/wchar/wctomb.h
new file mode 100644
index 0000000000000..02a34e5ad229f
--- /dev/null
+++ b/libc/src/wchar/wctomb.h
@@ -0,0 +1,22 @@
+//===-- Implementation header for wctomb ------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_WCHAR_WCTOMB_H
+#define LLVM_LIBC_SRC_WCHAR_WCTOMB_H
+
+#include "hdr/types/mbstate_t.h"
+#include "hdr/types/wchar_t.h"
+#include "src/__support/macros/config.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+int wctomb(char *s, wchar_t wc);
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC_WCHAR_WCTOMB_H
diff --git a/libc/test/src/wchar/CMakeLists.txt b/libc/test/src/wchar/CMakeLists.txt
index 184e482c895b1..95a4374abf0a2 100644
--- a/libc/test/src/wchar/CMakeLists.txt
+++ b/libc/test/src/wchar/CMakeLists.txt
@@ -61,6 +61,17 @@ add_libc_test(
libc.src.__support.libc_errno
)
+add_libc_test(
+ wctomb_test
+ SUITE
+ libc_wchar_unittests
+ SRCS
+ wctomb_test.cpp
+ DEPENDS
+ libc.src.wchar.wctomb
+ libc.hdr.types.wchar_t
+)
+
add_libc_test(
wmemset_test
SUITE
diff --git a/libc/test/src/wchar/wctomb_test.cpp b/libc/test/src/wchar/wctomb_test.cpp
new file mode 100644
index 0000000000000..1fcb51b2cb583
--- /dev/null
+++ b/libc/test/src/wchar/wctomb_test.cpp
@@ -0,0 +1,68 @@
+//===-- Unittests for wctomb ----------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "hdr/types/wchar_t.h"
+#include "src/wchar/wctomb.h"
+#include "test/UnitTest/Test.h"
+
+TEST(LlvmLibcWCToMBTest, OneByte) {
+ wchar_t wc = L'U';
+ char mb[4];
+ int cnt = LIBC_NAMESPACE::wctomb(mb, wc);
+ ASSERT_EQ(cnt, 1);
+ ASSERT_EQ(mb[0], 'U');
+}
+
+TEST(LlvmLibcWCToMBTest, TwoByte) {
+ // testing utf32: 0xff -> utf8: 0xc3 0xbf
+ wchar_t wc = 0xff;
+ char mb[4];
+ int cnt = LIBC_NAMESPACE::wctomb(mb, wc);
+ ASSERT_EQ(cnt, 2);
+ ASSERT_EQ(mb[0], static_cast<char>(0xc3));
+ ASSERT_EQ(mb[1], static_cast<char>(0xbf));
+}
+
+TEST(LlvmLibcWCToMBTest, ThreeByte) {
+ // testing utf32: 0xac15 -> utf8: 0xea 0xb0 0x95
+ wchar_t wc = 0xac15;
+ char mb[4];
+ int cnt = LIBC_NAMESPACE::wctomb(mb, wc);
+ ASSERT_EQ(cnt, 3);
+ ASSERT_EQ(mb[0], static_cast<char>(0xea));
+ ASSERT_EQ(mb[1], static_cast<char>(0xb0));
+ ASSERT_EQ(mb[2], static_cast<char>(0x95));
+}
+
+TEST(LlvmLibcWCToMBTest, FourByte) {
+ // testing utf32: 0x1f921 -> utf8: 0xf0 0x9f 0xa4 0xa1
+ wchar_t wc = 0x1f921;
+ char mb[4];
+ int cnt = LIBC_NAMESPACE::wctomb(mb, wc);
+ ASSERT_EQ(cnt, 4);
+ ASSERT_EQ(mb[0], static_cast<char>(0xf0));
+ ASSERT_EQ(mb[1], static_cast<char>(0x9f));
+ ASSERT_EQ(mb[2], static_cast<char>(0xa4));
+ ASSERT_EQ(mb[3], static_cast<char>(0xa1));
+}
+
+TEST(LlvmLibcWCToMBTest, NullString) {
+ wchar_t wc = L'A';
+
+ int cnt = LIBC_NAMESPACE::wctomb(nullptr, wc);
+
+ // no state-dependent encoding
+ ASSERT_EQ(cnt, 0);
+}
+
+TEST(LlvmLibcWCToMBTest, InvalidWchar) {
+ wchar_t wc = 0x12ffff;
+ char mb[4];
+ int cnt = LIBC_NAMESPACE::wctomb(mb, wc);
+ ASSERT_EQ(cnt, -1);
+}
>From e8a379402cc124f56aa750bb9571320a89becc7f Mon Sep 17 00:00:00 2001
From: Uzair Nawaz <uzairnawaz at google.com>
Date: Tue, 24 Jun 2025 17:35:47 +0000
Subject: [PATCH 2/3] added errno handling
---
libc/src/wchar/CMakeLists.txt | 1 +
libc/src/wchar/wctomb.cpp | 5 ++++-
libc/test/src/wchar/wctomb_test.cpp | 5 +++++
3 files changed, 10 insertions(+), 1 deletion(-)
diff --git a/libc/src/wchar/CMakeLists.txt b/libc/src/wchar/CMakeLists.txt
index 812afab66cf87..51d9de4534893 100644
--- a/libc/src/wchar/CMakeLists.txt
+++ b/libc/src/wchar/CMakeLists.txt
@@ -58,6 +58,7 @@ add_entrypoint_object(
libc.hdr.types.wchar_t
libc.src.__support.wchar.wcrtomb
libc.src.__support.wchar.mbstate
+ libc.src.__support.libc_errno
)
add_entrypoint_object(
diff --git a/libc/src/wchar/wctomb.cpp b/libc/src/wchar/wctomb.cpp
index 3278051eb196f..a660169ab04fc 100644
--- a/libc/src/wchar/wctomb.cpp
+++ b/libc/src/wchar/wctomb.cpp
@@ -10,6 +10,7 @@
#include "hdr/types/wchar_t.h"
#include "src/__support/common.h"
+#include "src/__support/libc_errno.h"
#include "src/__support/macros/config.h"
#include "src/__support/wchar/mbstate.h"
#include "src/__support/wchar/wcrtomb.h"
@@ -23,8 +24,10 @@ LLVM_LIBC_FUNCTION(int, wctomb, (char *s, wchar_t wc)) {
auto result = internal::wcrtomb(s, wc, &internal_mbstate);
- if (!result.has_value()) // invalid wide character
+ if (!result.has_value()) { // invalid wide character
+ libc_errno = EILSEQ;
return -1;
+ }
return static_cast<int>(result.value());
}
diff --git a/libc/test/src/wchar/wctomb_test.cpp b/libc/test/src/wchar/wctomb_test.cpp
index 1fcb51b2cb583..09fbf52806224 100644
--- a/libc/test/src/wchar/wctomb_test.cpp
+++ b/libc/test/src/wchar/wctomb_test.cpp
@@ -7,9 +7,13 @@
//===----------------------------------------------------------------------===//
#include "hdr/types/wchar_t.h"
+#include "src/__support/libc_errno.h"
#include "src/wchar/wctomb.h"
+#include "test/UnitTest/ErrnoCheckingTest.h"
#include "test/UnitTest/Test.h"
+using LlvmLibcWCToMBTest = LIBC_NAMESPACE::testing::ErrnoCheckingTest;
+
TEST(LlvmLibcWCToMBTest, OneByte) {
wchar_t wc = L'U';
char mb[4];
@@ -65,4 +69,5 @@ TEST(LlvmLibcWCToMBTest, InvalidWchar) {
char mb[4];
int cnt = LIBC_NAMESPACE::wctomb(mb, wc);
ASSERT_EQ(cnt, -1);
+ ASSERT_ERRNO_EQ(EILSEQ);
}
>From a811f7ce519d5d43dcdccc9d7d955ad09e0ee71d Mon Sep 17 00:00:00 2001
From: Uzair Nawaz <uzairnawaz at google.com>
Date: Wed, 25 Jun 2025 16:12:16 +0000
Subject: [PATCH 3/3] made mbstate within wctomb static
---
libc/src/wchar/wctomb.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/libc/src/wchar/wctomb.cpp b/libc/src/wchar/wctomb.cpp
index a660169ab04fc..142302e6ae09b 100644
--- a/libc/src/wchar/wctomb.cpp
+++ b/libc/src/wchar/wctomb.cpp
@@ -18,7 +18,7 @@
namespace LIBC_NAMESPACE_DECL {
LLVM_LIBC_FUNCTION(int, wctomb, (char *s, wchar_t wc)) {
- internal::mbstate internal_mbstate;
+ static internal::mbstate internal_mbstate;
if (s == nullptr)
return 0;
More information about the libc-commits
mailing list