[libc-commits] [libc] [libc] Implement wcs to mbs family of functions (PR #149421)
Uzair Nawaz via libc-commits
libc-commits at lists.llvm.org
Fri Jul 18 12:47:16 PDT 2025
https://github.com/uzairnawaz updated https://github.com/llvm/llvm-project/pull/149421
>From 953eca5ad64f2c9387705747cfa2b788f8ac3ce7 Mon Sep 17 00:00:00 2001
From: Uzair Nawaz <uzairnawaz at google.com>
Date: Wed, 16 Jul 2025 16:40:06 +0000
Subject: [PATCH 01/14] implemented public functions
---
libc/config/linux/x86_64/entrypoints.txt | 3 ++
libc/include/wchar.yaml | 28 +++++++++++
libc/src/wchar/CMakeLists.txt | 41 +++++++++++++++
libc/src/wchar/wcsnrtombs.cpp | 50 +++++++++++++++++++
libc/src/wchar/wcsnrtombs.h | 23 +++++++++
libc/src/wchar/wcsrtombs.cpp | 50 +++++++++++++++++++
libc/src/wchar/wcsrtombs.h | 23 +++++++++
libc/src/wchar/wcstombs.cpp | 45 +++++++++++++++++
libc/src/wchar/wcstombs.h | 22 +++++++++
libc/test/src/wchar/wcstombs_test.cpp | 63 ++++++++++++++++++++++++
10 files changed, 348 insertions(+)
create mode 100644 libc/src/wchar/wcsnrtombs.cpp
create mode 100644 libc/src/wchar/wcsnrtombs.h
create mode 100644 libc/src/wchar/wcsrtombs.cpp
create mode 100644 libc/src/wchar/wcsrtombs.h
create mode 100644 libc/src/wchar/wcstombs.cpp
create mode 100644 libc/src/wchar/wcstombs.h
create mode 100644 libc/test/src/wchar/wcstombs_test.cpp
diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt
index 9223911f04a93..a067e73e80d13 100644
--- a/libc/config/linux/x86_64/entrypoints.txt
+++ b/libc/config/linux/x86_64/entrypoints.txt
@@ -1263,6 +1263,9 @@ if(LLVM_LIBC_FULL_BUILD)
libc.src.wchar.mbtowc
libc.src.wchar.wcrtomb
libc.src.wchar.wctomb
+ libc.src.wchar.wcstombs
+ libc.src.wchar.wcsrtombs
+ libc.src.wchar.wcsnrtombs
)
endif()
diff --git a/libc/include/wchar.yaml b/libc/include/wchar.yaml
index 123d3440aeec3..226f1614ebcc2 100644
--- a/libc/include/wchar.yaml
+++ b/libc/include/wchar.yaml
@@ -274,3 +274,31 @@ functions:
- type: const wchar_t *__restrict
- type: wchar_t **__restrict
- type: int
+ - name: wcstombs
+ standards:
+ - stdc
+ return_type: size_t
+ arguments:
+ - type: char *__restrict
+ - type: const wchar_t *__restrict
+ - type: size_t
+ - name: wcsrtombs
+ standards:
+ - stdc
+ return_type: size_t
+ arguments:
+ - type: char *__restrict
+ - type: const wchar_t **__restrict
+ - type: size_t
+ - type: mbstate_t
+ - name: wcsnrtombs
+ standards:
+ - stdc
+ return_type: size_t
+ arguments:
+ - type: char *__restrict
+ - type: const wchar_t *__restrict
+ - type: size_t
+ - type: size_t
+ - type: mbstate_t
+
diff --git a/libc/src/wchar/CMakeLists.txt b/libc/src/wchar/CMakeLists.txt
index 7ace1a6ca66ba..521ea27fdbb26 100644
--- a/libc/src/wchar/CMakeLists.txt
+++ b/libc/src/wchar/CMakeLists.txt
@@ -159,6 +159,47 @@ add_entrypoint_object(
libc.src.__support.wchar.mbstate
)
+add_entrypoint_object(
+ wcstombs
+ SRCS
+ wcstombs.cpp
+ HDRS
+ wcstombs.h
+ DEPENDS
+ libc.hdr.types.wchar_t
+ libc.src.__support.wchar.mbstate
+ libc.src.__support.wchar.string_converter
+ libc.src.__support.libc_errno
+)
+
+add_entrypoint_object(
+ wcsrtombs
+ SRCS
+ wcsrtombs.cpp
+ HDRS
+ wcsrtombs.h
+ DEPENDS
+ libc.hdr.types.wchar_t
+ libc.hdr.types.mbstate_t
+ libc.src.__support.wchar.mbstate
+ libc.src.__support.wchar.string_converter
+ libc.src.__support.libc_errno
+)
+
+add_entrypoint_object(
+ wcstombs
+ SRCS
+ wcsnrtombs.cpp
+ HDRS
+ wcsnrtombs.h
+ DEPENDS
+ libc.hdr.types.wchar_t
+ libc.hdr.types.mbstate_t
+ libc.src.__support.wchar.mbstate
+ libc.src.__support.wchar.string_converter
+ libc.src.__support.libc_errno
+)
+
add_entrypoint_object(
wmemset
SRCS
diff --git a/libc/src/wchar/wcsnrtombs.cpp b/libc/src/wchar/wcsnrtombs.cpp
new file mode 100644
index 0000000000000..3a0ea79053e33
--- /dev/null
+++ b/libc/src/wchar/wcsnrtombs.cpp
@@ -0,0 +1,50 @@
+//===-- Implementation of wcsnrtombs --------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/wchar/wcsrtombs.h"
+
+#include "hdr/types/char32_t.h"
+#include "hdr/types/mbstate_t.h"
+#include "hdr/types/size_t.h"
+#include "hdr/types/wchar_t.h"
+#include "src/__support/common.h"
+#include "src/__support/libc_errno.h"
+#include "src/__support/macros/config.h"
+#include "src/__support/wchar/mbstate.h"
+#include "src/__support/wchar/string_converter.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+LLVM_LIBC_FUNCTION(size_t, wcsnrtombs,
+ (char *__restrict s, const wchar_t **__restrict pwcs,
+ size_t nwc, size_t len, mbstate_t *ps)) {
+ static internal::mbstate internal_mbstate;
+ internal::StringConverter<char32_t> str_conv(
+ reinterpret_cast<const char32_t *>(pwcs),
+ ps == nullptr ? &internal_mbstate
+ : reinterpret_cast<internal::mbstate *>(ps),
+ len, nwc);
+
+ int dst_idx = 0;
+ ErrorOr<char8_t> converted = str_conv.popUTF8();
+ while (converted.has_value()) {
+ if (s != nullptr)
+ s[dst_idx] = converted.value();
+ dst_idx++;
+ converted = str_conv.popUTF8();
+ }
+
+ pwcs += str_conv.getSourceIndex();
+ if (converted.error() == -1) // if we hit conversion limit
+ return dst_idx;
+
+ libc_errno = converted.error();
+ return -1;
+}
+
+} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/wchar/wcsnrtombs.h b/libc/src/wchar/wcsnrtombs.h
new file mode 100644
index 0000000000000..793d383660f1b
--- /dev/null
+++ b/libc/src/wchar/wcsnrtombs.h
@@ -0,0 +1,23 @@
+//===-- Implementation header for wcsnrtombs -------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_WCHAR_WCSNRTOMBS_H
+#define LLVM_LIBC_SRC_WCHAR_WCSNRTOMBS_H
+
+#include "hdr/types/mbstate_t.h"
+#include "hdr/types/size_t.h"
+#include "hdr/types/wchar_t.h"
+#include "src/__support/macros/config.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+size_t wcsnrtombs(char *__restrict s, const wchar_t **__restrict pwcs, size_t nwc, size_t len, mbstate_t* ps);
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC_WCHAR_WCSNRTOMBS_H
diff --git a/libc/src/wchar/wcsrtombs.cpp b/libc/src/wchar/wcsrtombs.cpp
new file mode 100644
index 0000000000000..1903a7104ab64
--- /dev/null
+++ b/libc/src/wchar/wcsrtombs.cpp
@@ -0,0 +1,50 @@
+//===-- Implementation of wcsrtombs ---------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/wchar/wcsrtombs.h"
+
+#include "hdr/types/char32_t.h"
+#include "hdr/types/mbstate_t.h"
+#include "hdr/types/size_t.h"
+#include "hdr/types/wchar_t.h"
+#include "src/__support/common.h"
+#include "src/__support/libc_errno.h"
+#include "src/__support/macros/config.h"
+#include "src/__support/wchar/mbstate.h"
+#include "src/__support/wchar/string_converter.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+LLVM_LIBC_FUNCTION(size_t, wcsrtombs,
+ (char *__restrict s, const wchar_t **__restrict pwcs,
+ size_t n, mbstate_t *ps)) {
+ static internal::mbstate internal_mbstate;
+ internal::StringConverter<char32_t> str_conv(
+ reinterpret_cast<const char32_t *>(pwcs),
+ ps == nullptr ? &internal_mbstate
+ : reinterpret_cast<internal::mbstate *>(ps),
+ n);
+
+ int dst_idx = 0;
+ ErrorOr<char8_t> converted = str_conv.popUTF8();
+ while (converted.has_value()) {
+ if (s != nullptr)
+ s[dst_idx] = converted.value();
+ dst_idx++;
+ converted = str_conv.popUTF8();
+ }
+
+ pwcs += str_conv.getSourceIndex();
+ if (converted.error() == -1) // if we hit conversion limit
+ return dst_idx;
+
+ libc_errno = converted.error();
+ return -1;
+}
+
+} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/wchar/wcsrtombs.h b/libc/src/wchar/wcsrtombs.h
new file mode 100644
index 0000000000000..af69fccdb296a
--- /dev/null
+++ b/libc/src/wchar/wcsrtombs.h
@@ -0,0 +1,23 @@
+//===-- Implementation header for wcsrtombs --------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_WCHAR_WCSRTOMBS_H
+#define LLVM_LIBC_SRC_WCHAR_WCSRTOMBS_H
+
+#include "hdr/types/mbstate_t.h"
+#include "hdr/types/size_t.h"
+#include "hdr/types/wchar_t.h"
+#include "src/__support/macros/config.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+size_t wcsrtombs(char *__restrict s, const wchar_t **__restrict pwcs, size_t n, mbstate_t* ps);
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC_WCHAR_WCSRTOMBS_H
diff --git a/libc/src/wchar/wcstombs.cpp b/libc/src/wchar/wcstombs.cpp
new file mode 100644
index 0000000000000..a2aaca8672924
--- /dev/null
+++ b/libc/src/wchar/wcstombs.cpp
@@ -0,0 +1,45 @@
+//===-- Implementation of wcstombs ----------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/wchar/wcstombs.h"
+
+#include "hdr/types/char32_t.h"
+#include "hdr/types/size_t.h"
+#include "hdr/types/wchar_t.h"
+#include "src/__support/common.h"
+#include "src/__support/libc_errno.h"
+#include "src/__support/macros/config.h"
+#include "src/__support/wchar/mbstate.h"
+#include "src/__support/wchar/string_converter.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+LLVM_LIBC_FUNCTION(size_t, wcstombs,
+ (char *__restrict s, const wchar_t *__restrict pwcs,
+ size_t n)) {
+ static internal::mbstate internal_mbstate;
+ internal::StringConverter<char32_t> str_conv(
+ reinterpret_cast<const char32_t *>(pwcs), &internal_mbstate, n);
+
+ int dst_idx = 0;
+ ErrorOr<char8_t> converted = str_conv.popUTF8();
+ while (converted.has_value()) {
+ if (s != nullptr)
+ s[dst_idx] = converted.value();
+ dst_idx++;
+ converted = str_conv.popUTF8();
+ }
+
+ if (converted.error() == -1) // if we hit conversion limit
+ return dst_idx;
+
+ libc_errno = converted.error();
+ return -1;
+}
+
+} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/wchar/wcstombs.h b/libc/src/wchar/wcstombs.h
new file mode 100644
index 0000000000000..cd0008a168d90
--- /dev/null
+++ b/libc/src/wchar/wcstombs.h
@@ -0,0 +1,22 @@
+//===-- Implementation header for wcstombs --------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_WCHAR_WCSTOMBS_H
+#define LLVM_LIBC_SRC_WCHAR_WCSTOMBS_H
+
+#include "hdr/types/size_t.h"
+#include "hdr/types/wchar_t.h"
+#include "src/__support/macros/config.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+size_t wcstombs(char *__restrict s, const wchar_t *__restrict pwcs, size_t n);
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC_WCHAR_WCSTOMBS_H
diff --git a/libc/test/src/wchar/wcstombs_test.cpp b/libc/test/src/wchar/wcstombs_test.cpp
new file mode 100644
index 0000000000000..2c87678af9c55
--- /dev/null
+++ b/libc/test/src/wchar/wcstombs_test.cpp
@@ -0,0 +1,63 @@
+//===-- Unittests for wcstombs --------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/wchar/wcstombs.h"
+#include "test/UnitTest/ErrnoCheckingTest.h"
+#include "test/UnitTest/Test.h"
+
+using LlvmLibcWcstombs = LIBC_NAMESPACE::testing::ErrnoCheckingTest;
+
+TEST_F(LlvmLibcWcstombs, AllMultibyteLengths) {
+ /// clown emoji, sigma symbol, y with diaeresis, letter A
+ const wchar_t src[] = {static_cast<wchar_t>(0x1f921),
+ static_cast<wchar_t>(0x2211),
+ static_cast<wchar_t>(0xff), static_cast<wchar_t>(0x41),
+ static_cast<wchar_t>(0x0)};
+ char mbs[11];
+
+ ASSERT_EQ(wcstombs(mbs, src, 11), static_cast<size_t>(11));
+ ASSERT_ERRNO_SUCCESS();
+ ASSERT_EQ(mbs[0], '\xF0'); // clown begin
+ ASSERT_EQ(mbs[1], '\x9F');
+ ASSERT_EQ(mbs[2], '\xA4');
+ ASSERT_EQ(mbs[3], '\xA1');
+ ASSERT_EQ(mbs[4], '\xE2'); // sigma begin
+ ASSERT_EQ(mbs[5], '\x88');
+ ASSERT_EQ(mbs[6], '\x91');
+ ASSERT_EQ(mbs[7], '\xC3'); // y diaeresis begin
+ ASSERT_EQ(mbs[8], '\xBF');
+ ASSERT_EQ(mbs[9], '\x41'); // A begin
+ ASSERT_EQ(mbs[10], '\0'); // null terminator
+}
+
+TEST_F(LlvmLibcWcstombs, PartialConversion) {
+ /// clown emoji, sigma symbol, y with diaeresis, letter A
+ const wchar_t src[] = {static_cast<wchar_t>(0x1f921),
+ static_cast<wchar_t>(0x2211),
+ static_cast<wchar_t>(0xff), static_cast<wchar_t>(0x41),
+ static_cast<wchar_t>(0x0)};
+ char mbs[11] = {0};
+
+ ASSERT_EQ(wcstombs(mbs, src, 6), static_cast<size_t>(4));
+ ASSERT_ERRNO_SUCCESS();
+ ASSERT_EQ(mbs[0], '\xF0'); // clown begin
+ ASSERT_EQ(mbs[1], '\x9F');
+ ASSERT_EQ(mbs[2], '\xA4');
+ ASSERT_EQ(mbs[3], '\xA1');
+ ASSERT_EQ(mbs[4], '\0');
+
+ ASSERT_EQ(wcstombs(mbs, src, 6), static_cast<size_t>(4));
+
+ ASSERT_EQ(mbs[4], '\xE2'); // sigma begin
+ ASSERT_EQ(mbs[5], '\x88');
+ ASSERT_EQ(mbs[6], '\x91');
+ ASSERT_EQ(mbs[7], '\xC3'); // y diaeresis begin
+ ASSERT_EQ(mbs[8], '\xBF');
+ ASSERT_EQ(mbs[9], '\x41'); // A begin
+ ASSERT_EQ(mbs[10], '\0'); // null terminator
+}
>From d4a86b2182f1b0673adab4e325c3188ee0e0ae9e Mon Sep 17 00:00:00 2001
From: Uzair Nawaz <uzairnawaz at google.com>
Date: Wed, 16 Jul 2025 20:12:16 +0000
Subject: [PATCH 02/14] wcstombs test
---
libc/src/wchar/CMakeLists.txt | 2 +-
libc/src/wchar/wcstombs.cpp | 11 ++++-
libc/test/src/wchar/CMakeLists.txt | 12 ++++++
libc/test/src/wchar/wcstombs_test.cpp | 58 ++++++++++++++++++++-------
4 files changed, 66 insertions(+), 17 deletions(-)
diff --git a/libc/src/wchar/CMakeLists.txt b/libc/src/wchar/CMakeLists.txt
index 521ea27fdbb26..36969e032594f 100644
--- a/libc/src/wchar/CMakeLists.txt
+++ b/libc/src/wchar/CMakeLists.txt
@@ -187,7 +187,7 @@ add_entrypoint_object(
)
add_entrypoint_object(
- wcstombs
+ wcsnrtombs
SRCS
wcsnrtombs.cpp
HDRS
diff --git a/libc/src/wchar/wcstombs.cpp b/libc/src/wchar/wcstombs.cpp
index a2aaca8672924..94c6e9a4d6942 100644
--- a/libc/src/wchar/wcstombs.cpp
+++ b/libc/src/wchar/wcstombs.cpp
@@ -23,15 +23,22 @@ LLVM_LIBC_FUNCTION(size_t, wcstombs,
(char *__restrict s, const wchar_t *__restrict pwcs,
size_t n)) {
static internal::mbstate internal_mbstate;
+
+ if (s == nullptr)
+ n = SIZE_MAX;
+
internal::StringConverter<char32_t> str_conv(
reinterpret_cast<const char32_t *>(pwcs), &internal_mbstate, n);
int dst_idx = 0;
ErrorOr<char8_t> converted = str_conv.popUTF8();
while (converted.has_value()) {
- if (s != nullptr)
+ if (s != nullptr)
s[dst_idx] = converted.value();
- dst_idx++;
+
+ if (converted.value() != '\0')
+ dst_idx++;
+
converted = str_conv.popUTF8();
}
diff --git a/libc/test/src/wchar/CMakeLists.txt b/libc/test/src/wchar/CMakeLists.txt
index 176cf7c3487cd..1d2e45c33f84a 100644
--- a/libc/test/src/wchar/CMakeLists.txt
+++ b/libc/test/src/wchar/CMakeLists.txt
@@ -101,6 +101,18 @@ add_libc_test(
libc.hdr.types.wchar_t
)
+add_libc_test(
+ wcstombs_test
+ SUITE
+ libc_wchar_unittests
+ SRCS
+ wcstombs_test.cpp
+ DEPENDS
+ libc.src.wchar.wcstombs
+ libc.test.UnitTest.ErrnoCheckingTest
+ libc.hdr.types.wchar_t
+)
+
add_libc_test(
wmemset_test
SUITE
diff --git a/libc/test/src/wchar/wcstombs_test.cpp b/libc/test/src/wchar/wcstombs_test.cpp
index 2c87678af9c55..25ff4baa2e8cd 100644
--- a/libc/test/src/wchar/wcstombs_test.cpp
+++ b/libc/test/src/wchar/wcstombs_test.cpp
@@ -20,7 +20,7 @@ TEST_F(LlvmLibcWcstombs, AllMultibyteLengths) {
static_cast<wchar_t>(0x0)};
char mbs[11];
- ASSERT_EQ(wcstombs(mbs, src, 11), static_cast<size_t>(11));
+ ASSERT_EQ(LIBC_NAMESPACE::wcstombs(mbs, src, 11), static_cast<size_t>(10));
ASSERT_ERRNO_SUCCESS();
ASSERT_EQ(mbs[0], '\xF0'); // clown begin
ASSERT_EQ(mbs[1], '\x9F');
@@ -35,29 +35,59 @@ TEST_F(LlvmLibcWcstombs, AllMultibyteLengths) {
ASSERT_EQ(mbs[10], '\0'); // null terminator
}
-TEST_F(LlvmLibcWcstombs, PartialConversion) {
+TEST_F(LlvmLibcWcstombs, DestLimit) {
/// clown emoji, sigma symbol, y with diaeresis, letter A
const wchar_t src[] = {static_cast<wchar_t>(0x1f921),
static_cast<wchar_t>(0x2211),
static_cast<wchar_t>(0xff), static_cast<wchar_t>(0x41),
static_cast<wchar_t>(0x0)};
- char mbs[11] = {0};
+ char mbs[11];
+ for (int i = 0; i < 11; ++i)
+ mbs[i] = '\x01'; // dummy initial values
- ASSERT_EQ(wcstombs(mbs, src, 6), static_cast<size_t>(4));
+ ASSERT_EQ(LIBC_NAMESPACE::wcstombs(mbs, src, 4), static_cast<size_t>(4));
ASSERT_ERRNO_SUCCESS();
- ASSERT_EQ(mbs[0], '\xF0'); // clown begin
+ ASSERT_EQ(mbs[0], '\xF0');
ASSERT_EQ(mbs[1], '\x9F');
ASSERT_EQ(mbs[2], '\xA4');
ASSERT_EQ(mbs[3], '\xA1');
- ASSERT_EQ(mbs[4], '\0');
+ ASSERT_EQ(mbs[4], '\x01'); // didn't write more than 4 bytes
- ASSERT_EQ(wcstombs(mbs, src, 6), static_cast<size_t>(4));
+ for (int i = 0; i < 11; ++i)
+ mbs[i] = '\x01'; // dummy initial values
- ASSERT_EQ(mbs[4], '\xE2'); // sigma begin
- ASSERT_EQ(mbs[5], '\x88');
- ASSERT_EQ(mbs[6], '\x91');
- ASSERT_EQ(mbs[7], '\xC3'); // y diaeresis begin
- ASSERT_EQ(mbs[8], '\xBF');
- ASSERT_EQ(mbs[9], '\x41'); // A begin
- ASSERT_EQ(mbs[10], '\0'); // null terminator
+ // not enough bytes to convert the second character, so only converts one
+ ASSERT_EQ(LIBC_NAMESPACE::wcstombs(mbs, src, 6), static_cast<size_t>(4));
+ ASSERT_ERRNO_SUCCESS();
+ ASSERT_EQ(mbs[0], '\xF0');
+ ASSERT_EQ(mbs[1], '\x9F');
+ ASSERT_EQ(mbs[2], '\xA4');
+ ASSERT_EQ(mbs[3], '\xA1');
+ ASSERT_EQ(mbs[4], '\x01');
+}
+
+TEST_F(LlvmLibcWcstombs, NullDest) {
+ const wchar_t src[] = {static_cast<wchar_t>(0x1f921),
+ static_cast<wchar_t>(0x2211),
+ static_cast<wchar_t>(0xff), static_cast<wchar_t>(0x41),
+ static_cast<wchar_t>(0x0)};
+
+ // n parameter ignored when dest is null
+ ASSERT_EQ(LIBC_NAMESPACE::wcstombs(nullptr, src, 1), static_cast<size_t>(10));
+ ASSERT_ERRNO_SUCCESS();
+ ASSERT_EQ(LIBC_NAMESPACE::wcstombs(nullptr, src, 100), static_cast<size_t>(10));
+ ASSERT_ERRNO_SUCCESS();
}
+
+TEST_F(LlvmLibcWcstombs, ErrnoTest) {
+ const wchar_t src[] = {static_cast<wchar_t>(0x1f921),
+ static_cast<wchar_t>(0x2211),
+ static_cast<wchar_t>(0x12ffff), // invalid widechar
+ static_cast<wchar_t>(0x0)};
+
+ // n parameter ignored when dest is null
+ ASSERT_EQ(LIBC_NAMESPACE::wcstombs(nullptr, src, 7), static_cast<size_t>(7));
+ ASSERT_ERRNO_SUCCESS();
+ ASSERT_EQ(LIBC_NAMESPACE::wcstombs(nullptr, src, 100), static_cast<size_t>(-1));
+ ASSERT_ERRNO_EQ(EILSEQ);
+}
\ No newline at end of file
>From 586497df73a43645c00e0173a283931446d5ad08 Mon Sep 17 00:00:00 2001
From: Uzair Nawaz <uzairnawaz at google.com>
Date: Thu, 17 Jul 2025 17:22:51 +0000
Subject: [PATCH 03/14] wcsrtombs tests
---
libc/src/wchar/wcsrtombs.cpp | 2 +-
libc/test/src/wchar/CMakeLists.txt | 14 +++
libc/test/src/wchar/wcsrtombs_test.cpp | 129 +++++++++++++++++++++++++
3 files changed, 144 insertions(+), 1 deletion(-)
create mode 100644 libc/test/src/wchar/wcsrtombs_test.cpp
diff --git a/libc/src/wchar/wcsrtombs.cpp b/libc/src/wchar/wcsrtombs.cpp
index 1903a7104ab64..af6accd848213 100644
--- a/libc/src/wchar/wcsrtombs.cpp
+++ b/libc/src/wchar/wcsrtombs.cpp
@@ -25,7 +25,7 @@ LLVM_LIBC_FUNCTION(size_t, wcsrtombs,
size_t n, mbstate_t *ps)) {
static internal::mbstate internal_mbstate;
internal::StringConverter<char32_t> str_conv(
- reinterpret_cast<const char32_t *>(pwcs),
+ reinterpret_cast<const char32_t *>(*pwcs),
ps == nullptr ? &internal_mbstate
: reinterpret_cast<internal::mbstate *>(ps),
n);
diff --git a/libc/test/src/wchar/CMakeLists.txt b/libc/test/src/wchar/CMakeLists.txt
index 1d2e45c33f84a..ac97b43e4d876 100644
--- a/libc/test/src/wchar/CMakeLists.txt
+++ b/libc/test/src/wchar/CMakeLists.txt
@@ -113,6 +113,20 @@ add_libc_test(
libc.hdr.types.wchar_t
)
+add_libc_test(
+ wcsrtombs_test
+ SUITE
+ libc_wchar_unittests
+ SRCS
+ wcsrtombs_test.cpp
+ DEPENDS
+ libc.src.wchar.wcsrtombs
+ libc.test.UnitTest.ErrnoCheckingTest
+ libc.hdr.types.wchar_t
+ libc.src.string.memset
+ libc.hdr.types.mbstate_t
+)
+
add_libc_test(
wmemset_test
SUITE
diff --git a/libc/test/src/wchar/wcsrtombs_test.cpp b/libc/test/src/wchar/wcsrtombs_test.cpp
new file mode 100644
index 0000000000000..84d0bc3b40980
--- /dev/null
+++ b/libc/test/src/wchar/wcsrtombs_test.cpp
@@ -0,0 +1,129 @@
+//===-- Unittests for wcsrtombs -------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "hdr/types/mbstate_t.h"
+#include "src/string/memset.h"
+#include "src/wchar/wcsrtombs.h"
+#include "test/UnitTest/ErrnoCheckingTest.h"
+#include "test/UnitTest/Test.h"
+
+using LlvmLibcWcsrtombs = LIBC_NAMESPACE::testing::ErrnoCheckingTest;
+
+TEST_F(LlvmLibcWcsrtombs, AllMultibyteLengths) {
+ mbstate_t state;
+ LIBC_NAMESPACE::memset(&state, 0, sizeof(mbstate_t));
+
+ /// clown emoji, sigma symbol, y with diaeresis, letter A
+ const wchar_t src[] = {static_cast<wchar_t>(0x1f921),
+ static_cast<wchar_t>(0x2211),
+ static_cast<wchar_t>(0xff), static_cast<wchar_t>(0x41),
+ static_cast<wchar_t>(0x0)};
+ const wchar_t *cur = src;
+ char mbs[11];
+
+ ASSERT_EQ(LIBC_NAMESPACE::wcsrtombs(mbs, &cur, 11, &state),
+ static_cast<size_t>(10));
+ ASSERT_ERRNO_SUCCESS();
+ ASSERT_EQ(cur, nullptr);
+ ASSERT_EQ(mbs[0], '\xF0'); // clown begin
+ ASSERT_EQ(mbs[1], '\x9F');
+ ASSERT_EQ(mbs[2], '\xA4');
+ ASSERT_EQ(mbs[3], '\xA1');
+ ASSERT_EQ(mbs[4], '\xE2'); // sigma begin
+ ASSERT_EQ(mbs[5], '\x88');
+ ASSERT_EQ(mbs[6], '\x91');
+ ASSERT_EQ(mbs[7], '\xC3'); // y diaeresis begin
+ ASSERT_EQ(mbs[8], '\xBF');
+ ASSERT_EQ(mbs[9], '\x41'); // A begin
+ ASSERT_EQ(mbs[10], '\0'); // null terminator
+}
+
+TEST_F(LlvmLibcWcsrtombs, DestLimit) {
+ mbstate_t state;
+ LIBC_NAMESPACE::memset(&state, 0, sizeof(mbstate_t));
+
+ /// clown emoji, sigma symbol, y with diaeresis, letter A
+ const wchar_t src[] = {static_cast<wchar_t>(0x1f921),
+ static_cast<wchar_t>(0x2211),
+ static_cast<wchar_t>(0xff), static_cast<wchar_t>(0x41),
+ static_cast<wchar_t>(0x0)};
+ const wchar_t *cur = src;
+
+ char mbs[11];
+ for (int i = 0; i < 11; ++i)
+ mbs[i] = '\x01'; // dummy initial values
+
+ ASSERT_EQ(LIBC_NAMESPACE::wcsrtombs(mbs, &cur, 4, &state),
+ static_cast<size_t>(4));
+ ASSERT_ERRNO_SUCCESS();
+ ASSERT_EQ(cur, src + 1);
+ ASSERT_EQ(mbs[0], '\xF0');
+ ASSERT_EQ(mbs[1], '\x9F');
+ ASSERT_EQ(mbs[2], '\xA4');
+ ASSERT_EQ(mbs[3], '\xA1');
+ ASSERT_EQ(mbs[4], '\x01'); // didn't write more than 4 bytes
+
+ for (int i = 0; i < 11; ++i)
+ mbs[i] = '\x01'; // dummy initial values
+ LIBC_NAMESPACE::memset(&state, 0, sizeof(mbstate_t));
+
+ // not enough bytes to convert the second character, so only converts one
+ ASSERT_EQ(LIBC_NAMESPACE::wcsrtombs(mbs, &cur, 6, &state),
+ static_cast<size_t>(4));
+ ASSERT_ERRNO_SUCCESS();
+ ASSERT_EQ(cur, src + 1);
+ ASSERT_EQ(mbs[0], '\xF0');
+ ASSERT_EQ(mbs[1], '\x9F');
+ ASSERT_EQ(mbs[2], '\xA4');
+ ASSERT_EQ(mbs[3], '\xA1');
+ ASSERT_EQ(mbs[4], '\x01');
+}
+
+TEST_F(LlvmLibcWcsrtombs, NullDest) {
+ mbstate_t state;
+ LIBC_NAMESPACE::memset(&state, 0, sizeof(mbstate_t));
+
+ const wchar_t src[] = {static_cast<wchar_t>(0x1f921),
+ static_cast<wchar_t>(0x2211),
+ static_cast<wchar_t>(0xff), static_cast<wchar_t>(0x41),
+ static_cast<wchar_t>(0x0)};
+ const wchar_t *cur = src;
+
+ // n parameter ignored when dest is null
+ ASSERT_EQ(LIBC_NAMESPACE::wcsrtombs(nullptr, &cur, 1, &state),
+ static_cast<size_t>(10));
+ ASSERT_ERRNO_SUCCESS();
+ ASSERT_EQ(cur, nullptr);
+
+ LIBC_NAMESPACE::memset(&state, 0, sizeof(mbstate_t));
+ ASSERT_EQ(LIBC_NAMESPACE::wcsrtombs(nullptr, &cur, 100, &state),
+ static_cast<size_t>(10));
+ ASSERT_ERRNO_SUCCESS();
+ ASSERT_EQ(cur, nullptr);
+}
+
+TEST_F(LlvmLibcWcsrtombs, ErrnoTest) {
+ mbstate_t state;
+ LIBC_NAMESPACE::memset(&state, 0, sizeof(mbstate_t));
+
+ const wchar_t src[] = {static_cast<wchar_t>(0x1f921),
+ static_cast<wchar_t>(0x2211),
+ static_cast<wchar_t>(0x12ffff), // invalid widechar
+ static_cast<wchar_t>(0x0)};
+ const wchar_t *cur = src;
+
+ // n parameter ignored when dest is null
+ ASSERT_EQ(LIBC_NAMESPACE::wcsrtombs(nullptr, &cur, 7, &state),
+ static_cast<size_t>(7));
+ ASSERT_ERRNO_SUCCESS();
+
+ LIBC_NAMESPACE::memset(&state, 0, sizeof(mbstate_t));
+ ASSERT_EQ(LIBC_NAMESPACE::wcsrtombs(nullptr, &cur, 100, &state),
+ static_cast<size_t>(-1));
+ ASSERT_ERRNO_EQ(EILSEQ);
+}
\ No newline at end of file
>From 7445bf79f948b83f855e5323077a260ae2fe1eb9 Mon Sep 17 00:00:00 2001
From: Uzair Nawaz <uzairnawaz at google.com>
Date: Thu, 17 Jul 2025 19:52:30 +0000
Subject: [PATCH 04/14] created internal function
---
libc/src/__support/wchar/CMakeLists.txt | 19 +++
libc/src/__support/wchar/wcsnrtombs.cpp | 56 ++++++++
libc/src/__support/wchar/wcsnrtombs.h | 27 ++++
libc/src/wchar/wcsrtombs.cpp | 2 +-
libc/test/src/__support/wchar/CMakeLists.txt | 17 +++
.../src/__support/wchar/wcsnrtombs_test.cpp | 128 ++++++++++++++++++
6 files changed, 248 insertions(+), 1 deletion(-)
create mode 100644 libc/src/__support/wchar/wcsnrtombs.cpp
create mode 100644 libc/src/__support/wchar/wcsnrtombs.h
create mode 100644 libc/test/src/__support/wchar/wcsnrtombs_test.cpp
diff --git a/libc/src/__support/wchar/CMakeLists.txt b/libc/src/__support/wchar/CMakeLists.txt
index 802441d37fe92..b9efe5888d955 100644
--- a/libc/src/__support/wchar/CMakeLists.txt
+++ b/libc/src/__support/wchar/CMakeLists.txt
@@ -68,3 +68,22 @@ add_object_library(
.character_converter
.mbstate
)
+
+add_object_library(
+ wcsnrtombs
+ HDRS
+ wcsnrtombs.h
+ SRCS
+ wcsnrtombs.cpp
+ DEPENDS
+ libc.hdr.errno_macros
+ libc.hdr.types.char8_t
+ libc.hdr.types.char32_t
+ libc.hdr.types.size_t
+ libc.hdr.types.wchar_t
+ libc.src.__support.error_or
+ libc.src.__support.common
+ .string_converter
+ .character_converter
+ .mbstate
+)
diff --git a/libc/src/__support/wchar/wcsnrtombs.cpp b/libc/src/__support/wchar/wcsnrtombs.cpp
new file mode 100644
index 0000000000000..b7add66214b5a
--- /dev/null
+++ b/libc/src/__support/wchar/wcsnrtombs.cpp
@@ -0,0 +1,56 @@
+//===-- Implementation of wcsnrtombs --------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/__support/wchar/wcsnrtombs.h"
+
+#include "hdr/types/char32_t.h"
+#include "hdr/types/size_t.h"
+#include "hdr/types/wchar_t.h"
+#include "src/__support/common.h"
+#include "src/__support/libc_errno.h"
+#include "src/__support/macros/config.h"
+#include "src/__support/wchar/mbstate.h"
+#include "src/__support/wchar/string_converter.h"
+
+namespace LIBC_NAMESPACE_DECL {
+namespace internal {
+
+ErrorOr<size_t> wcsnrtombs(char *__restrict s, const wchar_t **__restrict pwcs,
+ size_t nwc, size_t len, mbstate *ps) {
+ CharacterConverter cr(ps);
+ if (!cr.isValidState())
+ return Error(EINVAL);
+
+ if (s == nullptr)
+ len = SIZE_MAX;
+
+ StringConverter<char32_t> str_conv(reinterpret_cast<const char32_t *>(*pwcs),
+ ps, len, nwc);
+ size_t dst_idx = 0;
+ ErrorOr<char8_t> converted = str_conv.popUTF8();
+ while (converted.has_value()) {
+ if (s != nullptr)
+ s[dst_idx] = converted.value();
+
+ if (converted.value() == '\0') {
+ *pwcs = nullptr;
+ return dst_idx;
+ }
+
+ dst_idx++;
+ converted = str_conv.popUTF8();
+ }
+
+ *pwcs += str_conv.getSourceIndex();
+ if (converted.error() == -1) // if we hit conversion limit
+ return dst_idx;
+
+ return Error(converted.error());
+}
+} // namespace internal
+} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/__support/wchar/wcsnrtombs.h b/libc/src/__support/wchar/wcsnrtombs.h
new file mode 100644
index 0000000000000..972fbf97e52f8
--- /dev/null
+++ b/libc/src/__support/wchar/wcsnrtombs.h
@@ -0,0 +1,27 @@
+//===-- Implementation header for wcsnrtombs ------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC__SUPPORT_WCHAR_WCRTOMB_H
+#define LLVM_LIBC_SRC__SUPPORT_WCHAR_WCRTOMB_H
+
+#include "hdr/types/size_t.h"
+#include "hdr/types/wchar_t.h"
+#include "src/__support/error_or.h"
+#include "src/__support/macros/config.h"
+#include "src/__support/wchar/mbstate.h"
+
+namespace LIBC_NAMESPACE_DECL {
+namespace internal {
+
+ErrorOr<size_t> wcsnrtombs(char *__restrict s, const wchar_t **__restrict pwcs,
+ size_t nwc, size_t len, mbstate *ps);
+
+} // namespace internal
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC__SUPPORT_WCHAR_WCRTOMB_H
diff --git a/libc/src/wchar/wcsrtombs.cpp b/libc/src/wchar/wcsrtombs.cpp
index af6accd848213..cdbb6cb070ec5 100644
--- a/libc/src/wchar/wcsrtombs.cpp
+++ b/libc/src/wchar/wcsrtombs.cpp
@@ -39,7 +39,7 @@ LLVM_LIBC_FUNCTION(size_t, wcsrtombs,
converted = str_conv.popUTF8();
}
- pwcs += str_conv.getSourceIndex();
+ *pwcs += str_conv.getSourceIndex();
if (converted.error() == -1) // if we hit conversion limit
return dst_idx;
diff --git a/libc/test/src/__support/wchar/CMakeLists.txt b/libc/test/src/__support/wchar/CMakeLists.txt
index f0727451736f9..c112c83dbe9af 100644
--- a/libc/test/src/__support/wchar/CMakeLists.txt
+++ b/libc/test/src/__support/wchar/CMakeLists.txt
@@ -34,3 +34,20 @@ add_libc_test(
libc.hdr.errno_macros
libc.hdr.types.char32_t
)
+
+add_libc_test(
+ wcsnrtombs_test
+ SUITE
+ libc-support-tests
+ SRCS
+ wcsnrtombs_test.cpp
+ DEPENDS
+ libc.src.__support.wchar.string_converter
+ libc.src.__support.wchar.character_converter
+ libc.src.__support.wchar.mbstate
+ libc.src.__support.error_or
+ libc.src.__support.wchar.wcsnrtombs
+ libc.hdr.errno_macros
+ libc.hdr.types.char32_t
+ libc.hdr.types.char8_t
+)
diff --git a/libc/test/src/__support/wchar/wcsnrtombs_test.cpp b/libc/test/src/__support/wchar/wcsnrtombs_test.cpp
new file mode 100644
index 0000000000000..0838b8d3ffa32
--- /dev/null
+++ b/libc/test/src/__support/wchar/wcsnrtombs_test.cpp
@@ -0,0 +1,128 @@
+//===-- Unittests for wcsnrtombs ------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "hdr/errno_macros.h"
+#include "hdr/types/size_t.h"
+#include "hdr/types/wchar_t.h"
+#include "src/__support/error_or.h"
+#include "src/__support/wchar/mbstate.h"
+#include "src/__support/wchar/wcsnrtombs.h"
+#include "test/UnitTest/Test.h"
+
+TEST(LlvmLibcWcsnrtombs, AllMultibyteLengths) {
+ LIBC_NAMESPACE::internal::mbstate state;
+
+ /// clown emoji, sigma symbol, y with diaeresis, letter A
+ const wchar_t src[] = {static_cast<wchar_t>(0x1f921),
+ static_cast<wchar_t>(0x2211),
+ static_cast<wchar_t>(0xff), static_cast<wchar_t>(0x41),
+ static_cast<wchar_t>(0x0)};
+ const wchar_t *cur = src;
+ char mbs[11];
+
+ auto res = LIBC_NAMESPACE::internal::wcsnrtombs(mbs, &cur, 5, 11, &state);
+ ASSERT_TRUE(res.has_value());
+ ASSERT_EQ(res.value(), static_cast<size_t>(10));
+ ASSERT_EQ(cur, nullptr);
+ ASSERT_EQ(mbs[0], '\xF0'); // clown begin
+ ASSERT_EQ(mbs[1], '\x9F');
+ ASSERT_EQ(mbs[2], '\xA4');
+ ASSERT_EQ(mbs[3], '\xA1');
+ ASSERT_EQ(mbs[4], '\xE2'); // sigma begin
+ ASSERT_EQ(mbs[5], '\x88');
+ ASSERT_EQ(mbs[6], '\x91');
+ ASSERT_EQ(mbs[7], '\xC3'); // y diaeresis begin
+ ASSERT_EQ(mbs[8], '\xBF');
+ ASSERT_EQ(mbs[9], '\x41'); // A begin
+ ASSERT_EQ(mbs[10], '\0'); // null terminator
+}
+
+TEST(LlvmLibcWcsnrtombs, DestLimit) {
+ LIBC_NAMESPACE::internal::mbstate state1;
+
+ /// clown emoji, sigma symbol, y with diaeresis, letter A
+ const wchar_t src[] = {static_cast<wchar_t>(0x1f921),
+ static_cast<wchar_t>(0x2211),
+ static_cast<wchar_t>(0xff), static_cast<wchar_t>(0x41),
+ static_cast<wchar_t>(0x0)};
+ const wchar_t *cur = src;
+
+ char mbs[11];
+ for (int i = 0; i < 11; ++i)
+ mbs[i] = '\x01'; // dummy initial values
+
+ auto res = LIBC_NAMESPACE::internal::wcsnrtombs(mbs, &cur, 5, 4, &state1);
+ ASSERT_TRUE(res.has_value());
+ ASSERT_EQ(res.value(), static_cast<size_t>(4));
+ ASSERT_EQ(cur, src + 1);
+ ASSERT_EQ(mbs[0], '\xF0');
+ ASSERT_EQ(mbs[1], '\x9F');
+ ASSERT_EQ(mbs[2], '\xA4');
+ ASSERT_EQ(mbs[3], '\xA1');
+ ASSERT_EQ(mbs[4], '\x01'); // didn't write more than 4 bytes
+
+ for (int i = 0; i < 11; ++i)
+ mbs[i] = '\x01'; // dummy initial values
+ LIBC_NAMESPACE::internal::mbstate state2;
+
+ // not enough bytes to convert the second character, so only converts one
+ cur = src;
+ res = LIBC_NAMESPACE::internal::wcsnrtombs(mbs, &cur, 5, 6, &state2);
+ ASSERT_TRUE(res.has_value());
+ ASSERT_EQ(res.value(), static_cast<size_t>(4));
+ ASSERT_EQ(cur, src + 1);
+ ASSERT_EQ(mbs[0], '\xF0');
+ ASSERT_EQ(mbs[1], '\x9F');
+ ASSERT_EQ(mbs[2], '\xA4');
+ ASSERT_EQ(mbs[3], '\xA1');
+ ASSERT_EQ(mbs[4], '\x01');
+}
+
+TEST(LlvmLibcWcsnrtombs, NullDest) {
+ LIBC_NAMESPACE::internal::mbstate state1;
+
+ const wchar_t src[] = {static_cast<wchar_t>(0x1f921),
+ static_cast<wchar_t>(0x2211),
+ static_cast<wchar_t>(0xff), static_cast<wchar_t>(0x41),
+ static_cast<wchar_t>(0x0)};
+ const wchar_t *cur = src;
+
+ // n parameter ignored when dest is null
+ auto res = LIBC_NAMESPACE::internal::wcsnrtombs(nullptr, &cur, 5, 1, &state1);
+ ASSERT_TRUE(res.has_value());
+ ASSERT_EQ(res.value(), static_cast<size_t>(10));
+ ASSERT_EQ(cur, nullptr);
+
+ LIBC_NAMESPACE::internal::mbstate state2;
+ cur = src;
+ res = LIBC_NAMESPACE::internal::wcsnrtombs(nullptr, &cur, 5, 100, &state2);
+ ASSERT_TRUE(res.has_value());
+ ASSERT_EQ(res.value(), static_cast<size_t>(10));
+ ASSERT_EQ(cur, nullptr);
+}
+
+TEST(LlvmLibcWcsnrtombs, ErrorTest) {
+ LIBC_NAMESPACE::internal::mbstate state1;
+
+ const wchar_t src[] = {static_cast<wchar_t>(0x1f921),
+ static_cast<wchar_t>(0x2211),
+ static_cast<wchar_t>(0x12ffff), // invalid widechar
+ static_cast<wchar_t>(0x0)};
+ const wchar_t *cur = src;
+
+ // n parameter ignored when dest is null
+ auto res = LIBC_NAMESPACE::internal::wcsnrtombs(nullptr, &cur, 5, 7, &state1);
+ ASSERT_TRUE(res.has_value());
+ ASSERT_EQ(res.value(), static_cast<size_t>(7));
+
+ LIBC_NAMESPACE::internal::mbstate state2;
+ cur = src;
+ res = LIBC_NAMESPACE::internal::wcsnrtombs(nullptr, &cur, 5, 100, &state2);
+ ASSERT_FALSE(res.has_value());
+ ASSERT_EQ(res.error(), EILSEQ);
+}
>From e2be69afa8caf47216739dccbd278faef0d35b0e Mon Sep 17 00:00:00 2001
From: Uzair Nawaz <uzairnawaz at google.com>
Date: Thu, 17 Jul 2025 19:55:39 +0000
Subject: [PATCH 05/14] Merge branch 'main' into wcstombs-functions
>From 843c79a2c1acf9abc55a41f7997beb7bbd90818b Mon Sep 17 00:00:00 2001
From: Uzair Nawaz <uzairnawaz at google.com>
Date: Thu, 17 Jul 2025 20:12:07 +0000
Subject: [PATCH 06/14] add invalid state test
---
.../src/__support/wchar/wcsnrtombs_test.cpp | 67 ++++++++++++++++++-
1 file changed, 64 insertions(+), 3 deletions(-)
diff --git a/libc/test/src/__support/wchar/wcsnrtombs_test.cpp b/libc/test/src/__support/wchar/wcsnrtombs_test.cpp
index 0838b8d3ffa32..63d83d7d782f1 100644
--- a/libc/test/src/__support/wchar/wcsnrtombs_test.cpp
+++ b/libc/test/src/__support/wchar/wcsnrtombs_test.cpp
@@ -83,6 +83,50 @@ TEST(LlvmLibcWcsnrtombs, DestLimit) {
ASSERT_EQ(mbs[4], '\x01');
}
+TEST(LlvmLibcWcsnrtombs, SrcLimit) {
+ LIBC_NAMESPACE::internal::mbstate state;
+
+ /// clown emoji, sigma symbol, y with diaeresis, letter A
+ const wchar_t src[] = {static_cast<wchar_t>(0x1f921),
+ static_cast<wchar_t>(0x2211),
+ static_cast<wchar_t>(0xff), static_cast<wchar_t>(0x41),
+ static_cast<wchar_t>(0x0)};
+ const wchar_t *cur = src;
+
+ char mbs[11];
+ for (int i = 0; i < 11; ++i)
+ mbs[i] = '\x01'; // dummy initial values
+
+ auto res = LIBC_NAMESPACE::internal::wcsnrtombs(mbs, &cur, 2, 11, &state);
+ ASSERT_TRUE(res.has_value());
+ ASSERT_EQ(res.value(), static_cast<size_t>(7));
+ ASSERT_EQ(cur, src + 2);
+ ASSERT_EQ(mbs[0], '\xF0'); // clown begin
+ ASSERT_EQ(mbs[1], '\x9F');
+ ASSERT_EQ(mbs[2], '\xA4');
+ ASSERT_EQ(mbs[3], '\xA1');
+ ASSERT_EQ(mbs[4], '\xE2'); // sigma begin
+ ASSERT_EQ(mbs[5], '\x88');
+ ASSERT_EQ(mbs[6], '\x91');
+ ASSERT_EQ(mbs[7], '\x01');
+
+ res = LIBC_NAMESPACE::internal::wcsnrtombs(mbs + res.value(), &cur, 100, 11, &state);
+ ASSERT_TRUE(res.has_value());
+ ASSERT_EQ(res.value(), static_cast<size_t>(3));
+ ASSERT_EQ(cur, nullptr);
+ ASSERT_EQ(mbs[0], '\xF0'); // clown begin
+ ASSERT_EQ(mbs[1], '\x9F');
+ ASSERT_EQ(mbs[2], '\xA4');
+ ASSERT_EQ(mbs[3], '\xA1');
+ ASSERT_EQ(mbs[4], '\xE2'); // sigma begin
+ ASSERT_EQ(mbs[5], '\x88');
+ ASSERT_EQ(mbs[6], '\x91');
+ ASSERT_EQ(mbs[7], '\xC3'); // y diaeresis begin
+ ASSERT_EQ(mbs[8], '\xBF');
+ ASSERT_EQ(mbs[9], '\x41'); // A begin
+ ASSERT_EQ(mbs[10], '\0'); // null terminator
+}
+
TEST(LlvmLibcWcsnrtombs, NullDest) {
LIBC_NAMESPACE::internal::mbstate state1;
@@ -106,7 +150,23 @@ TEST(LlvmLibcWcsnrtombs, NullDest) {
ASSERT_EQ(cur, nullptr);
}
-TEST(LlvmLibcWcsnrtombs, ErrorTest) {
+TEST(LlvmLibcWcsnrtombs, InvalidState) {
+ LIBC_NAMESPACE::internal::mbstate state;
+ state.total_bytes = 100;
+
+ const wchar_t src[] = {static_cast<wchar_t>(0x1f921),
+ static_cast<wchar_t>(0x2211),
+ static_cast<wchar_t>(0xff), static_cast<wchar_t>(0x41),
+ static_cast<wchar_t>(0x0)};
+ const wchar_t *cur = src;
+
+ // n parameter ignored when dest is null
+ auto res = LIBC_NAMESPACE::internal::wcsnrtombs(nullptr, &cur, 5, 1, &state);
+ ASSERT_FALSE(res.has_value());
+ ASSERT_EQ(res.error(), EINVAL);
+}
+
+TEST(LlvmLibcWcsnrtombs, InvalidCharacter) {
LIBC_NAMESPACE::internal::mbstate state1;
const wchar_t src[] = {static_cast<wchar_t>(0x1f921),
@@ -114,15 +174,16 @@ TEST(LlvmLibcWcsnrtombs, ErrorTest) {
static_cast<wchar_t>(0x12ffff), // invalid widechar
static_cast<wchar_t>(0x0)};
const wchar_t *cur = src;
+ char mbs[11];
// n parameter ignored when dest is null
- auto res = LIBC_NAMESPACE::internal::wcsnrtombs(nullptr, &cur, 5, 7, &state1);
+ auto res = LIBC_NAMESPACE::internal::wcsnrtombs(mbs, &cur, 5, 7, &state1);
ASSERT_TRUE(res.has_value());
ASSERT_EQ(res.value(), static_cast<size_t>(7));
LIBC_NAMESPACE::internal::mbstate state2;
cur = src;
- res = LIBC_NAMESPACE::internal::wcsnrtombs(nullptr, &cur, 5, 100, &state2);
+ res = LIBC_NAMESPACE::internal::wcsnrtombs(mbs, &cur, 5, 11, &state2);
ASSERT_FALSE(res.has_value());
ASSERT_EQ(res.error(), EILSEQ);
}
>From 69ed44c62abe09f12dcd9feb6dad40bd00fe7bfd Mon Sep 17 00:00:00 2001
From: Uzair Nawaz <uzairnawaz at google.com>
Date: Thu, 17 Jul 2025 20:58:29 +0000
Subject: [PATCH 07/14] public string functions + tests
---
libc/src/wchar/CMakeLists.txt | 6 +-
libc/src/wchar/wcsnrtombs.cpp | 29 +---
libc/src/wchar/wcsrtombs.cpp | 27 +--
libc/src/wchar/wcstombs.cpp | 33 +---
.../src/__support/wchar/wcsnrtombs_test.cpp | 1 +
libc/test/src/wchar/CMakeLists.txt | 14 ++
libc/test/src/wchar/wcsnrtombs_test.cpp | 156 ++++++++++++++++++
libc/test/src/wchar/wcsrtombs_test.cpp | 36 ++--
libc/test/src/wchar/wcstombs_test.cpp | 24 +--
9 files changed, 218 insertions(+), 108 deletions(-)
create mode 100644 libc/test/src/wchar/wcsnrtombs_test.cpp
diff --git a/libc/src/wchar/CMakeLists.txt b/libc/src/wchar/CMakeLists.txt
index 36969e032594f..05c91d459bb95 100644
--- a/libc/src/wchar/CMakeLists.txt
+++ b/libc/src/wchar/CMakeLists.txt
@@ -168,7 +168,7 @@ add_entrypoint_object(
DEPENDS
libc.hdr.types.wchar_t
libc.src.__support.wchar.mbstate
- libc.src.__support.wchar.string_converter
+ libc.src.__support.wchar.wcsnrtombs
libc.src.__support.libc_errno
)
@@ -182,7 +182,7 @@ add_entrypoint_object(
libc.hdr.types.wchar_t
libc.hdr.types.mbstate_t
libc.src.__support.wchar.mbstate
- libc.src.__support.wchar.string_converter
+ libc.src.__support.wchar.wcsnrtombs
libc.src.__support.libc_errno
)
@@ -196,7 +196,7 @@ add_entrypoint_object(
libc.hdr.types.wchar_t
libc.hdr.types.mbstate_t
libc.src.__support.wchar.mbstate
- libc.src.__support.wchar.string_converter
+ libc.src.__support.wchar.wcsnrtombs
libc.src.__support.libc_errno
)
diff --git a/libc/src/wchar/wcsnrtombs.cpp b/libc/src/wchar/wcsnrtombs.cpp
index 3a0ea79053e33..fd4724150e927 100644
--- a/libc/src/wchar/wcsnrtombs.cpp
+++ b/libc/src/wchar/wcsnrtombs.cpp
@@ -6,7 +6,7 @@
//
//===----------------------------------------------------------------------===//
-#include "src/wchar/wcsrtombs.h"
+#include "src/wchar/wcsnrtombs.h"
#include "hdr/types/char32_t.h"
#include "hdr/types/mbstate_t.h"
@@ -16,7 +16,7 @@
#include "src/__support/libc_errno.h"
#include "src/__support/macros/config.h"
#include "src/__support/wchar/mbstate.h"
-#include "src/__support/wchar/string_converter.h"
+#include "src/__support/wchar/wcsnrtombs.h"
namespace LIBC_NAMESPACE_DECL {
@@ -24,27 +24,16 @@ LLVM_LIBC_FUNCTION(size_t, wcsnrtombs,
(char *__restrict s, const wchar_t **__restrict pwcs,
size_t nwc, size_t len, mbstate_t *ps)) {
static internal::mbstate internal_mbstate;
- internal::StringConverter<char32_t> str_conv(
- reinterpret_cast<const char32_t *>(pwcs),
+ auto result = internal::wcsnrtombs(
+ s, pwcs, nwc, len,
ps == nullptr ? &internal_mbstate
- : reinterpret_cast<internal::mbstate *>(ps),
- len, nwc);
-
- int dst_idx = 0;
- ErrorOr<char8_t> converted = str_conv.popUTF8();
- while (converted.has_value()) {
- if (s != nullptr)
- s[dst_idx] = converted.value();
- dst_idx++;
- converted = str_conv.popUTF8();
+ : reinterpret_cast<internal::mbstate *>(ps));
+ if (!result.has_value()) {
+ libc_errno = result.error();
+ return -1;
}
- pwcs += str_conv.getSourceIndex();
- if (converted.error() == -1) // if we hit conversion limit
- return dst_idx;
-
- libc_errno = converted.error();
- return -1;
+ return result.value();
}
} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/wchar/wcsrtombs.cpp b/libc/src/wchar/wcsrtombs.cpp
index cdbb6cb070ec5..b4632a4a436ba 100644
--- a/libc/src/wchar/wcsrtombs.cpp
+++ b/libc/src/wchar/wcsrtombs.cpp
@@ -16,7 +16,7 @@
#include "src/__support/libc_errno.h"
#include "src/__support/macros/config.h"
#include "src/__support/wchar/mbstate.h"
-#include "src/__support/wchar/string_converter.h"
+#include "src/__support/wchar/wcsnrtombs.h"
namespace LIBC_NAMESPACE_DECL {
@@ -24,27 +24,16 @@ LLVM_LIBC_FUNCTION(size_t, wcsrtombs,
(char *__restrict s, const wchar_t **__restrict pwcs,
size_t n, mbstate_t *ps)) {
static internal::mbstate internal_mbstate;
- internal::StringConverter<char32_t> str_conv(
- reinterpret_cast<const char32_t *>(*pwcs),
+ auto result = internal::wcsnrtombs(
+ s, pwcs, SIZE_MAX, n,
ps == nullptr ? &internal_mbstate
- : reinterpret_cast<internal::mbstate *>(ps),
- n);
-
- int dst_idx = 0;
- ErrorOr<char8_t> converted = str_conv.popUTF8();
- while (converted.has_value()) {
- if (s != nullptr)
- s[dst_idx] = converted.value();
- dst_idx++;
- converted = str_conv.popUTF8();
+ : reinterpret_cast<internal::mbstate *>(ps));
+ if (!result.has_value()) {
+ libc_errno = result.error();
+ return -1;
}
-
- *pwcs += str_conv.getSourceIndex();
- if (converted.error() == -1) // if we hit conversion limit
- return dst_idx;
- libc_errno = converted.error();
- return -1;
+ return result.value();
}
} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/wchar/wcstombs.cpp b/libc/src/wchar/wcstombs.cpp
index 94c6e9a4d6942..28e2425d645e7 100644
--- a/libc/src/wchar/wcstombs.cpp
+++ b/libc/src/wchar/wcstombs.cpp
@@ -15,38 +15,23 @@
#include "src/__support/libc_errno.h"
#include "src/__support/macros/config.h"
#include "src/__support/wchar/mbstate.h"
-#include "src/__support/wchar/string_converter.h"
+#include "src/__support/wchar/wcsnrtombs.h"
namespace LIBC_NAMESPACE_DECL {
LLVM_LIBC_FUNCTION(size_t, wcstombs,
- (char *__restrict s, const wchar_t *__restrict pwcs,
+ (char *__restrict s, const wchar_t *__restrict wcs,
size_t n)) {
static internal::mbstate internal_mbstate;
-
- if (s == nullptr)
- n = SIZE_MAX;
-
- internal::StringConverter<char32_t> str_conv(
- reinterpret_cast<const char32_t *>(pwcs), &internal_mbstate, n);
-
- int dst_idx = 0;
- ErrorOr<char8_t> converted = str_conv.popUTF8();
- while (converted.has_value()) {
- if (s != nullptr)
- s[dst_idx] = converted.value();
-
- if (converted.value() != '\0')
- dst_idx++;
-
- converted = str_conv.popUTF8();
+ const wchar_t *wcs_ptr_copy = wcs;
+ auto result =
+ internal::wcsnrtombs(s, &wcs_ptr_copy, SIZE_MAX, n, &internal_mbstate);
+ if (!result.has_value()) {
+ libc_errno = result.error();
+ return -1;
}
- if (converted.error() == -1) // if we hit conversion limit
- return dst_idx;
-
- libc_errno = converted.error();
- return -1;
+ return result.value();
}
} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/test/src/__support/wchar/wcsnrtombs_test.cpp b/libc/test/src/__support/wchar/wcsnrtombs_test.cpp
index 63d83d7d782f1..710fc4b568ac0 100644
--- a/libc/test/src/__support/wchar/wcsnrtombs_test.cpp
+++ b/libc/test/src/__support/wchar/wcsnrtombs_test.cpp
@@ -151,6 +151,7 @@ TEST(LlvmLibcWcsnrtombs, NullDest) {
}
TEST(LlvmLibcWcsnrtombs, InvalidState) {
+ // this is more thoroughly tested by CharacterConverter
LIBC_NAMESPACE::internal::mbstate state;
state.total_bytes = 100;
diff --git a/libc/test/src/wchar/CMakeLists.txt b/libc/test/src/wchar/CMakeLists.txt
index ac97b43e4d876..553821ddaf9af 100644
--- a/libc/test/src/wchar/CMakeLists.txt
+++ b/libc/test/src/wchar/CMakeLists.txt
@@ -127,6 +127,20 @@ add_libc_test(
libc.hdr.types.mbstate_t
)
+add_libc_test(
+ wcsnrtombs_test
+ SUITE
+ libc_wchar_unittests
+ SRCS
+ wcsnrtombs_test.cpp
+ DEPENDS
+ libc.src.wchar.wcsnrtombs
+ libc.test.UnitTest.ErrnoCheckingTest
+ libc.hdr.types.wchar_t
+ libc.src.string.memset
+ libc.hdr.types.mbstate_t
+)
+
add_libc_test(
wmemset_test
SUITE
diff --git a/libc/test/src/wchar/wcsnrtombs_test.cpp b/libc/test/src/wchar/wcsnrtombs_test.cpp
new file mode 100644
index 0000000000000..f6a333964018d
--- /dev/null
+++ b/libc/test/src/wchar/wcsnrtombs_test.cpp
@@ -0,0 +1,156 @@
+//===-- Unittests for wcsnrtombs ------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "hdr/types/mbstate_t.h"
+#include "src/string/memset.h"
+#include "src/wchar/wcsnrtombs.h"
+#include "test/UnitTest/ErrnoCheckingTest.h"
+#include "test/UnitTest/Test.h"
+
+using LlvmLibcWcsnrtombs = LIBC_NAMESPACE::testing::ErrnoCheckingTest;
+
+// these tests are fairly simple as this function just calls into the internal
+// wcsnrtombs which is more thoroughly tested
+
+TEST_F(LlvmLibcWcsnrtombs, AllMultibyteLengths) {
+ mbstate_t state;
+ LIBC_NAMESPACE::memset(&state, 0, sizeof(mbstate_t));
+
+ /// clown emoji, sigma symbol, y with diaeresis, letter A
+ const wchar_t src[] = {static_cast<wchar_t>(0x1f921),
+ static_cast<wchar_t>(0x2211),
+ static_cast<wchar_t>(0xff), static_cast<wchar_t>(0x41),
+ static_cast<wchar_t>(0x0)};
+ const wchar_t *cur = src;
+ char mbs[11];
+
+ ASSERT_EQ(LIBC_NAMESPACE::wcsnrtombs(mbs, &cur, 5, 11, &state),
+ static_cast<size_t>(10));
+ ASSERT_ERRNO_SUCCESS();
+ ASSERT_EQ(cur, nullptr);
+ ASSERT_EQ(mbs[0], '\xF0'); // clown begin
+ ASSERT_EQ(mbs[1], '\x9F');
+ ASSERT_EQ(mbs[2], '\xA4');
+ ASSERT_EQ(mbs[3], '\xA1');
+ ASSERT_EQ(mbs[4], '\xE2'); // sigma begin
+ ASSERT_EQ(mbs[5], '\x88');
+ ASSERT_EQ(mbs[6], '\x91');
+ ASSERT_EQ(mbs[7], '\xC3'); // y diaeresis begin
+ ASSERT_EQ(mbs[8], '\xBF');
+ ASSERT_EQ(mbs[9], '\x41'); // A begin
+ ASSERT_EQ(mbs[10], '\0'); // null terminator
+}
+
+TEST_F(LlvmLibcWcsnrtombs, DestLimit) {
+ mbstate_t state;
+ LIBC_NAMESPACE::memset(&state, 0, sizeof(mbstate_t));
+
+ /// clown emoji, sigma symbol, y with diaeresis, letter A
+ const wchar_t src[] = {static_cast<wchar_t>(0x1f921),
+ static_cast<wchar_t>(0x2211),
+ static_cast<wchar_t>(0xff), static_cast<wchar_t>(0x41),
+ static_cast<wchar_t>(0x0)};
+ const wchar_t *cur = src;
+
+ char mbs[11];
+ for (int i = 0; i < 11; ++i)
+ mbs[i] = '\x01'; // dummy initial values
+
+ ASSERT_EQ(LIBC_NAMESPACE::wcsnrtombs(mbs, &cur, 5, 4, &state),
+ static_cast<size_t>(4));
+ ASSERT_ERRNO_SUCCESS();
+ ASSERT_EQ(cur, src + 1);
+ ASSERT_EQ(mbs[0], '\xF0');
+ ASSERT_EQ(mbs[1], '\x9F');
+ ASSERT_EQ(mbs[2], '\xA4');
+ ASSERT_EQ(mbs[3], '\xA1');
+ ASSERT_EQ(mbs[4], '\x01'); // didn't write more than 4 bytes
+
+ for (int i = 0; i < 11; ++i)
+ mbs[i] = '\x01'; // dummy initial values
+ LIBC_NAMESPACE::memset(&state, 0, sizeof(mbstate_t));
+ cur = src;
+
+ // not enough bytes to convert the second character, so only converts one
+ ASSERT_EQ(LIBC_NAMESPACE::wcsnrtombs(mbs, &cur, 5, 6, &state),
+ static_cast<size_t>(4));
+ ASSERT_ERRNO_SUCCESS();
+ ASSERT_EQ(cur, src + 1);
+ ASSERT_EQ(mbs[0], '\xF0');
+ ASSERT_EQ(mbs[1], '\x9F');
+ ASSERT_EQ(mbs[2], '\xA4');
+ ASSERT_EQ(mbs[3], '\xA1');
+ ASSERT_EQ(mbs[4], '\x01');
+}
+
+TEST(LlvmLibcWcsnrtombs, SrcLimit) {
+ mbstate_t state;
+ LIBC_NAMESPACE::memset(&state, 0, sizeof(mbstate_t));
+
+ /// clown emoji, sigma symbol, y with diaeresis, letter A
+ const wchar_t src[] = {static_cast<wchar_t>(0x1f921),
+ static_cast<wchar_t>(0x2211),
+ static_cast<wchar_t>(0xff), static_cast<wchar_t>(0x41),
+ static_cast<wchar_t>(0x0)};
+ const wchar_t *cur = src;
+
+ char mbs[11];
+ for (int i = 0; i < 11; ++i)
+ mbs[i] = '\x01'; // dummy initial values
+
+ auto res = LIBC_NAMESPACE::wcsnrtombs(mbs, &cur, 2, 11, &state);
+ ASSERT_ERRNO_SUCCESS();
+ ASSERT_EQ(res, static_cast<size_t>(7));
+ ASSERT_EQ(cur, src + 2);
+ ASSERT_EQ(mbs[0], '\xF0'); // clown begin
+ ASSERT_EQ(mbs[1], '\x9F');
+ ASSERT_EQ(mbs[2], '\xA4');
+ ASSERT_EQ(mbs[3], '\xA1');
+ ASSERT_EQ(mbs[4], '\xE2'); // sigma begin
+ ASSERT_EQ(mbs[5], '\x88');
+ ASSERT_EQ(mbs[6], '\x91');
+ ASSERT_EQ(mbs[7], '\x01');
+
+ res = LIBC_NAMESPACE::wcsnrtombs(mbs + res, &cur, 100, 11, &state);
+ ASSERT_ERRNO_SUCCESS();
+ ASSERT_EQ(res, static_cast<size_t>(3));
+ ASSERT_EQ(cur, nullptr);
+ ASSERT_EQ(mbs[0], '\xF0'); // clown begin
+ ASSERT_EQ(mbs[1], '\x9F');
+ ASSERT_EQ(mbs[2], '\xA4');
+ ASSERT_EQ(mbs[3], '\xA1');
+ ASSERT_EQ(mbs[4], '\xE2'); // sigma begin
+ ASSERT_EQ(mbs[5], '\x88');
+ ASSERT_EQ(mbs[6], '\x91');
+ ASSERT_EQ(mbs[7], '\xC3'); // y diaeresis begin
+ ASSERT_EQ(mbs[8], '\xBF');
+ ASSERT_EQ(mbs[9], '\x41'); // A begin
+ ASSERT_EQ(mbs[10], '\0'); // null terminator
+}
+
+TEST_F(LlvmLibcWcsnrtombs, ErrnoTest) {
+ mbstate_t state;
+ LIBC_NAMESPACE::memset(&state, 0, sizeof(mbstate_t));
+
+ const wchar_t src[] = {static_cast<wchar_t>(0x1f921),
+ static_cast<wchar_t>(0x2211),
+ static_cast<wchar_t>(0x12ffff), // invalid widechar
+ static_cast<wchar_t>(0x0)};
+ const wchar_t *cur = src;
+ char mbs[11];
+
+ // n parameter ignored when dest is null
+ ASSERT_EQ(LIBC_NAMESPACE::wcsnrtombs(mbs, &cur, 5, 7, &state),
+ static_cast<size_t>(7));
+ ASSERT_ERRNO_SUCCESS();
+
+ LIBC_NAMESPACE::memset(&state, 0, sizeof(mbstate_t));
+ ASSERT_EQ(LIBC_NAMESPACE::wcsnrtombs(mbs, &cur, 5, 100, &state),
+ static_cast<size_t>(-1));
+ ASSERT_ERRNO_EQ(EILSEQ);
+}
diff --git a/libc/test/src/wchar/wcsrtombs_test.cpp b/libc/test/src/wchar/wcsrtombs_test.cpp
index 84d0bc3b40980..00879ee64e25a 100644
--- a/libc/test/src/wchar/wcsrtombs_test.cpp
+++ b/libc/test/src/wchar/wcsrtombs_test.cpp
@@ -14,6 +14,9 @@
using LlvmLibcWcsrtombs = LIBC_NAMESPACE::testing::ErrnoCheckingTest;
+// these tests are fairly simple as this function just calls into the internal
+// wcsnrtombs which is more thoroughly tested
+
TEST_F(LlvmLibcWcsrtombs, AllMultibyteLengths) {
mbstate_t state;
LIBC_NAMESPACE::memset(&state, 0, sizeof(mbstate_t));
@@ -73,6 +76,7 @@ TEST_F(LlvmLibcWcsrtombs, DestLimit) {
LIBC_NAMESPACE::memset(&state, 0, sizeof(mbstate_t));
// not enough bytes to convert the second character, so only converts one
+ cur = src;
ASSERT_EQ(LIBC_NAMESPACE::wcsrtombs(mbs, &cur, 6, &state),
static_cast<size_t>(4));
ASSERT_ERRNO_SUCCESS();
@@ -84,29 +88,6 @@ TEST_F(LlvmLibcWcsrtombs, DestLimit) {
ASSERT_EQ(mbs[4], '\x01');
}
-TEST_F(LlvmLibcWcsrtombs, NullDest) {
- mbstate_t state;
- LIBC_NAMESPACE::memset(&state, 0, sizeof(mbstate_t));
-
- const wchar_t src[] = {static_cast<wchar_t>(0x1f921),
- static_cast<wchar_t>(0x2211),
- static_cast<wchar_t>(0xff), static_cast<wchar_t>(0x41),
- static_cast<wchar_t>(0x0)};
- const wchar_t *cur = src;
-
- // n parameter ignored when dest is null
- ASSERT_EQ(LIBC_NAMESPACE::wcsrtombs(nullptr, &cur, 1, &state),
- static_cast<size_t>(10));
- ASSERT_ERRNO_SUCCESS();
- ASSERT_EQ(cur, nullptr);
-
- LIBC_NAMESPACE::memset(&state, 0, sizeof(mbstate_t));
- ASSERT_EQ(LIBC_NAMESPACE::wcsrtombs(nullptr, &cur, 100, &state),
- static_cast<size_t>(10));
- ASSERT_ERRNO_SUCCESS();
- ASSERT_EQ(cur, nullptr);
-}
-
TEST_F(LlvmLibcWcsrtombs, ErrnoTest) {
mbstate_t state;
LIBC_NAMESPACE::memset(&state, 0, sizeof(mbstate_t));
@@ -116,14 +97,17 @@ TEST_F(LlvmLibcWcsrtombs, ErrnoTest) {
static_cast<wchar_t>(0x12ffff), // invalid widechar
static_cast<wchar_t>(0x0)};
const wchar_t *cur = src;
+ char mbs[11];
// n parameter ignored when dest is null
- ASSERT_EQ(LIBC_NAMESPACE::wcsrtombs(nullptr, &cur, 7, &state),
+ ASSERT_EQ(LIBC_NAMESPACE::wcsrtombs(mbs, &cur, 7, &state),
static_cast<size_t>(7));
ASSERT_ERRNO_SUCCESS();
LIBC_NAMESPACE::memset(&state, 0, sizeof(mbstate_t));
- ASSERT_EQ(LIBC_NAMESPACE::wcsrtombs(nullptr, &cur, 100, &state),
+ cur = src;
+
+ ASSERT_EQ(LIBC_NAMESPACE::wcsrtombs(mbs, &cur, 100, &state),
static_cast<size_t>(-1));
ASSERT_ERRNO_EQ(EILSEQ);
-}
\ No newline at end of file
+}
diff --git a/libc/test/src/wchar/wcstombs_test.cpp b/libc/test/src/wchar/wcstombs_test.cpp
index 25ff4baa2e8cd..397ce0460806f 100644
--- a/libc/test/src/wchar/wcstombs_test.cpp
+++ b/libc/test/src/wchar/wcstombs_test.cpp
@@ -12,6 +12,9 @@
using LlvmLibcWcstombs = LIBC_NAMESPACE::testing::ErrnoCheckingTest;
+// these tests are fairly simple as this function just calls into the internal
+// wcsnrtombs which is more thoroughly tested
+
TEST_F(LlvmLibcWcstombs, AllMultibyteLengths) {
/// clown emoji, sigma symbol, y with diaeresis, letter A
const wchar_t src[] = {static_cast<wchar_t>(0x1f921),
@@ -66,28 +69,17 @@ TEST_F(LlvmLibcWcstombs, DestLimit) {
ASSERT_EQ(mbs[4], '\x01');
}
-TEST_F(LlvmLibcWcstombs, NullDest) {
- const wchar_t src[] = {static_cast<wchar_t>(0x1f921),
- static_cast<wchar_t>(0x2211),
- static_cast<wchar_t>(0xff), static_cast<wchar_t>(0x41),
- static_cast<wchar_t>(0x0)};
-
- // n parameter ignored when dest is null
- ASSERT_EQ(LIBC_NAMESPACE::wcstombs(nullptr, src, 1), static_cast<size_t>(10));
- ASSERT_ERRNO_SUCCESS();
- ASSERT_EQ(LIBC_NAMESPACE::wcstombs(nullptr, src, 100), static_cast<size_t>(10));
- ASSERT_ERRNO_SUCCESS();
-}
-
TEST_F(LlvmLibcWcstombs, ErrnoTest) {
const wchar_t src[] = {static_cast<wchar_t>(0x1f921),
static_cast<wchar_t>(0x2211),
static_cast<wchar_t>(0x12ffff), // invalid widechar
static_cast<wchar_t>(0x0)};
+ char mbs[11];
// n parameter ignored when dest is null
- ASSERT_EQ(LIBC_NAMESPACE::wcstombs(nullptr, src, 7), static_cast<size_t>(7));
+ ASSERT_EQ(LIBC_NAMESPACE::wcstombs(mbs, src, 7), static_cast<size_t>(7));
ASSERT_ERRNO_SUCCESS();
- ASSERT_EQ(LIBC_NAMESPACE::wcstombs(nullptr, src, 100), static_cast<size_t>(-1));
+ ASSERT_EQ(LIBC_NAMESPACE::wcstombs(mbs, src, 100),
+ static_cast<size_t>(-1));
ASSERT_ERRNO_EQ(EILSEQ);
-}
\ No newline at end of file
+}
>From f01702a10634c7b106f348357f99c7fbdf736586 Mon Sep 17 00:00:00 2001
From: Uzair Nawaz <uzairnawaz at google.com>
Date: Thu, 17 Jul 2025 22:49:26 +0000
Subject: [PATCH 08/14] yaml typo
---
libc/include/wchar.yaml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/libc/include/wchar.yaml b/libc/include/wchar.yaml
index 226f1614ebcc2..8c72e1963a425 100644
--- a/libc/include/wchar.yaml
+++ b/libc/include/wchar.yaml
@@ -297,7 +297,7 @@ functions:
return_type: size_t
arguments:
- type: char *__restrict
- - type: const wchar_t *__restrict
+ - type: const wchar_t **__restrict
- type: size_t
- type: size_t
- type: mbstate_t
>From ee62e624e03b63005b3ba7ccf3ce3a1e982344b7 Mon Sep 17 00:00:00 2001
From: Uzair Nawaz <uzairnawaz at google.com>
Date: Thu, 17 Jul 2025 22:53:22 +0000
Subject: [PATCH 09/14] typo
---
libc/src/__support/wchar/wcsnrtombs.h | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/libc/src/__support/wchar/wcsnrtombs.h b/libc/src/__support/wchar/wcsnrtombs.h
index 972fbf97e52f8..f5ba910940692 100644
--- a/libc/src/__support/wchar/wcsnrtombs.h
+++ b/libc/src/__support/wchar/wcsnrtombs.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_LIBC_SRC__SUPPORT_WCHAR_WCRTOMB_H
-#define LLVM_LIBC_SRC__SUPPORT_WCHAR_WCRTOMB_H
+#ifndef LLVM_LIBC_SRC__SUPPORT_WCHAR_WCSNRTOMBS_H
+#define LLVM_LIBC_SRC__SUPPORT_WCHAR_WCSNRTOMBS_H
#include "hdr/types/size_t.h"
#include "hdr/types/wchar_t.h"
@@ -24,4 +24,4 @@ ErrorOr<size_t> wcsnrtombs(char *__restrict s, const wchar_t **__restrict pwcs,
} // namespace internal
} // namespace LIBC_NAMESPACE_DECL
-#endif // LLVM_LIBC_SRC__SUPPORT_WCHAR_WCRTOMB_H
+#endif // LLVM_LIBC_SRC__SUPPORT_WCHAR_WCSNRTOMBS_H
>From 1c1a7f981bbbc4ff8a50a39ed9ac421f61f06ea7 Mon Sep 17 00:00:00 2001
From: Uzair Nawaz <uzairnawaz at google.com>
Date: Fri, 18 Jul 2025 16:52:42 +0000
Subject: [PATCH 10/14] fixed behavior when dest=null (shouldnt update src
pointer)
---
libc/src/__support/wchar/wcsnrtombs.cpp | 7 +++++--
libc/test/src/__support/wchar/wcsnrtombs_test.cpp | 5 ++---
2 files changed, 7 insertions(+), 5 deletions(-)
diff --git a/libc/src/__support/wchar/wcsnrtombs.cpp b/libc/src/__support/wchar/wcsnrtombs.cpp
index b7add66214b5a..65c85cffa55c7 100644
--- a/libc/src/__support/wchar/wcsnrtombs.cpp
+++ b/libc/src/__support/wchar/wcsnrtombs.cpp
@@ -38,7 +38,8 @@ ErrorOr<size_t> wcsnrtombs(char *__restrict s, const wchar_t **__restrict pwcs,
s[dst_idx] = converted.value();
if (converted.value() == '\0') {
- *pwcs = nullptr;
+ if (s != nullptr)
+ *pwcs = nullptr;
return dst_idx;
}
@@ -46,7 +47,9 @@ ErrorOr<size_t> wcsnrtombs(char *__restrict s, const wchar_t **__restrict pwcs,
converted = str_conv.popUTF8();
}
- *pwcs += str_conv.getSourceIndex();
+ if (s != nullptr)
+ *pwcs += str_conv.getSourceIndex();
+
if (converted.error() == -1) // if we hit conversion limit
return dst_idx;
diff --git a/libc/test/src/__support/wchar/wcsnrtombs_test.cpp b/libc/test/src/__support/wchar/wcsnrtombs_test.cpp
index 710fc4b568ac0..bec23638e19b7 100644
--- a/libc/test/src/__support/wchar/wcsnrtombs_test.cpp
+++ b/libc/test/src/__support/wchar/wcsnrtombs_test.cpp
@@ -140,14 +140,13 @@ TEST(LlvmLibcWcsnrtombs, NullDest) {
auto res = LIBC_NAMESPACE::internal::wcsnrtombs(nullptr, &cur, 5, 1, &state1);
ASSERT_TRUE(res.has_value());
ASSERT_EQ(res.value(), static_cast<size_t>(10));
- ASSERT_EQ(cur, nullptr);
+ ASSERT_EQ(cur, src); // pointer not updated when dest = null
LIBC_NAMESPACE::internal::mbstate state2;
- cur = src;
res = LIBC_NAMESPACE::internal::wcsnrtombs(nullptr, &cur, 5, 100, &state2);
ASSERT_TRUE(res.has_value());
ASSERT_EQ(res.value(), static_cast<size_t>(10));
- ASSERT_EQ(cur, nullptr);
+ ASSERT_EQ(cur, src);
}
TEST(LlvmLibcWcsnrtombs, InvalidState) {
>From 61be17f4facde976b16f4a6f37e78db208479d15 Mon Sep 17 00:00:00 2001
From: Uzair Nawaz <uzairnawaz at google.com>
Date: Fri, 18 Jul 2025 18:14:07 +0000
Subject: [PATCH 11/14] made internal function header only
---
libc/src/__support/wchar/CMakeLists.txt | 4 +-
libc/src/__support/wchar/wcsnrtombs.cpp | 59 -------------------------
libc/src/__support/wchar/wcsnrtombs.h | 40 ++++++++++++++++-
3 files changed, 39 insertions(+), 64 deletions(-)
delete mode 100644 libc/src/__support/wchar/wcsnrtombs.cpp
diff --git a/libc/src/__support/wchar/CMakeLists.txt b/libc/src/__support/wchar/CMakeLists.txt
index b9efe5888d955..a3d06ac0dbe6a 100644
--- a/libc/src/__support/wchar/CMakeLists.txt
+++ b/libc/src/__support/wchar/CMakeLists.txt
@@ -69,12 +69,10 @@ add_object_library(
.mbstate
)
-add_object_library(
+add_header_library(
wcsnrtombs
HDRS
wcsnrtombs.h
- SRCS
- wcsnrtombs.cpp
DEPENDS
libc.hdr.errno_macros
libc.hdr.types.char8_t
diff --git a/libc/src/__support/wchar/wcsnrtombs.cpp b/libc/src/__support/wchar/wcsnrtombs.cpp
deleted file mode 100644
index 65c85cffa55c7..0000000000000
--- a/libc/src/__support/wchar/wcsnrtombs.cpp
+++ /dev/null
@@ -1,59 +0,0 @@
-//===-- Implementation of wcsnrtombs --------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "src/__support/wchar/wcsnrtombs.h"
-
-#include "hdr/types/char32_t.h"
-#include "hdr/types/size_t.h"
-#include "hdr/types/wchar_t.h"
-#include "src/__support/common.h"
-#include "src/__support/libc_errno.h"
-#include "src/__support/macros/config.h"
-#include "src/__support/wchar/mbstate.h"
-#include "src/__support/wchar/string_converter.h"
-
-namespace LIBC_NAMESPACE_DECL {
-namespace internal {
-
-ErrorOr<size_t> wcsnrtombs(char *__restrict s, const wchar_t **__restrict pwcs,
- size_t nwc, size_t len, mbstate *ps) {
- CharacterConverter cr(ps);
- if (!cr.isValidState())
- return Error(EINVAL);
-
- if (s == nullptr)
- len = SIZE_MAX;
-
- StringConverter<char32_t> str_conv(reinterpret_cast<const char32_t *>(*pwcs),
- ps, len, nwc);
- size_t dst_idx = 0;
- ErrorOr<char8_t> converted = str_conv.popUTF8();
- while (converted.has_value()) {
- if (s != nullptr)
- s[dst_idx] = converted.value();
-
- if (converted.value() == '\0') {
- if (s != nullptr)
- *pwcs = nullptr;
- return dst_idx;
- }
-
- dst_idx++;
- converted = str_conv.popUTF8();
- }
-
- if (s != nullptr)
- *pwcs += str_conv.getSourceIndex();
-
- if (converted.error() == -1) // if we hit conversion limit
- return dst_idx;
-
- return Error(converted.error());
-}
-} // namespace internal
-} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/__support/wchar/wcsnrtombs.h b/libc/src/__support/wchar/wcsnrtombs.h
index f5ba910940692..4db9dae0b6ad0 100644
--- a/libc/src/__support/wchar/wcsnrtombs.h
+++ b/libc/src/__support/wchar/wcsnrtombs.h
@@ -9,17 +9,53 @@
#ifndef LLVM_LIBC_SRC__SUPPORT_WCHAR_WCSNRTOMBS_H
#define LLVM_LIBC_SRC__SUPPORT_WCHAR_WCSNRTOMBS_H
+#include "hdr/types/char32_t.h"
#include "hdr/types/size_t.h"
#include "hdr/types/wchar_t.h"
-#include "src/__support/error_or.h"
+#include "src/__support/common.h"
+#include "src/__support/libc_errno.h"
#include "src/__support/macros/config.h"
#include "src/__support/wchar/mbstate.h"
+#include "src/__support/wchar/string_converter.h"
namespace LIBC_NAMESPACE_DECL {
namespace internal {
ErrorOr<size_t> wcsnrtombs(char *__restrict s, const wchar_t **__restrict pwcs,
- size_t nwc, size_t len, mbstate *ps);
+ size_t nwc, size_t len, mbstate *ps) {
+ CharacterConverter cr(ps);
+ if (!cr.isValidState())
+ return Error(EINVAL);
+
+ if (s == nullptr)
+ len = SIZE_MAX;
+
+ StringConverter<char32_t> str_conv(reinterpret_cast<const char32_t *>(*pwcs),
+ ps, len, nwc);
+ size_t dst_idx = 0;
+ ErrorOr<char8_t> converted = str_conv.popUTF8();
+ while (converted.has_value()) {
+ if (s != nullptr)
+ s[dst_idx] = converted.value();
+
+ if (converted.value() == '\0') {
+ if (s != nullptr)
+ *pwcs = nullptr;
+ return dst_idx;
+ }
+
+ dst_idx++;
+ converted = str_conv.popUTF8();
+ }
+
+ if (s != nullptr)
+ *pwcs += str_conv.getSourceIndex();
+
+ if (converted.error() == -1) // if we hit conversion limit
+ return dst_idx;
+
+ return Error(converted.error());
+}
} // namespace internal
} // namespace LIBC_NAMESPACE_DECL
>From 2af900190385b24d1eeacbb71ef30d9986b9b914 Mon Sep 17 00:00:00 2001
From: Uzair Nawaz <uzairnawaz at google.com>
Date: Fri, 18 Jul 2025 18:18:19 +0000
Subject: [PATCH 12/14] formatting
---
libc/src/__support/wchar/wcsnrtombs.h | 2 +-
libc/src/wchar/wcsnrtombs.h | 5 +++--
libc/src/wchar/wcsrtombs.h | 5 +++--
libc/test/src/__support/wchar/wcsnrtombs_test.cpp | 3 ++-
4 files changed, 9 insertions(+), 6 deletions(-)
diff --git a/libc/src/__support/wchar/wcsnrtombs.h b/libc/src/__support/wchar/wcsnrtombs.h
index 4db9dae0b6ad0..d4b5c5a9b5e4e 100644
--- a/libc/src/__support/wchar/wcsnrtombs.h
+++ b/libc/src/__support/wchar/wcsnrtombs.h
@@ -50,7 +50,7 @@ ErrorOr<size_t> wcsnrtombs(char *__restrict s, const wchar_t **__restrict pwcs,
if (s != nullptr)
*pwcs += str_conv.getSourceIndex();
-
+
if (converted.error() == -1) // if we hit conversion limit
return dst_idx;
diff --git a/libc/src/wchar/wcsnrtombs.h b/libc/src/wchar/wcsnrtombs.h
index 793d383660f1b..bf8add75b2951 100644
--- a/libc/src/wchar/wcsnrtombs.h
+++ b/libc/src/wchar/wcsnrtombs.h
@@ -1,4 +1,4 @@
-//===-- Implementation header for wcsnrtombs -------------------------------===//
+//===-- Implementation header for wcsnrtombs ------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -16,7 +16,8 @@
namespace LIBC_NAMESPACE_DECL {
-size_t wcsnrtombs(char *__restrict s, const wchar_t **__restrict pwcs, size_t nwc, size_t len, mbstate_t* ps);
+size_t wcsnrtombs(char *__restrict s, const wchar_t **__restrict pwcs,
+ size_t nwc, size_t len, mbstate_t *ps);
} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/wchar/wcsrtombs.h b/libc/src/wchar/wcsrtombs.h
index af69fccdb296a..d23573f5b9418 100644
--- a/libc/src/wchar/wcsrtombs.h
+++ b/libc/src/wchar/wcsrtombs.h
@@ -1,4 +1,4 @@
-//===-- Implementation header for wcsrtombs --------------------------------===//
+//===-- Implementation header for wcsrtombs -------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -16,7 +16,8 @@
namespace LIBC_NAMESPACE_DECL {
-size_t wcsrtombs(char *__restrict s, const wchar_t **__restrict pwcs, size_t n, mbstate_t* ps);
+size_t wcsrtombs(char *__restrict s, const wchar_t **__restrict pwcs, size_t n,
+ mbstate_t *ps);
} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/test/src/__support/wchar/wcsnrtombs_test.cpp b/libc/test/src/__support/wchar/wcsnrtombs_test.cpp
index bec23638e19b7..3df7b07f90f47 100644
--- a/libc/test/src/__support/wchar/wcsnrtombs_test.cpp
+++ b/libc/test/src/__support/wchar/wcsnrtombs_test.cpp
@@ -110,7 +110,8 @@ TEST(LlvmLibcWcsnrtombs, SrcLimit) {
ASSERT_EQ(mbs[6], '\x91');
ASSERT_EQ(mbs[7], '\x01');
- res = LIBC_NAMESPACE::internal::wcsnrtombs(mbs + res.value(), &cur, 100, 11, &state);
+ res = LIBC_NAMESPACE::internal::wcsnrtombs(mbs + res.value(), &cur, 100, 11,
+ &state);
ASSERT_TRUE(res.has_value());
ASSERT_EQ(res.value(), static_cast<size_t>(3));
ASSERT_EQ(cur, nullptr);
>From f60a0aef681c73784ead1895d77ec65312f02952 Mon Sep 17 00:00:00 2001
From: Uzair Nawaz <uzairnawaz at google.com>
Date: Fri, 18 Jul 2025 18:22:08 +0000
Subject: [PATCH 13/14] format
---
libc/test/src/wchar/wcstombs_test.cpp | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/libc/test/src/wchar/wcstombs_test.cpp b/libc/test/src/wchar/wcstombs_test.cpp
index 397ce0460806f..61e0873dc9711 100644
--- a/libc/test/src/wchar/wcstombs_test.cpp
+++ b/libc/test/src/wchar/wcstombs_test.cpp
@@ -79,7 +79,6 @@ TEST_F(LlvmLibcWcstombs, ErrnoTest) {
// n parameter ignored when dest is null
ASSERT_EQ(LIBC_NAMESPACE::wcstombs(mbs, src, 7), static_cast<size_t>(7));
ASSERT_ERRNO_SUCCESS();
- ASSERT_EQ(LIBC_NAMESPACE::wcstombs(mbs, src, 100),
- static_cast<size_t>(-1));
+ ASSERT_EQ(LIBC_NAMESPACE::wcstombs(mbs, src, 100), static_cast<size_t>(-1));
ASSERT_ERRNO_EQ(EILSEQ);
}
>From 924f32f066e06e0937ae58c0adaa7147c01c5c29 Mon Sep 17 00:00:00 2001
From: Uzair Nawaz <uzairnawaz at google.com>
Date: Fri, 18 Jul 2025 19:46:41 +0000
Subject: [PATCH 14/14] added tests for src == null and mbstate == null
---
libc/src/__support/wchar/wcsnrtombs.h | 10 ++++-
libc/src/wchar/wcsnrtombs.cpp | 1 +
libc/src/wchar/wcsrtombs.cpp | 1 +
libc/src/wchar/wcstombs.cpp | 1 +
.../src/__support/wchar/wcsnrtombs_test.cpp | 10 +++++
libc/test/src/wchar/wcsnrtombs_test.cpp | 40 +++++++++++++++++++
libc/test/src/wchar/wcsrtombs_test.cpp | 40 +++++++++++++++++++
7 files changed, 101 insertions(+), 2 deletions(-)
diff --git a/libc/src/__support/wchar/wcsnrtombs.h b/libc/src/__support/wchar/wcsnrtombs.h
index d4b5c5a9b5e4e..cf2eda1d2c284 100644
--- a/libc/src/__support/wchar/wcsnrtombs.h
+++ b/libc/src/__support/wchar/wcsnrtombs.h
@@ -15,14 +15,20 @@
#include "src/__support/common.h"
#include "src/__support/libc_errno.h"
#include "src/__support/macros/config.h"
+#include "src/__support/macros/null_check.h"
#include "src/__support/wchar/mbstate.h"
#include "src/__support/wchar/string_converter.h"
namespace LIBC_NAMESPACE_DECL {
namespace internal {
-ErrorOr<size_t> wcsnrtombs(char *__restrict s, const wchar_t **__restrict pwcs,
- size_t nwc, size_t len, mbstate *ps) {
+LIBC_INLINE static ErrorOr<size_t> wcsnrtombs(char *__restrict s,
+ const wchar_t **__restrict pwcs,
+ size_t nwc, size_t len,
+ mbstate *ps) {
+ LIBC_CRASH_ON_NULLPTR(pwcs);
+ LIBC_CRASH_ON_NULLPTR(ps);
+
CharacterConverter cr(ps);
if (!cr.isValidState())
return Error(EINVAL);
diff --git a/libc/src/wchar/wcsnrtombs.cpp b/libc/src/wchar/wcsnrtombs.cpp
index fd4724150e927..7f25b248a0863 100644
--- a/libc/src/wchar/wcsnrtombs.cpp
+++ b/libc/src/wchar/wcsnrtombs.cpp
@@ -23,6 +23,7 @@ namespace LIBC_NAMESPACE_DECL {
LLVM_LIBC_FUNCTION(size_t, wcsnrtombs,
(char *__restrict s, const wchar_t **__restrict pwcs,
size_t nwc, size_t len, mbstate_t *ps)) {
+ LIBC_CRASH_ON_NULLPTR(pwcs);
static internal::mbstate internal_mbstate;
auto result = internal::wcsnrtombs(
s, pwcs, nwc, len,
diff --git a/libc/src/wchar/wcsrtombs.cpp b/libc/src/wchar/wcsrtombs.cpp
index b4632a4a436ba..9d2508cb81a8c 100644
--- a/libc/src/wchar/wcsrtombs.cpp
+++ b/libc/src/wchar/wcsrtombs.cpp
@@ -23,6 +23,7 @@ namespace LIBC_NAMESPACE_DECL {
LLVM_LIBC_FUNCTION(size_t, wcsrtombs,
(char *__restrict s, const wchar_t **__restrict pwcs,
size_t n, mbstate_t *ps)) {
+ LIBC_CRASH_ON_NULLPTR(pwcs);
static internal::mbstate internal_mbstate;
auto result = internal::wcsnrtombs(
s, pwcs, SIZE_MAX, n,
diff --git a/libc/src/wchar/wcstombs.cpp b/libc/src/wchar/wcstombs.cpp
index 28e2425d645e7..c3793cbe912cd 100644
--- a/libc/src/wchar/wcstombs.cpp
+++ b/libc/src/wchar/wcstombs.cpp
@@ -22,6 +22,7 @@ namespace LIBC_NAMESPACE_DECL {
LLVM_LIBC_FUNCTION(size_t, wcstombs,
(char *__restrict s, const wchar_t *__restrict wcs,
size_t n)) {
+ LIBC_CRASH_ON_NULLPTR(wcs);
static internal::mbstate internal_mbstate;
const wchar_t *wcs_ptr_copy = wcs;
auto result =
diff --git a/libc/test/src/__support/wchar/wcsnrtombs_test.cpp b/libc/test/src/__support/wchar/wcsnrtombs_test.cpp
index 3df7b07f90f47..33d28d791b18c 100644
--- a/libc/test/src/__support/wchar/wcsnrtombs_test.cpp
+++ b/libc/test/src/__support/wchar/wcsnrtombs_test.cpp
@@ -188,3 +188,13 @@ TEST(LlvmLibcWcsnrtombs, InvalidCharacter) {
ASSERT_FALSE(res.has_value());
ASSERT_EQ(res.error(), EILSEQ);
}
+
+TEST(LlvmLibcWcsnrtombs, NullSrc) {
+ EXPECT_DEATH(
+ [] {
+ LIBC_NAMESPACE::internal::mbstate state;
+ char mbs[10];
+ LIBC_NAMESPACE::internal::wcsnrtombs(mbs, nullptr, 1, 1, &state);
+ },
+ WITH_SIGNAL(-1));
+}
\ No newline at end of file
diff --git a/libc/test/src/wchar/wcsnrtombs_test.cpp b/libc/test/src/wchar/wcsnrtombs_test.cpp
index f6a333964018d..73e478fcdd256 100644
--- a/libc/test/src/wchar/wcsnrtombs_test.cpp
+++ b/libc/test/src/wchar/wcsnrtombs_test.cpp
@@ -154,3 +154,43 @@ TEST_F(LlvmLibcWcsnrtombs, ErrnoTest) {
static_cast<size_t>(-1));
ASSERT_ERRNO_EQ(EILSEQ);
}
+
+TEST_F(LlvmLibcWcsnrtombs, NullState) {
+ // this test is the same as DestLimit except it uses a nullptr mbstate*
+
+ /// clown emoji, sigma symbol, y with diaeresis, letter A
+ const wchar_t src[] = {static_cast<wchar_t>(0x1f921),
+ static_cast<wchar_t>(0x2211),
+ static_cast<wchar_t>(0xff), static_cast<wchar_t>(0x41),
+ static_cast<wchar_t>(0x0)};
+ const wchar_t *cur = src;
+
+ char mbs[11];
+ for (int i = 0; i < 11; ++i)
+ mbs[i] = '\x01'; // dummy initial values
+
+ ASSERT_EQ(LIBC_NAMESPACE::wcsnrtombs(mbs, &cur, 5, 4, nullptr),
+ static_cast<size_t>(4));
+ ASSERT_ERRNO_SUCCESS();
+ ASSERT_EQ(cur, src + 1);
+ ASSERT_EQ(mbs[0], '\xF0');
+ ASSERT_EQ(mbs[1], '\x9F');
+ ASSERT_EQ(mbs[2], '\xA4');
+ ASSERT_EQ(mbs[3], '\xA1');
+ ASSERT_EQ(mbs[4], '\x01'); // didn't write more than 4 bytes
+
+ for (int i = 0; i < 11; ++i)
+ mbs[i] = '\x01'; // dummy initial values
+
+ // not enough bytes to convert the second character, so only converts one
+ cur = src;
+ ASSERT_EQ(LIBC_NAMESPACE::wcsnrtombs(mbs, &cur, 5, 6, nullptr),
+ static_cast<size_t>(4));
+ ASSERT_ERRNO_SUCCESS();
+ ASSERT_EQ(cur, src + 1);
+ ASSERT_EQ(mbs[0], '\xF0');
+ ASSERT_EQ(mbs[1], '\x9F');
+ ASSERT_EQ(mbs[2], '\xA4');
+ ASSERT_EQ(mbs[3], '\xA1');
+ ASSERT_EQ(mbs[4], '\x01');
+}
diff --git a/libc/test/src/wchar/wcsrtombs_test.cpp b/libc/test/src/wchar/wcsrtombs_test.cpp
index 00879ee64e25a..a18d2438476ea 100644
--- a/libc/test/src/wchar/wcsrtombs_test.cpp
+++ b/libc/test/src/wchar/wcsrtombs_test.cpp
@@ -111,3 +111,43 @@ TEST_F(LlvmLibcWcsrtombs, ErrnoTest) {
static_cast<size_t>(-1));
ASSERT_ERRNO_EQ(EILSEQ);
}
+
+TEST_F(LlvmLibcWcsrtombs, NullState) {
+ // this test is the same as DestLimit except it uses a nullptr mbstate*
+
+ /// clown emoji, sigma symbol, y with diaeresis, letter A
+ const wchar_t src[] = {static_cast<wchar_t>(0x1f921),
+ static_cast<wchar_t>(0x2211),
+ static_cast<wchar_t>(0xff), static_cast<wchar_t>(0x41),
+ static_cast<wchar_t>(0x0)};
+ const wchar_t *cur = src;
+
+ char mbs[11];
+ for (int i = 0; i < 11; ++i)
+ mbs[i] = '\x01'; // dummy initial values
+
+ ASSERT_EQ(LIBC_NAMESPACE::wcsrtombs(mbs, &cur, 4, nullptr),
+ static_cast<size_t>(4));
+ ASSERT_ERRNO_SUCCESS();
+ ASSERT_EQ(cur, src + 1);
+ ASSERT_EQ(mbs[0], '\xF0');
+ ASSERT_EQ(mbs[1], '\x9F');
+ ASSERT_EQ(mbs[2], '\xA4');
+ ASSERT_EQ(mbs[3], '\xA1');
+ ASSERT_EQ(mbs[4], '\x01'); // didn't write more than 4 bytes
+
+ for (int i = 0; i < 11; ++i)
+ mbs[i] = '\x01'; // dummy initial values
+
+ // not enough bytes to convert the second character, so only converts one
+ cur = src;
+ ASSERT_EQ(LIBC_NAMESPACE::wcsrtombs(mbs, &cur, 6, nullptr),
+ static_cast<size_t>(4));
+ ASSERT_ERRNO_SUCCESS();
+ ASSERT_EQ(cur, src + 1);
+ ASSERT_EQ(mbs[0], '\xF0');
+ ASSERT_EQ(mbs[1], '\x9F');
+ ASSERT_EQ(mbs[2], '\xA4');
+ ASSERT_EQ(mbs[3], '\xA1');
+ ASSERT_EQ(mbs[4], '\x01');
+}
More information about the libc-commits
mailing list