[libc-commits] [libc] [libc] Implement wcs to mbs family of functions (PR #149421)
Michael Jones via libc-commits
libc-commits at lists.llvm.org
Tue Jul 22 09:48:14 PDT 2025
================
@@ -0,0 +1,154 @@
+//===-- Unittests for wcsrtombs -------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "hdr/types/mbstate_t.h"
+#include "src/__support/macros/null_check.h"
+#include "src/string/memset.h"
+#include "src/wchar/wcsrtombs.h"
+#include "test/UnitTest/ErrnoCheckingTest.h"
+#include "test/UnitTest/Test.h"
+
+using LlvmLibcWcsrtombs = LIBC_NAMESPACE::testing::ErrnoCheckingTest;
+
+// these tests are fairly simple as this function just calls into the internal
+// wcsnrtombs which is more thoroughly tested
+
+TEST_F(LlvmLibcWcsrtombs, AllMultibyteLengths) {
+ mbstate_t state;
+ LIBC_NAMESPACE::memset(&state, 0, sizeof(mbstate_t));
+
+ /// clown emoji, sigma symbol, y with diaeresis, letter A
+ const wchar_t src[] = {static_cast<wchar_t>(0x1f921),
+ static_cast<wchar_t>(0x2211),
+ static_cast<wchar_t>(0xff), static_cast<wchar_t>(0x41),
+ static_cast<wchar_t>(0x0)};
+ const wchar_t *cur = src;
+ char mbs[11];
+
+ ASSERT_EQ(LIBC_NAMESPACE::wcsrtombs(mbs, &cur, 11, &state),
+ static_cast<size_t>(10));
+ ASSERT_ERRNO_SUCCESS();
+ ASSERT_EQ(cur, nullptr);
+ ASSERT_EQ(mbs[0], '\xF0'); // clown begin
+ ASSERT_EQ(mbs[1], '\x9F');
+ ASSERT_EQ(mbs[2], '\xA4');
+ ASSERT_EQ(mbs[3], '\xA1');
+ ASSERT_EQ(mbs[4], '\xE2'); // sigma begin
+ ASSERT_EQ(mbs[5], '\x88');
+ ASSERT_EQ(mbs[6], '\x91');
+ ASSERT_EQ(mbs[7], '\xC3'); // y diaeresis begin
+ ASSERT_EQ(mbs[8], '\xBF');
+ ASSERT_EQ(mbs[9], '\x41'); // A begin
+ ASSERT_EQ(mbs[10], '\0'); // null terminator
+}
+
+TEST_F(LlvmLibcWcsrtombs, DestLimit) {
+ mbstate_t state;
+ LIBC_NAMESPACE::memset(&state, 0, sizeof(mbstate_t));
+
+ /// clown emoji, sigma symbol, y with diaeresis, letter A
+ const wchar_t src[] = {static_cast<wchar_t>(0x1f921),
+ static_cast<wchar_t>(0x2211),
+ static_cast<wchar_t>(0xff), static_cast<wchar_t>(0x41),
+ static_cast<wchar_t>(0x0)};
+ const wchar_t *cur = src;
+
+ char mbs[11];
+ for (int i = 0; i < 11; ++i)
+ mbs[i] = '\x01'; // dummy initial values
+
+ ASSERT_EQ(LIBC_NAMESPACE::wcsrtombs(mbs, &cur, 4, &state),
+ static_cast<size_t>(4));
+ ASSERT_ERRNO_SUCCESS();
+ ASSERT_EQ(cur, src + 1);
+ ASSERT_EQ(mbs[0], '\xF0');
+ ASSERT_EQ(mbs[1], '\x9F');
+ ASSERT_EQ(mbs[2], '\xA4');
+ ASSERT_EQ(mbs[3], '\xA1');
+ ASSERT_EQ(mbs[4], '\x01'); // didn't write more than 4 bytes
+
+ for (int i = 0; i < 11; ++i)
+ mbs[i] = '\x01'; // dummy initial values
+ LIBC_NAMESPACE::memset(&state, 0, sizeof(mbstate_t));
+
+ // not enough bytes to convert the second character, so only converts one
+ cur = src;
+ ASSERT_EQ(LIBC_NAMESPACE::wcsrtombs(mbs, &cur, 6, &state),
+ static_cast<size_t>(4));
+ ASSERT_ERRNO_SUCCESS();
+ ASSERT_EQ(cur, src + 1);
+ ASSERT_EQ(mbs[0], '\xF0');
+ ASSERT_EQ(mbs[1], '\x9F');
+ ASSERT_EQ(mbs[2], '\xA4');
+ ASSERT_EQ(mbs[3], '\xA1');
+ ASSERT_EQ(mbs[4], '\x01');
+}
+
+TEST_F(LlvmLibcWcsrtombs, ErrnoTest) {
+ mbstate_t state;
+ LIBC_NAMESPACE::memset(&state, 0, sizeof(mbstate_t));
+
+ const wchar_t src[] = {static_cast<wchar_t>(0x1f921),
+ static_cast<wchar_t>(0x2211),
+ static_cast<wchar_t>(0x12ffff), // invalid widechar
+ static_cast<wchar_t>(0x0)};
+ const wchar_t *cur = src;
+ char mbs[11];
+
+ // n parameter ignored when dest is null
+ ASSERT_EQ(LIBC_NAMESPACE::wcsrtombs(mbs, &cur, 7, &state),
+ static_cast<size_t>(7));
+ ASSERT_ERRNO_SUCCESS();
+
+ LIBC_NAMESPACE::memset(&state, 0, sizeof(mbstate_t));
+ cur = src;
+
+ ASSERT_EQ(LIBC_NAMESPACE::wcsrtombs(mbs, &cur, 100, &state),
+ static_cast<size_t>(-1));
+ ASSERT_ERRNO_EQ(EILSEQ);
+}
+
+TEST_F(LlvmLibcWcsrtombs, NullState) {
+ // this test is the same as DestLimit except it uses a nullptr mbstate*
+
+ /// clown emoji, sigma symbol, y with diaeresis, letter A
+ const wchar_t src[] = {static_cast<wchar_t>(0x1f921),
+ static_cast<wchar_t>(0x2211),
+ static_cast<wchar_t>(0xff), static_cast<wchar_t>(0x41),
+ static_cast<wchar_t>(0x0)};
+ const wchar_t *cur = src;
+
+ char mbs[11];
+ for (int i = 0; i < 11; ++i)
+ mbs[i] = '\x01'; // dummy initial values
----------------
michaelrj-google wrote:
since you use memset for the mbstate you can also use it here for the multibyte string
https://github.com/llvm/llvm-project/pull/149421
More information about the libc-commits
mailing list