[libc-commits] [libc] [libc] utf8 to 32 CharacterConverter (PR #143973)
Michael Jones via libc-commits
libc-commits at lists.llvm.org
Thu Jun 12 15:11:40 PDT 2025
================
@@ -0,0 +1,125 @@
+//===-- Unittests for character_converter utf8->3 -------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/__support/wchar/character_converter.h"
+#include "src/__support/wchar/mbstate.h"
+#include "src/__support/wchar/utf_ret.h"
+#include "test/UnitTest/Test.h"
+
+TEST(LlvmLibcCharacterConverterUTF8To32Test, OneByte) {
+ LIBC_NAMESPACE::internal::mbstate state;
+ state.bytes_processed = 0;
+ state.total_bytes = 0;
+ char ch = 'A';
+
+ LIBC_NAMESPACE::internal::CharacterConverter char_conv(&state);
+ int err = char_conv.push(static_cast<char8_t>(ch));
+ LIBC_NAMESPACE::internal::utf_ret<char32_t> wch = char_conv.pop_utf32();
+
+ EXPECT_EQ(err, 0);
+ EXPECT_EQ(wch.error, 0);
+ EXPECT_EQ(static_cast<int>(wch.out), 65);
+}
+
+TEST(LlvmLibcCharacterConverterUTF8To32Test, TwoBytes) {
+ LIBC_NAMESPACE::internal::mbstate state;
+ state.bytes_processed = 0;
+ state.total_bytes = 0;
+ const char *ch = ""; // hex 0xC2, 0x8E
----------------
michaelrj-google wrote:
for a character that may not be printable for some users, it's better to write out the bytes directly. Here and below.
```suggestion
const char *ch = {0xC2, 0x8E}; // hex 0xC2, 0x8E
```
https://github.com/llvm/llvm-project/pull/143973
More information about the libc-commits
mailing list