[libc-commits] [libc] 52583b3 - [libc] Character converter skeleton class (#143619)
via libc-commits
libc-commits at lists.llvm.org
Wed Jun 11 13:11:34 PDT 2025
Author: Uzair Nawaz
Date: 2025-06-11T13:11:31-07:00
New Revision: 52583b3ed7dd39788360361fc1e21039c8eb5479
URL: https://github.com/llvm/llvm-project/commit/52583b3ed7dd39788360361fc1e21039c8eb5479
DIFF: https://github.com/llvm/llvm-project/commit/52583b3ed7dd39788360361fc1e21039c8eb5479.diff
LOG: [libc] Character converter skeleton class (#143619)
Made CharacterConverter class skeleton
Added:
libc/hdr/types/char32_t.h
libc/hdr/types/char8_t.h
libc/hdr/uchar_overlay.h
libc/src/__support/wchar/CMakeLists.txt
libc/src/__support/wchar/character_converter.cpp
libc/src/__support/wchar/character_converter.h
libc/src/__support/wchar/mbstate.h
libc/src/__support/wchar/utf_ret.h
Modified:
Removed:
################################################################################
diff --git a/libc/hdr/types/char32_t.h b/libc/hdr/types/char32_t.h
new file mode 100644
index 0000000000000..94fe5747d3415
--- /dev/null
+++ b/libc/hdr/types/char32_t.h
@@ -0,0 +1,22 @@
+//===-- Definition of char32_t.h ------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_HDR_TYPES_CHAR32_T_H
+#define LLVM_LIBC_HDR_TYPES_CHAR32_T_H
+
+#ifdef LIBC_FULL_BUILD
+
+#include "include/llvm-libc-types/char32_t.h"
+
+#else // overlay mode
+
+#include "hdr/uchar_overlay.h"
+
+#endif // LLVM_LIBC_FULL_BUILD
+
+#endif // LLVM_LIBC_HDR_TYPES_CHAR32_T_H
diff --git a/libc/hdr/types/char8_t.h b/libc/hdr/types/char8_t.h
new file mode 100644
index 0000000000000..31de764658f9e
--- /dev/null
+++ b/libc/hdr/types/char8_t.h
@@ -0,0 +1,22 @@
+//===-- Definition of char8_t.h -------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_HDR_TYPES_CHAR8_T_H
+#define LLVM_LIBC_HDR_TYPES_CHAR8_T_H
+
+#ifdef LIBC_FULL_BUILD
+
+#include "include/llvm-libc-types/char8_t.h"
+
+#else // overlay mode
+
+#include "hdr/uchar_overlay.h"
+
+#endif // LLVM_LIBC_FULL_BUILD
+
+#endif // LLVM_LIBC_HDR_TYPES_CHAR8_T_H
diff --git a/libc/hdr/uchar_overlay.h b/libc/hdr/uchar_overlay.h
new file mode 100644
index 0000000000000..44ed3d48c6c1d
--- /dev/null
+++ b/libc/hdr/uchar_overlay.h
@@ -0,0 +1,69 @@
+//===-- Including uchar.h in overlay mode ---------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_HDR_UCHAR_OVERLAY_H
+#define LLVM_LIBC_HDR_UCHAR_OVERLAY_H
+
+#ifdef LIBC_FULL_BUILD
+#error "This header should only be included in overlay mode"
+#endif
+
+// Overlay mode
+
+// glibc <uchar.h> header might provide extern inline definitions for few
+// functions, causing external alias errors. They are guarded by
+// `__USE_EXTERN_INLINES` macro. We temporarily disable `__USE_EXTERN_INLINES`
+// macro by defining `__NO_INLINE__` before including <uchar.h>.
+// And the same with `__USE_FORTIFY_LEVEL`, which will be temporarily disabled
+// with `_FORTIFY_SOURCE`.
+
+#ifdef _FORTIFY_SOURCE
+#define LIBC_OLD_FORTIFY_SOURCE _FORTIFY_SOURCE
+#undef _FORTIFY_SOURCE
+#endif
+
+#ifndef __NO_INLINE__
+#define __NO_INLINE__ 1
+#define LIBC_SET_NO_INLINE
+#endif
+
+#ifdef __USE_EXTERN_INLINES
+#define LIBC_OLD_USE_EXTERN_INLINES
+#undef __USE_EXTERN_INLINES
+#endif
+
+#ifdef __USE_FORTIFY_LEVEL
+#define LIBC_OLD_USE_FORTIFY_LEVEL __USE_FORTIFY_LEVEL
+#undef __USE_FORTIFY_LEVEL
+#define __USE_FORTIFY_LEVEL 0
+#endif
+
+#include <uchar.h>
+
+#ifdef LIBC_OLD_FORTIFY_SOURCE
+#define _FORTIFY_SOURCE LIBC_OLD_FORTIFY_SOURCE
+#undef LIBC_OLD_FORTIFY_SOURCE
+#endif
+
+#ifdef LIBC_SET_NO_INLINE
+#undef __NO_INLINE__
+#undef LIBC_SET_NO_INLINE
+#endif
+
+#ifdef LIBC_OLD_USE_FORTIFY_LEVEL
+#undef __USE_FORTIFY_LEVEL
+#define __USE_FORTIFY_LEVEL LIBC_OLD_USE_FORTIFY_LEVEL
+#undef LIBC_OLD_USE_FORTIFY_LEVEL
+#endif
+
+#ifdef LIBC_OLD_USE_EXTERN_INLINES
+#define __USE_EXTERN_INLINES
+#undef LIBC_OLD_USE_EXTERN_INLINES
+#endif
+
+#endif // LLVM_LIBC_HDR_UCHAR_OVERLAY_H
diff --git a/libc/src/__support/wchar/CMakeLists.txt b/libc/src/__support/wchar/CMakeLists.txt
new file mode 100644
index 0000000000000..5cca58400ff45
--- /dev/null
+++ b/libc/src/__support/wchar/CMakeLists.txt
@@ -0,0 +1,26 @@
+add_header_library(
+ mbstate
+ HDRS
+ mbstate.h
+ DEPENDS
+ libc.hdr.types.char32_t
+)
+
+add_object_library(
+ character_converter
+ HDRS
+ character_converter.h
+ SRCS
+ character_converter.cpp
+ DEPENDS
+ libc.hdr.types.char8_t
+ libc.hdr.types.char32_t
+ .mbstate
+ .utf_ret
+)
+
+add_header_library(
+ utf_ret
+ HDRS
+ utf_ret.h
+)
diff --git a/libc/src/__support/wchar/character_converter.cpp b/libc/src/__support/wchar/character_converter.cpp
new file mode 100644
index 0000000000000..0afc2a6f59e64
--- /dev/null
+++ b/libc/src/__support/wchar/character_converter.cpp
@@ -0,0 +1,32 @@
+//===-- Implementation of a class for conversion --------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "hdr/types/char32_t.h"
+#include "hdr/types/char8_t.h"
+#include "src/__support/wchar/mbstate.h"
+#include "src/__support/wchar/utf_ret.h"
+
+#include "character_converter.h"
+
+namespace LIBC_NAMESPACE_DECL {
+namespace internal {
+
+CharacterConverter::CharacterConverter(mbstate_t *mbstate) { state = mbstate; }
+
+bool CharacterConverter::isComplete() {}
+
+int CharacterConverter::push(char8_t utf8_byte) {}
+
+int CharacterConverter::push(char32_t utf32) {}
+
+utf_ret<char8_t> CharacterConverter::pop_utf8() {}
+
+utf_ret<char32_t> CharacterConverter::pop_utf32() {}
+
+} // namespace internal
+} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/__support/wchar/character_converter.h b/libc/src/__support/wchar/character_converter.h
new file mode 100644
index 0000000000000..a6bac43805376
--- /dev/null
+++ b/libc/src/__support/wchar/character_converter.h
@@ -0,0 +1,39 @@
+//===-- Definition of a class for mbstate_t and conversion -----*-- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC___SUPPORT_CHARACTER_CONVERTER_H
+#define LLVM_LIBC_SRC___SUPPORT_CHARACTER_CONVERTER_H
+
+#include "hdr/types/char32_t.h"
+#include "hdr/types/char8_t.h"
+#include "src/__support/wchar/mbstate.h"
+#include "src/__support/wchar/utf_ret.h"
+
+namespace LIBC_NAMESPACE_DECL {
+namespace internal {
+
+class CharacterConverter {
+private:
+ mbstate_t *state;
+
+public:
+ CharacterConverter(mbstate_t *mbstate);
+
+ bool isComplete();
+
+ int push(char8_t utf8_byte);
+ int push(char32_t utf32);
+
+ utf_ret<char8_t> pop_utf8();
+ utf_ret<char32_t> pop_utf32();
+};
+
+} // namespace internal
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC___SUPPORT_CHARACTER_CONVERTER_H
diff --git a/libc/src/__support/wchar/mbstate.h b/libc/src/__support/wchar/mbstate.h
new file mode 100644
index 0000000000000..72ec727560003
--- /dev/null
+++ b/libc/src/__support/wchar/mbstate.h
@@ -0,0 +1,27 @@
+//===-- Definition of mbstate-----------------------------------*-- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC___SUPPORT_MBSTATE_H
+#define LLVM_LIBC_SRC___SUPPORT_MBSTATE_H
+
+#include "hdr/types/char32_t.h"
+#include <stdint.h>
+
+namespace LIBC_NAMESPACE_DECL {
+namespace internal {
+
+struct mbstate {
+ char32_t partial;
+ uint8_t bits_processed;
+ uint8_t total_bytes;
+};
+
+} // namespace internal
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC___SUPPORT_MBSTATE_H
diff --git a/libc/src/__support/wchar/utf_ret.h b/libc/src/__support/wchar/utf_ret.h
new file mode 100644
index 0000000000000..b8a8f6f094143
--- /dev/null
+++ b/libc/src/__support/wchar/utf_ret.h
@@ -0,0 +1,21 @@
+//===-- Definition of utf_ret ----------------------------------*-- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC___SUPPORT_UTF_RET_H
+#define LLVM_LIBC_SRC___SUPPORT_UTF_RET_H
+
+namespace LIBC_NAMESPACE_DECL {
+
+template <typename T> struct utf_ret {
+ T out;
+ int error;
+};
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC___SUPPORT_UTF_RET_H
More information about the libc-commits
mailing list