[libc-commits] [libc] [libc] Character converter skeleton class (PR #143619)

Uzair Nawaz via libc-commits libc-commits at lists.llvm.org
Wed Jun 11 11:55:51 PDT 2025


https://github.com/uzairnawaz updated https://github.com/llvm/llvm-project/pull/143619

>From 4de427f6c44acad9299922ec2844174ee14ddb37 Mon Sep 17 00:00:00 2001
From: Uzair Nawaz <uzairnawaz at google.com>
Date: Tue, 10 Jun 2025 22:36:36 +0000
Subject: [PATCH 01/11] character converter skeleton class

---
 libc/src/__support/wchar/CMakeLists.txt       | 24 +++++++++++++++++++
 .../__support/wchar/character_converter.cpp   |  2 ++
 .../src/__support/wchar/character_converter.h | 20 ++++++++++++++++
 libc/src/__support/wchar/mbstate.h            |  8 +++++++
 libc/src/__support/wchar/utf_ret.h            |  6 +++++
 5 files changed, 60 insertions(+)
 create mode 100644 libc/src/__support/wchar/CMakeLists.txt
 create mode 100644 libc/src/__support/wchar/character_converter.cpp
 create mode 100644 libc/src/__support/wchar/character_converter.h
 create mode 100644 libc/src/__support/wchar/mbstate.h
 create mode 100644 libc/src/__support/wchar/utf_ret.h

diff --git a/libc/src/__support/wchar/CMakeLists.txt b/libc/src/__support/wchar/CMakeLists.txt
new file mode 100644
index 0000000000000..c1f402767235f
--- /dev/null
+++ b/libc/src/__support/wchar/CMakeLists.txt
@@ -0,0 +1,24 @@
+add_header_library(
+  mbstate
+  HDRS
+    mbstate.h
+  DEPENDS
+    libc.hdr.types.wchar_t    
+)
+
+add_header_library(
+  character_converter
+  HDRS
+    character_converter.h
+  DEPENDS
+    libc.hdr.types.wchar_t
+    .mbstate
+    .utf_ret
+)
+
+add_header_library(
+  utf_ret
+  HDRS
+    utf_ret.h
+  DEPENDS
+)
diff --git a/libc/src/__support/wchar/character_converter.cpp b/libc/src/__support/wchar/character_converter.cpp
new file mode 100644
index 0000000000000..139597f9cb07c
--- /dev/null
+++ b/libc/src/__support/wchar/character_converter.cpp
@@ -0,0 +1,2 @@
+
+
diff --git a/libc/src/__support/wchar/character_converter.h b/libc/src/__support/wchar/character_converter.h
new file mode 100644
index 0000000000000..1800fe16eb14b
--- /dev/null
+++ b/libc/src/__support/wchar/character_converter.h
@@ -0,0 +1,20 @@
+
+#include "src/__support/wchar/mbstate.h"
+#include "src/__support/wchar/utf_ret.h"
+#include "hdr/types/wchar_t.h"
+
+class CharacterConverter {
+private:
+    mbstate_t* state;
+
+public:
+    CharacterConverter();
+
+    bool isComplete();
+
+    int push(char utf8_byte);
+    int push(wchar_t utf32);
+
+    utf_ret<char> pop_utf8();
+    utf_ret<wchar_t> pop_utf32();
+};
diff --git a/libc/src/__support/wchar/mbstate.h b/libc/src/__support/wchar/mbstate.h
new file mode 100644
index 0000000000000..c0af608c37623
--- /dev/null
+++ b/libc/src/__support/wchar/mbstate.h
@@ -0,0 +1,8 @@
+
+#include "hdr/types/wchar_t.h"
+
+struct mbstate_t {
+    wchar_t partial;
+    unsigned char bits_processed;
+    unsigned char total_bytes;
+};
diff --git a/libc/src/__support/wchar/utf_ret.h b/libc/src/__support/wchar/utf_ret.h
new file mode 100644
index 0000000000000..533f4cb952f4b
--- /dev/null
+++ b/libc/src/__support/wchar/utf_ret.h
@@ -0,0 +1,6 @@
+
+template <typename T>
+struct utf_ret {
+    T out;
+    int error;
+};

>From 6245ef18aab8874a93ecdc8ce4a5675f27731ef6 Mon Sep 17 00:00:00 2001
From: Uzair Nawaz <uzairnawaz at google.com>
Date: Tue, 10 Jun 2025 22:42:22 +0000
Subject: [PATCH 02/11] fixed formatting

---
 libc/src/__support/wchar/character_converter.cpp |  1 -
 libc/src/__support/wchar/character_converter.h   | 16 ++++++++--------
 libc/src/__support/wchar/mbstate.h               |  6 +++---
 libc/src/__support/wchar/utf_ret.h               |  7 +++----
 4 files changed, 14 insertions(+), 16 deletions(-)

diff --git a/libc/src/__support/wchar/character_converter.cpp b/libc/src/__support/wchar/character_converter.cpp
index 139597f9cb07c..8b137891791fe 100644
--- a/libc/src/__support/wchar/character_converter.cpp
+++ b/libc/src/__support/wchar/character_converter.cpp
@@ -1,2 +1 @@
 
-
diff --git a/libc/src/__support/wchar/character_converter.h b/libc/src/__support/wchar/character_converter.h
index 1800fe16eb14b..b3de5eae926ab 100644
--- a/libc/src/__support/wchar/character_converter.h
+++ b/libc/src/__support/wchar/character_converter.h
@@ -1,20 +1,20 @@
 
+#include "hdr/types/wchar_t.h"
 #include "src/__support/wchar/mbstate.h"
 #include "src/__support/wchar/utf_ret.h"
-#include "hdr/types/wchar_t.h"
 
 class CharacterConverter {
 private:
-    mbstate_t* state;
+  mbstate_t *state;
 
 public:
-    CharacterConverter();
+  CharacterConverter();
 
-    bool isComplete();
+  bool isComplete();
 
-    int push(char utf8_byte);
-    int push(wchar_t utf32);
+  int push(char utf8_byte);
+  int push(wchar_t utf32);
 
-    utf_ret<char> pop_utf8();
-    utf_ret<wchar_t> pop_utf32();
+  utf_ret<char> pop_utf8();
+  utf_ret<wchar_t> pop_utf32();
 };
diff --git a/libc/src/__support/wchar/mbstate.h b/libc/src/__support/wchar/mbstate.h
index c0af608c37623..7ab16177ee33f 100644
--- a/libc/src/__support/wchar/mbstate.h
+++ b/libc/src/__support/wchar/mbstate.h
@@ -2,7 +2,7 @@
 #include "hdr/types/wchar_t.h"
 
 struct mbstate_t {
-    wchar_t partial;
-    unsigned char bits_processed;
-    unsigned char total_bytes;
+  wchar_t partial;
+  unsigned char bits_processed;
+  unsigned char total_bytes;
 };
diff --git a/libc/src/__support/wchar/utf_ret.h b/libc/src/__support/wchar/utf_ret.h
index 533f4cb952f4b..eeaf66762a379 100644
--- a/libc/src/__support/wchar/utf_ret.h
+++ b/libc/src/__support/wchar/utf_ret.h
@@ -1,6 +1,5 @@
 
-template <typename T>
-struct utf_ret {
-    T out;
-    int error;
+template <typename T> struct utf_ret {
+  T out;
+  int error;
 };

>From 7c9ad0294004764c67d97716f3ff754eeaec8742 Mon Sep 17 00:00:00 2001
From: Uzair Nawaz <uzairnawaz at google.com>
Date: Tue, 10 Jun 2025 23:12:13 +0000
Subject: [PATCH 03/11] license and include guards

---
 .../src/__support/wchar/character_converter.h | 16 ++++++++++++++++
 libc/src/__support/wchar/mbstate.h            | 19 ++++++++++++++++++-
 libc/src/__support/wchar/utf_ret.h            | 16 ++++++++++++++++
 3 files changed, 50 insertions(+), 1 deletion(-)

diff --git a/libc/src/__support/wchar/character_converter.h b/libc/src/__support/wchar/character_converter.h
index b3de5eae926ab..7deca8bf117a4 100644
--- a/libc/src/__support/wchar/character_converter.h
+++ b/libc/src/__support/wchar/character_converter.h
@@ -1,8 +1,20 @@
+//===-- Definition of a class for mbstate_t and conversion -----*-- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC___SUPPORT_CHARACTER_CONVERTER_H
+#define LLVM_LIBC_SRC___SUPPORT_CHARACTER_CONVERTER_H
 
 #include "hdr/types/wchar_t.h"
 #include "src/__support/wchar/mbstate.h"
 #include "src/__support/wchar/utf_ret.h"
 
+namespace LIBC_NAMESPACE_DECL {
+
 class CharacterConverter {
 private:
   mbstate_t *state;
@@ -18,3 +30,7 @@ class CharacterConverter {
   utf_ret<char> pop_utf8();
   utf_ret<wchar_t> pop_utf32();
 };
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC___SUPPORT_CHARACTER_CONVERTER_H
diff --git a/libc/src/__support/wchar/mbstate.h b/libc/src/__support/wchar/mbstate.h
index 7ab16177ee33f..26256cf9ed30b 100644
--- a/libc/src/__support/wchar/mbstate.h
+++ b/libc/src/__support/wchar/mbstate.h
@@ -1,8 +1,25 @@
+//===-- Definition of mbstate_t -------------------------- -----*-- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC___SUPPORT_MBSTATE_H
+#define LLVM_LIBC_SRC___SUPPORT_MBSTATE_H
 
 #include "hdr/types/wchar_t.h"
 
+namespace LIBC_NAMESPACE_DECL {
+
 struct mbstate_t {
   wchar_t partial;
   unsigned char bits_processed;
   unsigned char total_bytes;
-};
+}; 
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC___SUPPORT_MBSTATE_H
+
diff --git a/libc/src/__support/wchar/utf_ret.h b/libc/src/__support/wchar/utf_ret.h
index eeaf66762a379..b8a8f6f094143 100644
--- a/libc/src/__support/wchar/utf_ret.h
+++ b/libc/src/__support/wchar/utf_ret.h
@@ -1,5 +1,21 @@
+//===-- Definition of utf_ret ----------------------------------*-- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC___SUPPORT_UTF_RET_H
+#define LLVM_LIBC_SRC___SUPPORT_UTF_RET_H
+
+namespace LIBC_NAMESPACE_DECL {
 
 template <typename T> struct utf_ret {
   T out;
   int error;
 };
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC___SUPPORT_UTF_RET_H

>From 9ac9af3f9526987ead4d85612a61c5d43da09403 Mon Sep 17 00:00:00 2001
From: Uzair Nawaz <uzairnawaz at google.com>
Date: Wed, 11 Jun 2025 16:36:19 +0000
Subject: [PATCH 04/11] switched to fixed width types; added internal namespace

---
 libc/hdr/types/char32_t.h                     | 23 +++++++
 libc/hdr/types/char8_t.h                      | 23 +++++++
 libc/hdr/uchar_overlay.h                      | 69 +++++++++++++++++++
 .../__support/wchar/character_converter.cpp   | 26 +++++++
 .../src/__support/wchar/character_converter.h | 14 ++--
 libc/src/__support/wchar/mbstate.h            | 12 ++--
 6 files changed, 157 insertions(+), 10 deletions(-)
 create mode 100644 libc/hdr/types/char32_t.h
 create mode 100644 libc/hdr/types/char8_t.h
 create mode 100644 libc/hdr/uchar_overlay.h

diff --git a/libc/hdr/types/char32_t.h b/libc/hdr/types/char32_t.h
new file mode 100644
index 0000000000000..e68a931e24aa5
--- /dev/null
+++ b/libc/hdr/types/char32_t.h
@@ -0,0 +1,23 @@
+//===-- Definition of char32_t.h ------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_HDR_TYPES_CHAR32_t_H
+#define LLVM_LIBC_HDR_TYPES_CHAR32_t_H
+
+#ifdef LIBC_FULL_BUILD
+
+#include "include/llvm-libc-types/char32_t.h"
+
+#else // overlay mode
+
+#include "hdr/uchar_overlay.h"
+
+#endif // LLVM_LIBC_FULL_BUILD
+
+#endif // LLVM_LIBC_HDR_TYPES_char32_t_H
diff --git a/libc/hdr/types/char8_t.h b/libc/hdr/types/char8_t.h
new file mode 100644
index 0000000000000..8d801ff0438a9
--- /dev/null
+++ b/libc/hdr/types/char8_t.h
@@ -0,0 +1,23 @@
+//===-- Definition of char8_t.h -------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_HDR_TYPES_CHAR8_T_H
+#define LLVM_LIBC_HDR_TYPES_CHAR8_T_H
+
+#ifdef LIBC_FULL_BUILD
+
+#include "include/llvm-libc-types/char8_t.h"
+
+#else // overlay mode
+
+#include "hdr/uchar_overlay.h"
+
+#endif // LLVM_LIBC_FULL_BUILD
+
+#endif // LLVM_LIBC_HDR_TYPES_char8_t_H
diff --git a/libc/hdr/uchar_overlay.h b/libc/hdr/uchar_overlay.h
new file mode 100644
index 0000000000000..44ed3d48c6c1d
--- /dev/null
+++ b/libc/hdr/uchar_overlay.h
@@ -0,0 +1,69 @@
+//===-- Including uchar.h in overlay mode ---------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_HDR_UCHAR_OVERLAY_H
+#define LLVM_LIBC_HDR_UCHAR_OVERLAY_H
+
+#ifdef LIBC_FULL_BUILD
+#error "This header should only be included in overlay mode"
+#endif
+
+// Overlay mode
+
+// glibc <uchar.h> header might provide extern inline definitions for few
+// functions, causing external alias errors.  They are guarded by
+// `__USE_EXTERN_INLINES` macro.  We temporarily disable `__USE_EXTERN_INLINES`
+// macro by defining `__NO_INLINE__` before including <uchar.h>.
+// And the same with `__USE_FORTIFY_LEVEL`, which will be temporarily disabled
+// with `_FORTIFY_SOURCE`.
+
+#ifdef _FORTIFY_SOURCE
+#define LIBC_OLD_FORTIFY_SOURCE _FORTIFY_SOURCE
+#undef _FORTIFY_SOURCE
+#endif
+
+#ifndef __NO_INLINE__
+#define __NO_INLINE__ 1
+#define LIBC_SET_NO_INLINE
+#endif
+
+#ifdef __USE_EXTERN_INLINES
+#define LIBC_OLD_USE_EXTERN_INLINES
+#undef __USE_EXTERN_INLINES
+#endif
+
+#ifdef __USE_FORTIFY_LEVEL
+#define LIBC_OLD_USE_FORTIFY_LEVEL __USE_FORTIFY_LEVEL
+#undef __USE_FORTIFY_LEVEL
+#define __USE_FORTIFY_LEVEL 0
+#endif
+
+#include <uchar.h>
+
+#ifdef LIBC_OLD_FORTIFY_SOURCE
+#define _FORTIFY_SOURCE LIBC_OLD_FORTIFY_SOURCE
+#undef LIBC_OLD_FORTIFY_SOURCE
+#endif
+
+#ifdef LIBC_SET_NO_INLINE
+#undef __NO_INLINE__
+#undef LIBC_SET_NO_INLINE
+#endif
+
+#ifdef LIBC_OLD_USE_FORTIFY_LEVEL
+#undef __USE_FORTIFY_LEVEL
+#define __USE_FORTIFY_LEVEL LIBC_OLD_USE_FORTIFY_LEVEL
+#undef LIBC_OLD_USE_FORTIFY_LEVEL
+#endif
+
+#ifdef LIBC_OLD_USE_EXTERN_INLINES
+#define __USE_EXTERN_INLINES
+#undef LIBC_OLD_USE_EXTERN_INLINES
+#endif
+
+#endif // LLVM_LIBC_HDR_UCHAR_OVERLAY_H
diff --git a/libc/src/__support/wchar/character_converter.cpp b/libc/src/__support/wchar/character_converter.cpp
index 8b137891791fe..63fa9ae40385c 100644
--- a/libc/src/__support/wchar/character_converter.cpp
+++ b/libc/src/__support/wchar/character_converter.cpp
@@ -1 +1,27 @@
+//===-- Implementation of a class for conversion ---------------*-- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
 
+#include "character_converter.h"
+
+namespace LIBC_NAMESPACE_DECL {
+namespace internal {
+
+CharacterConverter::CharacterConverter(mbstate_t *mbstate) {}
+
+bool CharacterConverter::isComplete() {}
+
+int CharacterConverter::push(char8_t utf8_byte) {}
+
+int CharacterConverter::push(char32_t utf32) {}
+
+utf_ret<char8_t> CharacterConverter::pop_utf8() {}
+
+utf_ret<char32_t> CharacterConverter::pop_utf32() {}
+
+} // namespace internal
+} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/__support/wchar/character_converter.h b/libc/src/__support/wchar/character_converter.h
index 7deca8bf117a4..a9a6d444aa8a0 100644
--- a/libc/src/__support/wchar/character_converter.h
+++ b/libc/src/__support/wchar/character_converter.h
@@ -9,28 +9,32 @@
 #ifndef LLVM_LIBC_SRC___SUPPORT_CHARACTER_CONVERTER_H
 #define LLVM_LIBC_SRC___SUPPORT_CHARACTER_CONVERTER_H
 
+#include "hdr/types/char32_t.h"
+#include "hdr/types/char8_t.h"
 #include "hdr/types/wchar_t.h"
 #include "src/__support/wchar/mbstate.h"
 #include "src/__support/wchar/utf_ret.h"
 
 namespace LIBC_NAMESPACE_DECL {
+namespace internal {
 
 class CharacterConverter {
 private:
   mbstate_t *state;
 
 public:
-  CharacterConverter();
+  CharacterConverter(mbstate_t *mbstate);
 
   bool isComplete();
 
-  int push(char utf8_byte);
-  int push(wchar_t utf32);
+  int push(char8_t utf8_byte);
+  int push(char32_t utf32);
 
-  utf_ret<char> pop_utf8();
-  utf_ret<wchar_t> pop_utf32();
+  utf_ret<char8_t> pop_utf8();
+  utf_ret<char32_t> pop_utf32();
 };
 
+} // namespace internal
 } // namespace LIBC_NAMESPACE_DECL
 
 #endif // LLVM_LIBC_SRC___SUPPORT_CHARACTER_CONVERTER_H
diff --git a/libc/src/__support/wchar/mbstate.h b/libc/src/__support/wchar/mbstate.h
index 26256cf9ed30b..15cdb7980485c 100644
--- a/libc/src/__support/wchar/mbstate.h
+++ b/libc/src/__support/wchar/mbstate.h
@@ -10,16 +10,18 @@
 #define LLVM_LIBC_SRC___SUPPORT_MBSTATE_H
 
 #include "hdr/types/wchar_t.h"
+#include "stdint.h"
 
 namespace LIBC_NAMESPACE_DECL {
+namespace internal {
 
-struct mbstate_t {
+struct mbstate {
   wchar_t partial;
-  unsigned char bits_processed;
-  unsigned char total_bytes;
-}; 
+  uint8_t bits_processed;
+  uint8_t total_bytes;
+};
 
+} // namespace internal
 } // namespace LIBC_NAMESPACE_DECL
 
 #endif // LLVM_LIBC_SRC___SUPPORT_MBSTATE_H
-

>From f646d43bf308906739cea0b8eec0ff06679b9fe1 Mon Sep 17 00:00:00 2001
From: Uzair Nawaz <uzairnawaz at google.com>
Date: Wed, 11 Jun 2025 16:51:27 +0000
Subject: [PATCH 05/11] fixed build dependencies + formatting

---
 libc/hdr/types/char32_t.h               | 4 ++--
 libc/src/__support/wchar/CMakeLists.txt | 4 ++++
 libc/src/__support/wchar/mbstate.h      | 7 ++++---
 3 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/libc/hdr/types/char32_t.h b/libc/hdr/types/char32_t.h
index e68a931e24aa5..a2a2df36dc196 100644
--- a/libc/hdr/types/char32_t.h
+++ b/libc/hdr/types/char32_t.h
@@ -7,8 +7,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_LIBC_HDR_TYPES_CHAR32_t_H
-#define LLVM_LIBC_HDR_TYPES_CHAR32_t_H
+#ifndef LLVM_LIBC_HDR_TYPES_CHAR32_T_H
+#define LLVM_LIBC_HDR_TYPES_CHAR32_T_H
 
 #ifdef LIBC_FULL_BUILD
 
diff --git a/libc/src/__support/wchar/CMakeLists.txt b/libc/src/__support/wchar/CMakeLists.txt
index c1f402767235f..aebe8c650e0ce 100644
--- a/libc/src/__support/wchar/CMakeLists.txt
+++ b/libc/src/__support/wchar/CMakeLists.txt
@@ -4,6 +4,8 @@ add_header_library(
     mbstate.h
   DEPENDS
     libc.hdr.types.wchar_t    
+    libc.hdr.types.char32_t    
+    stdint.h
 )
 
 add_header_library(
@@ -12,6 +14,8 @@ add_header_library(
     character_converter.h
   DEPENDS
     libc.hdr.types.wchar_t
+    libc.hdr.types.char8_t
+    libc.hdr.types.char32_t
     .mbstate
     .utf_ret
 )
diff --git a/libc/src/__support/wchar/mbstate.h b/libc/src/__support/wchar/mbstate.h
index 15cdb7980485c..b5e264a9129c4 100644
--- a/libc/src/__support/wchar/mbstate.h
+++ b/libc/src/__support/wchar/mbstate.h
@@ -1,4 +1,4 @@
-//===-- Definition of mbstate_t -------------------------- -----*-- C++ -*-===//
+//===-- Definition of mbstate----------------------------- -----*-- C++ -*-===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -9,14 +9,15 @@
 #ifndef LLVM_LIBC_SRC___SUPPORT_MBSTATE_H
 #define LLVM_LIBC_SRC___SUPPORT_MBSTATE_H
 
+#include "hdr/types/char32_t.h"
 #include "hdr/types/wchar_t.h"
-#include "stdint.h"
+#include <stdint.h>
 
 namespace LIBC_NAMESPACE_DECL {
 namespace internal {
 
 struct mbstate {
-  wchar_t partial;
+  char32_t partial;
   uint8_t bits_processed;
   uint8_t total_bytes;
 };

>From 651f1688ed1db52b55f6b8a87a7754638b1304ae Mon Sep 17 00:00:00 2001
From: Uzair Nawaz <uzairnawaz at google.com>
Date: Wed, 11 Jun 2025 16:53:47 +0000
Subject: [PATCH 06/11] fixed formatting + added constructor

---
 libc/hdr/types/char32_t.h                        | 2 +-
 libc/src/__support/wchar/character_converter.cpp | 2 +-
 libc/src/__support/wchar/mbstate.h               | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/libc/hdr/types/char32_t.h b/libc/hdr/types/char32_t.h
index a2a2df36dc196..91181dff1425c 100644
--- a/libc/hdr/types/char32_t.h
+++ b/libc/hdr/types/char32_t.h
@@ -20,4 +20,4 @@
 
 #endif // LLVM_LIBC_FULL_BUILD
 
-#endif // LLVM_LIBC_HDR_TYPES_char32_t_H
+#endif // LLVM_LIBC_HDR_TYPES_char32_T_H
diff --git a/libc/src/__support/wchar/character_converter.cpp b/libc/src/__support/wchar/character_converter.cpp
index 63fa9ae40385c..128b82c6de781 100644
--- a/libc/src/__support/wchar/character_converter.cpp
+++ b/libc/src/__support/wchar/character_converter.cpp
@@ -11,7 +11,7 @@
 namespace LIBC_NAMESPACE_DECL {
 namespace internal {
 
-CharacterConverter::CharacterConverter(mbstate_t *mbstate) {}
+CharacterConverter::CharacterConverter(mbstate_t *mbstate) { state = mbstate; }
 
 bool CharacterConverter::isComplete() {}
 
diff --git a/libc/src/__support/wchar/mbstate.h b/libc/src/__support/wchar/mbstate.h
index b5e264a9129c4..0c6e5e18799b8 100644
--- a/libc/src/__support/wchar/mbstate.h
+++ b/libc/src/__support/wchar/mbstate.h
@@ -1,4 +1,4 @@
-//===-- Definition of mbstate----------------------------- -----*-- C++ -*-===//
+//===-- Definition of mbstate-----------------------------------*-- C++ -*-===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.

>From 4ec2de7ef20e539d06f83bc6e96508d1bd94dc51 Mon Sep 17 00:00:00 2001
From: Uzair Nawaz <uzairnawaz at google.com>
Date: Wed, 11 Jun 2025 17:24:26 +0000
Subject: [PATCH 07/11] formatting fixes; remove unnecessary dependencies

---
 libc/hdr/types/char32_t.h                        | 3 +--
 libc/hdr/types/char8_t.h                         | 3 +--
 libc/src/__support/wchar/CMakeLists.txt          | 9 ---------
 libc/src/__support/wchar/character_converter.cpp | 2 +-
 libc/src/__support/wchar/character_converter.h   | 1 -
 libc/src/__support/wchar/mbstate.h               | 1 -
 6 files changed, 3 insertions(+), 16 deletions(-)

diff --git a/libc/hdr/types/char32_t.h b/libc/hdr/types/char32_t.h
index 91181dff1425c..94fe5747d3415 100644
--- a/libc/hdr/types/char32_t.h
+++ b/libc/hdr/types/char32_t.h
@@ -4,7 +4,6 @@
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
-//
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_LIBC_HDR_TYPES_CHAR32_T_H
@@ -20,4 +19,4 @@
 
 #endif // LLVM_LIBC_FULL_BUILD
 
-#endif // LLVM_LIBC_HDR_TYPES_char32_T_H
+#endif // LLVM_LIBC_HDR_TYPES_CHAR32_T_H
diff --git a/libc/hdr/types/char8_t.h b/libc/hdr/types/char8_t.h
index 8d801ff0438a9..2e765dc9de56b 100644
--- a/libc/hdr/types/char8_t.h
+++ b/libc/hdr/types/char8_t.h
@@ -4,7 +4,6 @@
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
-//
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_LIBC_HDR_TYPES_CHAR8_T_H
@@ -20,4 +19,4 @@
 
 #endif // LLVM_LIBC_FULL_BUILD
 
-#endif // LLVM_LIBC_HDR_TYPES_char8_t_H
+#endif // LLVM_LIBC_HDR_TYPES_CHAR8_t_H
diff --git a/libc/src/__support/wchar/CMakeLists.txt b/libc/src/__support/wchar/CMakeLists.txt
index aebe8c650e0ce..c7747729a555b 100644
--- a/libc/src/__support/wchar/CMakeLists.txt
+++ b/libc/src/__support/wchar/CMakeLists.txt
@@ -3,9 +3,7 @@ add_header_library(
   HDRS
     mbstate.h
   DEPENDS
-    libc.hdr.types.wchar_t    
     libc.hdr.types.char32_t    
-    stdint.h
 )
 
 add_header_library(
@@ -13,16 +11,9 @@ add_header_library(
   HDRS
     character_converter.h
   DEPENDS
-    libc.hdr.types.wchar_t
     libc.hdr.types.char8_t
     libc.hdr.types.char32_t
     .mbstate
     .utf_ret
 )
 
-add_header_library(
-  utf_ret
-  HDRS
-    utf_ret.h
-  DEPENDS
-)
diff --git a/libc/src/__support/wchar/character_converter.cpp b/libc/src/__support/wchar/character_converter.cpp
index 128b82c6de781..01a5043a46b25 100644
--- a/libc/src/__support/wchar/character_converter.cpp
+++ b/libc/src/__support/wchar/character_converter.cpp
@@ -1,4 +1,4 @@
-//===-- Implementation of a class for conversion ---------------*-- C++ -*-===//
+//===-- Implementation of a class for conversion --------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
diff --git a/libc/src/__support/wchar/character_converter.h b/libc/src/__support/wchar/character_converter.h
index a9a6d444aa8a0..a6bac43805376 100644
--- a/libc/src/__support/wchar/character_converter.h
+++ b/libc/src/__support/wchar/character_converter.h
@@ -11,7 +11,6 @@
 
 #include "hdr/types/char32_t.h"
 #include "hdr/types/char8_t.h"
-#include "hdr/types/wchar_t.h"
 #include "src/__support/wchar/mbstate.h"
 #include "src/__support/wchar/utf_ret.h"
 
diff --git a/libc/src/__support/wchar/mbstate.h b/libc/src/__support/wchar/mbstate.h
index 0c6e5e18799b8..72ec727560003 100644
--- a/libc/src/__support/wchar/mbstate.h
+++ b/libc/src/__support/wchar/mbstate.h
@@ -10,7 +10,6 @@
 #define LLVM_LIBC_SRC___SUPPORT_MBSTATE_H
 
 #include "hdr/types/char32_t.h"
-#include "hdr/types/wchar_t.h"
 #include <stdint.h>
 
 namespace LIBC_NAMESPACE_DECL {

>From c145ff838d33f3c618dae40607c7bbcce6153a96 Mon Sep 17 00:00:00 2001
From: Uzair Nawaz <uzairnawaz at google.com>
Date: Wed, 11 Jun 2025 17:27:06 +0000
Subject: [PATCH 08/11] capitalization fix

---
 libc/hdr/types/char8_t.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libc/hdr/types/char8_t.h b/libc/hdr/types/char8_t.h
index 2e765dc9de56b..31de764658f9e 100644
--- a/libc/hdr/types/char8_t.h
+++ b/libc/hdr/types/char8_t.h
@@ -19,4 +19,4 @@
 
 #endif // LLVM_LIBC_FULL_BUILD
 
-#endif // LLVM_LIBC_HDR_TYPES_CHAR8_t_H
+#endif // LLVM_LIBC_HDR_TYPES_CHAR8_T_H

>From e46c1a7d7b57848e4770a9579d28ce4ab83972bc Mon Sep 17 00:00:00 2001
From: Uzair Nawaz <uzairnawaz at google.com>
Date: Wed, 11 Jun 2025 17:38:45 +0000
Subject: [PATCH 09/11] object library for characterconverter class

---
 libc/src/__support/wchar/CMakeLists.txt | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/libc/src/__support/wchar/CMakeLists.txt b/libc/src/__support/wchar/CMakeLists.txt
index c7747729a555b..5cca58400ff45 100644
--- a/libc/src/__support/wchar/CMakeLists.txt
+++ b/libc/src/__support/wchar/CMakeLists.txt
@@ -6,10 +6,12 @@ add_header_library(
     libc.hdr.types.char32_t    
 )
 
-add_header_library(
+add_object_library(
   character_converter
   HDRS
     character_converter.h
+  SRCS 
+    character_converter.cpp
   DEPENDS
     libc.hdr.types.char8_t
     libc.hdr.types.char32_t
@@ -17,3 +19,8 @@ add_header_library(
     .utf_ret
 )
 
+add_header_library(
+  utf_ret
+  HDRS
+    utf_ret.h
+)

>From f2163462129455209d073156520c861eb26b8515 Mon Sep 17 00:00:00 2001
From: Uzair Nawaz <uzairnawaz at google.com>
Date: Wed, 11 Jun 2025 18:03:40 +0000
Subject: [PATCH 10/11] added includes to cpp file

---
 libc/src/__support/wchar/character_converter.cpp | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/libc/src/__support/wchar/character_converter.cpp b/libc/src/__support/wchar/character_converter.cpp
index 01a5043a46b25..750462c422313 100644
--- a/libc/src/__support/wchar/character_converter.cpp
+++ b/libc/src/__support/wchar/character_converter.cpp
@@ -6,8 +6,14 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "hdr/types/char32_t.h"
+#include "hdr/types/char8_t.h"
+#include "src/__support/wchar/mbstate.h"
+#include "src/__support/wchar/utf_ret.h"
+
 #include "character_converter.h"
 
+
 namespace LIBC_NAMESPACE_DECL {
 namespace internal {
 

>From 0de4caad8842c9269a8ae22d14e66bc746f28c18 Mon Sep 17 00:00:00 2001
From: Uzair Nawaz <uzairnawaz at google.com>
Date: Wed, 11 Jun 2025 18:55:20 +0000
Subject: [PATCH 11/11] formatting

---
 libc/src/__support/wchar/character_converter.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/libc/src/__support/wchar/character_converter.cpp b/libc/src/__support/wchar/character_converter.cpp
index 750462c422313..0afc2a6f59e64 100644
--- a/libc/src/__support/wchar/character_converter.cpp
+++ b/libc/src/__support/wchar/character_converter.cpp
@@ -13,7 +13,6 @@
 
 #include "character_converter.h"
 
-
 namespace LIBC_NAMESPACE_DECL {
 namespace internal {
 



More information about the libc-commits mailing list