[libc-commits] [libc] [libc] Character converter skeleton class (PR #143619)

Uzair Nawaz via libc-commits libc-commits at lists.llvm.org
Tue Jun 10 16:12:41 PDT 2025


https://github.com/uzairnawaz updated https://github.com/llvm/llvm-project/pull/143619

>From 4de427f6c44acad9299922ec2844174ee14ddb37 Mon Sep 17 00:00:00 2001
From: Uzair Nawaz <uzairnawaz at google.com>
Date: Tue, 10 Jun 2025 22:36:36 +0000
Subject: [PATCH 1/3] character converter skeleton class

---
 libc/src/__support/wchar/CMakeLists.txt       | 24 +++++++++++++++++++
 .../__support/wchar/character_converter.cpp   |  2 ++
 .../src/__support/wchar/character_converter.h | 20 ++++++++++++++++
 libc/src/__support/wchar/mbstate.h            |  8 +++++++
 libc/src/__support/wchar/utf_ret.h            |  6 +++++
 5 files changed, 60 insertions(+)
 create mode 100644 libc/src/__support/wchar/CMakeLists.txt
 create mode 100644 libc/src/__support/wchar/character_converter.cpp
 create mode 100644 libc/src/__support/wchar/character_converter.h
 create mode 100644 libc/src/__support/wchar/mbstate.h
 create mode 100644 libc/src/__support/wchar/utf_ret.h

diff --git a/libc/src/__support/wchar/CMakeLists.txt b/libc/src/__support/wchar/CMakeLists.txt
new file mode 100644
index 0000000000000..c1f402767235f
--- /dev/null
+++ b/libc/src/__support/wchar/CMakeLists.txt
@@ -0,0 +1,24 @@
+add_header_library(
+  mbstate
+  HDRS
+    mbstate.h
+  DEPENDS
+    libc.hdr.types.wchar_t    
+)
+
+add_header_library(
+  character_converter
+  HDRS
+    character_converter.h
+  DEPENDS
+    libc.hdr.types.wchar_t
+    .mbstate
+    .utf_ret
+)
+
+add_header_library(
+  utf_ret
+  HDRS
+    utf_ret.h
+  DEPENDS
+)
diff --git a/libc/src/__support/wchar/character_converter.cpp b/libc/src/__support/wchar/character_converter.cpp
new file mode 100644
index 0000000000000..139597f9cb07c
--- /dev/null
+++ b/libc/src/__support/wchar/character_converter.cpp
@@ -0,0 +1,2 @@
+
+
diff --git a/libc/src/__support/wchar/character_converter.h b/libc/src/__support/wchar/character_converter.h
new file mode 100644
index 0000000000000..1800fe16eb14b
--- /dev/null
+++ b/libc/src/__support/wchar/character_converter.h
@@ -0,0 +1,20 @@
+
+#include "src/__support/wchar/mbstate.h"
+#include "src/__support/wchar/utf_ret.h"
+#include "hdr/types/wchar_t.h"
+
+class CharacterConverter {
+private:
+    mbstate_t* state;
+
+public:
+    CharacterConverter();
+
+    bool isComplete();
+
+    int push(char utf8_byte);
+    int push(wchar_t utf32);
+
+    utf_ret<char> pop_utf8();
+    utf_ret<wchar_t> pop_utf32();
+};
diff --git a/libc/src/__support/wchar/mbstate.h b/libc/src/__support/wchar/mbstate.h
new file mode 100644
index 0000000000000..c0af608c37623
--- /dev/null
+++ b/libc/src/__support/wchar/mbstate.h
@@ -0,0 +1,8 @@
+
+#include "hdr/types/wchar_t.h"
+
+struct mbstate_t {
+    wchar_t partial;
+    unsigned char bits_processed;
+    unsigned char total_bytes;
+};
diff --git a/libc/src/__support/wchar/utf_ret.h b/libc/src/__support/wchar/utf_ret.h
new file mode 100644
index 0000000000000..533f4cb952f4b
--- /dev/null
+++ b/libc/src/__support/wchar/utf_ret.h
@@ -0,0 +1,6 @@
+
+template <typename T>
+struct utf_ret {
+    T out;
+    int error;
+};

>From 6245ef18aab8874a93ecdc8ce4a5675f27731ef6 Mon Sep 17 00:00:00 2001
From: Uzair Nawaz <uzairnawaz at google.com>
Date: Tue, 10 Jun 2025 22:42:22 +0000
Subject: [PATCH 2/3] fixed formatting

---
 libc/src/__support/wchar/character_converter.cpp |  1 -
 libc/src/__support/wchar/character_converter.h   | 16 ++++++++--------
 libc/src/__support/wchar/mbstate.h               |  6 +++---
 libc/src/__support/wchar/utf_ret.h               |  7 +++----
 4 files changed, 14 insertions(+), 16 deletions(-)

diff --git a/libc/src/__support/wchar/character_converter.cpp b/libc/src/__support/wchar/character_converter.cpp
index 139597f9cb07c..8b137891791fe 100644
--- a/libc/src/__support/wchar/character_converter.cpp
+++ b/libc/src/__support/wchar/character_converter.cpp
@@ -1,2 +1 @@
 
-
diff --git a/libc/src/__support/wchar/character_converter.h b/libc/src/__support/wchar/character_converter.h
index 1800fe16eb14b..b3de5eae926ab 100644
--- a/libc/src/__support/wchar/character_converter.h
+++ b/libc/src/__support/wchar/character_converter.h
@@ -1,20 +1,20 @@
 
+#include "hdr/types/wchar_t.h"
 #include "src/__support/wchar/mbstate.h"
 #include "src/__support/wchar/utf_ret.h"
-#include "hdr/types/wchar_t.h"
 
 class CharacterConverter {
 private:
-    mbstate_t* state;
+  mbstate_t *state;
 
 public:
-    CharacterConverter();
+  CharacterConverter();
 
-    bool isComplete();
+  bool isComplete();
 
-    int push(char utf8_byte);
-    int push(wchar_t utf32);
+  int push(char utf8_byte);
+  int push(wchar_t utf32);
 
-    utf_ret<char> pop_utf8();
-    utf_ret<wchar_t> pop_utf32();
+  utf_ret<char> pop_utf8();
+  utf_ret<wchar_t> pop_utf32();
 };
diff --git a/libc/src/__support/wchar/mbstate.h b/libc/src/__support/wchar/mbstate.h
index c0af608c37623..7ab16177ee33f 100644
--- a/libc/src/__support/wchar/mbstate.h
+++ b/libc/src/__support/wchar/mbstate.h
@@ -2,7 +2,7 @@
 #include "hdr/types/wchar_t.h"
 
 struct mbstate_t {
-    wchar_t partial;
-    unsigned char bits_processed;
-    unsigned char total_bytes;
+  wchar_t partial;
+  unsigned char bits_processed;
+  unsigned char total_bytes;
 };
diff --git a/libc/src/__support/wchar/utf_ret.h b/libc/src/__support/wchar/utf_ret.h
index 533f4cb952f4b..eeaf66762a379 100644
--- a/libc/src/__support/wchar/utf_ret.h
+++ b/libc/src/__support/wchar/utf_ret.h
@@ -1,6 +1,5 @@
 
-template <typename T>
-struct utf_ret {
-    T out;
-    int error;
+template <typename T> struct utf_ret {
+  T out;
+  int error;
 };

>From 7c9ad0294004764c67d97716f3ff754eeaec8742 Mon Sep 17 00:00:00 2001
From: Uzair Nawaz <uzairnawaz at google.com>
Date: Tue, 10 Jun 2025 23:12:13 +0000
Subject: [PATCH 3/3] license and include guards

---
 .../src/__support/wchar/character_converter.h | 16 ++++++++++++++++
 libc/src/__support/wchar/mbstate.h            | 19 ++++++++++++++++++-
 libc/src/__support/wchar/utf_ret.h            | 16 ++++++++++++++++
 3 files changed, 50 insertions(+), 1 deletion(-)

diff --git a/libc/src/__support/wchar/character_converter.h b/libc/src/__support/wchar/character_converter.h
index b3de5eae926ab..7deca8bf117a4 100644
--- a/libc/src/__support/wchar/character_converter.h
+++ b/libc/src/__support/wchar/character_converter.h
@@ -1,8 +1,20 @@
+//===-- Definition of a class for mbstate_t and conversion -----*-- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC___SUPPORT_CHARACTER_CONVERTER_H
+#define LLVM_LIBC_SRC___SUPPORT_CHARACTER_CONVERTER_H
 
 #include "hdr/types/wchar_t.h"
 #include "src/__support/wchar/mbstate.h"
 #include "src/__support/wchar/utf_ret.h"
 
+namespace LIBC_NAMESPACE_DECL {
+
 class CharacterConverter {
 private:
   mbstate_t *state;
@@ -18,3 +30,7 @@ class CharacterConverter {
   utf_ret<char> pop_utf8();
   utf_ret<wchar_t> pop_utf32();
 };
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC___SUPPORT_CHARACTER_CONVERTER_H
diff --git a/libc/src/__support/wchar/mbstate.h b/libc/src/__support/wchar/mbstate.h
index 7ab16177ee33f..26256cf9ed30b 100644
--- a/libc/src/__support/wchar/mbstate.h
+++ b/libc/src/__support/wchar/mbstate.h
@@ -1,8 +1,25 @@
+//===-- Definition of mbstate_t -------------------------- -----*-- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC___SUPPORT_MBSTATE_H
+#define LLVM_LIBC_SRC___SUPPORT_MBSTATE_H
 
 #include "hdr/types/wchar_t.h"
 
+namespace LIBC_NAMESPACE_DECL {
+
 struct mbstate_t {
   wchar_t partial;
   unsigned char bits_processed;
   unsigned char total_bytes;
-};
+}; 
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC___SUPPORT_MBSTATE_H
+
diff --git a/libc/src/__support/wchar/utf_ret.h b/libc/src/__support/wchar/utf_ret.h
index eeaf66762a379..b8a8f6f094143 100644
--- a/libc/src/__support/wchar/utf_ret.h
+++ b/libc/src/__support/wchar/utf_ret.h
@@ -1,5 +1,21 @@
+//===-- Definition of utf_ret ----------------------------------*-- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC___SUPPORT_UTF_RET_H
+#define LLVM_LIBC_SRC___SUPPORT_UTF_RET_H
+
+namespace LIBC_NAMESPACE_DECL {
 
 template <typename T> struct utf_ret {
   T out;
   int error;
 };
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC___SUPPORT_UTF_RET_H



More information about the libc-commits mailing list