[libc-commits] [libc] Add basic char*_t support for libc (partial WG14 N2653) (PR #90360)

Fabian Keßler via libc-commits libc-commits at lists.llvm.org
Mon Apr 29 14:18:33 PDT 2024


https://github.com/Febbe updated https://github.com/llvm/llvm-project/pull/90360

>From 1a3366e4639f0fe8c7d15e74038b521e410aeb52 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Fabian=20Ke=C3=9Fler?= <fabian_kessler at gmx.de>
Date: Sat, 27 Apr 2024 23:21:01 +0200
Subject: [PATCH 1/3] Add basic char*_t support for libc  - Define C23 char8_t 
 - Define C11 char16_t  - Define C11 char32_t

Preparation for functions like `mbrtoc8` and `c8rtomb` which are
introduced in C23.
---
 libc/config/baremetal/api.td                |  7 ++++++-
 libc/config/linux/x86_64/headers.txt        |  1 +
 libc/include/CMakeLists.txt                 |  3 +++
 libc/include/llvm-libc-types/CMakeLists.txt |  3 +++
 libc/include/llvm-libc-types/char16_t.h     | 17 +++++++++++++++++
 libc/include/llvm-libc-types/char32_t.h     | 17 +++++++++++++++++
 libc/include/llvm-libc-types/char8_t.h      | 17 +++++++++++++++++
 libc/spec/spec.td                           |  3 +++
 libc/spec/stdc.td                           |  3 +++
 9 files changed, 70 insertions(+), 1 deletion(-)
 create mode 100644 libc/include/llvm-libc-types/char16_t.h
 create mode 100644 libc/include/llvm-libc-types/char32_t.h
 create mode 100644 libc/include/llvm-libc-types/char8_t.h

diff --git a/libc/config/baremetal/api.td b/libc/config/baremetal/api.td
index 25aa06aacb642e..a6547d843c85ee 100644
--- a/libc/config/baremetal/api.td
+++ b/libc/config/baremetal/api.td
@@ -85,5 +85,10 @@ def TimeAPI : PublicAPI<"time.h"> {
 }
 
 def UCharAPI : PublicAPI<"uchar.h"> {
-  let Types = ["mbstate_t"];
+  let Types = [
+    "mbstate_t",
+    "char8_t",
+    "char16_t",
+    "char32_t",
+  ];
 }
diff --git a/libc/config/linux/x86_64/headers.txt b/libc/config/linux/x86_64/headers.txt
index e51c7931942706..44d640b75e2bf7 100644
--- a/libc/config/linux/x86_64/headers.txt
+++ b/libc/config/linux/x86_64/headers.txt
@@ -29,6 +29,7 @@ set(TARGET_PUBLIC_HEADERS
     libc.include.time
     libc.include.unistd
     libc.include.wchar
+    libc.include.uchar
 
     libc.include.arpa_inet
 
diff --git a/libc/include/CMakeLists.txt b/libc/include/CMakeLists.txt
index aeef46aabfce5c..6dea8e539969d0 100644
--- a/libc/include/CMakeLists.txt
+++ b/libc/include/CMakeLists.txt
@@ -603,6 +603,9 @@ add_gen_header(
   DEPENDS
     .llvm_libc_common_h
     .llvm-libc-types.mbstate_t
+    .llvm-libc-types.char8_t
+    .llvm-libc-types.char16_t
+    .llvm-libc-types.char32_t
 )
 
 add_gen_header(
diff --git a/libc/include/llvm-libc-types/CMakeLists.txt b/libc/include/llvm-libc-types/CMakeLists.txt
index 310374fb62ffe0..c8999f3d25f4cd 100644
--- a/libc/include/llvm-libc-types/CMakeLists.txt
+++ b/libc/include/llvm-libc-types/CMakeLists.txt
@@ -90,6 +90,9 @@ add_header(tcflag_t HDR tcflag_t.h)
 add_header(struct_termios HDR struct_termios.h DEPENDS .cc_t .speed_t .tcflag_t)
 add_header(__getoptargv_t HDR __getoptargv_t.h)
 add_header(wchar_t HDR wchar_t.h)
+add_header(char8_t HDR char8_t.h)
+add_header(char16_t HDR char16_t.h)
+add_header(char32_t HDR char32_t.h)
 add_header(wint_t HDR wint_t.h)
 add_header(sa_family_t HDR sa_family_t.h)
 add_header(socklen_t HDR socklen_t.h)
diff --git a/libc/include/llvm-libc-types/char16_t.h b/libc/include/llvm-libc-types/char16_t.h
new file mode 100644
index 00000000000000..96389917e11a25
--- /dev/null
+++ b/libc/include/llvm-libc-types/char16_t.h
@@ -0,0 +1,17 @@
+//===-- Definition of clock_t type ----------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_TYPES_CHAR8_T_H
+#define LLVM_LIBC_TYPES_CHAR8_T_H
+
+#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L
+#include <stdint.h>
+typedef uint_least16_t char16_t;
+#endif
+
+#endif // LLVM_LIBC_TYPES_CHAR8_T_H
diff --git a/libc/include/llvm-libc-types/char32_t.h b/libc/include/llvm-libc-types/char32_t.h
new file mode 100644
index 00000000000000..8e578dbbb51262
--- /dev/null
+++ b/libc/include/llvm-libc-types/char32_t.h
@@ -0,0 +1,17 @@
+//===-- Definition of clock_t type ----------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_TYPES_CHAR8_T_H
+#define LLVM_LIBC_TYPES_CHAR8_T_H
+
+#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L
+#include <stdint.h>
+typedef uint_least32_t char32_t;
+#endif
+
+#endif // LLVM_LIBC_TYPES_CHAR8_T_H
diff --git a/libc/include/llvm-libc-types/char8_t.h b/libc/include/llvm-libc-types/char8_t.h
new file mode 100644
index 00000000000000..c55c723f27693f
--- /dev/null
+++ b/libc/include/llvm-libc-types/char8_t.h
@@ -0,0 +1,17 @@
+//===-- Definition of clock_t type ----------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_TYPES_CHAR8_T_H
+#define LLVM_LIBC_TYPES_CHAR8_T_H
+
+#if !defined(__cplusplus) && defined(__STDC_VERSION__) &&                      \
+    __STDC_VERSION__ >= 202311L
+typedef unsigned char char8_t;
+#endif
+
+#endif // LLVM_LIBC_TYPES_CHAR8_T_H
diff --git a/libc/spec/spec.td b/libc/spec/spec.td
index 87bf4435e16724..ea8fa4cd373cf3 100644
--- a/libc/spec/spec.td
+++ b/libc/spec/spec.td
@@ -65,6 +65,9 @@ def SizeTType : NamedType<"size_t">;
 def SizeTPtr : PtrType<SizeTType>;
 def RestrictedSizeTPtr : RestrictedPtrType<SizeTType>;
 
+def Char8TType : NamedType<"char8_t">;
+def Char16TType : NamedType<"char16_t">;
+def Char32TType : NamedType<"char32_t">;
 def WCharType : NamedType<"wchar_t">;
 def WIntType : NamedType<"wint_t">;
 
diff --git a/libc/spec/stdc.td b/libc/spec/stdc.td
index 01aa7c70b3b9df..88758dec643fd4 100644
--- a/libc/spec/stdc.td
+++ b/libc/spec/stdc.td
@@ -1396,6 +1396,9 @@ def StdC : StandardSpec<"stdc"> {
       [], // Macros
       [ //Types
         MBStateTType,
+        Char8TType,
+        Char16TType,
+        Char32TType,
       ],
       [], // Enumerations
       []

>From 5e96b861ee13d46b33d9e8b58273a415a1a503b8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Fabian=20Ke=C3=9Fler?= <fabian_kessler at gmx.de>
Date: Sun, 28 Apr 2024 20:08:52 +0200
Subject: [PATCH 2/3] check char*_t in the c23 status rst

---
 libc/docs/c23.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libc/docs/c23.rst b/libc/docs/c23.rst
index 44724fe1660cbe..8ccfd46271797b 100644
--- a/libc/docs/c23.rst
+++ b/libc/docs/c23.rst
@@ -158,4 +158,4 @@ Additions:
 
   * mbrtoc8
   * c8rtomb
-  * char*_t
+  * char*_t |check|

>From 40e9779077438cc391214df17bc41089d974bc32 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Fabian=20Ke=C3=9Fler?= <fabian_kessler at gmx.de>
Date: Mon, 29 Apr 2024 23:14:15 +0200
Subject: [PATCH 3/3] Applied feedback  - headers fixed  - included
 `stdint-macros.h` instead of `stdint.h`  - Updated dependencies of `char16_t`
 and `char32_t`  - Added uchar support for linux-riscv  - Added uchar & wchar
 support for linux-arm & linux-aarch64  - Added UCharAPI type to linux/api.td

---
 libc/config/linux/aarch64/headers.txt       |  2 ++
 libc/config/linux/api.td                    |  9 +++++++++
 libc/config/linux/arm/headers.txt           |  2 ++
 libc/config/linux/riscv/headers.txt         |  1 +
 libc/include/llvm-libc-types/CMakeLists.txt | 14 ++++++++++++--
 libc/include/llvm-libc-types/char16_t.h     | 11 ++++++-----
 libc/include/llvm-libc-types/char32_t.h     | 11 ++++++-----
 libc/include/llvm-libc-types/char8_t.h      |  2 +-
 8 files changed, 39 insertions(+), 13 deletions(-)

diff --git a/libc/config/linux/aarch64/headers.txt b/libc/config/linux/aarch64/headers.txt
index 47db4434b09b33..7d25877cefcc83 100644
--- a/libc/config/linux/aarch64/headers.txt
+++ b/libc/config/linux/aarch64/headers.txt
@@ -25,6 +25,8 @@ set(TARGET_PUBLIC_HEADERS
     libc.include.threads
     libc.include.time
     libc.include.unistd
+    libc.include.wchar
+    libc.include.uchar
 
     libc.include.sys_ioctl
     # Disabled due to epoll_wait syscalls not being available on this platform.
diff --git a/libc/config/linux/api.td b/libc/config/linux/api.td
index 7843513c4d27bb..902839b3e5b8fe 100644
--- a/libc/config/linux/api.td
+++ b/libc/config/linux/api.td
@@ -206,6 +206,15 @@ def WCharAPI : PublicAPI<"wchar.h"> {
   ];
 }
 
+def UCharAPI : PublicAPI<"uchar.h"> {
+  let Types = [
+    "mbstate_t",
+    "char8_t",
+    "char16_t",
+    "char32_t",
+  ];
+}
+
 def SysRandomAPI : PublicAPI<"sys/random.h"> {
   let Types = ["size_t", "ssize_t"];
 }
diff --git a/libc/config/linux/arm/headers.txt b/libc/config/linux/arm/headers.txt
index 307bb6b146a4cc..1180564fe458c8 100644
--- a/libc/config/linux/arm/headers.txt
+++ b/libc/config/linux/arm/headers.txt
@@ -12,6 +12,8 @@ set(TARGET_PUBLIC_HEADERS
     libc.include.string
     libc.include.strings
     libc.include.search
+    libc.include.wchar
+    libc.include.uchar
 
     # Disabled due to epoll_wait syscalls not being available on this platform.
     # libc.include.sys_epoll
diff --git a/libc/config/linux/riscv/headers.txt b/libc/config/linux/riscv/headers.txt
index c858bcc978d9da..da203e9850603f 100644
--- a/libc/config/linux/riscv/headers.txt
+++ b/libc/config/linux/riscv/headers.txt
@@ -28,6 +28,7 @@ set(TARGET_PUBLIC_HEADERS
     libc.include.time
     libc.include.unistd
     libc.include.wchar
+    libc.include.uchar
 
     libc.include.arpa_inet
 
diff --git a/libc/include/llvm-libc-types/CMakeLists.txt b/libc/include/llvm-libc-types/CMakeLists.txt
index c8999f3d25f4cd..df0df249637b6d 100644
--- a/libc/include/llvm-libc-types/CMakeLists.txt
+++ b/libc/include/llvm-libc-types/CMakeLists.txt
@@ -91,8 +91,18 @@ add_header(struct_termios HDR struct_termios.h DEPENDS .cc_t .speed_t .tcflag_t)
 add_header(__getoptargv_t HDR __getoptargv_t.h)
 add_header(wchar_t HDR wchar_t.h)
 add_header(char8_t HDR char8_t.h)
-add_header(char16_t HDR char16_t.h)
-add_header(char32_t HDR char32_t.h)
+add_header(
+    char16_t 
+  HDR 
+    char16_t.h
+  DEPENDS
+    libc.include.llvm-libc-macros.stdint_macros)
+add_header(
+    char32_t 
+  HDR 
+    char32_t.h
+  DEPENDS
+    libc.include.llvm-libc-macros.stdint_macros)
 add_header(wint_t HDR wint_t.h)
 add_header(sa_family_t HDR sa_family_t.h)
 add_header(socklen_t HDR socklen_t.h)
diff --git a/libc/include/llvm-libc-types/char16_t.h b/libc/include/llvm-libc-types/char16_t.h
index 96389917e11a25..14eb5b34829108 100644
--- a/libc/include/llvm-libc-types/char16_t.h
+++ b/libc/include/llvm-libc-types/char16_t.h
@@ -1,4 +1,5 @@
-//===-- Definition of clock_t type ----------------------------------------===//
+//===-- Definition of char16_t type
+//----------------------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -6,12 +7,12 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_LIBC_TYPES_CHAR8_T_H
-#define LLVM_LIBC_TYPES_CHAR8_T_H
+#ifndef LLVM_LIBC_TYPES_CHAR16_T_H
+#define LLVM_LIBC_TYPES_CHAR16_T_H
 
 #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L
-#include <stdint.h>
+#include "../llvm-libc-macros/stdint-macros.h"
 typedef uint_least16_t char16_t;
 #endif
 
-#endif // LLVM_LIBC_TYPES_CHAR8_T_H
+#endif // LLVM_LIBC_TYPES_CHAR16_T_H
diff --git a/libc/include/llvm-libc-types/char32_t.h b/libc/include/llvm-libc-types/char32_t.h
index 8e578dbbb51262..8f08fe93353a5b 100644
--- a/libc/include/llvm-libc-types/char32_t.h
+++ b/libc/include/llvm-libc-types/char32_t.h
@@ -1,4 +1,5 @@
-//===-- Definition of clock_t type ----------------------------------------===//
+//===-- Definition of char32_t type
+//----------------------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -6,12 +7,12 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_LIBC_TYPES_CHAR8_T_H
-#define LLVM_LIBC_TYPES_CHAR8_T_H
+#ifndef LLVM_LIBC_TYPES_CHAR32_T_H
+#define LLVM_LIBC_TYPES_CHAR32_T_H
 
 #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L
-#include <stdint.h>
+#include "../llvm-libc-macros/stdint-macros.h"
 typedef uint_least32_t char32_t;
 #endif
 
-#endif // LLVM_LIBC_TYPES_CHAR8_T_H
+#endif // LLVM_LIBC_TYPES_CHAR32_T_H
diff --git a/libc/include/llvm-libc-types/char8_t.h b/libc/include/llvm-libc-types/char8_t.h
index c55c723f27693f..ddadab1afa219d 100644
--- a/libc/include/llvm-libc-types/char8_t.h
+++ b/libc/include/llvm-libc-types/char8_t.h
@@ -1,4 +1,4 @@
-//===-- Definition of clock_t type ----------------------------------------===//
+//===-- Definition of char8_t type ----------------------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.



More information about the libc-commits mailing list