[libc-commits] [libc] cd7a7a5 - Add basic char*_t support for libc (partial WG14 N2653) (#90360)

via libc-commits libc-commits at lists.llvm.org
Tue Apr 30 15:08:42 PDT 2024


Author: Fabian Keßler
Date: 2024-04-30T15:08:38-07:00
New Revision: cd7a7a56fc73c73855036f77a4f69ea90c75c27a

URL: https://github.com/llvm/llvm-project/commit/cd7a7a56fc73c73855036f77a4f69ea90c75c27a
DIFF: https://github.com/llvm/llvm-project/commit/cd7a7a56fc73c73855036f77a4f69ea90c75c27a.diff

LOG: Add basic char*_t support for libc (partial WG14 N2653) (#90360)

This PR implements a part of WG14 N2653:
 - Define C23 char8_t
 - Define C11 char16_t
 - Define C11 char32_t
 
 Missing goals are:
- The type of UTF-8 character literals is changed from unsigned char to
char8_t. (Since UTF-8 character literals already have type unsigned
char, this is not a semantic change).
- New mbrtoc8() and c8rtomb() functions declared in <uchar.h> enable
conversions between multibyte characters and UTF-8.
    - A new ATOMIC_CHAR8_T_LOCK_FREE macro.
    - A new atomic_char8_t typedef name.

Added: 
    libc/include/llvm-libc-types/char16_t.h
    libc/include/llvm-libc-types/char32_t.h
    libc/include/llvm-libc-types/char8_t.h

Modified: 
    libc/config/baremetal/api.td
    libc/config/linux/aarch64/headers.txt
    libc/config/linux/api.td
    libc/config/linux/arm/headers.txt
    libc/config/linux/riscv/headers.txt
    libc/config/linux/x86_64/headers.txt
    libc/docs/c23.rst
    libc/include/CMakeLists.txt
    libc/include/llvm-libc-types/CMakeLists.txt
    libc/spec/spec.td
    libc/spec/stdc.td

Removed: 
    


################################################################################
diff  --git a/libc/config/baremetal/api.td b/libc/config/baremetal/api.td
index 25aa06aacb642e..a6547d843c85ee 100644
--- a/libc/config/baremetal/api.td
+++ b/libc/config/baremetal/api.td
@@ -85,5 +85,10 @@ def TimeAPI : PublicAPI<"time.h"> {
 }
 
 def UCharAPI : PublicAPI<"uchar.h"> {
-  let Types = ["mbstate_t"];
+  let Types = [
+    "mbstate_t",
+    "char8_t",
+    "char16_t",
+    "char32_t",
+  ];
 }

diff  --git a/libc/config/linux/aarch64/headers.txt b/libc/config/linux/aarch64/headers.txt
index 47db4434b09b33..7d25877cefcc83 100644
--- a/libc/config/linux/aarch64/headers.txt
+++ b/libc/config/linux/aarch64/headers.txt
@@ -25,6 +25,8 @@ set(TARGET_PUBLIC_HEADERS
     libc.include.threads
     libc.include.time
     libc.include.unistd
+    libc.include.wchar
+    libc.include.uchar
 
     libc.include.sys_ioctl
     # Disabled due to epoll_wait syscalls not being available on this platform.

diff  --git a/libc/config/linux/api.td b/libc/config/linux/api.td
index 7843513c4d27bb..902839b3e5b8fe 100644
--- a/libc/config/linux/api.td
+++ b/libc/config/linux/api.td
@@ -206,6 +206,15 @@ def WCharAPI : PublicAPI<"wchar.h"> {
   ];
 }
 
+def UCharAPI : PublicAPI<"uchar.h"> {
+  let Types = [
+    "mbstate_t",
+    "char8_t",
+    "char16_t",
+    "char32_t",
+  ];
+}
+
 def SysRandomAPI : PublicAPI<"sys/random.h"> {
   let Types = ["size_t", "ssize_t"];
 }

diff  --git a/libc/config/linux/arm/headers.txt b/libc/config/linux/arm/headers.txt
index 307bb6b146a4cc..1180564fe458c8 100644
--- a/libc/config/linux/arm/headers.txt
+++ b/libc/config/linux/arm/headers.txt
@@ -12,6 +12,8 @@ set(TARGET_PUBLIC_HEADERS
     libc.include.string
     libc.include.strings
     libc.include.search
+    libc.include.wchar
+    libc.include.uchar
 
     # Disabled due to epoll_wait syscalls not being available on this platform.
     # libc.include.sys_epoll

diff  --git a/libc/config/linux/riscv/headers.txt b/libc/config/linux/riscv/headers.txt
index c858bcc978d9da..da203e9850603f 100644
--- a/libc/config/linux/riscv/headers.txt
+++ b/libc/config/linux/riscv/headers.txt
@@ -28,6 +28,7 @@ set(TARGET_PUBLIC_HEADERS
     libc.include.time
     libc.include.unistd
     libc.include.wchar
+    libc.include.uchar
 
     libc.include.arpa_inet
 

diff  --git a/libc/config/linux/x86_64/headers.txt b/libc/config/linux/x86_64/headers.txt
index e51c7931942706..44d640b75e2bf7 100644
--- a/libc/config/linux/x86_64/headers.txt
+++ b/libc/config/linux/x86_64/headers.txt
@@ -29,6 +29,7 @@ set(TARGET_PUBLIC_HEADERS
     libc.include.time
     libc.include.unistd
     libc.include.wchar
+    libc.include.uchar
 
     libc.include.arpa_inet
 

diff  --git a/libc/docs/c23.rst b/libc/docs/c23.rst
index 44724fe1660cbe..8ccfd46271797b 100644
--- a/libc/docs/c23.rst
+++ b/libc/docs/c23.rst
@@ -158,4 +158,4 @@ Additions:
 
   * mbrtoc8
   * c8rtomb
-  * char*_t
+  * char*_t |check|

diff  --git a/libc/include/CMakeLists.txt b/libc/include/CMakeLists.txt
index aeef46aabfce5c..6dea8e539969d0 100644
--- a/libc/include/CMakeLists.txt
+++ b/libc/include/CMakeLists.txt
@@ -603,6 +603,9 @@ add_gen_header(
   DEPENDS
     .llvm_libc_common_h
     .llvm-libc-types.mbstate_t
+    .llvm-libc-types.char8_t
+    .llvm-libc-types.char16_t
+    .llvm-libc-types.char32_t
 )
 
 add_gen_header(

diff  --git a/libc/include/llvm-libc-types/CMakeLists.txt b/libc/include/llvm-libc-types/CMakeLists.txt
index 310374fb62ffe0..16e343d6f3448c 100644
--- a/libc/include/llvm-libc-types/CMakeLists.txt
+++ b/libc/include/llvm-libc-types/CMakeLists.txt
@@ -90,6 +90,21 @@ add_header(tcflag_t HDR tcflag_t.h)
 add_header(struct_termios HDR struct_termios.h DEPENDS .cc_t .speed_t .tcflag_t)
 add_header(__getoptargv_t HDR __getoptargv_t.h)
 add_header(wchar_t HDR wchar_t.h)
+add_header(char8_t HDR char8_t.h)
+add_header(
+    char16_t 
+  HDR 
+    char16_t.h
+  DEPENDS
+    libc.include.llvm-libc-macros.stdint_macros
+)
+add_header(
+    char32_t 
+  HDR 
+    char32_t.h
+  DEPENDS
+    libc.include.llvm-libc-macros.stdint_macros
+)
 add_header(wint_t HDR wint_t.h)
 add_header(sa_family_t HDR sa_family_t.h)
 add_header(socklen_t HDR socklen_t.h)

diff  --git a/libc/include/llvm-libc-types/char16_t.h b/libc/include/llvm-libc-types/char16_t.h
new file mode 100644
index 00000000000000..1f5847ae771b41
--- /dev/null
+++ b/libc/include/llvm-libc-types/char16_t.h
@@ -0,0 +1,17 @@
+//===-- Definition of char16_t type ---------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_TYPES_CHAR16_T_H
+#define LLVM_LIBC_TYPES_CHAR16_T_H
+
+#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L
+#include "../llvm-libc-macros/stdint-macros.h"
+typedef uint_least16_t char16_t;
+#endif
+
+#endif // LLVM_LIBC_TYPES_CHAR16_T_H

diff  --git a/libc/include/llvm-libc-types/char32_t.h b/libc/include/llvm-libc-types/char32_t.h
new file mode 100644
index 00000000000000..20b72dc5d67e30
--- /dev/null
+++ b/libc/include/llvm-libc-types/char32_t.h
@@ -0,0 +1,17 @@
+//===-- Definition of char32_t type ---------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_TYPES_CHAR32_T_H
+#define LLVM_LIBC_TYPES_CHAR32_T_H
+
+#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L
+#include "../llvm-libc-macros/stdint-macros.h"
+typedef uint_least32_t char32_t;
+#endif
+
+#endif // LLVM_LIBC_TYPES_CHAR32_T_H

diff  --git a/libc/include/llvm-libc-types/char8_t.h b/libc/include/llvm-libc-types/char8_t.h
new file mode 100644
index 00000000000000..ddadab1afa219d
--- /dev/null
+++ b/libc/include/llvm-libc-types/char8_t.h
@@ -0,0 +1,17 @@
+//===-- Definition of char8_t type ----------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_TYPES_CHAR8_T_H
+#define LLVM_LIBC_TYPES_CHAR8_T_H
+
+#if !defined(__cplusplus) && defined(__STDC_VERSION__) &&                      \
+    __STDC_VERSION__ >= 202311L
+typedef unsigned char char8_t;
+#endif
+
+#endif // LLVM_LIBC_TYPES_CHAR8_T_H

diff  --git a/libc/spec/spec.td b/libc/spec/spec.td
index 87bf4435e16724..ea8fa4cd373cf3 100644
--- a/libc/spec/spec.td
+++ b/libc/spec/spec.td
@@ -65,6 +65,9 @@ def SizeTType : NamedType<"size_t">;
 def SizeTPtr : PtrType<SizeTType>;
 def RestrictedSizeTPtr : RestrictedPtrType<SizeTType>;
 
+def Char8TType : NamedType<"char8_t">;
+def Char16TType : NamedType<"char16_t">;
+def Char32TType : NamedType<"char32_t">;
 def WCharType : NamedType<"wchar_t">;
 def WIntType : NamedType<"wint_t">;
 

diff  --git a/libc/spec/stdc.td b/libc/spec/stdc.td
index 7a9031a0a330bb..eb67c9b0b009ba 100644
--- a/libc/spec/stdc.td
+++ b/libc/spec/stdc.td
@@ -1398,6 +1398,9 @@ def StdC : StandardSpec<"stdc"> {
       [], // Macros
       [ //Types
         MBStateTType,
+        Char8TType,
+        Char16TType,
+        Char32TType,
       ],
       [], // Enumerations
       []


        


More information about the libc-commits mailing list