[clang] [C23] Implement WG14 N2653 char8_t: A type for UTF-8 characters and strings (PR #98945)

Aaron Ballman via cfe-commits cfe-commits at lists.llvm.org
Mon Jul 15 11:48:25 PDT 2024


https://github.com/AaronBallman created https://github.com/llvm/llvm-project/pull/98945

None

>From f820346e5dbdb1d6aafc0ef3405752f6c7fb16ed Mon Sep 17 00:00:00 2001
From: Aaron Ballman <aaron at aaronballman.com>
Date: Mon, 15 Jul 2024 14:41:57 -0400
Subject: [PATCH] [C23] Implement WG14 N2653 char8_t: A type for UTF-8
 characters and strings

---
 clang/docs/ReleaseNotes.rst             |  7 +++++
 clang/lib/Frontend/InitPreprocessor.cpp |  2 +-
 clang/lib/Headers/stdatomic.h           |  6 ++++
 clang/lib/Sema/SemaExpr.cpp             |  2 ++
 clang/test/C/C23/n2653.c                | 39 +++++++++++++++++++++++++
 clang/www/c_status.html                 |  2 +-
 6 files changed, 56 insertions(+), 2 deletions(-)
 create mode 100644 clang/test/C/C23/n2653.c

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index a34f109ba21ce..c99493b6ca7dc 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -361,6 +361,13 @@ C23 Feature Support
 - Added the ``FLT_NORM_MAX``, ``DBL_NORM_MAX``, and ``LDBL_NORM_MAX`` to the
   freestanding implementation of ``<float.h>`` that ships with Clang.
 
+- Implemented support for
+  `WG14 N2653 <https://www.open-std.org/jtc1/sc22/wg14/www/docs/n2653.htm>`_
+  which changes the underlying type of ``u8`` character and string constants
+  from ``char`` to ``char8_t``, which is a type defined in ``<uchar.h>`` as
+  being the same type as ``unsigned char``. Also adds support for the type in
+  ``<stdatomic.h>`` in C23 and later modes.
+
 Non-comprehensive list of changes in this release
 -------------------------------------------------
 
diff --git a/clang/lib/Frontend/InitPreprocessor.cpp b/clang/lib/Frontend/InitPreprocessor.cpp
index d40d78a38540b..849b97fcf75b8 100644
--- a/clang/lib/Frontend/InitPreprocessor.cpp
+++ b/clang/lib/Frontend/InitPreprocessor.cpp
@@ -1349,7 +1349,7 @@ static void InitializePredefinedMacros(const TargetInfo &TI,
                       getLockFreeValue(TI.get##Type##Width(), TI));
     DEFINE_LOCK_FREE_MACRO(BOOL, Bool);
     DEFINE_LOCK_FREE_MACRO(CHAR, Char);
-    if (LangOpts.Char8)
+    if (LangOpts.Char8 || LangOpts.C23)
       DEFINE_LOCK_FREE_MACRO(CHAR8_T, Char); // Treat char8_t like char.
     DEFINE_LOCK_FREE_MACRO(CHAR16_T, Char16);
     DEFINE_LOCK_FREE_MACRO(CHAR32_T, Char32);
diff --git a/clang/lib/Headers/stdatomic.h b/clang/lib/Headers/stdatomic.h
index 9c103d98af8c5..2027055f38796 100644
--- a/clang/lib/Headers/stdatomic.h
+++ b/clang/lib/Headers/stdatomic.h
@@ -35,6 +35,9 @@ extern "C" {
 
 #define ATOMIC_BOOL_LOCK_FREE       __CLANG_ATOMIC_BOOL_LOCK_FREE
 #define ATOMIC_CHAR_LOCK_FREE       __CLANG_ATOMIC_CHAR_LOCK_FREE
+#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L
+#define ATOMIC_CHAR8_T_LOCK_FREE    __CLANG_ATOMIC_CHAR8_T_LOCK_FREE
+#endif
 #define ATOMIC_CHAR16_T_LOCK_FREE   __CLANG_ATOMIC_CHAR16_T_LOCK_FREE
 #define ATOMIC_CHAR32_T_LOCK_FREE   __CLANG_ATOMIC_CHAR32_T_LOCK_FREE
 #define ATOMIC_WCHAR_T_LOCK_FREE    __CLANG_ATOMIC_WCHAR_T_LOCK_FREE
@@ -104,6 +107,9 @@ typedef _Atomic(long)               atomic_long;
 typedef _Atomic(unsigned long)      atomic_ulong;
 typedef _Atomic(long long)          atomic_llong;
 typedef _Atomic(unsigned long long) atomic_ullong;
+#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L
+typedef _Atomic(unsigned char)      atomic_char8_t;
+#endif
 typedef _Atomic(uint_least16_t)     atomic_char16_t;
 typedef _Atomic(uint_least32_t)     atomic_char32_t;
 typedef _Atomic(wchar_t)            atomic_wchar_t;
diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index d6b85cbcaf56b..17a920df66fec 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -2051,6 +2051,8 @@ Sema::ActOnStringLiteral(ArrayRef<Token> StringToks, Scope *UDLScope) {
   } else if (Literal.isUTF8()) {
     if (getLangOpts().Char8)
       CharTy = Context.Char8Ty;
+    else if (getLangOpts().C23)
+      CharTy = Context.UnsignedCharTy;
     Kind = StringLiteralKind::UTF8;
   } else if (Literal.isUTF16()) {
     CharTy = Context.Char16Ty;
diff --git a/clang/test/C/C23/n2653.c b/clang/test/C/C23/n2653.c
new file mode 100644
index 0000000000000..72c8af561db26
--- /dev/null
+++ b/clang/test/C/C23/n2653.c
@@ -0,0 +1,39 @@
+// RUN: %clang_cc1 -verify=pre-c23 -ffreestanding -std=c17 %s
+// RUD: %clang_cc1 -verify -ffreestanding -std=c23 %s
+
+/* WG14 N2653: Clang 19
+ * char8_t: A type for UTF-8 characters and strings
+ */
+
+// expected-no-diagnostics
+
+#include <stdatomic.h>
+
+typedef unsigned char char8_t;  // in <uchar.h>, which Clang does not provide.
+
+#if __STDC_VERSION__ >= 202311L
+  #define LITERAL_TYPE char8_t
+  #define LITERAL_UNDERLYING_TYPE unsigned char
+
+  // Ensure that char8_t has the same lock-free capabilities as unsigned char.
+  #if defined(ATOMIC_CHAR8_T_LOCK_FREE) != defined(ATOMIC_CHAR_LOCK_FREE) || \
+      ATOMIC_CHAR8_T_LOCK_FREE != ATOMIC_CHAR_LOCK_FREE
+    #error "invalid char8_t atomic lock free status"
+  #endif
+
+#else
+  #define LITERAL_TYPE char
+  #define LITERAL_UNDERLYING_TYPE char
+
+  // Ensure we don't define the lock-free status in earlier modes.
+  #if defined(ATOMIC_CHAR8_T_LOCK_FREE)
+    #error "ATOMIC_CHAR8_T_LOCK_FREE should not be defined"
+  #endif
+#endif
+
+// Ensure we get the type of the literal correct.
+_Static_assert(_Generic(u8""[0], LITERAL_TYPE : 1, default : 0), "");
+_Static_assert(_Generic(u8""[0], LITERAL_UNDERLYING_TYPE : 1, default : 0), "");
+
+// Ensure we have a datatype for atomic operations.
+atomic_char8_t val; // pre-c23-error {{unknown type name 'atomic_char8_t'}}
diff --git a/clang/www/c_status.html b/clang/www/c_status.html
index 669448635837e..3ea70b0163c70 100644
--- a/clang/www/c_status.html
+++ b/clang/www/c_status.html
@@ -1066,7 +1066,7 @@ <h2 id="c2x">C23 implementation status</h2>
     <tr>
       <td>char8_t: A type for UTF-8 characters and strings</td>
       <td><a href="https://www.open-std.org/jtc1/sc22/wg14/www/docs/n2653.htm">N2653</a></td>
-      <td class="none" align="center">No</td>
+      <td class="unreleased" align="center">Clang 19</td>
     </tr>
     <tr>
       <td>Clarification for max exponent macros-update</td>



More information about the cfe-commits mailing list