[clang] [C23] Implement WG14 N2653 char8_t: A type for UTF-8 characters and strings (PR #98945)
Aaron Ballman via cfe-commits
cfe-commits at lists.llvm.org
Mon Jul 15 11:48:25 PDT 2024
https://github.com/AaronBallman created https://github.com/llvm/llvm-project/pull/98945
None
>From f820346e5dbdb1d6aafc0ef3405752f6c7fb16ed Mon Sep 17 00:00:00 2001
From: Aaron Ballman <aaron at aaronballman.com>
Date: Mon, 15 Jul 2024 14:41:57 -0400
Subject: [PATCH] [C23] Implement WG14 N2653 char8_t: A type for UTF-8
characters and strings
---
clang/docs/ReleaseNotes.rst | 7 +++++
clang/lib/Frontend/InitPreprocessor.cpp | 2 +-
clang/lib/Headers/stdatomic.h | 6 ++++
clang/lib/Sema/SemaExpr.cpp | 2 ++
clang/test/C/C23/n2653.c | 39 +++++++++++++++++++++++++
clang/www/c_status.html | 2 +-
6 files changed, 56 insertions(+), 2 deletions(-)
create mode 100644 clang/test/C/C23/n2653.c
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index a34f109ba21ce..c99493b6ca7dc 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -361,6 +361,13 @@ C23 Feature Support
- Added the ``FLT_NORM_MAX``, ``DBL_NORM_MAX``, and ``LDBL_NORM_MAX`` to the
freestanding implementation of ``<float.h>`` that ships with Clang.
+- Implemented support for
+ `WG14 N2653 <https://www.open-std.org/jtc1/sc22/wg14/www/docs/n2653.htm>`_
+ which changes the underlying type of ``u8`` character and string constants
+ from ``char`` to ``char8_t``, which is a type defined in ``<uchar.h>`` as
+ being the same type as ``unsigned char``. Also adds support for the type in
+ ``<stdatomic.h>`` in C23 and later modes.
+
Non-comprehensive list of changes in this release
-------------------------------------------------
diff --git a/clang/lib/Frontend/InitPreprocessor.cpp b/clang/lib/Frontend/InitPreprocessor.cpp
index d40d78a38540b..849b97fcf75b8 100644
--- a/clang/lib/Frontend/InitPreprocessor.cpp
+++ b/clang/lib/Frontend/InitPreprocessor.cpp
@@ -1349,7 +1349,7 @@ static void InitializePredefinedMacros(const TargetInfo &TI,
getLockFreeValue(TI.get##Type##Width(), TI));
DEFINE_LOCK_FREE_MACRO(BOOL, Bool);
DEFINE_LOCK_FREE_MACRO(CHAR, Char);
- if (LangOpts.Char8)
+ if (LangOpts.Char8 || LangOpts.C23)
DEFINE_LOCK_FREE_MACRO(CHAR8_T, Char); // Treat char8_t like char.
DEFINE_LOCK_FREE_MACRO(CHAR16_T, Char16);
DEFINE_LOCK_FREE_MACRO(CHAR32_T, Char32);
diff --git a/clang/lib/Headers/stdatomic.h b/clang/lib/Headers/stdatomic.h
index 9c103d98af8c5..2027055f38796 100644
--- a/clang/lib/Headers/stdatomic.h
+++ b/clang/lib/Headers/stdatomic.h
@@ -35,6 +35,9 @@ extern "C" {
#define ATOMIC_BOOL_LOCK_FREE __CLANG_ATOMIC_BOOL_LOCK_FREE
#define ATOMIC_CHAR_LOCK_FREE __CLANG_ATOMIC_CHAR_LOCK_FREE
+#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L
+#define ATOMIC_CHAR8_T_LOCK_FREE __CLANG_ATOMIC_CHAR8_T_LOCK_FREE
+#endif
#define ATOMIC_CHAR16_T_LOCK_FREE __CLANG_ATOMIC_CHAR16_T_LOCK_FREE
#define ATOMIC_CHAR32_T_LOCK_FREE __CLANG_ATOMIC_CHAR32_T_LOCK_FREE
#define ATOMIC_WCHAR_T_LOCK_FREE __CLANG_ATOMIC_WCHAR_T_LOCK_FREE
@@ -104,6 +107,9 @@ typedef _Atomic(long) atomic_long;
typedef _Atomic(unsigned long) atomic_ulong;
typedef _Atomic(long long) atomic_llong;
typedef _Atomic(unsigned long long) atomic_ullong;
+#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L
+typedef _Atomic(unsigned char) atomic_char8_t;
+#endif
typedef _Atomic(uint_least16_t) atomic_char16_t;
typedef _Atomic(uint_least32_t) atomic_char32_t;
typedef _Atomic(wchar_t) atomic_wchar_t;
diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index d6b85cbcaf56b..17a920df66fec 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -2051,6 +2051,8 @@ Sema::ActOnStringLiteral(ArrayRef<Token> StringToks, Scope *UDLScope) {
} else if (Literal.isUTF8()) {
if (getLangOpts().Char8)
CharTy = Context.Char8Ty;
+ else if (getLangOpts().C23)
+ CharTy = Context.UnsignedCharTy;
Kind = StringLiteralKind::UTF8;
} else if (Literal.isUTF16()) {
CharTy = Context.Char16Ty;
diff --git a/clang/test/C/C23/n2653.c b/clang/test/C/C23/n2653.c
new file mode 100644
index 0000000000000..72c8af561db26
--- /dev/null
+++ b/clang/test/C/C23/n2653.c
@@ -0,0 +1,39 @@
+// RUN: %clang_cc1 -verify=pre-c23 -ffreestanding -std=c17 %s
+// RUD: %clang_cc1 -verify -ffreestanding -std=c23 %s
+
+/* WG14 N2653: Clang 19
+ * char8_t: A type for UTF-8 characters and strings
+ */
+
+// expected-no-diagnostics
+
+#include <stdatomic.h>
+
+typedef unsigned char char8_t; // in <uchar.h>, which Clang does not provide.
+
+#if __STDC_VERSION__ >= 202311L
+ #define LITERAL_TYPE char8_t
+ #define LITERAL_UNDERLYING_TYPE unsigned char
+
+ // Ensure that char8_t has the same lock-free capabilities as unsigned char.
+ #if defined(ATOMIC_CHAR8_T_LOCK_FREE) != defined(ATOMIC_CHAR_LOCK_FREE) || \
+ ATOMIC_CHAR8_T_LOCK_FREE != ATOMIC_CHAR_LOCK_FREE
+ #error "invalid char8_t atomic lock free status"
+ #endif
+
+#else
+ #define LITERAL_TYPE char
+ #define LITERAL_UNDERLYING_TYPE char
+
+ // Ensure we don't define the lock-free status in earlier modes.
+ #if defined(ATOMIC_CHAR8_T_LOCK_FREE)
+ #error "ATOMIC_CHAR8_T_LOCK_FREE should not be defined"
+ #endif
+#endif
+
+// Ensure we get the type of the literal correct.
+_Static_assert(_Generic(u8""[0], LITERAL_TYPE : 1, default : 0), "");
+_Static_assert(_Generic(u8""[0], LITERAL_UNDERLYING_TYPE : 1, default : 0), "");
+
+// Ensure we have a datatype for atomic operations.
+atomic_char8_t val; // pre-c23-error {{unknown type name 'atomic_char8_t'}}
diff --git a/clang/www/c_status.html b/clang/www/c_status.html
index 669448635837e..3ea70b0163c70 100644
--- a/clang/www/c_status.html
+++ b/clang/www/c_status.html
@@ -1066,7 +1066,7 @@ <h2 id="c2x">C23 implementation status</h2>
<tr>
<td>char8_t: A type for UTF-8 characters and strings</td>
<td><a href="https://www.open-std.org/jtc1/sc22/wg14/www/docs/n2653.htm">N2653</a></td>
- <td class="none" align="center">No</td>
+ <td class="unreleased" align="center">Clang 19</td>
</tr>
<tr>
<td>Clarification for max exponent macros-update</td>
More information about the cfe-commits
mailing list