[clang] d0d2772 - [Clang] Implement P2513

Corentin Jabot via cfe-commits cfe-commits at lists.llvm.org
Mon Oct 24 09:10:49 PDT 2022


Author: Corentin Jabot
Date: 2022-10-24T18:10:43+02:00
New Revision: d0d2772379bd89f1dce3c456520272678cf4b966

URL: https://github.com/llvm/llvm-project/commit/d0d2772379bd89f1dce3c456520272678cf4b966
DIFF: https://github.com/llvm/llvm-project/commit/d0d2772379bd89f1dce3c456520272678cf4b966.diff

LOG: [Clang] Implement P2513

Implement P2513

This change allows initializing an array of unsigned char,
or char from u8 string literals.
This was done both to support legacy code and for compatibility
with C where char8_t will be typedef to unsigned char.

This is backported to C++20 as per WG21 guidance.

Reviewed By: aaron.ballman

Differential Revision: https://reviews.llvm.org/D136449

Added: 
    

Modified: 
    clang/docs/ReleaseNotes.rst
    clang/include/clang/Basic/DiagnosticSemaKinds.td
    clang/lib/Frontend/InitPreprocessor.cpp
    clang/lib/Sema/SemaInit.cpp
    clang/test/Lexer/cxx-features.cpp
    clang/test/SemaCXX/char8_t.cpp
    clang/test/SemaCXX/cxx2a-compat.cpp
    clang/www/cxx_status.html

Removed: 
    


################################################################################
diff  --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 68cee534513a..49ef53f33877 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -557,6 +557,8 @@ C++2b Feature Support
 
 - Support label at end of compound statement (`P2324 <https://wg21.link/p2324r2>`_).
 - Implemented `P1169R4: static operator() <https://wg21.link/P1169R4>`_.
+- Implemented "char8_t Compatibility and Portability Fix" (`P2513R3 <https://wg21.link/P2513R3>`_).
+  This Change was applied to C++20 as a Defect Report.
 
 CUDA/HIP Language Changes in Clang
 ----------------------------------
@@ -654,8 +656,8 @@ libclang
   the behavior of ``QualType::getNonReferenceType`` for ``CXType``.
 - Introduced the new function ``clang_CXXMethod_isDeleted``, which queries
   whether the method is declared ``= delete``.
-- ``clang_Cursor_getNumTemplateArguments``, ``clang_Cursor_getTemplateArgumentKind``, 
-  ``clang_Cursor_getTemplateArgumentType``, ``clang_Cursor_getTemplateArgumentValue`` and 
+- ``clang_Cursor_getNumTemplateArguments``, ``clang_Cursor_getTemplateArgumentKind``,
+  ``clang_Cursor_getTemplateArgumentType``, ``clang_Cursor_getTemplateArgumentValue`` and
   ``clang_Cursor_getTemplateArgumentUnsignedValue`` now work on struct, class,
   and partial template specialization cursors in addition to function cursors.
 

diff  --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index b1d475772502..8cf73784d97b 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -6868,8 +6868,8 @@ def err_array_init_plain_string_into_char8_t : Error<
 def note_array_init_plain_string_into_char8_t : Note<
   "add 'u8' prefix to form a 'char8_t' string literal">;
 def err_array_init_utf8_string_into_char : Error<
-  "%select{|ISO C++20 does not permit }0initialization of char array with "
-  "UTF-8 string literal%select{ is not permitted by '-fchar8_t'|}0">;
+  "initialization of %select{|signed }0char array with "
+  "UTF-8 string literal is not permitted by %select{'-fchar8_t'|C++20}1">;
 def warn_cxx20_compat_utf8_string : Warning<
   "type of UTF-8 string literal will change from array of const char to "
   "array of const char8_t in C++20">, InGroup<CXX20Compat>, DefaultIgnore;

diff  --git a/clang/lib/Frontend/InitPreprocessor.cpp b/clang/lib/Frontend/InitPreprocessor.cpp
index 2273fb113fb2..96b93dcdf044 100644
--- a/clang/lib/Frontend/InitPreprocessor.cpp
+++ b/clang/lib/Frontend/InitPreprocessor.cpp
@@ -705,7 +705,7 @@ static void InitializeCPlusPlusFeatureTestMacros(const LangOptions &LangOpts,
   Builder.defineMacro("__cpp_named_character_escapes", "202207L");
 
   if (LangOpts.Char8)
-    Builder.defineMacro("__cpp_char8_t", "201811L");
+    Builder.defineMacro("__cpp_char8_t", "202207L");
   Builder.defineMacro("__cpp_impl_destroying_delete", "201806L");
 
   // TS features.

diff  --git a/clang/lib/Sema/SemaInit.cpp b/clang/lib/Sema/SemaInit.cpp
index db5580c9e55c..7ebf6997e27e 100644
--- a/clang/lib/Sema/SemaInit.cpp
+++ b/clang/lib/Sema/SemaInit.cpp
@@ -81,10 +81,20 @@ static StringInitFailureKind IsStringInit(Expr *Init, const ArrayType *AT,
   const QualType ElemTy =
       Context.getCanonicalType(AT->getElementType()).getUnqualifiedType();
 
+  auto IsCharOrUnsignedChar = [](const QualType &T) {
+    const BuiltinType *BT = dyn_cast<BuiltinType>(T.getTypePtr());
+    return BT && BT->isCharType() && BT->getKind() != BuiltinType::SChar;
+  };
+
   switch (SL->getKind()) {
   case StringLiteral::UTF8:
     // char8_t array can be initialized with a UTF-8 string.
-    if (ElemTy->isChar8Type())
+    // - C++20 [dcl.init.string] (DR)
+    //   Additionally, an array of char or unsigned char may be initialized
+    //   by a UTF-8 string literal.
+    if (ElemTy->isChar8Type() ||
+        (Context.getLangOpts().Char8 &&
+         IsCharOrUnsignedChar(ElemTy.getCanonicalType())))
       return SIF_None;
     [[fallthrough]];
   case StringLiteral::Ordinary:
@@ -9114,9 +9124,8 @@ bool InitializationSequence::Diagnose(Sema &S,
         << FixItHint::CreateInsertion(Args.front()->getBeginLoc(), "u8");
     break;
   case FK_UTF8StringIntoPlainChar:
-    S.Diag(Kind.getLocation(),
-           diag::err_array_init_utf8_string_into_char)
-      << S.getLangOpts().CPlusPlus20;
+    S.Diag(Kind.getLocation(), diag::err_array_init_utf8_string_into_char)
+        << DestType->isSignedIntegerType() << S.getLangOpts().CPlusPlus20;
     break;
   case FK_ArrayTypeMismatch:
   case FK_NonConstantArrayInit:

diff  --git a/clang/test/Lexer/cxx-features.cpp b/clang/test/Lexer/cxx-features.cpp
index ee52017a2201..c12f2d20b66c 100644
--- a/clang/test/Lexer/cxx-features.cpp
+++ b/clang/test/Lexer/cxx-features.cpp
@@ -66,9 +66,9 @@
 #error "wrong value for __cpp_aggregate_paren_init"
 #endif
 
-#if defined(CHAR8_T) ? check(char8_t, 201811, 201811, 201811, 201811, 201811, 201811) : \
+#if defined(CHAR8_T) ? check(char8_t, 202207, 202207, 202207, 202207, 202207, 202207) : \
     defined(NO_CHAR8_T) ? check(char8_t, 0, 0, 0, 0, 0, 0) : \
-    check(char8_t, 0, 0, 0, 0, 201811, 201811)
+    check(char8_t, 0, 0, 0, 0, 202207, 202207)
 #error "wrong value for __cpp_char8_t"
 #endif
 

diff  --git a/clang/test/SemaCXX/char8_t.cpp b/clang/test/SemaCXX/char8_t.cpp
index f60a66dbe84d..5ffa550847de 100644
--- a/clang/test/SemaCXX/char8_t.cpp
+++ b/clang/test/SemaCXX/char8_t.cpp
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -fchar8_t -std=c++17 -verify %s
-// RUN: %clang_cc1 -std=c++2a -verify %s
+// RUN: %clang_cc1 -std=c++2a -verify=expected %s
+// RUN: %clang_cc1 -std=c++2a -verify=expected -fno-signed-char %s
+
 
 char8_t a = u8'a';
 char8_t b[] = u8"foo";
@@ -7,15 +9,35 @@ char8_t c = 'a';
 char8_t d[] = "foo"; // expected-error {{initializing 'char8_t' array with plain string literal}} expected-note {{add 'u8' prefix}}
 
 char e = u8'a';
-char f[] = u8"foo";
-#if __cplusplus <= 201703L
-// expected-error at -2 {{initialization of char array with UTF-8 string literal is not permitted by '-fchar8_t'}}
-#else
-// expected-error at -4 {{ISO C++20 does not permit initialization of char array with UTF-8 string literal}}
-#endif
 char g = 'a';
 char h[] = "foo";
 
+unsigned char i[] = u8"foo";
+unsigned char j[] = { u8"foo" };
+char k[] = u8"foo";
+char l[] = { u8"foo" };
+signed char m[] = u8"foo"; // expected-error {{initialization of char array with UTF-8 string literal is not permitted}}
+signed char n[] = { u8"foo" }; // expected-error {{cannot initialize an array element of type 'signed char' with an lvalue of type 'const char8_t[4]'}}
+
+const unsigned char* uptr = u8"foo"; // expected-error {{cannot initialize}}
+const signed char* sptr = u8"foo"; // expected-error {{cannot initialize}}
+const char* ptr = u8"foo"; // expected-error {{cannot initialize}}
+
+template <typename T>
+void check_values() {
+  constexpr T c[] = {0, static_cast<T>(0xFF), 0x42};
+  constexpr T a[] = u8"\x00\xFF\x42";
+
+  static_assert(a[0] == c[0]);
+  static_assert(a[1] == c[1]);
+  static_assert(a[2] == c[2]);
+}
+
+void call_check_values() {
+  check_values<char>();
+  check_values<unsigned char>();
+}
+
 void disambig() {
   char8_t (a) = u8'x';
 }
@@ -48,3 +70,21 @@ void check_deduction() {
 static_assert(sizeof(char8_t) == 1);
 static_assert(char8_t(-1) > 0);
 static_assert(u8"\u0080"[0] > 0);
+
+namespace ambiguous {
+
+struct A {
+	char8_t s[10];
+};
+struct B {
+  char s[10];
+};
+
+void f(A); // expected-note {{candidate}}
+void f(B); // expected-note {{candidate}}
+
+int test() {
+  f({u8"foo"}); // expected-error {{call to 'f' is ambiguous}}
+}
+
+}

diff  --git a/clang/test/SemaCXX/cxx2a-compat.cpp b/clang/test/SemaCXX/cxx2a-compat.cpp
index 0e9eafdc9b70..4f20cf59b65f 100644
--- a/clang/test/SemaCXX/cxx2a-compat.cpp
+++ b/clang/test/SemaCXX/cxx2a-compat.cpp
@@ -33,9 +33,8 @@ string u8str = u8"test" u8"test";
 // expected-warning at -4 {{type of UTF-8 string literal will change}} expected-note at -4 {{remove 'u8' prefix}}
 // expected-warning at -4 {{type of UTF-8 string literal will change}} expected-note at -4 {{remove 'u8' prefix}}
 #else
-// expected-error at -8 {{ISO C++20 does not permit initialization of char array with UTF-8 string literal}}
-// expected-error at -8 {{cannot initialize a variable of type 'const char *' with an lvalue of type 'const char8_t[6]'}}
-// expected-error at -8 {{no viable conversion from 'const char8_t[9]' to 'string'}}
+// expected-error at -7 {{cannot initialize a variable of type 'const char *' with an lvalue of type 'const char8_t[6]'}}
+// expected-error at -7 {{no viable conversion from 'const char8_t[9]' to 'string'}}
 #endif
 
 template<bool b>

diff  --git a/clang/www/cxx_status.html b/clang/www/cxx_status.html
index cbea6bb5e1e2..d46e7bb0b46a 100755
--- a/clang/www/cxx_status.html
+++ b/clang/www/cxx_status.html
@@ -1502,7 +1502,7 @@ <h2 id="cxx23">C++2b implementation status</h2>
     <tr>
       <td><code>char8_t</code> Compatibility and Portability Fix</td>
       <td><a href="https://wg21.link/P2513R3">P2513R3</a></td>
-      <td class="none" align="center">No</td>
+      <td class="unreleased" align="center">Clang 16</td>
     </tr>
     <tr>
       <td>Relax requirements on <code>wchar_t</code> to match existing practices</td>


        


More information about the cfe-commits mailing list