[clang] a68039c - [Clang] Add tests and mark as implemented WG14-N2728

Tom Honermann via cfe-commits cfe-commits at lists.llvm.org
Thu Apr 27 14:27:46 PDT 2023


Author: Tom Honermann
Date: 2023-04-27T14:25:41-07:00
New Revision: a68039c51e6123bea4a019c02b72297e0de58529

URL: https://github.com/llvm/llvm-project/commit/a68039c51e6123bea4a019c02b72297e0de58529
DIFF: https://github.com/llvm/llvm-project/commit/a68039c51e6123bea4a019c02b72297e0de58529.diff

LOG: [Clang] Add tests and mark as implemented WG14-N2728

This change expands testing of UTF-8, UTF-16, and UTF-32 character and string
literals as validation that WG14 N2728 (char16_t & char32_t string literals
shall be UTF-16 & UTF-32) has been implemented.

Reviewed By: cor3ntin, aaron.ballman

Differential Revision: https://reviews.llvm.org/D149098

Added: 
    

Modified: 
    clang/test/Lexer/char-literal.cpp
    clang/www/c_status.html

Removed: 
    


################################################################################
diff  --git a/clang/test/Lexer/char-literal.cpp b/clang/test/Lexer/char-literal.cpp
index a71500f8f0107..f2d72280e66cd 100644
--- a/clang/test/Lexer/char-literal.cpp
+++ b/clang/test/Lexer/char-literal.cpp
@@ -1,5 +1,8 @@
-// RUN: %clang_cc1 -triple x86_64-apple-darwin -std=c++11 -Wfour-char-constants -fsyntax-only -verify %s
-// RUN: %clang_cc1 -triple x86_64-apple-darwin -std=c11 -x c -Wfour-char-constants -fsyntax-only -verify %s
+// RUN: %clang_cc1 -triple x86_64-apple-darwin -std=c++11 -Wfour-char-constants -fsyntax-only -verify=cxx,expected %s
+// RUN: %clang_cc1 -triple x86_64-apple-darwin -std=c++17 -Wfour-char-constants -fsyntax-only -verify=cxx,expected %s
+// RUN: %clang_cc1 -triple x86_64-apple-darwin -std=c++20 -Wfour-char-constants -fsyntax-only -verify=cxx,expected %s
+// RUN: %clang_cc1 -triple x86_64-apple-darwin -std=c11 -x c -Wfour-char-constants -fsyntax-only -verify=c,expected %s
+// RUN: %clang_cc1 -triple x86_64-apple-darwin -std=c2x -x c -Wfour-char-constants -fsyntax-only -verify=c,expected %s
 
 #ifndef __cplusplus
 typedef __WCHAR_TYPE__ wchar_t;
@@ -38,3 +41,81 @@ char16_t q[2] = u"\U00010000";
 #ifdef __cplusplus
 // expected-error at -2 {{too long}}
 #endif
+
+// UTF-8 character literal code point ranges.
+#if __cplusplus >= 201703L || __STDC_VERSION__ >= 201710L
+_Static_assert(u8'\U00000000' == 0x00, ""); // c-error {{universal character name refers to a control character}}
+_Static_assert(u8'\U0000007F' == 0x7F, ""); // c-error {{universal character name refers to a control character}}
+_Static_assert(u8'\U00000080', ""); // c-error {{universal character name refers to a control character}}
+                                    // cxx-error at -1 {{character too large for enclosing character literal type}}
+_Static_assert((unsigned char)u8'\xFF' == (unsigned char)0xFF, "");
+#endif
+
+// UTF-8 string literal code point ranges.
+_Static_assert(u8"\U00000000"[0] == 0x00, ""); // c-error {{universal character name refers to a control character}}
+_Static_assert(u8"\U0000007F"[0] == 0x7F, ""); // c-error {{universal character name refers to a control character}}
+_Static_assert((unsigned char)u8"\U00000080"[0] == (unsigned char)0xC2, ""); // c-error {{universal character name refers to a control character}}
+_Static_assert((unsigned char)u8"\U00000080"[1] == (unsigned char)0x80, ""); // c-error {{universal character name refers to a control character}}
+_Static_assert((unsigned char)u8"\U000007FF"[0] == (unsigned char)0xDF, "");
+_Static_assert((unsigned char)u8"\U000007FF"[1] == (unsigned char)0xBF, "");
+_Static_assert((unsigned char)u8"\U00000800"[0] == (unsigned char)0xE0, "");
+_Static_assert((unsigned char)u8"\U00000800"[1] == (unsigned char)0xA0, "");
+_Static_assert((unsigned char)u8"\U00000800"[2] == (unsigned char)0x80, "");
+_Static_assert(u8"\U0000D800"[0], ""); // expected-error {{invalid universal character}}
+_Static_assert(u8"\U0000DFFF"[0], ""); // expected-error {{invalid universal character}}
+_Static_assert((unsigned char)u8"\U0000FFFF"[0] == (unsigned char)0xEF, "");
+_Static_assert((unsigned char)u8"\U0000FFFF"[1] == (unsigned char)0xBF, "");
+_Static_assert((unsigned char)u8"\U0000FFFF"[2] == (unsigned char)0xBF, "");
+_Static_assert((unsigned char)u8"\U00010000"[0] == (unsigned char)0xF0, "");
+_Static_assert((unsigned char)u8"\U00010000"[1] == (unsigned char)0x90, "");
+_Static_assert((unsigned char)u8"\U00010000"[2] == (unsigned char)0x80, "");
+_Static_assert((unsigned char)u8"\U00010000"[3] == (unsigned char)0x80, "");
+_Static_assert((unsigned char)u8"\U0010FFFF"[0] == (unsigned char)0xF4, "");
+_Static_assert((unsigned char)u8"\U0010FFFF"[1] == (unsigned char)0x8F, "");
+_Static_assert((unsigned char)u8"\U0010FFFF"[2] == (unsigned char)0xBF, "");
+_Static_assert((unsigned char)u8"\U0010FFFF"[3] == (unsigned char)0xBF, "");
+_Static_assert(u8"\U00110000"[0], ""); // expected-error {{invalid universal character}}
+
+#if !defined(__STDC_UTF_16__)
+#error __STDC_UTF_16__ is not defined.
+#endif
+#if __STDC_UTF_16__ != 1
+#error __STDC_UTF_16__ has the wrong value.
+#endif
+
+// UTF-16 character literal code point ranges.
+_Static_assert(u'\U00000000' == 0x0000, ""); // c-error {{universal character name refers to a control character}}
+_Static_assert(u'\U0000D800', ""); // expected-error {{invalid universal character}}
+_Static_assert(u'\U0000DFFF', ""); // expected-error {{invalid universal character}}
+_Static_assert(u'\U0000FFFF' == 0xFFFF, "");
+_Static_assert(u'\U00010000', ""); // expected-error {{character too large for enclosing character literal type}}
+
+// UTF-16 string literal code point ranges.
+_Static_assert(u"\U00000000"[0] == 0x0000, ""); // c-error {{universal character name refers to a control character}}
+_Static_assert(u"\U0000D800"[0], ""); // expected-error {{invalid universal character}}
+_Static_assert(u"\U0000DFFF"[0], ""); // expected-error {{invalid universal character}}
+_Static_assert(u"\U0000FFFF"[0] == 0xFFFF, "");
+_Static_assert(u"\U00010000"[0] == 0xD800, "");
+_Static_assert(u"\U00010000"[1] == 0xDC00, "");
+_Static_assert(u"\U0010FFFF"[0] == 0xDBFF, "");
+_Static_assert(u"\U0010FFFF"[1] == 0xDFFF, "");
+_Static_assert(u"\U00110000"[0], ""); // expected-error {{invalid universal character}}
+
+#if !defined(__STDC_UTF_32__)
+#error __STDC_UTF_32__ is not defined.
+#endif
+#if __STDC_UTF_32__ != 1
+#error __STDC_UTF_32__ has the wrong value.
+#endif
+
+// UTF-32 character literal code point ranges.
+_Static_assert(U'\U00000000' == 0x00000000, ""); // c-error {{universal character name refers to a control character}}
+_Static_assert(U'\U0010FFFF' == 0x0010FFFF, "");
+_Static_assert(U'\U00110000', ""); // expected-error {{invalid universal character}}
+
+// UTF-32 string literal code point ranges.
+_Static_assert(U"\U00000000"[0] == 0x00000000, ""); // c-error {{universal character name refers to a control character}}
+_Static_assert(U"\U0000D800"[0], ""); // expected-error {{invalid universal character}}
+_Static_assert(U"\U0000DFFF"[0], ""); // expected-error {{invalid universal character}}
+_Static_assert(U"\U0010FFFF"[0] == 0x0010FFFF, "");
+_Static_assert(U"\U00110000"[0], ""); // expected-error {{invalid universal character}}

diff  --git a/clang/www/c_status.html b/clang/www/c_status.html
index 9ddd01b9fd445..bf2d54af8ce06 100644
--- a/clang/www/c_status.html
+++ b/clang/www/c_status.html
@@ -929,7 +929,7 @@ <h2 id="c2x">C2x implementation status</h2>
     <tr>
       <td>char16_t & char32_t string literals shall be UTF-16 & UTF-32</td>
       <td><a href="https://www.open-std.org/jtc1/sc22/wg14/www/docs/n2728.htm">N2728</a></td>
-      <td class="unknown" align="center">Unknown</td>
+      <td class="full" align="center">Yes</td>
     </tr>
     <tr>
       <td>IEC 60559 binding</td>


        


More information about the cfe-commits mailing list