[PATCH] D119221: [clang][lexer] Allow u8 character literal prefixes in C2x

Timm Bäder via Phabricator via cfe-commits cfe-commits at lists.llvm.org
Sun Feb 13 23:49:36 PST 2022


tbaeder updated this revision to Diff 408341.

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D119221/new/

https://reviews.llvm.org/D119221

Files:
  clang/lib/Lex/Lexer.cpp
  clang/lib/Sema/SemaExpr.cpp
  clang/test/Lexer/utf8-char-literal.cpp


Index: clang/test/Lexer/utf8-char-literal.cpp
===================================================================
--- clang/test/Lexer/utf8-char-literal.cpp
+++ clang/test/Lexer/utf8-char-literal.cpp
@@ -1,5 +1,6 @@
 // RUN: %clang_cc1 -triple x86_64-apple-darwin -std=c++11 -fsyntax-only -verify %s
 // RUN: %clang_cc1 -triple x86_64-apple-darwin -std=c11 -x c -fsyntax-only -verify %s
+// RUN: %clang_cc1 -triple x86_64-apple-darwin -std=c2x -DC2X -x c -fsyntax-only -verify %s
 // RUN: %clang_cc1 -triple x86_64-apple-darwin -std=c++1z -fsyntax-only -verify %s
 
 int array0[u'ñ' == u'\xf1'? 1 : -1];
@@ -12,4 +13,16 @@
 char d = u8'\u1234'; // expected-error {{character too large for enclosing character literal type}}
 char e = u8'ሴ'; // expected-error {{character too large for enclosing character literal type}}
 char f = u8'ab'; // expected-error {{Unicode character literals may not contain multiple characters}}
+#elif defined(C2X)
+char a = u8'ñ';      // expected-error {{character too large for enclosing character literal type}}
+char b = u8'\x80';   // ok
+char c = u8'\u0080'; // expected-error {{universal character name refers to a control character}}
+char d = u8'\u1234'; // expected-error {{character too large for enclosing character literal type}}
+char e = u8'ሴ';      // expected-error {{character too large for enclosing character literal type}}
+char f = u8'ab';     // expected-error {{Unicode character literals may not contain multiple characters}}
+_Static_assert(
+    _Generic(u8'a',
+             default : 0,
+             unsigned char : 1),
+    "Surprise!");
 #endif
Index: clang/lib/Sema/SemaExpr.cpp
===================================================================
--- clang/lib/Sema/SemaExpr.cpp
+++ clang/lib/Sema/SemaExpr.cpp
@@ -3551,6 +3551,8 @@
   QualType Ty;
   if (Literal.isWide())
     Ty = Context.WideCharTy; // L'x' -> wchar_t in C and C++.
+  else if (Literal.isUTF8() && getLangOpts().C2x)
+    Ty = Context.UnsignedCharTy; // u8'x' -> unsigned char in C2x
   else if (Literal.isUTF8() && getLangOpts().Char8)
     Ty = Context.Char8Ty; // u8'x' -> char8_t when it exists.
   else if (Literal.isUTF16())
Index: clang/lib/Lex/Lexer.cpp
===================================================================
--- clang/lib/Lex/Lexer.cpp
+++ clang/lib/Lex/Lexer.cpp
@@ -3459,7 +3459,10 @@
     MIOpt.ReadToken();
     return LexNumericConstant(Result, CurPtr);
 
-  case 'u':   // Identifier (uber) or C11/C++11 UTF-8 or UTF-16 string literal
+  // Identifer (e.g., uber), or
+  // UTF-8 (C2x/C++17) or UTF-16 (C11/C++11) character literal, or
+  // UTF-8 or UTF-16 string literal (C11/C++11).
+  case 'u':
     // Notify MIOpt that we read a non-whitespace/non-comment token.
     MIOpt.ReadToken();
 
@@ -3493,7 +3496,7 @@
                                ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
                                            SizeTmp2, Result),
                                tok::utf8_string_literal);
-        if (Char2 == '\'' && LangOpts.CPlusPlus17)
+        if (Char2 == '\'' && (LangOpts.CPlusPlus17 || LangOpts.C2x))
           return LexCharConstant(
               Result, ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
                                   SizeTmp2, Result),
@@ -3517,7 +3520,7 @@
     // treat u like the start of an identifier.
     return LexIdentifierContinue(Result, CurPtr);
 
-  case 'U':   // Identifier (Uber) or C11/C++11 UTF-32 string literal
+  case 'U': // Identifier (e.g. Uber) or C11/C++11 UTF-32 string literal
     // Notify MIOpt that we read a non-whitespace/non-comment token.
     MIOpt.ReadToken();
 


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D119221.408341.patch
Type: text/x-patch
Size: 3637 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/cfe-commits/attachments/20220214/070ebd4d/attachment.bin>


More information about the cfe-commits mailing list