[clang] 274adcb - Implement delimited escape sequences.
Aaron Ballman via cfe-commits
cfe-commits at lists.llvm.org
Wed Sep 15 06:54:58 PDT 2021
Author: Corentin Jabot
Date: 2021-09-15T09:54:49-04:00
New Revision: 274adcb866343cb505408d8f401840ea814522c8
URL: https://github.com/llvm/llvm-project/commit/274adcb866343cb505408d8f401840ea814522c8
DIFF: https://github.com/llvm/llvm-project/commit/274adcb866343cb505408d8f401840ea814522c8.diff
LOG: Implement delimited escape sequences.
\x{XXXX} \u{XXXX} and \o{OOOO} are accepted in all languages mode
in characters and string literals.
This is a feature proposed for both C++ (P2290R1) and C (N2785). The
papers have been seen by both committees but are not yet adopted into
either standard. However, they do have support from both committees.
Added:
clang/test/Lexer/char-escapes-delimited.c
Modified:
clang/include/clang/Basic/DiagnosticLexKinds.td
clang/lib/Lex/Lexer.cpp
clang/lib/Lex/LiteralSupport.cpp
clang/test/Parser/cxx11-user-defined-literals.cpp
clang/test/Preprocessor/ucn-pp-identifier.c
clang/test/Sema/ucn-identifiers.c
Removed:
################################################################################
diff --git a/clang/include/clang/Basic/DiagnosticLexKinds.td b/clang/include/clang/Basic/DiagnosticLexKinds.td
index 49c50fb27e624..faa84774167b9 100644
--- a/clang/include/clang/Basic/DiagnosticLexKinds.td
+++ b/clang/include/clang/Basic/DiagnosticLexKinds.td
@@ -127,6 +127,15 @@ def warn_utf8_symbol_zero_width : Warning<
"identifier contains Unicode character <U+%0> that is invisible in "
"some environments">, InGroup<DiagGroup<"unicode-zero-width">>;
+def ext_delimited_escape_sequence : Extension<
+ "delimited escape sequences are a Clang extension">,
+ InGroup<DiagGroup<"delimited-escape-sequence-extension">>;
+def err_delimited_escape_empty : Error<
+ "delimited escape sequence cannot be empty">;
+def err_delimited_escape_missing_brace: Error<
+ "expected '{' after '\\%0' escape sequence">;
+def err_delimited_escape_invalid : Error<
+ "invalid digit '%0' in escape sequence">;
def err_hex_escape_no_digits : Error<
"\\%0 used with no following hex digits">;
def warn_ucn_escape_no_digits : Warning<
@@ -134,6 +143,12 @@ def warn_ucn_escape_no_digits : Warning<
"treating as '\\' followed by identifier">, InGroup<Unicode>;
def err_ucn_escape_incomplete : Error<
"incomplete universal character name">;
+def warn_delimited_ucn_incomplete : Warning<
+ "incomplete delimited universal character name; "
+ "treating as '\\' 'u' '{' identifier">, InGroup<Unicode>;
+def warn_delimited_ucn_empty : Warning<
+ "empty delimited universal character name; "
+ "treating as '\\' 'u' '{' '}'">, InGroup<Unicode>;
def warn_ucn_escape_incomplete : Warning<
"incomplete universal character name; "
"treating as '\\' followed by identifier">, InGroup<Unicode>;
diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp
index 2685924392d05..55693c8f8cf46 100644
--- a/clang/lib/Lex/Lexer.cpp
+++ b/clang/lib/Lex/Lexer.cpp
@@ -3112,6 +3112,10 @@ uint32_t Lexer::tryReadUCN(const char *&StartPtr, const char *SlashLoc,
Token *Result) {
unsigned CharSize;
char Kind = getCharAndSize(StartPtr, CharSize);
+ bool Delimited = false;
+ bool FoundEndDelimiter = false;
+ unsigned Count = 0;
+ bool Diagnose = Result && !isLexingRawMode();
unsigned NumHexDigits;
if (Kind == 'u')
@@ -3122,7 +3126,7 @@ uint32_t Lexer::tryReadUCN(const char *&StartPtr, const char *SlashLoc,
return 0;
if (!LangOpts.CPlusPlus && !LangOpts.C99) {
- if (Result && !isLexingRawMode())
+ if (Diagnose)
Diag(SlashLoc, diag::warn_ucn_not_valid_in_c89);
return 0;
}
@@ -3131,39 +3135,70 @@ uint32_t Lexer::tryReadUCN(const char *&StartPtr, const char *SlashLoc,
const char *KindLoc = &CurPtr[-1];
uint32_t CodePoint = 0;
- for (unsigned i = 0; i < NumHexDigits; ++i) {
+ while (Count != NumHexDigits || Delimited) {
char C = getCharAndSize(CurPtr, CharSize);
+ if (!Delimited && C == '{') {
+ Delimited = true;
+ CurPtr += CharSize;
+ continue;
+ }
+
+ if (Delimited && C == '}') {
+ CurPtr += CharSize;
+ FoundEndDelimiter = true;
+ break;
+ }
unsigned Value = llvm::hexDigitValue(C);
if (Value == -1U) {
- if (Result && !isLexingRawMode()) {
- if (i == 0) {
- Diag(BufferPtr, diag::warn_ucn_escape_no_digits)
- << StringRef(KindLoc, 1);
- } else {
- Diag(BufferPtr, diag::warn_ucn_escape_incomplete);
-
- // If the user wrote \U1234, suggest a fixit to \u.
- if (i == 4 && NumHexDigits == 8) {
- CharSourceRange URange = makeCharRange(*this, KindLoc, KindLoc + 1);
- Diag(KindLoc, diag::note_ucn_four_not_eight)
- << FixItHint::CreateReplacement(URange, "u");
- }
- }
- }
+ if (!Delimited)
+ break;
+ if (Diagnose)
+ Diag(BufferPtr, diag::warn_delimited_ucn_incomplete)
+ << StringRef(&C, 1);
+ return 0;
+ }
+ if (CodePoint & 0xF000'0000) {
+ if (Diagnose)
+ Diag(KindLoc, diag::err_escape_too_large) << 0;
return 0;
}
CodePoint <<= 4;
- CodePoint += Value;
-
+ CodePoint |= Value;
CurPtr += CharSize;
+ Count++;
+ }
+
+ if (Count == 0) {
+ if (Diagnose)
+ Diag(StartPtr, FoundEndDelimiter ? diag::warn_delimited_ucn_empty
+ : diag::warn_ucn_escape_no_digits)
+ << StringRef(KindLoc, 1);
+ return 0;
+ }
+
+ if (!Delimited && Count != NumHexDigits) {
+ if (Diagnose) {
+ Diag(BufferPtr, diag::warn_ucn_escape_incomplete);
+ // If the user wrote \U1234, suggest a fixit to \u.
+ if (Count == 4 && NumHexDigits == 8) {
+ CharSourceRange URange = makeCharRange(*this, KindLoc, KindLoc + 1);
+ Diag(KindLoc, diag::note_ucn_four_not_eight)
+ << FixItHint::CreateReplacement(URange, "u");
+ }
+ }
+ return 0;
+ }
+
+ if (Delimited && PP) {
+ Diag(BufferPtr, diag::ext_delimited_escape_sequence);
}
if (Result) {
Result->setFlag(Token::HasUCN);
- if (CurPtr - StartPtr == (ptr
diff _t)NumHexDigits + 2)
+ if (CurPtr - StartPtr == (ptr
diff _t)(Count + 2 + (Delimited ? 2 : 0)))
StartPtr = CurPtr;
else
while (StartPtr != CurPtr)
diff --git a/clang/lib/Lex/LiteralSupport.cpp b/clang/lib/Lex/LiteralSupport.cpp
index f012fb72580ed..78e3e7d79a246 100644
--- a/clang/lib/Lex/LiteralSupport.cpp
+++ b/clang/lib/Lex/LiteralSupport.cpp
@@ -95,6 +95,8 @@ static unsigned ProcessCharEscape(const char *ThisTokBegin,
DiagnosticsEngine *Diags,
const LangOptions &Features) {
const char *EscapeBegin = ThisTokBuf;
+ bool Delimited = false;
+ bool EndDelimiterFound = false;
// Skip the '\' char.
++ThisTokBuf;
@@ -143,26 +145,47 @@ static unsigned ProcessCharEscape(const char *ThisTokBegin,
break;
case 'x': { // Hex escape.
ResultChar = 0;
- if (ThisTokBuf == ThisTokEnd || !isHexDigit(*ThisTokBuf)) {
+ if (ThisTokBuf != ThisTokEnd && *ThisTokBuf == '{') {
+ Delimited = true;
+ ThisTokBuf++;
+ if (*ThisTokBuf == '}') {
+ Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
+ diag::err_delimited_escape_empty);
+ return ResultChar;
+ }
+ } else if (ThisTokBuf == ThisTokEnd || !isHexDigit(*ThisTokBuf)) {
if (Diags)
Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
diag::err_hex_escape_no_digits) << "x";
- HadError = true;
- break;
+ return ResultChar;
}
// Hex escapes are a maximal series of hex digits.
bool Overflow = false;
for (; ThisTokBuf != ThisTokEnd; ++ThisTokBuf) {
- int CharVal = llvm::hexDigitValue(ThisTokBuf[0]);
- if (CharVal == -1) break;
+ if (Delimited && *ThisTokBuf == '}') {
+ ThisTokBuf++;
+ EndDelimiterFound = true;
+ break;
+ }
+ int CharVal = llvm::hexDigitValue(*ThisTokBuf);
+ if (CharVal == -1) {
+ // Non delimited hex escape sequences stop at the first non-hex digit.
+ if (!Delimited)
+ break;
+ HadError = true;
+ if (Diags)
+ Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
+ diag::err_delimited_escape_invalid)
+ << StringRef(ThisTokBuf, 1);
+ continue;
+ }
// About to shift out a digit?
if (ResultChar & 0xF0000000)
Overflow = true;
ResultChar <<= 4;
ResultChar |= CharVal;
}
-
// See if any bits will be truncated when evaluated as a character.
if (CharWidth != 32 && (ResultChar >> CharWidth) != 0) {
Overflow = true;
@@ -170,9 +193,13 @@ static unsigned ProcessCharEscape(const char *ThisTokBegin,
}
// Check for overflow.
- if (Overflow && Diags) // Too many digits to fit in
- Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
- diag::err_escape_too_large) << 0;
+ if (!HadError && Overflow) { // Too many digits to fit in
+ HadError = true;
+ if (Diags)
+ Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
+ diag::err_escape_too_large)
+ << 0;
+ }
break;
}
case '0': case '1': case '2': case '3':
@@ -200,7 +227,58 @@ static unsigned ProcessCharEscape(const char *ThisTokBegin,
}
break;
}
+ case 'o': {
+ bool Overflow = false;
+ if (ThisTokBuf == ThisTokEnd || *ThisTokBuf != '{') {
+ HadError = true;
+ if (Diags)
+ Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
+ diag::err_delimited_escape_missing_brace);
+
+ break;
+ }
+ ResultChar = 0;
+ Delimited = true;
+ ++ThisTokBuf;
+ if (*ThisTokBuf == '}') {
+ Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
+ diag::err_delimited_escape_empty);
+ return ResultChar;
+ }
+
+ while (ThisTokBuf != ThisTokEnd) {
+ if (*ThisTokBuf == '}') {
+ EndDelimiterFound = true;
+ ThisTokBuf++;
+ break;
+ }
+ if (*ThisTokBuf < '0' || *ThisTokBuf > '7') {
+ HadError = true;
+ if (Diags)
+ Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
+ diag::err_delimited_escape_invalid)
+ << StringRef(ThisTokBuf, 1);
+ ThisTokBuf++;
+ continue;
+ }
+ if (ResultChar & 0x020000000)
+ Overflow = true;
+ ResultChar <<= 3;
+ ResultChar |= *ThisTokBuf++ - '0';
+ }
+ // Check for overflow. Reject '\777', but not L'\777'.
+ if (!HadError &&
+ (Overflow || (CharWidth != 32 && (ResultChar >> CharWidth) != 0))) {
+ HadError = true;
+ if (Diags)
+ Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
+ diag::err_escape_too_large)
+ << 1;
+ ResultChar &= ~0U >> (32 - CharWidth);
+ }
+ break;
+ }
// Otherwise, these are not valid escapes.
case '(': case '{': case '[': case '%':
// GCC accepts these as extensions. We warn about them as such though.
@@ -224,6 +302,17 @@ static unsigned ProcessCharEscape(const char *ThisTokBegin,
break;
}
+ if (Delimited && Diags) {
+ if (!EndDelimiterFound)
+ Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
+ diag::err_expected)
+ << tok::r_brace;
+ else if (!HadError) {
+ Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
+ diag::ext_delimited_escape_sequence);
+ }
+ }
+
return ResultChar;
}
@@ -245,18 +334,32 @@ void clang::expandUCNs(SmallVectorImpl<char> &Buf, StringRef Input) {
}
++I;
- assert(*I == 'u' || *I == 'U');
+ char Kind = *I;
+ ++I;
+
+ assert(Kind == 'u' || Kind == 'U');
+ uint32_t CodePoint = 0;
+
+ if (Kind == 'u' && *I == '{') {
+ for (++I; *I != '}'; ++I) {
+ unsigned Value = llvm::hexDigitValue(*I);
+ assert(Value != -1U);
+ CodePoint <<= 4;
+ CodePoint += Value;
+ }
+ appendCodePoint(CodePoint, Buf);
+ continue;
+ }
unsigned NumHexDigits;
- if (*I == 'u')
+ if (Kind == 'u')
NumHexDigits = 4;
else
NumHexDigits = 8;
assert(I + NumHexDigits <= E);
- uint32_t CodePoint = 0;
- for (++I; NumHexDigits != 0; ++I, --NumHexDigits) {
+ for (; NumHexDigits != 0; ++I, --NumHexDigits) {
unsigned Value = llvm::hexDigitValue(*I);
assert(Value != -1U);
@@ -282,28 +385,82 @@ static bool ProcessUCNEscape(const char *ThisTokBegin, const char *&ThisTokBuf,
// Skip the '\u' char's.
ThisTokBuf += 2;
- if (ThisTokBuf == ThisTokEnd || !isHexDigit(*ThisTokBuf)) {
+ bool Delimited = false;
+ bool EndDelimiterFound = false;
+ bool HasError = false;
+
+ if (UcnBegin[1] == 'u' && in_char_string_literal &&
+ ThisTokBuf != ThisTokEnd && *ThisTokBuf == '{') {
+ Delimited = true;
+ ThisTokBuf++;
+ } else if (ThisTokBuf == ThisTokEnd || !isHexDigit(*ThisTokBuf)) {
if (Diags)
Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf,
diag::err_hex_escape_no_digits) << StringRef(&ThisTokBuf[-1], 1);
return false;
}
UcnLen = (ThisTokBuf[-1] == 'u' ? 4 : 8);
- unsigned short UcnLenSave = UcnLen;
- for (; ThisTokBuf != ThisTokEnd && UcnLenSave; ++ThisTokBuf, UcnLenSave--) {
- int CharVal = llvm::hexDigitValue(ThisTokBuf[0]);
- if (CharVal == -1) break;
+
+ bool Overflow = false;
+ unsigned short Count = 0;
+ for (; ThisTokBuf != ThisTokEnd && (Delimited || Count != UcnLen);
+ ++ThisTokBuf) {
+ if (Delimited && *ThisTokBuf == '}') {
+ ++ThisTokBuf;
+ EndDelimiterFound = true;
+ break;
+ }
+ int CharVal = llvm::hexDigitValue(*ThisTokBuf);
+ if (CharVal == -1) {
+ HasError = true;
+ if (!Delimited)
+ break;
+ if (Diags) {
+ Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf,
+ diag::err_delimited_escape_invalid)
+ << StringRef(ThisTokBuf, 1);
+ }
+ Count++;
+ continue;
+ }
+ if (UcnVal & 0xF0000000) {
+ Overflow = true;
+ continue;
+ }
UcnVal <<= 4;
UcnVal |= CharVal;
+ Count++;
}
+
+ if (Overflow) {
+ if (Diags)
+ Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf,
+ diag::err_escape_too_large)
+ << 0;
+ return false;
+ }
+
+ if (Delimited && !EndDelimiterFound) {
+ if (Diags) {
+ Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf,
+ diag::err_expected)
+ << tok::r_brace;
+ }
+ return false;
+ }
+
// If we didn't consume the proper number of digits, there is a problem.
- if (UcnLenSave) {
+ if (Count == 0 || (!Delimited && Count != UcnLen)) {
if (Diags)
Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf,
- diag::err_ucn_escape_incomplete);
+ Delimited ? diag::err_delimited_escape_empty
+ : diag::err_ucn_escape_incomplete);
return false;
}
+ if (HasError)
+ return false;
+
// Check UCN constraints (C99 6.4.3p2) [C++11 lex.charset p2]
if ((0xD800 <= UcnVal && UcnVal <= 0xDFFF) || // surrogate codepoints
UcnVal > 0x10FFFF) { // maximum legal UTF32 value
@@ -338,6 +495,10 @@ static bool ProcessUCNEscape(const char *ThisTokBegin, const char *&ThisTokBuf,
Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf,
diag::warn_ucn_not_valid_in_c89_literal);
+ if (Delimited && Diags)
+ Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf,
+ diag::ext_delimited_escape_sequence);
+
return true;
}
diff --git a/clang/test/Lexer/char-escapes-delimited.c b/clang/test/Lexer/char-escapes-delimited.c
new file mode 100644
index 0000000000000..ad413d1843b06
--- /dev/null
+++ b/clang/test/Lexer/char-escapes-delimited.c
@@ -0,0 +1,81 @@
+// RUN: %clang_cc1 -fsyntax-only -pedantic -verify %s
+// RUN: %clang_cc1 -x c -fsyntax-only -pedantic -verify %s
+// RUN: %clang_cc1 -fwchar-type=short -fno-signed-wchar -fsyntax-only -pedantic -verify %s
+// RUN: %clang_cc1 -x c -fwchar-type=short -fno-signed-wchar -fsyntax-only -pedantic -verify %s
+
+const char *errors =
+ "\u{}" //expected-error {{delimited escape sequence cannot be empty}}
+ "\u{" //expected-error {{expected '}'}}
+ "\u{h}" //expected-error {{invalid digit 'h' in escape sequence}}
+ "\x{}" //expected-error {{delimited escape sequence cannot be empty}}
+ "\x{" //expected-error {{expected '}'}}
+ "\x{h}" //expected-error {{invalid digit 'h' in escape sequence}}
+ "\o{}" //expected-error {{delimited escape sequence cannot be empty}}
+ "\o{" //expected-error {{expected '}'}}
+ "\o{8}" //expected-error {{invalid digit '8' in escape sequence}}
+ ;
+
+void ucn() {
+ char a = '\u{1234}'; // expected-error {{character too large for enclosing character literal type}}
+ // expected-warning at -1 {{delimited escape sequences are a Clang extension}}
+
+ unsigned b = U'\u{1234}'; // expected-warning {{extension}}
+
+#ifdef __cplusplus
+ unsigned b2 = U'\u{1}'; // expected-warning {{extension}}
+#else
+ unsigned b2 = U'\u{1}'; //expected-error {{universal character name refers to a control character}}
+#endif
+
+ unsigned c = U'\u{000000000001234}'; // expected-warning {{extension}}
+ unsigned d = U'\u{111111111}'; //expected-error {{hex escape sequence out of range}}
+}
+
+void hex() {
+ char a = '\x{1}'; // expected-warning {{extension}}
+ char b = '\x{abcdegggggabc}'; // expected-error 5{{invalid digit 'g' in escape sequence}}
+ char c = '\x{ff1}'; // expected-error {{hex escape sequence out of range}}
+
+#if __WCHAR_MAX__ > 0xFFFF
+ unsigned d = L'\x{FFFFFFFF}'; // expected-warning {{extension}}
+ unsigned e = L'\x{100000000}'; // expected-error {{hex escape sequence out of range}}
+#else
+ unsigned f = L'\x{FFFF}'; // expected-warning {{extension}}
+ unsigned g = L'\x{10000}'; // expected-error {{hex escape sequence out of range}}
+#endif
+ unsigned h = U'\x{FFFFFFFF}'; // expected-warning {{extension}}
+ unsigned i = U'\x{100000000}'; // expected-error {{hex escape sequence out of range}}
+}
+
+void octal() {
+ char a = '\o{1}'; // expected-warning {{extension}}
+ char b = '\o{12345678881238}'; // expected-error 4{{invalid digit '8' in escape sequence}}
+ char c = '\o{777}'; // //expected-error {{octal escape sequence out of range}}
+#if __WCHAR_MAX__ > 0xFFFF
+ unsigned d = L'\o{37777777777}'; // expected-warning {{extension}}
+ unsigned e = L'\o{40000000000}'; // expected-error {{octal escape sequence out of range}}
+#else
+ unsigned d = L'\o{177777}'; // expected-warning {{extension}}
+ unsigned e = L'\o{200000}'; // expected-error {{octal escape sequence out of range}}
+#endif
+}
+
+void concat() {
+ (void)"\x{" "12}"; // expected-error {{expected '}'}}
+ (void)"\u{" "12}"; // expected-error {{expected '}'}}
+ (void)"\o{" "12}"; // expected-error {{expected '}'}}
+
+ (void)"\x{12" "}"; // expected-error {{expected '}'}}
+ (void)"\u{12" "}"; // expected-error {{expected '}'}}
+ (void)"\o{12" "}"; // expected-error {{expected '}'}}
+}
+
+void separators() {
+ (void)"\x{12'3}"; // expected-error {{invalid digit ''' in escape sequence}}
+ (void)"\u{12'3}"; // expected-error {{invalid digit ''' in escape sequence}}
+ (void)"\o{12'3}"; // expected-error {{invalid digit ''' in escape sequence}}
+
+ '\x{12'3'}'; // expected-error {{expected '}'}}
+ // expected-error at -1 2{{expected ';'}}
+ // expected-warning at -2 3{{expression result unused}}
+}
diff --git a/clang/test/Parser/cxx11-user-defined-literals.cpp b/clang/test/Parser/cxx11-user-defined-literals.cpp
index 714be62381c07..143cdb502c041 100644
--- a/clang/test/Parser/cxx11-user-defined-literals.cpp
+++ b/clang/test/Parser/cxx11-user-defined-literals.cpp
@@ -129,6 +129,9 @@ int operator""_\U00010000(char) {} // expected-error {{redefinition of 'operator
int operator""_℮""_\u212e""_\U0000212e""(const char*, size_t);
int operator""_\u212e""_\U0000212e""_℮""(const char*, size_t);
int operator""_\U0000212e""_℮""_\u212e""(const char*, size_t);
+
+int operator""_\u{212f}(char);
+
int mix_ucn_utf8 = ""_℮""_\u212e""_\U0000212e"";
void operator""_℮""_ℯ(unsigned long long) {} // expected-error {{
diff ering user-defined suffixes ('_℮' and '_ℯ') in string literal concatenation}}
diff --git a/clang/test/Preprocessor/ucn-pp-identifier.c b/clang/test/Preprocessor/ucn-pp-identifier.c
index 0929412a76ed2..1d91ad422b697 100644
--- a/clang/test/Preprocessor/ucn-pp-identifier.c
+++ b/clang/test/Preprocessor/ucn-pp-identifier.c
@@ -16,6 +16,10 @@
#error "This should never happen"
#endif
+#if a\u{FD}() //expected-warning {{Clang extension}}
+#error "This should never happen"
+#endif
+
#if \uarecool // expected-warning{{incomplete universal character name; treating as '\' followed by identifier}} expected-error {{invalid token at start of a preprocessor expression}}
#endif
#if \uwerecool // expected-warning{{\u used with no following hex digits; treating as '\' followed by identifier}} expected-error {{invalid token at start of a preprocessor expression}}
@@ -27,6 +31,7 @@
#define \ufffe // expected-error {{macro name must be an identifier}}
#define \U10000000 // expected-error {{macro name must be an identifier}}
#define \u0061 // expected-error {{character 'a' cannot be specified by a universal character name}} expected-error {{macro name must be an identifier}}
+#define \u{fffe} // expected-error {{macro name must be an identifier}} expected-warning {{Clang extension}}
#define a\u0024
@@ -103,3 +108,8 @@ C 1
// CHECK-NEXT: #define capital_u_\U00FC
// CHECK-NEXT: {{^ \^}}
// CHECK-NEXT: {{^ u}}
+
+#define \u{} // expected-warning {{empty delimited universal character name; treating as '\' 'u' '{' '}'}} expected-error {{macro name must be an identifier}}
+#define \u{123456789} // expected-error {{hex escape sequence out of range}} expected-error {{macro name must be an identifier}}
+#define \u{ // expected-warning {{incomplete delimited universal character name; treating as '\' 'u' '{' identifier}} expected-error {{macro name must be an identifier}}
+#define \u{fgh} // expected-warning {{incomplete delimited universal character name; treating as '\' 'u' '{' identifier}} expected-error {{macro name must be an identifier}}
diff --git a/clang/test/Sema/ucn-identifiers.c b/clang/test/Sema/ucn-identifiers.c
index 6b2636587af66..ebf2dc0982b97 100644
--- a/clang/test/Sema/ucn-identifiers.c
+++ b/clang/test/Sema/ucn-identifiers.c
@@ -17,6 +17,7 @@ void goodCalls() {
\u00fcber(1);
über(2);
\U000000FCber(3);
+ \u{FC}ber(4); // expected-warning {{Clang extension}}
}
void badCalls() {
@@ -24,7 +25,7 @@ void badCalls() {
\u00fcber = 0; // expected-error{{non-object type 'void (int)' is not assignable}}
über(1, 2);
- \U000000FCber();
+ \U000000FCber();
#ifdef __cplusplus
// expected-error at -3 {{no matching function}}
// expected-error at -3 {{no matching function}}
More information about the cfe-commits
mailing list