[cfe-commits] r152444 - in /cfe/trunk: include/clang/Basic/DiagnosticLexKinds.td lib/Lex/LiteralSupport.cpp test/CXX/lex/lex.charset/p2-cxx11.cpp test/CXX/lex/lex.charset/p2-cxx98.cpp test/Sema/ucn-cstring.c test/SemaCXX/cxx98-compat.cpp www/cxx_status.html
Richard Smith
richard-llvm at metafoo.co.uk
Fri Mar 9 14:27:52 PST 2012
Author: rsmith
Date: Fri Mar 9 16:27:51 2012
New Revision: 152444
URL: http://llvm.org/viewvc/llvm-project?rev=152444&view=rev
Log:
Improve diagnostics for UCNs referring to control characters and members of the
basic source character set in C++98. Add -Wc++98-compat diagnostics for same in
literals in C++11. Extend such support to cover string literals as well as
character literals, and mark N2170 as done.
This seems too minor to warrant a release note to me. Let me know if you disagree.
Added:
cfe/trunk/test/CXX/lex/lex.charset/p2-cxx11.cpp
cfe/trunk/test/CXX/lex/lex.charset/p2-cxx98.cpp
Modified:
cfe/trunk/include/clang/Basic/DiagnosticLexKinds.td
cfe/trunk/lib/Lex/LiteralSupport.cpp
cfe/trunk/test/Sema/ucn-cstring.c
cfe/trunk/test/SemaCXX/cxx98-compat.cpp
cfe/trunk/www/cxx_status.html
Modified: cfe/trunk/include/clang/Basic/DiagnosticLexKinds.td
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/DiagnosticLexKinds.td?rev=152444&r1=152443&r2=152444&view=diff
==============================================================================
--- cfe/trunk/include/clang/Basic/DiagnosticLexKinds.td (original)
+++ cfe/trunk/include/clang/Basic/DiagnosticLexKinds.td Fri Mar 9 16:27:51 2012
@@ -95,6 +95,16 @@
def err_ucn_escape_no_digits : Error<"\\u used with no following hex digits">;
def err_ucn_escape_invalid : Error<"invalid universal character">;
def err_ucn_escape_incomplete : Error<"incomplete universal character name">;
+def err_ucn_escape_basic_scs : Error<
+ "character '%0' cannot be specified by a universal character name">;
+def err_ucn_control_character : Error<
+ "universal character name refers to a control character">;
+def warn_cxx98_compat_literal_ucn_escape_basic_scs : Warning<
+ "specifying character '%0' with a universal character name "
+ "is incompatible with C++98">, InGroup<CXX98Compat>, DefaultIgnore;
+def warn_cxx98_compat_literal_ucn_control_character : Warning<
+ "universal character name referring to a control character "
+ "is incompatible with C++98">, InGroup<CXX98Compat>, DefaultIgnore;
def err_invalid_decimal_digit : Error<"invalid digit '%0' in decimal constant">;
def err_invalid_binary_digit : Error<"invalid digit '%0' in binary constant">;
def err_invalid_octal_digit : Error<"invalid digit '%0' in octal constant">;
Modified: cfe/trunk/lib/Lex/LiteralSupport.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Lex/LiteralSupport.cpp?rev=152444&r1=152443&r2=152444&view=diff
==============================================================================
--- cfe/trunk/lib/Lex/LiteralSupport.cpp (original)
+++ cfe/trunk/lib/Lex/LiteralSupport.cpp Fri Mar 9 16:27:51 2012
@@ -179,7 +179,8 @@
/// ProcessUCNEscape - Read the Universal Character Name, check constraints and
/// return the UTF32.
-static bool ProcessUCNEscape(const char *&ThisTokBuf, const char *ThisTokEnd,
+static bool ProcessUCNEscape(const char *ThisTokBegin, const char *&ThisTokBuf,
+ const char *ThisTokEnd,
uint32_t &UcnVal, unsigned short &UcnLen,
FullSourceLoc Loc, DiagnosticsEngine *Diags,
const LangOptions &Features,
@@ -187,8 +188,7 @@
if (!Features.CPlusPlus && !Features.C99 && Diags)
Diags->Report(Loc, diag::warn_ucn_not_valid_in_c89);
- // Save the beginning of the string (for error diagnostics).
- const char *ThisTokBegin = ThisTokBuf;
+ const char *UcnBegin = ThisTokBuf;
// Skip the '\u' char's.
ThisTokBuf += 2;
@@ -210,31 +210,43 @@
if (UcnLenSave) {
if (Diags) {
SourceLocation L =
- Lexer::AdvanceToTokenCharacter(Loc, ThisTokBuf-ThisTokBegin,
+ Lexer::AdvanceToTokenCharacter(Loc, UcnBegin - ThisTokBegin,
Loc.getManager(), Features);
- Diags->Report(FullSourceLoc(L, Loc.getManager()),
- diag::err_ucn_escape_incomplete);
+ Diags->Report(L, diag::err_ucn_escape_incomplete);
}
return false;
}
+
// Check UCN constraints (C99 6.4.3p2) [C++11 lex.charset p2]
- bool invalid_ucn = (0xD800<=UcnVal && UcnVal<=0xDFFF) // surrogate codepoints
- || 0x10FFFF < UcnVal; // maximum legal UTF32 value
+ if ((0xD800 <= UcnVal && UcnVal <= 0xDFFF) || // surrogate codepoints
+ UcnVal > 0x10FFFF) { // maximum legal UTF32 value
+ if (Diags)
+ Diags->Report(Loc, diag::err_ucn_escape_invalid);
+ return false;
+ }
// C++11 allows UCNs that refer to control characters and basic source
// characters inside character and string literals
- if (!Features.CPlusPlus0x || !in_char_string_literal) {
- if ((UcnVal < 0xa0 &&
- (UcnVal != 0x24 && UcnVal != 0x40 && UcnVal != 0x60 ))) { // $, @, `
- invalid_ucn = true;
+ if (UcnVal < 0xa0 &&
+ (UcnVal != 0x24 && UcnVal != 0x40 && UcnVal != 0x60)) { // $, @, `
+ bool IsError = (!Features.CPlusPlus0x || !in_char_string_literal);
+ if (Diags) {
+ SourceLocation UcnBeginLoc =
+ Lexer::AdvanceToTokenCharacter(Loc, UcnBegin - ThisTokBegin,
+ Loc.getManager(), Features);
+ char BasicSCSChar = UcnVal;
+ if (UcnVal >= 0x20 && UcnVal < 0x7f)
+ Diags->Report(UcnBeginLoc, IsError ? diag::err_ucn_escape_basic_scs :
+ diag::warn_cxx98_compat_literal_ucn_escape_basic_scs)
+ << StringRef(&BasicSCSChar, 1);
+ else
+ Diags->Report(UcnBeginLoc, IsError ? diag::err_ucn_control_character :
+ diag::warn_cxx98_compat_literal_ucn_control_character);
}
+ if (IsError)
+ return false;
}
- if (invalid_ucn) {
- if (Diags)
- Diags->Report(Loc, diag::err_ucn_escape_invalid);
- return false;
- }
return true;
}
@@ -242,7 +254,8 @@
/// convert the UTF32 to UTF8 or UTF16. This is a subroutine of
/// StringLiteralParser. When we decide to implement UCN's for identifiers,
/// we will likely rework our support for UCN's.
-static void EncodeUCNEscape(const char *&ThisTokBuf, const char *ThisTokEnd,
+static void EncodeUCNEscape(const char *ThisTokBegin, const char *&ThisTokBuf,
+ const char *ThisTokEnd,
char *&ResultBuf, bool &HadError,
FullSourceLoc Loc, unsigned CharByteWidth,
DiagnosticsEngine *Diags,
@@ -250,8 +263,8 @@
typedef uint32_t UTF32;
UTF32 UcnVal = 0;
unsigned short UcnLen = 0;
- if (!ProcessUCNEscape(ThisTokBuf, ThisTokEnd, UcnVal, UcnLen, Loc, Diags,
- Features)) {
+ if (!ProcessUCNEscape(ThisTokBegin, ThisTokBuf, ThisTokEnd, UcnVal, UcnLen,
+ Loc, Diags, Features, true)) {
HadError = 1;
return;
}
@@ -787,6 +800,8 @@
Kind = kind;
+ const char *TokBegin = begin;
+
// Skip over wide character determinant.
if (Kind != tok::char_constant) {
++begin;
@@ -803,7 +818,7 @@
--end;
} while (end[-1] != '\'');
UDSuffixBuf.assign(end, UDSuffixEnd);
- UDSuffixOffset = end - begin + 1;
+ UDSuffixOffset = end - TokBegin;
}
// Trim the ending quote.
@@ -885,7 +900,7 @@
// Is this a Universal Character Name excape?
if (begin[1] == 'u' || begin[1] == 'U') {
unsigned short UcnLen = 0;
- if (!ProcessUCNEscape(begin, end, *buffer_begin, UcnLen,
+ if (!ProcessUCNEscape(TokBegin, begin, end, *buffer_begin, UcnLen,
FullSourceLoc(Loc, PP.getSourceManager()),
&PP.getDiagnostics(), PP.getLangOptions(),
true))
@@ -1113,6 +1128,7 @@
continue;
}
+ const char *ThisTokBegin = ThisTokBuf;
const char *ThisTokEnd = ThisTokBuf+ThisTokLen;
// Remove an optional ud-suffix.
@@ -1208,8 +1224,9 @@
}
// Is this a Universal Character Name escape?
if (ThisTokBuf[1] == 'u' || ThisTokBuf[1] == 'U') {
- EncodeUCNEscape(ThisTokBuf, ThisTokEnd, ResultPtr,
- hadError, FullSourceLoc(StringToks[i].getLocation(),SM),
+ EncodeUCNEscape(ThisTokBegin, ThisTokBuf, ThisTokEnd,
+ ResultPtr, hadError,
+ FullSourceLoc(StringToks[i].getLocation(), SM),
CharByteWidth, Diags, Features);
continue;
}
Added: cfe/trunk/test/CXX/lex/lex.charset/p2-cxx11.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CXX/lex/lex.charset/p2-cxx11.cpp?rev=152444&view=auto
==============================================================================
--- cfe/trunk/test/CXX/lex/lex.charset/p2-cxx11.cpp (added)
+++ cfe/trunk/test/CXX/lex/lex.charset/p2-cxx11.cpp Fri Mar 9 16:27:51 2012
@@ -0,0 +1,42 @@
+// RUN: %clang_cc1 -verify -std=c++11 %s
+
+char c00 = '\u0000'; // ok
+char c01 = '\u0001'; // ok
+char c1f = '\u001f'; // ok
+char c20 = '\u0020'; // ' ', ok
+char c22 = '\u0022'; // ", ok
+char c23 = '\u0023'; // #, ok
+char c24 = '\u0024'; // $, ok
+char c25 = '\u0025'; // %, ok
+char c27 = '\u0027'; // ', ok
+char c3f = '\u003f'; // ?, ok
+char c40 = '\u0040'; // @, ok
+char c41 = '\u0041'; // A, ok
+char c5f = '\u005f'; // _, ok
+char c60 = '\u0060'; // `, ok
+char c7e = '\u007e'; // ~, ok
+char c7f = '\u007f'; // ok
+
+wchar_t w007f = L'\u007f';
+wchar_t w0080 = L'\u0080';
+wchar_t w009f = L'\u009f';
+wchar_t w00a0 = L'\u00a0';
+
+wchar_t wd799 = L'\ud799';
+wchar_t wd800 = L'\ud800'; // expected-error {{invalid universal character}}
+wchar_t wdfff = L'\udfff'; // expected-error {{invalid universal character}}
+wchar_t we000 = L'\ue000';
+
+char32_t w10fffe = U'\U0010fffe';
+char32_t w10ffff = U'\U0010ffff';
+char32_t w110000 = U'\U00110000'; // expected-error {{invalid universal character}}
+
+const char *p1 = "\u0000\u0001\u001f\u0020\u0022\u0023\u0024\u0025\u0027\u003f\u0040\u0041\u005f\u0060\u007e\u007f";
+const wchar_t *p2 = L"\u0000\u0012\u004e\u007f\u0080\u009f\u00a0\ud799\ue000";
+const char *p3 = u8"\u0000\u0012\u004e\u007f\u0080\u009f\u00a0\ud799\ue000";
+const char16_t *p4 = u"\u0000\u0012\u004e\u007f\u0080\u009f\u00a0\ud799\ue000";
+const char32_t *p5 = U"\u0000\u0012\u004e\u007f\u0080\u009f\u00a0\ud799\ue000";
+const wchar_t *p6 = L"foo \U00110000 bar"; // expected-error {{invalid universal character}}
+const char *p7 = u8"foo \U0000d800 bar"; // expected-error {{invalid universal character}}
+const char16_t *p8 = u"foo \U0000dfff bar"; // expected-error {{invalid universal character}}
+const char32_t *p9 = U"foo \U0010ffff bar"; // ok
Added: cfe/trunk/test/CXX/lex/lex.charset/p2-cxx98.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CXX/lex/lex.charset/p2-cxx98.cpp?rev=152444&view=auto
==============================================================================
--- cfe/trunk/test/CXX/lex/lex.charset/p2-cxx98.cpp (added)
+++ cfe/trunk/test/CXX/lex/lex.charset/p2-cxx98.cpp Fri Mar 9 16:27:51 2012
@@ -0,0 +1,55 @@
+// RUN: %clang_cc1 -verify -std=c++98 %s
+
+char c00 = '\u0000'; // expected-error {{universal character name refers to a control character}}
+char c01 = '\u0001'; // expected-error {{universal character name refers to a control character}}
+char c1f = '\u001f'; // expected-error {{universal character name refers to a control character}}
+char c20 = '\u0020'; // ' ', expected-error {{character ' ' cannot be specified by a universal character name}}
+char c22 = '\u0022'; // ", expected-error {{character '"' cannot be specified by a universal character name}}
+char c23 = '\u0023'; // #, expected-error {{character '#' cannot be specified by a universal character name}}
+char c24 = '\u0024'; // $, ok
+char c25 = '\u0025'; // %, expected-error {{character '%' cannot be specified by a universal character name}}
+char c27 = '\u0027'; // ', expected-error {{character ''' cannot be specified by a universal character name}}
+char c3f = '\u003f'; // ?, expected-error {{character '?' cannot be specified by a universal character name}}
+char c40 = '\u0040'; // @, ok
+char c41 = '\u0041'; // A, expected-error {{character 'A' cannot be specified by a universal character name}}
+char c5f = '\u005f'; // _, expected-error {{character '_' cannot be specified by a universal character name}}
+char c60 = '\u0060'; // `, ok
+char c7e = '\u007e'; // ~, expected-error {{character '~' cannot be specified by a universal character name}}
+char c7f = '\u007f'; // expected-error {{universal character name refers to a control character}}
+
+wchar_t w007f = L'\u007f'; // expected-error {{universal character name refers to a control character}}
+wchar_t w0080 = L'\u0080'; // expected-error {{universal character name refers to a control character}}
+wchar_t w009f = L'\u009f'; // expected-error {{universal character name refers to a control character}}
+wchar_t w00a0 = L'\u00a0';
+
+wchar_t wd799 = L'\ud799';
+wchar_t wd800 = L'\ud800'; // expected-error {{invalid universal character}}
+wchar_t wdfff = L'\udfff'; // expected-error {{invalid universal character}}
+wchar_t we000 = L'\ue000';
+
+const char *s00 = "\u0000"; // expected-error {{universal character name refers to a control character}}
+const char *s01 = "\u0001"; // expected-error {{universal character name refers to a control character}}
+const char *s1f = "\u001f"; // expected-error {{universal character name refers to a control character}}
+const char *s20 = "\u0020"; // ' ', expected-error {{character ' ' cannot be specified by a universal character name}}
+const char *s22 = "\u0022"; // ", expected-error {{character '"' cannot be specified by a universal character name}}
+const char *s23 = "\u0023"; // #, expected-error {{character '#' cannot be specified by a universal character name}}
+const char *s24 = "\u0024"; // $, ok
+const char *s25 = "\u0025"; // %, expected-error {{character '%' cannot be specified by a universal character name}}
+const char *s27 = "\u0027"; // ', expected-error {{character ''' cannot be specified by a universal character name}}
+const char *s3f = "\u003f"; // ?, expected-error {{character '?' cannot be specified by a universal character name}}
+const char *s40 = "\u0040"; // @, ok
+const char *s41 = "\u0041"; // A, expected-error {{character 'A' cannot be specified by a universal character name}}
+const char *s5f = "\u005f"; // _, expected-error {{character '_' cannot be specified by a universal character name}}
+const char *s60 = "\u0060"; // `, ok
+const char *s7e = "\u007e"; // ~, expected-error {{character '~' cannot be specified by a universal character name}}
+const char *s7f = "\u007f"; // expected-error {{universal character name refers to a control character}}
+
+const wchar_t *ws007f = L"\u007f"; // expected-error {{universal character name refers to a control character}}
+const wchar_t *ws0080 = L"\u0080"; // expected-error {{universal character name refers to a control character}}
+const wchar_t *ws009f = L"\u009f"; // expected-error {{universal character name refers to a control character}}
+const wchar_t *ws00a0 = L"\u00a0";
+
+const wchar_t *wsd799 = L"\ud799";
+const wchar_t *wsd800 = L"\ud800"; // expected-error {{invalid universal character}}
+const wchar_t *wsdfff = L"\udfff"; // expected-error {{invalid universal character}}
+const wchar_t *wse000 = L"\ue000";
Modified: cfe/trunk/test/Sema/ucn-cstring.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Sema/ucn-cstring.c?rev=152444&r1=152443&r2=152444&view=diff
==============================================================================
--- cfe/trunk/test/Sema/ucn-cstring.c (original)
+++ cfe/trunk/test/Sema/ucn-cstring.c Fri Mar 9 16:27:51 2012
@@ -11,7 +11,6 @@
printf("%s\n", "\U"); // expected-error{{\u used with no following hex digits}}
printf("%s\n", "\U00"); // expected-error{{incomplete universal character name}}
printf("%s\n", "\U0001"); // expected-error{{incomplete universal character name}}
- printf("%s\n", "\u0001"); // expected-error{{invalid universal character}}
+ printf("%s\n", "\u0001"); // expected-error{{universal character name refers to a control character}}
return 0;
}
-
Modified: cfe/trunk/test/SemaCXX/cxx98-compat.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/SemaCXX/cxx98-compat.cpp?rev=152444&r1=152443&r2=152444&view=diff
==============================================================================
--- cfe/trunk/test/SemaCXX/cxx98-compat.cpp (original)
+++ cfe/trunk/test/SemaCXX/cxx98-compat.cpp Fri Mar 9 16:27:51 2012
@@ -281,3 +281,10 @@
int k = sizeof(S::n); // expected-warning {{use of non-static data member 'n' in an unevaluated context is incompatible with C++98}}
const std::type_info &ti = typeid(S::n); // expected-warning {{use of non-static data member 'n' in an unevaluated context is incompatible with C++98}}
}
+
+namespace LiteralUCNs {
+ char c1 = '\u001e'; // expected-warning {{universal character name referring to a control character is incompatible with C++98}}
+ wchar_t c2 = L'\u0041'; // expected-warning {{specifying character 'A' with a universal character name is incompatible with C++98}}
+ const char *s1 = "foo\u0031"; // expected-warning {{specifying character '1' with a universal character name is incompatible with C++98}}
+ const wchar_t *s2 = L"bar\u0085"; // expected-warning {{universal character name referring to a control character is incompatible with C++98}}
+}
Modified: cfe/trunk/www/cxx_status.html
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/www/cxx_status.html?rev=152444&r1=152443&r2=152444&view=diff
==============================================================================
--- cfe/trunk/www/cxx_status.html (original)
+++ cfe/trunk/www/cxx_status.html Fri Mar 9 16:27:51 2012
@@ -206,9 +206,9 @@
<td class="full" align="center">Clang 3.0</td>
</tr>
<tr>
- <td>Universal character name literals</td>
+ <td>Universal character names in literals</td>
<td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2007/n2170.html">N2170</a></td>
- <td class="none" align="center">No</td>
+ <td class="svn" align="center">SVN</td>
</tr>
<tr>
<td>User-defined literals</td>
More information about the cfe-commits
mailing list