[cfe-commits] r152012 - in /cfe/trunk: include/clang/Basic/DiagnosticLexKinds.td include/clang/Lex/Lexer.h include/clang/Lex/LiteralSupport.h lib/Lex/Lexer.cpp lib/Lex/LiteralSupport.cpp test/CXX/lex/lex.literal/lex.ext/p1.cpp test/CXX/over/over.oper/over.literal/p5.cpp test/CXX/over/over.oper/over.literal/p8.cpp test/SemaCXX/cxx98-compat.cpp
Richard Smith
richard-llvm at metafoo.co.uk
Sun Mar 4 20:02:16 PST 2012
Author: rsmith
Date: Sun Mar 4 22:02:15 2012
New Revision: 152012
URL: http://llvm.org/viewvc/llvm-project?rev=152012&view=rev
Log:
Lexing support for user-defined literals. Currently these lex as the same token
kinds as the underlying string literals, and we silently drop the ud-suffix;
those issues will be fixed by subsequent patches.
Modified:
cfe/trunk/include/clang/Basic/DiagnosticLexKinds.td
cfe/trunk/include/clang/Lex/Lexer.h
cfe/trunk/include/clang/Lex/LiteralSupport.h
cfe/trunk/lib/Lex/Lexer.cpp
cfe/trunk/lib/Lex/LiteralSupport.cpp
cfe/trunk/test/CXX/lex/lex.literal/lex.ext/p1.cpp
cfe/trunk/test/CXX/over/over.oper/over.literal/p5.cpp
cfe/trunk/test/CXX/over/over.oper/over.literal/p8.cpp
cfe/trunk/test/SemaCXX/cxx98-compat.cpp
Modified: cfe/trunk/include/clang/Basic/DiagnosticLexKinds.td
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/DiagnosticLexKinds.td?rev=152012&r1=152011&r2=152012&view=diff
==============================================================================
--- cfe/trunk/include/clang/Basic/DiagnosticLexKinds.td (original)
+++ cfe/trunk/include/clang/Basic/DiagnosticLexKinds.td Sun Mar 4 22:02:15 2012
@@ -133,6 +133,9 @@
InGroup<CXX98Compat>, DefaultIgnore;
def err_unsupported_string_concat : Error<
"unsupported non-standard concatenation of string literals">;
+def err_string_concat_mixed_suffix : Error<
+ "differing user-defined suffixes ('%0' and '%1') in string literal "
+ "concatenation">;
def err_bad_string_encoding : Error<
"illegal character encoding in string literal">;
def warn_bad_string_encoding : ExtWarn<
Modified: cfe/trunk/include/clang/Lex/Lexer.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Lex/Lexer.h?rev=152012&r1=152011&r2=152012&view=diff
==============================================================================
--- cfe/trunk/include/clang/Lex/Lexer.h (original)
+++ cfe/trunk/include/clang/Lex/Lexer.h Sun Mar 4 22:02:15 2012
@@ -530,6 +530,8 @@
// Other lexer functions.
void SkipBytes(unsigned Bytes, bool StartOfLine);
+
+ const char *LexUDSuffix(Token &Result, const char *CurPtr);
// Helper functions to lex the remainder of a token of the specific type.
void LexIdentifier (Token &Result, const char *CurPtr);
Modified: cfe/trunk/include/clang/Lex/LiteralSupport.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Lex/LiteralSupport.h?rev=152012&r1=152011&r2=152012&view=diff
==============================================================================
--- cfe/trunk/include/clang/Lex/LiteralSupport.h (original)
+++ cfe/trunk/include/clang/Lex/LiteralSupport.h Sun Mar 4 22:02:15 2012
@@ -128,6 +128,7 @@
tok::TokenKind Kind;
bool IsMultiChar;
bool HadError;
+ SmallString<32> UDSuffixBuf;
public:
CharLiteralParser(const char *begin, const char *end,
SourceLocation Loc, Preprocessor &PP,
@@ -140,6 +141,7 @@
bool isUTF32() const { return Kind == tok::utf32_char_constant; }
bool isMultiChar() const { return IsMultiChar; }
uint64_t getValue() const { return Value; }
+ StringRef getUDSuffix() const { return UDSuffixBuf; }
};
/// StringLiteralParser - This decodes string escape characters and performs
@@ -157,6 +159,7 @@
tok::TokenKind Kind;
SmallString<512> ResultBuf;
char *ResultPtr; // cursor
+ SmallString<32> UDSuffixBuf;
public:
StringLiteralParser(const Token *StringToks, unsigned NumStringToks,
Preprocessor &PP, bool Complain = true);
@@ -196,6 +199,8 @@
bool isUTF32() const { return Kind == tok::utf32_string_literal; }
bool isPascal() const { return Pascal; }
+ StringRef getUDSuffix() const { return UDSuffixBuf; }
+
private:
void init(const Token *StringToks, unsigned NumStringToks);
bool CopyStringFragment(StringRef Fragment);
Modified: cfe/trunk/lib/Lex/Lexer.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Lex/Lexer.cpp?rev=152012&r1=152011&r2=152012&view=diff
==============================================================================
--- cfe/trunk/lib/Lex/Lexer.cpp (original)
+++ cfe/trunk/lib/Lex/Lexer.cpp Sun Mar 4 22:02:15 2012
@@ -1078,6 +1078,12 @@
}
+/// isIdentifierHead - Return true if this is the first character of an
+/// identifier, which is [a-zA-Z_].
+static inline bool isIdentifierHead(unsigned char c) {
+ return (CharInfo[c] & (CHAR_LETTER|CHAR_UNDER)) ? true : false;
+}
+
/// isIdentifierBody - Return true if this is the body character of an
/// identifier, which is [a-zA-Z0-9_].
static inline bool isIdentifierBody(unsigned char c) {
@@ -1543,7 +1549,7 @@
unsigned Size;
char C = getCharAndSize(CurPtr, Size);
char PrevCh = 0;
- while (isNumberBody(C)) { // FIXME: UCNs?
+ while (isNumberBody(C)) { // FIXME: UCNs.
CurPtr = ConsumeChar(CurPtr, Size, Result);
PrevCh = C;
C = getCharAndSize(CurPtr, Size);
@@ -1567,6 +1573,23 @@
Result.setLiteralData(TokStart);
}
+/// LexUDSuffix - Lex the ud-suffix production for user-defined literal suffixes
+/// in C++11.
+const char *Lexer::LexUDSuffix(Token &Result, const char *CurPtr) {
+ assert(getFeatures().CPlusPlus0x && "ud-suffix only exists in C++11");
+
+ // Maximally munch an identifier. FIXME: UCNs.
+ unsigned Size;
+ char C = getCharAndSize(CurPtr, Size);
+ if (isIdentifierHead(C)) {
+ do {
+ CurPtr = ConsumeChar(CurPtr, Size, Result);
+ C = getCharAndSize(CurPtr, Size);
+ } while (isIdentifierBody(C));
+ }
+ return CurPtr;
+}
+
/// LexStringLiteral - Lex the remainder of a string literal, after having lexed
/// either " or L" or u8" or u" or U".
void Lexer::LexStringLiteral(Token &Result, const char *CurPtr,
@@ -1606,6 +1629,10 @@
C = getAndAdvanceChar(CurPtr, Result);
}
+ // If we are in C++11, lex the optional ud-suffix.
+ if (getFeatures().CPlusPlus0x)
+ CurPtr = LexUDSuffix(Result, CurPtr);
+
// If a nul character existed in the string, warn about it.
if (NulCharacter && !isLexingRawMode())
Diag(NulCharacter, diag::null_in_string);
@@ -1685,6 +1712,10 @@
}
}
+ // If we are in C++11, lex the optional ud-suffix.
+ if (getFeatures().CPlusPlus0x)
+ CurPtr = LexUDSuffix(Result, CurPtr);
+
// Update the location of token as well as BufferPtr.
const char *TokStart = BufferPtr;
FormTokenWithChars(Result, CurPtr, Kind);
@@ -1768,6 +1799,10 @@
C = getAndAdvanceChar(CurPtr, Result);
}
+ // If we are in C++11, lex the optional ud-suffix.
+ if (getFeatures().CPlusPlus0x)
+ CurPtr = LexUDSuffix(Result, CurPtr);
+
// If a nul character existed in the character, warn about it.
if (NulCharacter && !isLexingRawMode())
Diag(NulCharacter, diag::null_in_char);
Modified: cfe/trunk/lib/Lex/LiteralSupport.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Lex/LiteralSupport.cpp?rev=152012&r1=152011&r2=152012&view=diff
==============================================================================
--- cfe/trunk/lib/Lex/LiteralSupport.cpp (original)
+++ cfe/trunk/lib/Lex/LiteralSupport.cpp Sun Mar 4 22:02:15 2012
@@ -731,7 +731,11 @@
}
-/// character-literal: [C++0x lex.ccon]
+/// user-defined-character-literal: [C++11 lex.ext]
+/// character-literal ud-suffix
+/// ud-suffix:
+/// identifier
+/// character-literal: [C++11 lex.ccon]
/// ' c-char-sequence '
/// u' c-char-sequence '
/// U' c-char-sequence '
@@ -744,7 +748,7 @@
/// backslash \, or new-line character
/// escape-sequence
/// universal-character-name
-/// escape-sequence: [C++0x lex.ccon]
+/// escape-sequence:
/// simple-escape-sequence
/// octal-escape-sequence
/// hexadecimal-escape-sequence
@@ -757,7 +761,7 @@
/// hexadecimal-escape-sequence:
/// \x hexadecimal-digit
/// hexadecimal-escape-sequence hexadecimal-digit
-/// universal-character-name:
+/// universal-character-name: [C++11 lex.charset]
/// \u hex-quad
/// \U hex-quad hex-quad
/// hex-quad:
@@ -780,8 +784,17 @@
assert(begin[0] == '\'' && "Invalid token lexed");
++begin;
+ // Remove an optional ud-suffix.
+ if (end[-1] != '\'') {
+ const char *UDSuffixEnd = end;
+ do {
+ --end;
+ } while (end[-1] != '\'');
+ UDSuffixBuf.assign(end, UDSuffixEnd);
+ }
+
// Trim the ending quote.
- assert(end[-1] == '\'' && "Invalid token lexed");
+ assert(end != begin && "Invalid token lexed");
--end;
// FIXME: The "Value" is an uint64_t so we can handle char literals of
@@ -1071,6 +1084,8 @@
Pascal = false;
+ SourceLocation UDSuffixTokLoc;
+
for (unsigned i = 0, e = NumStringToks; i != e; ++i) {
const char *ThisTokBuf = &TokenBuf[0];
// Get the spelling of the token, which eliminates trigraphs, etc. We know
@@ -1085,7 +1100,39 @@
continue;
}
- const char *ThisTokEnd = ThisTokBuf+ThisTokLen-1; // Skip end quote.
+ const char *ThisTokEnd = ThisTokBuf+ThisTokLen;
+
+ // Remove an optional ud-suffix.
+ if (ThisTokEnd[-1] != '"') {
+ const char *UDSuffixEnd = ThisTokEnd;
+ do {
+ --ThisTokEnd;
+ } while (ThisTokEnd[-1] != '"');
+
+ StringRef UDSuffix(ThisTokEnd, UDSuffixEnd - ThisTokEnd);
+
+ if (UDSuffixBuf.empty()) {
+ UDSuffixBuf.assign(UDSuffix);
+ UDSuffixTokLoc = StringToks[i].getLocation();
+ } else if (!UDSuffixBuf.equals(UDSuffix)) {
+ // C++11 [lex.ext]p8: At the end of phase 6, if a string literal is the
+ // result of a concatenation involving at least one user-defined-string-
+ // literal, all the participating user-defined-string-literals shall
+ // have the same ud-suffix.
+ if (Diags) {
+ SourceLocation TokLoc = StringToks[i].getLocation();
+ Diags->Report(TokLoc, diag::err_string_concat_mixed_suffix)
+ << UDSuffixBuf << UDSuffix
+ << SourceRange(UDSuffixTokLoc, UDSuffixTokLoc)
+ << SourceRange(TokLoc, TokLoc);
+ }
+ hadError = true;
+ }
+ }
+
+ // Strip the end quote.
+ --ThisTokEnd;
+
// TODO: Input character set mapping support.
// Skip marker for wide or unicode strings.
Modified: cfe/trunk/test/CXX/lex/lex.literal/lex.ext/p1.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CXX/lex/lex.literal/lex.ext/p1.cpp?rev=152012&r1=152011&r2=152012&view=diff
==============================================================================
--- cfe/trunk/test/CXX/lex/lex.literal/lex.ext/p1.cpp (original)
+++ cfe/trunk/test/CXX/lex/lex.literal/lex.ext/p1.cpp Sun Mar 4 22:02:15 2012
@@ -1,7 +1,7 @@
// RUN: %clang_cc1 -fsyntax-only -std=c++11 -verify %s
-int * operator "" p31(long double); // expected-warning{{user-defined literal with suffix 'p31' is preempted by C99 hexfloat extension}}
-long double operator "" _p31(long double);
+void operator "" p31(long double); // expected-warning{{user-defined literal with suffix 'p31' is preempted by C99 hexfloat extension}}
+void operator "" _p31(long double);
long double operator "" pi(long double); // expected-warning{{user-defined literals not starting with '_' are reserved by the implementation}}
float hexfloat = 0x1p31; // allow hexfloats
Modified: cfe/trunk/test/CXX/over/over.oper/over.literal/p5.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CXX/over/over.oper/over.literal/p5.cpp?rev=152012&r1=152011&r2=152012&view=diff
==============================================================================
--- cfe/trunk/test/CXX/over/over.oper/over.literal/p5.cpp (original)
+++ cfe/trunk/test/CXX/over/over.oper/over.literal/p5.cpp Sun Mar 4 22:02:15 2012
@@ -7,9 +7,13 @@
template<char... C> S<C...> operator "" _a();
template<typename T> struct U {
+ friend int operator "" _a(const char *, size_t);
// FIXME: It's not entirely clear whether this is intended to be legal.
friend U operator "" _a(const T *, size_t); // expected-error {{parameter}}
};
+template<char...> struct V {
+ friend void operator "" _b(); // expected-error {{parameter}}
+};
template<char... C, int N = 0> void operator "" _b(); // expected-error {{parameter}}
template<char... C> void operator "" _b(int N = 0); // expected-error {{parameter}}
Modified: cfe/trunk/test/CXX/over/over.oper/over.literal/p8.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CXX/over/over.oper/over.literal/p8.cpp?rev=152012&r1=152011&r2=152012&view=diff
==============================================================================
--- cfe/trunk/test/CXX/over/over.oper/over.literal/p8.cpp (original)
+++ cfe/trunk/test/CXX/over/over.oper/over.literal/p8.cpp Sun Mar 4 22:02:15 2012
@@ -9,8 +9,8 @@
string operator "" _i18n(const char*, std::size_t); // ok
// FIXME: This should be accepted once we support UCNs
template<char...> int operator "" \u03C0(); // ok, UCN for lowercase pi // expected-error {{expected identifier}}
-// FIXME: This should be rejected once we lex user-defined literal suffices
-float operator ""E(const char *); // expected-warning {{hexfloat}}
+// FIXME: Accept this as an extension, with a fix-it to add the space
+float operator ""E(const char *); // expected-error {{must be '""'}} expected-error {{expected identifier}}
float operator " " B(const char *); // expected-error {{must be '""'}} expected-warning {{hexfloat}}
string operator "" 5X(const char *, std::size_t); // expected-error {{expected identifier}}
double operator "" _miles(double); // expected-error {{parameter}}
Modified: cfe/trunk/test/SemaCXX/cxx98-compat.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/SemaCXX/cxx98-compat.cpp?rev=152012&r1=152011&r2=152012&view=diff
==============================================================================
--- cfe/trunk/test/SemaCXX/cxx98-compat.cpp (original)
+++ cfe/trunk/test/SemaCXX/cxx98-compat.cpp Sun Mar 4 22:02:15 2012
@@ -50,7 +50,7 @@
return { 0 }; // expected-warning {{generalized initializer lists are incompatible with C++98}}
}
-int operator""_hello(const char *); // expected-warning {{literal operators are incompatible with C++98}}
+int operator"" _hello(const char *); // expected-warning {{literal operators are incompatible with C++98}}
enum EnumFixed : int { // expected-warning {{enumeration types with a fixed underlying type are incompatible with C++98}}
};
More information about the cfe-commits
mailing list