[cfe-commits] r168776 - in /cfe/trunk: lib/Lex/Lexer.cpp test/CXX/lex/lex.literal/lex.ext/p5.cpp test/CodeGen/string-literal.c
Richard Smith
richard-llvm at metafoo.co.uk
Tue Nov 27 23:29:00 PST 2012
Author: rsmith
Date: Wed Nov 28 01:29:00 2012
New Revision: 168776
URL: http://llvm.org/viewvc/llvm-project?rev=168776&view=rev
Log:
Teach Lexer::getSpelling about raw string literals. Specifically, if a raw
string literal needs cleaning (because it contains line-splicing in the
encoding prefix or in the ud-suffix), do not clean the section between the
double-quotes -- that's the "raw" bit!
Modified:
cfe/trunk/lib/Lex/Lexer.cpp
cfe/trunk/test/CXX/lex/lex.literal/lex.ext/p5.cpp
cfe/trunk/test/CodeGen/string-literal.c
Modified: cfe/trunk/lib/Lex/Lexer.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Lex/Lexer.cpp?rev=168776&r1=168775&r2=168776&view=diff
==============================================================================
--- cfe/trunk/lib/Lex/Lexer.cpp (original)
+++ cfe/trunk/lib/Lex/Lexer.cpp Wed Nov 28 01:29:00 2012
@@ -233,16 +233,67 @@
// Token Spelling
//===----------------------------------------------------------------------===//
+/// \brief Slow case of getSpelling. Extract the characters comprising the
+/// spelling of this token from the provided input buffer.
+static size_t getSpellingSlow(const Token &Tok, const char *BufPtr,
+ const LangOptions &LangOpts, char *Spelling) {
+ assert(Tok.needsCleaning() && "getSpellingSlow called on simple token");
+
+ size_t Length = 0;
+ const char *BufEnd = BufPtr + Tok.getLength();
+
+ if (Tok.is(tok::string_literal)) {
+ // Munch the encoding-prefix and opening double-quote.
+ while (BufPtr < BufEnd) {
+ unsigned Size;
+ Spelling[Length++] = Lexer::getCharAndSizeNoWarn(BufPtr, Size, LangOpts);
+ BufPtr += Size;
+
+ if (Spelling[Length - 1] == '"')
+ break;
+ }
+
+ // Raw string literals need special handling; trigraph expansion and line
+ // splicing do not occur within their d-char-sequence nor within their
+ // r-char-sequence.
+ if (Length >= 2 &&
+ Spelling[Length - 2] == 'R' && Spelling[Length - 1] == '"') {
+ // Search backwards from the end of the token to find the matching closing
+ // quote.
+ const char *RawEnd = BufEnd;
+ do --RawEnd; while (*RawEnd != '"');
+ size_t RawLength = RawEnd - BufPtr + 1;
+
+ // Everything between the quotes is included verbatim in the spelling.
+ memcpy(Spelling + Length, BufPtr, RawLength);
+ Length += RawLength;
+ BufPtr += RawLength;
+
+ // The rest of the token is lexed normally.
+ }
+ }
+
+ while (BufPtr < BufEnd) {
+ unsigned Size;
+ Spelling[Length++] = Lexer::getCharAndSizeNoWarn(BufPtr, Size, LangOpts);
+ BufPtr += Size;
+ }
+
+ assert(Length < Tok.getLength() &&
+ "NeedsCleaning flag set on token that didn't need cleaning!");
+ return Length;
+}
+
/// getSpelling() - Return the 'spelling' of this token. The spelling of a
/// token are the characters used to represent the token in the source file
/// after trigraph expansion and escaped-newline folding. In particular, this
/// wants to get the true, uncanonicalized, spelling of things like digraphs
/// UCNs, etc.
StringRef Lexer::getSpelling(SourceLocation loc,
- SmallVectorImpl<char> &buffer,
- const SourceManager &SM,
- const LangOptions &options,
- bool *invalid) {
+ SmallVectorImpl<char> &buffer,
+ const SourceManager &SM,
+ const LangOptions &options,
+ bool *invalid) {
// Break down the source location.
std::pair<FileID, unsigned> locInfo = SM.getDecomposedLoc(loc);
@@ -267,17 +318,10 @@
// Common case: no need for cleaning.
if (!token.needsCleaning())
return StringRef(tokenBegin, length);
-
- // Hard case, we need to relex the characters into the string.
- buffer.clear();
- buffer.reserve(length);
-
- for (const char *ti = tokenBegin, *te = ti + length; ti != te; ) {
- unsigned charSize;
- buffer.push_back(Lexer::getCharAndSizeNoWarn(ti, charSize, options));
- ti += charSize;
- }
+ // Hard case, we need to relex the characters into the string.
+ buffer.resize(length);
+ buffer.resize(getSpellingSlow(token, tokenBegin, options, buffer.data()));
return StringRef(buffer.data(), buffer.size());
}
@@ -289,31 +333,22 @@
std::string Lexer::getSpelling(const Token &Tok, const SourceManager &SourceMgr,
const LangOptions &LangOpts, bool *Invalid) {
assert((int)Tok.getLength() >= 0 && "Token character range is bogus!");
-
- // If this token contains nothing interesting, return it directly.
+
bool CharDataInvalid = false;
- const char* TokStart = SourceMgr.getCharacterData(Tok.getLocation(),
+ const char *TokStart = SourceMgr.getCharacterData(Tok.getLocation(),
&CharDataInvalid);
if (Invalid)
*Invalid = CharDataInvalid;
if (CharDataInvalid)
return std::string();
-
+
+ // If this token contains nothing interesting, return it directly.
if (!Tok.needsCleaning())
- return std::string(TokStart, TokStart+Tok.getLength());
-
+ return std::string(TokStart, TokStart + Tok.getLength());
+
std::string Result;
- Result.reserve(Tok.getLength());
-
- // Otherwise, hard case, relex the characters into the string.
- for (const char *Ptr = TokStart, *End = TokStart+Tok.getLength();
- Ptr != End; ) {
- unsigned CharSize;
- Result.push_back(Lexer::getCharAndSizeNoWarn(Ptr, CharSize, LangOpts));
- Ptr += CharSize;
- }
- assert(Result.size() != unsigned(Tok.getLength()) &&
- "NeedsCleaning flag set on something that didn't need cleaning!");
+ Result.resize(Tok.getLength());
+ Result.resize(getSpellingSlow(Tok, TokStart, LangOpts, &*Result.begin()));
return Result;
}
@@ -365,17 +400,7 @@
}
// Otherwise, hard case, relex the characters into the string.
- char *OutBuf = const_cast<char*>(Buffer);
- for (const char *Ptr = TokStart, *End = TokStart+Tok.getLength();
- Ptr != End; ) {
- unsigned CharSize;
- *OutBuf++ = Lexer::getCharAndSizeNoWarn(Ptr, CharSize, LangOpts);
- Ptr += CharSize;
- }
- assert(unsigned(OutBuf-Buffer) != Tok.getLength() &&
- "NeedsCleaning flag set on something that didn't need cleaning!");
-
- return OutBuf-Buffer;
+ return getSpellingSlow(Tok, TokStart, LangOpts, const_cast<char*>(Buffer));
}
Modified: cfe/trunk/test/CXX/lex/lex.literal/lex.ext/p5.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CXX/lex/lex.literal/lex.ext/p5.cpp?rev=168776&r1=168775&r2=168776&view=diff
==============================================================================
--- cfe/trunk/test/CXX/lex/lex.literal/lex.ext/p5.cpp (original)
+++ cfe/trunk/test/CXX/lex/lex.literal/lex.ext/p5.cpp Wed Nov 28 01:29:00 2012
@@ -11,3 +11,10 @@
char &operator "" _x1(const wchar_t *, size_t);
char &i4 = L"foo"_x1; // ok
double &i5 = R"(foo)"_x1; // ok
+double &i6 = u\
+8\
+R\
+"(foo)"\
+_\
+x\
+1; // ok
Modified: cfe/trunk/test/CodeGen/string-literal.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/string-literal.c?rev=168776&r1=168775&r2=168776&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/string-literal.c (original)
+++ cfe/trunk/test/CodeGen/string-literal.c Wed Nov 28 01:29:00 2012
@@ -76,5 +76,12 @@
const char *q = R"(abc
def)" "ghi";
+ // CHECK-CPP0X: private unnamed_addr constant [13 x i8] c"abc\5C\0A??=\0Adef\00", align 1
+ const char *r = R\
+"(abc\
+??=
+def)";
+
+
#endif
}
More information about the cfe-commits
mailing list