[cfe-commits] r61360 - in /cfe/trunk: Driver/CacheTokens.cpp include/clang/Lex/PTHLexer.h lib/Lex/PPDirectives.cpp lib/Lex/PTHLexer.cpp
Ted Kremenek
kremenek at apple.com
Mon Dec 22 17:30:53 PST 2008
Author: kremenek
Date: Mon Dec 22 19:30:52 2008
New Revision: 61360
URL: http://llvm.org/viewvc/llvm-project?rev=61360&view=rev
Log:
PTH:
- Embed 'eom' tokens in PTH file.
- Use embedded 'eom' tokens to not lazily generate them in the PTHLexer.
This means that PTHLexer can always advance to the next token after
reading a token (instead of buffering tokens using a copy).
- Moved logic of 'ReadToken' into Lex. GetToken & ReadToken no longer exist.
- These changes result in a 3.3% speedup (-Eonly) on Cocoa.h.
- The code is a little gross. Many cleanups are possible and should be done.
Modified:
cfe/trunk/Driver/CacheTokens.cpp
cfe/trunk/include/clang/Lex/PTHLexer.h
cfe/trunk/lib/Lex/PPDirectives.cpp
cfe/trunk/lib/Lex/PTHLexer.cpp
Modified: cfe/trunk/Driver/CacheTokens.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/Driver/CacheTokens.cpp?rev=61360&r1=61359&r2=61360&view=diff
==============================================================================
--- cfe/trunk/Driver/CacheTokens.cpp (original)
+++ cfe/trunk/Driver/CacheTokens.cpp Mon Dec 22 19:30:52 2008
@@ -159,23 +159,43 @@
typedef std::vector<std::pair<Offset, unsigned> > PPCondTable;
PPCondTable PPCond;
std::vector<unsigned> PPStartCond;
+ bool ParsingPreprocessorDirective = false;
Token Tok;
do {
L.LexFromRawLexer(Tok);
+ if ((Tok.isAtStartOfLine() || Tok.is(tok::eof)) &&
+ ParsingPreprocessorDirective) {
+ // Insert an eom token into the token cache. It has the same
+ // position as the next token that is not on the same line as the
+ // preprocessor directive. Observe that we continue processing
+ // 'Tok' when we exit this branch.
+ Token Tmp = Tok;
+ Tmp.setKind(tok::eom);
+ Tmp.clearFlag(Token::StartOfLine);
+ Tmp.setIdentifierInfo(0);
+ EmitToken(Out, Tmp, SMgr, idcount, IM);
+ ParsingPreprocessorDirective = false;
+ }
+
if (Tok.is(tok::identifier)) {
Tok.setIdentifierInfo(PP.LookUpIdentifierInfo(Tok));
+ continue;
}
- else if (Tok.is(tok::hash) && Tok.isAtStartOfLine()) {
+
+ if (Tok.is(tok::hash) && Tok.isAtStartOfLine()) {
// Special processing for #include. Store the '#' token and lex
// the next token.
+ assert(!ParsingPreprocessorDirective);
Offset HashOff = (Offset) Out.tell();
EmitToken(Out, Tok, SMgr, idcount, IM);
// Get the next token.
L.LexFromRawLexer(Tok);
+
+ assert(!Tok.isAtStartOfLine());
// Did we see 'include'/'import'/'include_next'?
if (!Tok.is(tok::identifier))
@@ -185,27 +205,37 @@
Tok.setIdentifierInfo(II);
tok::PPKeywordKind K = II->getPPKeywordID();
- if (K == tok::pp_include || K == tok::pp_import ||
- K == tok::pp_include_next) {
-
+ assert(K != tok::pp_not_keyword);
+ ParsingPreprocessorDirective = true;
+
+ switch (K) {
+ default:
+ break;
+ case tok::pp_include:
+ case tok::pp_import:
+ case tok::pp_include_next: {
// Save the 'include' token.
EmitToken(Out, Tok, SMgr, idcount, IM);
-
// Lex the next token as an include string.
L.setParsingPreprocessorDirective(true);
L.LexIncludeFilename(Tok);
L.setParsingPreprocessorDirective(false);
-
+ assert(!Tok.isAtStartOfLine());
if (Tok.is(tok::identifier))
Tok.setIdentifierInfo(PP.LookUpIdentifierInfo(Tok));
+
+ break;
}
- else if (K == tok::pp_if || K == tok::pp_ifdef || K == tok::pp_ifndef) {
+ case tok::pp_if:
+ case tok::pp_ifdef:
+ case tok::pp_ifndef: {
// Ad an entry for '#if' and friends. We initially set the target index
// to 0. This will get backpatched when we hit #endif.
PPStartCond.push_back(PPCond.size());
PPCond.push_back(std::make_pair(HashOff, 0U));
+ break;
}
- else if (K == tok::pp_endif) {
+ case tok::pp_endif: {
// Add an entry for '#endif'. We set the target table index to itself.
// This will later be set to zero when emitting to the PTH file. We
// use 0 for uninitialized indices because that is easier to debug.
@@ -218,9 +248,11 @@
PPStartCond.pop_back();
// Add the new entry to PPCond.
PPCond.push_back(std::make_pair(HashOff, index));
+ break;
}
- else if (K == tok::pp_elif || K == tok::pp_else) {
- // Add an entry for '#elif' or '#else.
+ case tok::pp_elif:
+ case tok::pp_else: {
+ // Add an entry for #elif or #else.
// This serves as both a closing and opening of a conditional block.
// This means that its entry will get backpatched later.
unsigned index = PPCond.size();
@@ -233,6 +265,8 @@
// Now add '#elif' as a new block opening.
PPCond.push_back(std::make_pair(HashOff, 0U));
PPStartCond.push_back(index);
+ break;
+ }
}
}
}
Modified: cfe/trunk/include/clang/Lex/PTHLexer.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Lex/PTHLexer.h?rev=61360&r1=61359&r2=61360&view=diff
==============================================================================
--- cfe/trunk/include/clang/Lex/PTHLexer.h (original)
+++ cfe/trunk/include/clang/Lex/PTHLexer.h Mon Dec 22 19:30:52 2008
@@ -51,9 +51,7 @@
/// PTHMgr - The PTHManager object that created this PTHLexer.
PTHManager& PTHMgr;
- Token LastFetched;
Token EofToken;
- bool NeedsFetching;
public:
@@ -95,20 +93,7 @@
/// SkipBlock - Used by Preprocessor to skip the current conditional block.
bool SkipBlock();
-private:
- /// AtLastToken - Returns true if the PTHLexer is at the last token.
- bool AtLastToken() {
- Token T = GetToken();
- return T.is(tok::eof) ? EofToken = T, true : false;
- }
-
- /// GetToken - Returns the next token. This method does not advance the
- /// PTHLexer to the next token.
- Token GetToken();
-
- /// AdvanceToken - Advances the PTHLexer to the next token.
- void AdvanceToken() { NeedsFetching = true; }
-
+private:
bool LexEndOfFile(Token &Result);
};
Modified: cfe/trunk/lib/Lex/PPDirectives.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Lex/PPDirectives.cpp?rev=61360&r1=61359&r2=61360&view=diff
==============================================================================
--- cfe/trunk/lib/Lex/PPDirectives.cpp (original)
+++ cfe/trunk/lib/Lex/PPDirectives.cpp Mon Dec 22 19:30:52 2008
@@ -339,6 +339,13 @@
// If the #if block wasn't entered then enter the #else block now.
if (!CondInfo.FoundNonSkip) {
CondInfo.FoundNonSkip = true;
+
+ // Consume the eom token.
+ CurPTHLexer->ParsingPreprocessorDirective = true;
+ LexUnexpandedToken(Tok);
+ assert(Tok.is(tok::eom));
+ CurPTHLexer->ParsingPreprocessorDirective = false;
+
break;
}
Modified: cfe/trunk/lib/Lex/PTHLexer.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Lex/PTHLexer.cpp?rev=61360&r1=61359&r2=61360&view=diff
==============================================================================
--- cfe/trunk/lib/Lex/PTHLexer.cpp (original)
+++ cfe/trunk/lib/Lex/PTHLexer.cpp Mon Dec 22 19:30:52 2008
@@ -28,38 +28,64 @@
#define DISK_TOKEN_SIZE (2+3*4)
+//===----------------------------------------------------------------------===//
+// Utility methods for reading from the mmap'ed PTH file.
+//===----------------------------------------------------------------------===//
+
+static inline uint8_t Read8(const char*& data) {
+ return (uint8_t) *(data++);
+}
+
+static inline uint32_t Read32(const char*& data) {
+ uint32_t V = (uint32_t) Read8(data);
+ V |= (((uint32_t) Read8(data)) << 8);
+ V |= (((uint32_t) Read8(data)) << 16);
+ V |= (((uint32_t) Read8(data)) << 24);
+ return V;
+}
+
+//===----------------------------------------------------------------------===//
+// PTHLexer methods.
+//===----------------------------------------------------------------------===//
+
PTHLexer::PTHLexer(Preprocessor& pp, SourceLocation fileloc, const char* D,
const char* ppcond, PTHManager& PM)
: PreprocessorLexer(&pp, fileloc), TokBuf(D), CurPtr(D), LastHashTokPtr(0),
- PPCond(ppcond), CurPPCondPtr(ppcond), PTHMgr(PM), NeedsFetching(true) {
- // Make sure the EofToken is completely clean.
- EofToken.startToken();
- }
+ PPCond(ppcond), CurPPCondPtr(ppcond), PTHMgr(PM) {}
-Token PTHLexer::GetToken() {
- // Read the next token, or if we haven't advanced yet, get the last
- // token read.
- if (NeedsFetching) {
- NeedsFetching = false;
- ReadToken(LastFetched);
- }
+void PTHLexer::Lex(Token& Tok) {
+LexNextToken:
+
+ // Read the token.
+ // FIXME: Setting the flags directly should obviate this step.
+ Tok.startToken();
- Token Tok = LastFetched;
+ // Shadow CurPtr into an automatic variable so that Read8 doesn't load and
+ // store back into the instance variable.
+ const char *CurPtrShadow = CurPtr;
- // If we are in raw mode, zero out identifier pointers. This is
- // needed for 'pragma poison'. Note that this requires that the Preprocessor
- // can go back to the original source when it calls getSpelling().
- if (LexingRawMode && Tok.is(tok::identifier))
- Tok.setIdentifierInfo(0);
+ // Read the type of the token.
+ Tok.setKind((tok::TokenKind) Read8(CurPtrShadow));
+
+ // Set flags. This is gross, since we are really setting multiple flags.
+ Tok.setFlag((Token::TokenFlags) Read8(CurPtrShadow));
+
+ // Set the IdentifierInfo* (if any).
+ Tok.setIdentifierInfo(PTHMgr.ReadIdentifierInfo(CurPtrShadow));
+
+ // Set the SourceLocation. Since all tokens are constructed using a
+ // raw lexer, they will all be offseted from the same FileID.
+ Tok.setLocation(SourceLocation::getFileLoc(FileID, Read32(CurPtrShadow)));
+
+ // Finally, read and set the length of the token.
+ Tok.setLength(Read32(CurPtrShadow));
- return Tok;
-}
+ CurPtr = CurPtrShadow;
-void PTHLexer::Lex(Token& Tok) {
-LexNextToken:
- Tok = GetToken();
-
- if (AtLastToken()) {
+ if (Tok.is(tok::eof)) {
+ // Save the end-of-file token.
+ EofToken = Tok;
+
Preprocessor *PPCache = PP;
if (LexEndOfFile(Tok))
@@ -68,19 +94,28 @@
assert(PPCache && "Raw buffer::LexEndOfFile should return a token");
return PPCache->Lex(Tok);
}
+
+ MIOpt.ReadToken();
- // Don't advance to the next token yet. Check if we are at the
- // start of a new line and we're processing a directive. If so, we
- // consume this token twice, once as an tok::eom.
- if (Tok.isAtStartOfLine() && ParsingPreprocessorDirective) {
+ if (Tok.is(tok::eom)) {
ParsingPreprocessorDirective = false;
- Tok.setKind(tok::eom);
- MIOpt.ReadToken();
return;
}
- // Advance to the next token.
- AdvanceToken();
+#if 0
+ SourceManager& SM = PP->getSourceManager();
+ SourceLocation L = Tok.getLocation();
+
+ static const char* last = 0;
+ const char* next = SM.getContentCacheForLoc(L)->Entry->getName();
+ if (next != last) {
+ last = next;
+ llvm::cerr << next << '\n';
+ }
+
+ llvm::cerr << "line " << SM.getLogicalLineNumber(L) << " col " <<
+ SM.getLogicalColumnNumber(L) << '\n';
+#endif
if (Tok.is(tok::hash)) {
if (Tok.isAtStartOfLine()) {
@@ -95,33 +130,31 @@
}
}
}
-
- MIOpt.ReadToken();
if (Tok.is(tok::identifier)) {
- if (LexingRawMode) return;
+ if (LexingRawMode) {
+ Tok.setIdentifierInfo(0);
+ return;
+ }
+
return PP->HandleIdentifier(Tok);
- }
+ }
+
+
+ assert(!Tok.is(tok::eom) || ParsingPreprocessorDirective);
}
+// FIXME: This method can just be inlined into Lex().
bool PTHLexer::LexEndOfFile(Token &Tok) {
-
- if (ParsingPreprocessorDirective) {
- ParsingPreprocessorDirective = false;
- Tok.setKind(tok::eom);
- MIOpt.ReadToken();
- return true; // Have a token.
- }
-
- if (LexingRawMode) {
- MIOpt.ReadToken();
- return true; // Have an eof token.
- }
+ assert(!ParsingPreprocessorDirective);
+ assert(!LexingRawMode);
// FIXME: Issue diagnostics similar to Lexer.
return PP->HandleEndOfFile(Tok, false);
}
+// FIXME: We can just grab the last token instead of storing a copy
+// into EofToken.
void PTHLexer::setEOF(Token& Tok) {
assert(!EofToken.is(tok::eof));
Tok = EofToken;
@@ -131,6 +164,10 @@
assert(ParsingPreprocessorDirective && ParsingFilename == false &&
"Must be in a preprocessing directive!");
+ // We assume that if the preprocessor wishes to discard to the end of
+ // the line that it also means to end the current preprocessor directive.
+ ParsingPreprocessorDirective = false;
+
// Skip tokens by only peeking at their token kind and the flags.
// We don't need to actually reconstruct full tokens from the token buffer.
// This saves some copies and it also reduces IdentifierInfo* lookup.
@@ -139,7 +176,7 @@
// Read the token kind. Are we at the end of the file?
tok::TokenKind x = (tok::TokenKind) (uint8_t) *p;
if (x == tok::eof) break;
-
+
// Read the token flags. Are we at the start of the next line?
Token::TokenFlags y = (Token::TokenFlags) (uint8_t) p[1];
if (y & Token::StartOfLine) break;
@@ -151,22 +188,6 @@
CurPtr = p;
}
-//===----------------------------------------------------------------------===//
-// Utility methods for reading from the mmap'ed PTH file.
-//===----------------------------------------------------------------------===//
-
-static inline uint8_t Read8(const char*& data) {
- return (uint8_t) *(data++);
-}
-
-static inline uint32_t Read32(const char*& data) {
- uint32_t V = (uint32_t) Read8(data);
- V |= (((uint32_t) Read8(data)) << 8);
- V |= (((uint32_t) Read8(data)) << 16);
- V |= (((uint32_t) Read8(data)) << 24);
- return V;
-}
-
/// SkipBlock - Used by Preprocessor to skip the current conditional block.
bool PTHLexer::SkipBlock() {
assert(CurPPCondPtr && "No cached PP conditional information.");
@@ -225,7 +246,6 @@
// By construction NextIdx will be zero if this is a #endif. This is useful
// to know to obviate lexing another token.
bool isEndif = NextIdx == 0;
- NeedsFetching = true;
// This case can occur when we see something like this:
//
@@ -240,7 +260,7 @@
assert(CurPtr == HashEntryI + DISK_TOKEN_SIZE);
// Did we reach a #endif? If so, go ahead and consume that token as well.
if (isEndif)
- CurPtr += DISK_TOKEN_SIZE;
+ CurPtr += DISK_TOKEN_SIZE*2;
else
LastHashTokPtr = HashEntryI;
@@ -253,20 +273,13 @@
// Update the location of the last observed '#'. This is useful if we
// are skipping multiple blocks.
LastHashTokPtr = CurPtr;
-
-#ifndef DEBUG
- // In a debug build we should verify that the token is really a '#' that
- // appears at the start of the line.
- Token Tok;
- ReadToken(Tok);
- assert(Tok.isAtStartOfLine() && Tok.is(tok::hash));
-#else
- // In a full release build we can just skip the token entirely.
- CurPtr += DISK_TOKEN_SIZE;
-#endif
+ // Skip the '#' token.
+ assert(((tok::TokenKind) (unsigned char) *CurPtr) == tok::hash);
+ CurPtr += DISK_TOKEN_SIZE;
+
// Did we reach a #endif? If so, go ahead and consume that token as well.
- if (isEndif) { CurPtr += DISK_TOKEN_SIZE; }
+ if (isEndif) { CurPtr += DISK_TOKEN_SIZE*2; }
return isEndif;
}
@@ -287,38 +300,6 @@
}
//===----------------------------------------------------------------------===//
-// Token reconstruction from the PTH file.
-//===----------------------------------------------------------------------===//
-
-void PTHLexer::ReadToken(Token& T) {
- // Clear the token.
- // FIXME: Setting the flags directly should obviate this step.
- T.startToken();
-
- // Shadow CurPtr into an automatic variable so that Read8 doesn't load and
- // store back into the instance variable.
- const char *CurPtrShadow = CurPtr;
-
- // Read the type of the token.
- T.setKind((tok::TokenKind) Read8(CurPtrShadow));
-
- // Set flags. This is gross, since we are really setting multiple flags.
- T.setFlag((Token::TokenFlags) Read8(CurPtrShadow));
-
- // Set the IdentifierInfo* (if any).
- T.setIdentifierInfo(PTHMgr.ReadIdentifierInfo(CurPtrShadow));
-
- // Set the SourceLocation. Since all tokens are constructed using a
- // raw lexer, they will all be offseted from the same FileID.
- T.setLocation(SourceLocation::getFileLoc(FileID, Read32(CurPtrShadow)));
-
- // Finally, read and set the length of the token.
- T.setLength(Read32(CurPtrShadow));
-
- CurPtr = CurPtrShadow;
-}
-
-//===----------------------------------------------------------------------===//
// Internal Data Structures for PTH file lookup and resolving identifiers.
//===----------------------------------------------------------------------===//
More information about the cfe-commits
mailing list