[cfe-commits] r54611 - in /cfe/trunk: include/clang/Lex/Preprocessor.h include/clang/Parse/Parser.h lib/Lex/PPCaching.cpp lib/Lex/PPLexerChange.cpp lib/Lex/Preprocessor.cpp
Argiris Kirtzidis
akyrtzi at gmail.com
Sun Aug 10 06:15:28 PDT 2008
Author: akirtzidis
Date: Sun Aug 10 08:15:22 2008
New Revision: 54611
URL: http://llvm.org/viewvc/llvm-project?rev=54611&view=rev
Log:
Allow the preprocessor to cache the lexed tokens, so that we can do efficient lookahead and backtracking.
1) New public methods added:
-EnableBacktrackAtThisPos
-DisableBacktrack
-Backtrack
-isBacktrackEnabled
2) LookAhead() implementation is replaced with a more efficient one.
3) LookNext() is removed.
Added:
cfe/trunk/lib/Lex/PPCaching.cpp
Modified:
cfe/trunk/include/clang/Lex/Preprocessor.h
cfe/trunk/include/clang/Parse/Parser.h
cfe/trunk/lib/Lex/PPLexerChange.cpp
cfe/trunk/lib/Lex/Preprocessor.cpp
Modified: cfe/trunk/include/clang/Lex/Preprocessor.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Lex/Preprocessor.h?rev=54611&r1=54610&r2=54611&view=diff
==============================================================================
--- cfe/trunk/include/clang/Lex/Preprocessor.h (original)
+++ cfe/trunk/include/clang/Lex/Preprocessor.h Sun Aug 10 08:15:22 2008
@@ -71,6 +71,9 @@
bool DisableMacroExpansion : 1; // True if macro expansion is disabled.
bool InMacroArgs : 1; // True if parsing fn macro invocation args.
+ /// CacheTokens - True when the lexed tokens are cached for backtracking.
+ bool CacheTokens : 1;
+
/// Identifiers - This is mapping/lookup information for all identifiers in
/// the program, including program keywords.
IdentifierTable Identifiers;
@@ -139,10 +142,24 @@
unsigned NumCachedTokenLexers;
TokenLexer *TokenLexerCache[TokenLexerCacheSize];
- /// PeekedToken - Cache the token that was retrieved through LookNext().
- /// This is a valid token (its Location is valid) when LookNext() is
- /// called and gets invalid again when it is "consumed" by Lex().
- Token PeekedToken;
+ // Cached tokens state.
+
+ typedef std::vector<Token> CachedTokensTy;
+
+ /// CachedTokens - Cached tokens are stored here when we do backtracking or
+ /// lookahead. They are "lexed" by the CachingLex() method.
+ CachedTokensTy CachedTokens;
+
+ /// CachedLexPos - The position of the cached token that CachingLex() should
+ /// "lex" next. If it points beyond the CachedTokens vector, it means that
+ /// a normal Lex() should be invoked.
+ CachedTokensTy::size_type CachedLexPos;
+
+ /// CachedBacktrackPos - Gets set by the EnableBacktrackAtThisPos() method,
+ /// to indicate the position where CachedLexPos should be set when the
+ /// BackTrack() method is invoked.
+ CachedTokensTy::size_type CachedBacktrackPos;
+
public:
Preprocessor(Diagnostic &diags, const LangOptions &opts, TargetInfo &target,
SourceManager &SM, HeaderSearch &Headers);
@@ -258,7 +275,45 @@
/// lexer stack. This should only be used in situations where the current
/// state of the top-of-stack lexer is known.
void RemoveTopOfLexerStack();
-
+
+ /// EnableBacktrackAtThisPos - From the point that this method is called, and
+ /// until DisableBacktrack() or Backtrack() is called, the Preprocessor keeps
+ /// track of the lexed tokens so that a subsequent Backtrack() call will make
+ /// the Preprocessor re-lex the same tokens.
+ ///
+ /// EnableBacktrackAtThisPos should not be called again until DisableBacktrack
+ /// or Backtrack is called.
+ ///
+ /// NOTE: *DO NOT* forget to call either DisableBacktrack() or Backtrack() at
+ /// some point after EnableBacktrackAtThisPos. If you don't, caching of tokens
+ /// will continue indefinitely.
+ ///
+ void EnableBacktrackAtThisPos() {
+ assert(!CacheTokens && "Backtrack is already enabled!");
+ CacheTokens = true;
+ CachedBacktrackPos = CachedLexPos;
+ EnterCachingLexMode();
+ }
+
+ /// DisableBacktrack - Stop the caching of tokens that was enabled by
+ /// EnableBacktrackAtThisPos().
+ void DisableBacktrack() {
+ assert(CacheTokens && "Backtrack is not enabled!");
+ CacheTokens = false;
+ }
+
+ /// Backtrack - Make Preprocessor re-lex the tokens that were lexed since
+ /// EnableBacktrackAtThisPos() was previously called.
+ void Backtrack() {
+ assert(CacheTokens && "Backtrack is not enabled!");
+ CacheTokens = false;
+ CachedLexPos = CachedBacktrackPos;
+ }
+
+ /// isBacktrackEnabled - True if EnableBacktrackAtThisPos() was called and
+ /// caching of tokens is on.
+ bool isBacktrackEnabled() const { return CacheTokens; }
+
/// Lex - To lex a token from the preprocessor, just pull a token from the
/// current lexer or macro object.
void Lex(Token &Result) {
@@ -266,11 +321,8 @@
CurLexer->Lex(Result);
else if (CurTokenLexer)
CurTokenLexer->Lex(Result);
- else {
- // We have a peeked token that hasn't been consumed yet.
- Result = PeekedToken;
- ConsumedPeekedToken();
- }
+ else
+ CachingLex(Result);
}
/// LexNonComment - Lex a token. If it's a comment, keep lexing until we get
@@ -300,32 +352,12 @@
/// returned by Lex(), LookAhead(1) returns the token after it, etc. This
/// returns normal tokens after phase 5. As such, it is equivalent to using
/// 'Lex', not 'LexUnexpandedToken'.
- ///
- /// NOTE: is a relatively expensive method, so it should not be used in common
- /// code paths if possible!
- ///
- Token LookAhead(unsigned N);
-
- /// LookNext - Returns the next token that would be returned by Lex() without
- /// consuming it.
- const Token &LookNext() {
- if (PeekedToken.getLocation().isInvalid()) {
- // We don't have a peeked token that hasn't been consumed yet.
- // Peek it now.
- PeekToken();
- }
- return PeekedToken;
+ const Token &LookAhead(unsigned N) {
+ if (CachedLexPos + N < CachedTokens.size())
+ return CachedTokens[CachedLexPos+N];
+ else
+ return PeekAhead(N+1);
}
-
-private:
- /// PeekToken - Lexes one token into PeekedToken and pushes CurLexer,
- /// CurLexerToken into the IncludeMacroStack before setting them to null.
- void PeekToken();
-
- /// ConsumedPeekedToken - Called when Lex() is about to return the PeekedToken
- /// and have it "consumed".
- void ConsumedPeekedToken();
-public:
/// Diag - Forwarding function for diagnostics. This emits a diagnostic at
/// the specified Token's location, translating the token's start
@@ -524,6 +556,17 @@
const DirectoryLookup *&CurDir);
//===--------------------------------------------------------------------===//
+ // Caching stuff.
+ void CachingLex(Token &Result);
+ bool InCachingLexMode() const { return CurLexer == 0 && CurTokenLexer == 0; }
+ void EnterCachingLexMode();
+ void ExitCachingLexMode() {
+ if (InCachingLexMode())
+ RemoveTopOfLexerStack();
+ }
+ const Token &PeekAhead(unsigned N);
+
+ //===--------------------------------------------------------------------===//
/// Handle*Directive - implement the various preprocessor directives. These
/// should side-effect the current preprocessor object so that the next call
/// to Lex() will return the appropriate token next.
Modified: cfe/trunk/include/clang/Parse/Parser.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Parse/Parser.h?rev=54611&r1=54610&r2=54611&view=diff
==============================================================================
--- cfe/trunk/include/clang/Parse/Parser.h (original)
+++ cfe/trunk/include/clang/Parse/Parser.h Sun Aug 10 08:15:22 2008
@@ -198,10 +198,7 @@
/// Note that this differs from the Preprocessor's LookAhead method, because
/// the Parser always has one token lexed that the preprocessor doesn't.
///
- /// NOTE: is a relatively expensive method, so it should not be used in common
- /// code paths if possible!
- ///
- Token GetLookAheadToken(unsigned N) {
+ const Token &GetLookAheadToken(unsigned N) {
if (N == 0 || Tok.is(tok::eof)) return Tok;
return PP.LookAhead(N-1);
}
@@ -209,7 +206,7 @@
/// NextToken - This peeks ahead one token and returns it without
/// consuming it.
const Token &NextToken() {
- return PP.LookNext();
+ return PP.LookAhead(0);
}
Added: cfe/trunk/lib/Lex/PPCaching.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Lex/PPCaching.cpp?rev=54611&view=auto
==============================================================================
--- cfe/trunk/lib/Lex/PPCaching.cpp (added)
+++ cfe/trunk/lib/Lex/PPCaching.cpp Sun Aug 10 08:15:22 2008
@@ -0,0 +1,63 @@
+//===--- PPCaching.cpp - Handle caching lexed tokens ----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements pieces of the Preprocessor interface that manage the
+// caching of lexed tokens.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Lex/Preprocessor.h"
+using namespace clang;
+
+void Preprocessor::CachingLex(Token &Result) {
+ if (CachedLexPos < CachedTokens.size()) {
+ Result = CachedTokens[CachedLexPos++];
+ return;
+ }
+
+ ExitCachingLexMode();
+ Lex(Result);
+
+ if (!CacheTokens) {
+ // All cached tokens were consumed.
+ CachedTokens.clear();
+ CachedLexPos = 0;
+ return;
+ }
+
+ // We should cache the lexed token.
+
+ EnterCachingLexMode();
+ if (Result.isNot(tok::eof)) {
+ CachedTokens.push_back(Result);
+ ++CachedLexPos;
+ }
+}
+
+void Preprocessor::EnterCachingLexMode() {
+ if (InCachingLexMode())
+ return;
+
+ IncludeMacroStack.push_back(IncludeStackInfo(CurLexer, CurDirLookup,
+ CurTokenLexer));
+ CurLexer = 0;
+ CurTokenLexer = 0;
+}
+
+
+const Token &Preprocessor::PeekAhead(unsigned N) {
+ assert(CachedLexPos + N > CachedTokens.size() && "Confused caching.");
+ ExitCachingLexMode();
+ for (unsigned C = CachedLexPos + N - CachedTokens.size(); C > 0; --C) {
+ CachedTokens.push_back(Token());
+ Lex(CachedTokens.back());
+ }
+ EnterCachingLexMode();
+ return CachedTokens.back();
+}
Modified: cfe/trunk/lib/Lex/PPLexerChange.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Lex/PPLexerChange.cpp?rev=54611&r1=54610&r2=54611&view=diff
==============================================================================
--- cfe/trunk/lib/Lex/PPLexerChange.cpp (original)
+++ cfe/trunk/lib/Lex/PPLexerChange.cpp Sun Aug 10 08:15:22 2008
@@ -60,94 +60,6 @@
return 0;
}
-/// LookAhead - This peeks ahead N tokens and returns that token without
-/// consuming any tokens. LookAhead(0) returns 'Tok', LookAhead(1) returns
-/// the token after Tok, etc.
-///
-/// NOTE: is a relatively expensive method, so it should not be used in common
-/// code paths if possible!
-///
-Token Preprocessor::LookAhead(unsigned N) {
- // FIXME: Optimize the case where multiple lookahead calls are used back to
- // back. Consider if the the parser contained (dynamically):
- // Lookahead(1); Lookahead(1); Lookahead(1)
- // This would return the same token 3 times, but would end up making lots of
- // token stream lexers to do it. To handle this common case, see if the top
- // of the lexer stack is a TokenStreamLexer with macro expansion disabled. If
- // so, see if it has 'N' tokens available in it. If so, just return the
- // token.
-
- // FIXME: Optimize the case when the parser does multiple nearby lookahead
- // calls. For example, consider:
- // Lookahead(0); Lookahead(1); Lookahead(2);
- // The previous optimization won't apply, and there won't be any space left in
- // the array that was previously new'd. To handle this, always round up the
- // size we new to a multiple of 16 tokens. If the previous buffer has space
- // left, we can just grow it. This means we only have to do the new 1/16th as
- // often.
-
- // Optimized LookAhead(0) case.
- if (N == 0)
- return LookNext();
-
- Token *LookaheadTokens = new Token[N+1];
-
- // Read N+1 tokens into LookaheadTokens. After this loop, Tok is the token
- // to return.
- Token Tok;
- unsigned NumTokens = 0;
- for (; N != ~0U; --N, ++NumTokens) {
- Lex(Tok);
- LookaheadTokens[NumTokens] = Tok;
-
- // If we got to EOF, don't lex past it. This will cause LookAhead to return
- // the EOF token.
- if (Tok.is(tok::eof))
- break;
- }
-
- // Okay, at this point, we have the token we want to return in Tok. However,
- // we read it and a bunch of other stuff (in LookaheadTokens) that we must
- // allow subsequent calls to 'Lex' to return. To do this, we push a new token
- // lexer onto the lexer stack with the tokens we read here. This passes
- // ownership of LookaheadTokens to EnterTokenStream.
- //
- // Note that we disable macro expansion of the tokens from this buffer, since
- // any macros have already been expanded, and the internal preprocessor state
- // may already read past new macros. Consider something like LookAhead(1) on
- // X
- // #define X 14
- // Y
- // The lookahead call should return 'Y', and the next Lex call should return
- // 'X' even though X -> 14 has already been entered as a macro.
- //
- EnterTokenStream(LookaheadTokens, NumTokens, true /*DisableExpansion*/,
- true /*OwnsTokens*/);
- return Tok;
-}
-
-/// PeekToken - Lexes one token into PeekedToken and pushes CurLexer,
-/// CurLexerToken into the IncludeMacroStack before setting them to null.
-void Preprocessor::PeekToken() {
- Lex(PeekedToken);
- // Cache the current Lexer, TokenLexer and set them both to null.
- // When Lex() is called, PeekedToken will be "consumed".
- IncludeMacroStack.push_back(IncludeStackInfo(CurLexer, CurDirLookup,
- CurTokenLexer));
- CurLexer = 0;
- CurTokenLexer = 0;
-}
-
-/// ConsumedPeekedToken - Called when Lex() is about to return the PeekedToken
-/// and have it "consumed".
-void Preprocessor::ConsumedPeekedToken() {
- assert(PeekedToken.getLocation().isValid() && "Confused Peeking?");
- // Restore CurLexer, TokenLexer.
- RemoveTopOfLexerStack();
- // Make PeekedToken invalid.
- PeekedToken.startToken();
-}
-
//===----------------------------------------------------------------------===//
// Methods for Entering and Callbacks for leaving various contexts
Modified: cfe/trunk/lib/Lex/Preprocessor.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Lex/Preprocessor.cpp?rev=54611&r1=54610&r2=54611&view=diff
==============================================================================
--- cfe/trunk/lib/Lex/Preprocessor.cpp (original)
+++ cfe/trunk/lib/Lex/Preprocessor.cpp Sun Aug 10 08:15:22 2008
@@ -68,6 +68,9 @@
InMacroArgs = false;
NumCachedTokenLexers = 0;
+ CacheTokens = false;
+ CachedLexPos = 0;
+
// "Poison" __VA_ARGS__, which can only appear in the expansion of a macro.
// This gets unpoisoned where it is allowed.
(Ident__VA_ARGS__ = getIdentifierInfo("__VA_ARGS__"))->setIsPoisoned();
@@ -579,4 +582,3 @@
if (II.isExtensionToken() && Features.C99)
Diag(Identifier, diag::ext_token_used);
}
-
More information about the cfe-commits
mailing list