[cfe-commits] r160680 - in /cfe/trunk: include/clang/AST/CommentParser.h lib/AST/CommentParser.cpp
Dmitri Gribenko
gribozavr at gmail.com
Tue Jul 24 10:52:18 PDT 2012
Author: gribozavr
Date: Tue Jul 24 12:52:18 2012
New Revision: 160680
URL: http://llvm.org/viewvc/llvm-project?rev=160680&view=rev
Log:
Comment parsing: couple TextTokenRetokenizer and comment parser together to
remove one of the two variable-length lookahead buffers. Now retokenizer will
ask for more tokens when it needs them.
Modified:
cfe/trunk/include/clang/AST/CommentParser.h
cfe/trunk/lib/AST/CommentParser.cpp
Modified: cfe/trunk/include/clang/AST/CommentParser.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/AST/CommentParser.h?rev=160680&r1=160679&r2=160680&view=diff
==============================================================================
--- cfe/trunk/include/clang/AST/CommentParser.h (original)
+++ cfe/trunk/include/clang/AST/CommentParser.h Tue Jul 24 12:52:18 2012
@@ -27,6 +27,8 @@
/// Doxygen comment parser.
class Parser {
+ friend class TextTokenRetokenizer;
+
Lexer &L;
Sema &S;
Modified: cfe/trunk/lib/AST/CommentParser.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/AST/CommentParser.cpp?rev=160680&r1=160679&r2=160680&view=diff
==============================================================================
--- cfe/trunk/lib/AST/CommentParser.cpp (original)
+++ cfe/trunk/lib/AST/CommentParser.cpp Tue Jul 24 12:52:18 2012
@@ -19,8 +19,8 @@
/// Re-lexes a sequence of tok::text tokens.
class TextTokenRetokenizer {
llvm::BumpPtrAllocator &Allocator;
- static const unsigned MaxTokens = 16;
- SmallVector<Token, MaxTokens> Toks;
+ Parser &P;
+ SmallVector<Token, 16> Toks;
struct Position {
unsigned CurToken;
@@ -39,7 +39,7 @@
/// Sets up the buffer pointers to point to current token.
void setupBuffer() {
- assert(Pos.CurToken < Toks.size());
+ assert(!isEnd());
const Token &Tok = Toks[Pos.CurToken];
Pos.BufferStart = Tok.getText().begin();
@@ -65,11 +65,27 @@
Pos.BufferPtr++;
if (Pos.BufferPtr == Pos.BufferEnd) {
Pos.CurToken++;
- if (Pos.CurToken < Toks.size())
+ if (isEnd() && addToken()) {
+ assert(!isEnd());
setupBuffer();
+ }
}
}
+ /// Add a token.
+ /// Returns true on success, false if there are no interesting tokens to
+ /// fetch from lexer.
+ bool addToken() {
+ if (P.Tok.isNot(tok::text))
+ return false;
+
+ Toks.push_back(P.Tok);
+ P.consumeToken();
+ if (Toks.size() == 1)
+ setupBuffer();
+ return true;
+ }
+
static bool isWhitespace(char C) {
return C == ' ' || C == '\n' || C == '\r' ||
C == '\t' || C == '\f' || C == '\v';
@@ -100,22 +116,10 @@
}
public:
- TextTokenRetokenizer(llvm::BumpPtrAllocator &Allocator):
- Allocator(Allocator) {
+ TextTokenRetokenizer(llvm::BumpPtrAllocator &Allocator, Parser &P):
+ Allocator(Allocator), P(P) {
Pos.CurToken = 0;
- }
-
- /// Add a token.
- /// Returns true on success, false if it seems like we have enough tokens.
- bool addToken(const Token &Tok) {
- assert(Tok.is(tok::text));
- if (Toks.size() >= MaxTokens)
- return false;
-
- Toks.push_back(Tok);
- if (Toks.size() == 1)
- setupBuffer();
- return true;
+ addToken();
}
/// Extract a word -- sequence of non-whitespace characters.
@@ -199,23 +203,27 @@
return true;
}
- /// Return a text token. Useful to take tokens back.
- bool lexText(Token &Tok) {
+ /// Put back tokens that we didn't consume.
+ void putBackLeftoverTokens() {
if (isEnd())
- return false;
+ return;
- if (Pos.BufferPtr != Pos.BufferStart)
- formTokenWithChars(Tok, getSourceLocation(),
+ bool HavePartialTok = false;
+ Token PartialTok;
+ if (Pos.BufferPtr != Pos.BufferStart) {
+ formTokenWithChars(PartialTok, getSourceLocation(),
Pos.BufferPtr, Pos.BufferEnd - Pos.BufferPtr,
StringRef(Pos.BufferPtr,
Pos.BufferEnd - Pos.BufferPtr));
- else
- Tok = Toks[Pos.CurToken];
+ HavePartialTok = true;
+ Pos.CurToken++;
+ }
- Pos.CurToken++;
- if (Pos.CurToken < Toks.size())
- setupBuffer();
- return true;
+ P.putBack(llvm::makeArrayRef(Toks.begin() + Pos.CurToken, Toks.end()));
+ Pos.CurToken = Toks.size();
+
+ if (HavePartialTok)
+ P.putBack(PartialTok);
}
};
@@ -296,24 +304,14 @@
if (IsParam || NumArgs > 0) {
// In order to parse command arguments we need to retokenize a few
// following text tokens.
- TextTokenRetokenizer Retokenizer(Allocator);
- while (Tok.is(tok::text)) {
- if (Retokenizer.addToken(Tok))
- consumeToken();
- }
+ TextTokenRetokenizer Retokenizer(Allocator, *this);
if (IsParam)
PC = parseParamCommandArgs(PC, Retokenizer);
else
BC = parseBlockCommandArgs(BC, Retokenizer, NumArgs);
- // Put back tokens we didn't use.
- SmallVector<Token, 16> TextToks;
- Token Text;
- while (Retokenizer.lexText(Text)) {
- TextToks.push_back(Text);
- }
- putBack(TextToks);
+ Retokenizer.putBackLeftoverTokens();
}
BlockContentComment *Block = parseParagraphOrBlockCommand();
@@ -331,11 +329,7 @@
const Token CommandTok = Tok;
consumeToken();
- TextTokenRetokenizer Retokenizer(Allocator);
- while (Tok.is(tok::text)) {
- if (Retokenizer.addToken(Tok))
- consumeToken();
- }
+ TextTokenRetokenizer Retokenizer(Allocator, *this);
Token ArgTok;
bool ArgTokValid = Retokenizer.lexWord(ArgTok);
@@ -354,9 +348,7 @@
CommandTok.getCommandName());
}
- Token Text;
- while (Retokenizer.lexText(Text))
- putBack(Text);
+ Retokenizer.putBackLeftoverTokens();
return IC;
}
More information about the cfe-commits
mailing list