[cfe-commits] r160680 - in /cfe/trunk: include/clang/AST/CommentParser.h lib/AST/CommentParser.cpp

Dmitri Gribenko gribozavr at gmail.com
Tue Jul 24 10:52:18 PDT 2012


Author: gribozavr
Date: Tue Jul 24 12:52:18 2012
New Revision: 160680

URL: http://llvm.org/viewvc/llvm-project?rev=160680&view=rev
Log:
Comment parsing: couple TextTokenRetokenizer and comment parser together to
remove one of the two variable-length lookahead buffers.  Now retokenizer will
ask for more tokens when it needs them.

Modified:
    cfe/trunk/include/clang/AST/CommentParser.h
    cfe/trunk/lib/AST/CommentParser.cpp

Modified: cfe/trunk/include/clang/AST/CommentParser.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/AST/CommentParser.h?rev=160680&r1=160679&r2=160680&view=diff
==============================================================================
--- cfe/trunk/include/clang/AST/CommentParser.h (original)
+++ cfe/trunk/include/clang/AST/CommentParser.h Tue Jul 24 12:52:18 2012
@@ -27,6 +27,8 @@
 
 /// Doxygen comment parser.
 class Parser {
+  friend class TextTokenRetokenizer;
+
   Lexer &L;
 
   Sema &S;

Modified: cfe/trunk/lib/AST/CommentParser.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/AST/CommentParser.cpp?rev=160680&r1=160679&r2=160680&view=diff
==============================================================================
--- cfe/trunk/lib/AST/CommentParser.cpp (original)
+++ cfe/trunk/lib/AST/CommentParser.cpp Tue Jul 24 12:52:18 2012
@@ -19,8 +19,8 @@
 /// Re-lexes a sequence of tok::text tokens.
 class TextTokenRetokenizer {
   llvm::BumpPtrAllocator &Allocator;
-  static const unsigned MaxTokens = 16;
-  SmallVector<Token, MaxTokens> Toks;
+  Parser &P;
+  SmallVector<Token, 16> Toks;
 
   struct Position {
     unsigned CurToken;
@@ -39,7 +39,7 @@
 
   /// Sets up the buffer pointers to point to current token.
   void setupBuffer() {
-    assert(Pos.CurToken < Toks.size());
+    assert(!isEnd());
     const Token &Tok = Toks[Pos.CurToken];
 
     Pos.BufferStart = Tok.getText().begin();
@@ -65,11 +65,27 @@
     Pos.BufferPtr++;
     if (Pos.BufferPtr == Pos.BufferEnd) {
       Pos.CurToken++;
-      if (Pos.CurToken < Toks.size())
+      if (isEnd() && addToken()) {
+        assert(!isEnd());
         setupBuffer();
+      }
     }
   }
 
+  /// Add a token.
+  /// Returns true on success, false if there are no interesting tokens to
+  /// fetch from lexer.
+  bool addToken() {
+    if (P.Tok.isNot(tok::text))
+      return false;
+
+    Toks.push_back(P.Tok);
+    P.consumeToken();
+    if (Toks.size() == 1)
+      setupBuffer();
+    return true;
+  }
+
   static bool isWhitespace(char C) {
     return C == ' ' || C == '\n' || C == '\r' ||
            C == '\t' || C == '\f' || C == '\v';
@@ -100,22 +116,10 @@
   }
 
 public:
-  TextTokenRetokenizer(llvm::BumpPtrAllocator &Allocator):
-      Allocator(Allocator) {
+  TextTokenRetokenizer(llvm::BumpPtrAllocator &Allocator, Parser &P):
+      Allocator(Allocator), P(P) {
     Pos.CurToken = 0;
-  }
-
-  /// Add a token.
-  /// Returns true on success, false if it seems like we have enough tokens.
-  bool addToken(const Token &Tok) {
-    assert(Tok.is(tok::text));
-    if (Toks.size() >= MaxTokens)
-      return false;
-
-    Toks.push_back(Tok);
-    if (Toks.size() == 1)
-      setupBuffer();
-    return true;
+    addToken();
   }
 
   /// Extract a word -- sequence of non-whitespace characters.
@@ -199,23 +203,27 @@
     return true;
   }
 
-  /// Return a text token.  Useful to take tokens back.
-  bool lexText(Token &Tok) {
+  /// Put back tokens that we didn't consume.
+  void putBackLeftoverTokens() {
     if (isEnd())
-      return false;
+      return;
 
-    if (Pos.BufferPtr != Pos.BufferStart)
-      formTokenWithChars(Tok, getSourceLocation(),
+    bool HavePartialTok = false;
+    Token PartialTok;
+    if (Pos.BufferPtr != Pos.BufferStart) {
+      formTokenWithChars(PartialTok, getSourceLocation(),
                          Pos.BufferPtr, Pos.BufferEnd - Pos.BufferPtr,
                          StringRef(Pos.BufferPtr,
                                    Pos.BufferEnd - Pos.BufferPtr));
-    else
-      Tok = Toks[Pos.CurToken];
+      HavePartialTok = true;
+      Pos.CurToken++;
+    }
 
-    Pos.CurToken++;
-    if (Pos.CurToken < Toks.size())
-      setupBuffer();
-    return true;
+    P.putBack(llvm::makeArrayRef(Toks.begin() + Pos.CurToken, Toks.end()));
+    Pos.CurToken = Toks.size();
+
+    if (HavePartialTok)
+      P.putBack(PartialTok);
   }
 };
 
@@ -296,24 +304,14 @@
   if (IsParam || NumArgs > 0) {
     // In order to parse command arguments we need to retokenize a few
     // following text tokens.
-    TextTokenRetokenizer Retokenizer(Allocator);
-    while (Tok.is(tok::text)) {
-      if (Retokenizer.addToken(Tok))
-        consumeToken();
-    }
+    TextTokenRetokenizer Retokenizer(Allocator, *this);
 
     if (IsParam)
       PC = parseParamCommandArgs(PC, Retokenizer);
     else
       BC = parseBlockCommandArgs(BC, Retokenizer, NumArgs);
 
-    // Put back tokens we didn't use.
-    SmallVector<Token, 16> TextToks;
-    Token Text;
-    while (Retokenizer.lexText(Text)) {
-      TextToks.push_back(Text);
-    }
-    putBack(TextToks);
+    Retokenizer.putBackLeftoverTokens();
   }
 
   BlockContentComment *Block = parseParagraphOrBlockCommand();
@@ -331,11 +329,7 @@
   const Token CommandTok = Tok;
   consumeToken();
 
-  TextTokenRetokenizer Retokenizer(Allocator);
-  while (Tok.is(tok::text)) {
-    if (Retokenizer.addToken(Tok))
-      consumeToken();
-  }
+  TextTokenRetokenizer Retokenizer(Allocator, *this);
 
   Token ArgTok;
   bool ArgTokValid = Retokenizer.lexWord(ArgTok);
@@ -354,9 +348,7 @@
                               CommandTok.getCommandName());
   }
 
-  Token Text;
-  while (Retokenizer.lexText(Text))
-    putBack(Text);
+  Retokenizer.putBackLeftoverTokens();
 
   return IC;
 }





More information about the cfe-commits mailing list