r182568 - Expand parsing of braced init lists.

Manuel Klimek klimek at google.com
Thu May 23 02:41:44 PDT 2013


Author: klimek
Date: Thu May 23 04:41:43 2013
New Revision: 182568

URL: http://llvm.org/viewvc/llvm-project?rev=182568&view=rev
Log:
Expand parsing of braced init lists.

Allows formatting of C++11 braced init list constructs, like:
vector<int> v { 1, 2, 3 };
f({ 1, 2 });

This involves some changes of how tokens are handled in the
UnwrappedLineFormatter. Note that we have a plan to evolve the
design of the token flow into one where we create all tokens
up-front and then annotate them in the various layers (as we
currently already have to create all tokens at once anyway, the
current abstraction does not help). Thus, this introduces
FIXMEs towards that goal.

Modified:
    cfe/trunk/lib/Format/UnwrappedLineParser.cpp
    cfe/trunk/lib/Format/UnwrappedLineParser.h
    cfe/trunk/unittests/Format/FormatTest.cpp

Modified: cfe/trunk/lib/Format/UnwrappedLineParser.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/UnwrappedLineParser.cpp?rev=182568&r1=182567&r2=182568&view=diff
==============================================================================
--- cfe/trunk/lib/Format/UnwrappedLineParser.cpp (original)
+++ cfe/trunk/lib/Format/UnwrappedLineParser.cpp Thu May 23 04:41:43 2013
@@ -72,6 +72,15 @@ public:
     return Token;
   }
 
+  virtual unsigned getPosition() {
+    return PreviousTokenSource->getPosition();
+  }
+
+  virtual FormatToken setPosition(unsigned Position) {
+    Token = PreviousTokenSource->setPosition(Position);
+    return Token;
+  }
+
 private:
   bool eof() { return Token.HasUnescapedNewline; }
 
@@ -124,15 +133,49 @@ private:
   UnwrappedLine *PreBlockLine;
 };
 
+class IndexedTokenSource : public FormatTokenSource {
+public:
+  IndexedTokenSource(ArrayRef<FormatToken> Tokens)
+      : Tokens(Tokens), Position(-1) {}
+
+  virtual FormatToken getNextToken() {
+    ++Position;
+    return Tokens[Position];
+  }
+
+  virtual unsigned getPosition() {
+    assert(Position >= 0);
+    return Position;
+  }
+
+  virtual FormatToken setPosition(unsigned P) {
+    Position = P;
+    return Tokens[Position];
+  }
+
+private:
+  ArrayRef<FormatToken> Tokens;
+  int Position;
+};
+
 UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style,
                                          FormatTokenSource &Tokens,
                                          UnwrappedLineConsumer &Callback)
     : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
       CurrentLines(&Lines), StructuralError(false), Style(Style),
-      Tokens(&Tokens), Callback(Callback) {}
+      Tokens(NULL), Callback(Callback) {
+  FormatToken Tok;
+  do {
+    Tok = Tokens.getNextToken();
+    AllTokens.push_back(Tok);
+  } while (Tok.Tok.isNot(tok::eof));
+  LBraces.resize(AllTokens.size(), BS_Unknown);
+}
 
 bool UnwrappedLineParser::parse() {
   DEBUG(llvm::dbgs() << "----\n");
+  IndexedTokenSource TokenSource(AllTokens);
+  Tokens = &TokenSource;
   readToken();
   parseFile();
   for (std::vector<UnwrappedLine>::iterator I = Lines.begin(), E = Lines.end();
@@ -183,6 +226,68 @@ void UnwrappedLineParser::parseLevel(boo
   } while (!eof());
 }
 
+void UnwrappedLineParser::calculateBraceTypes() {
+  // We'll parse forward through the tokens until we hit
+  // a closing brace or eof - note that getNextToken() will
+  // parse macros, so this will magically work inside macro
+  // definitions, too.
+  unsigned StoredPosition = Tokens->getPosition();
+  unsigned Position = StoredPosition;
+  FormatToken Tok = FormatTok;
+  // Keep a stack of positions of lbrace tokens. We will
+  // update information about whether an lbrace starts a
+  // braced init list or a different block during the loop.
+  SmallVector<unsigned, 8> LBraceStack;
+  assert(Tok.Tok.is(tok::l_brace));
+  do {
+    FormatToken NextTok = Tokens->getNextToken();
+    switch (Tok.Tok.getKind()) {
+    case tok::l_brace:
+      LBraceStack.push_back(Position);
+      break;
+    case tok::r_brace:
+      if (!LBraceStack.empty()) {
+        if (LBraces[LBraceStack.back()] == BS_Unknown) {
+          // If there is a comma, semicolon or right paren after the closing
+          // brace, we assume this is a braced initializer list.
+
+          // FIXME: Note that this currently works only because we do not
+          // use the brace information while inside a braced init list.
+          // Thus, if the parent is a braced init list, we consider all
+          // brace blocks inside it braced init list. That works good enough
+          // for now, but we will need to fix it to correctly handle lambdas.
+          if (NextTok.Tok.is(tok::comma) || NextTok.Tok.is(tok::semi) ||
+              NextTok.Tok.is(tok::r_paren))
+            LBraces[LBraceStack.back()] = BS_BracedInit;
+          else
+            LBraces[LBraceStack.back()] = BS_Block;
+        }
+        LBraceStack.pop_back();
+      }
+      break;
+    case tok::semi:
+    case tok::kw_if:
+    case tok::kw_while:
+    case tok::kw_for:
+    case tok::kw_switch:
+    case tok::kw_try:
+      if (!LBraceStack.empty()) 
+        LBraces[LBraceStack.back()] = BS_Block;
+      break;
+    default:
+      break;
+    }
+    Tok = NextTok;
+    ++Position;
+  } while (Tok.Tok.isNot(tok::eof));
+  // Assume other blocks for all unclosed opening braces.
+  for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) {
+    if (LBraces[LBraceStack[i]] == BS_Unknown)
+      LBraces[LBraceStack[i]] = BS_Block;
+  }
+  FormatTok = Tokens->setPosition(StoredPosition);
+}
+
 void UnwrappedLineParser::parseBlock(bool MustBeDeclaration,
                                      unsigned AddLevels) {
   assert(FormatTok.Tok.is(tok::l_brace) && "'{' expected");
@@ -394,17 +499,21 @@ void UnwrappedLineParser::parseStructura
       parseParens();
       break;
     case tok::l_brace:
-      // A block outside of parentheses must be the last part of a
-      // structural element.
-      // FIXME: Figure out cases where this is not true, and add projections for
-      // them (the one we know is missing are lambdas).
-      if (Style.BreakBeforeBraces == FormatStyle::BS_Linux ||
-          Style.BreakBeforeBraces == FormatStyle::BS_Stroustrup)
+      if (!tryToParseBracedList()) {
+        // A block outside of parentheses must be the last part of a
+        // structural element.
+        // FIXME: Figure out cases where this is not true, and add projections
+        // for them (the one we know is missing are lambdas).
+        if (Style.BreakBeforeBraces == FormatStyle::BS_Linux ||
+            Style.BreakBeforeBraces == FormatStyle::BS_Stroustrup)
+          addUnwrappedLine();
+        parseBlock(/*MustBeDeclaration=*/ false);
         addUnwrappedLine();
-
-      parseBlock(/*MustBeDeclaration=*/ false);
-      addUnwrappedLine();
-      return;
+        return;
+      }
+      // Otherwise this was a braced init list, and the structural
+      // element continues.
+      break;
     case tok::identifier:
       nextToken();
       if (Line->Tokens.size() == 1) {
@@ -436,6 +545,16 @@ void UnwrappedLineParser::parseStructura
   } while (!eof());
 }
 
+bool UnwrappedLineParser::tryToParseBracedList() {
+  if (LBraces[Tokens->getPosition()] == BS_Unknown)
+    calculateBraceTypes();
+  assert(LBraces[Tokens->getPosition()] != BS_Unknown);
+  if (LBraces[Tokens->getPosition()] == BS_Block)
+    return false;
+  parseBracedList();
+  return true;
+}
+
 void UnwrappedLineParser::parseBracedList() {
   nextToken();
 
@@ -517,13 +636,15 @@ void UnwrappedLineParser::parseParens()
       nextToken();
       return;
     case tok::l_brace: {
-      nextToken();
-      ScopedLineState LineState(*this);
-      ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
-                                              /*MustBeDeclaration=*/ false);
-      Line->Level += 1;
-      parseLevel(/*HasOpeningBrace=*/ true);
-      Line->Level -= 1;
+      if (!tryToParseBracedList()) {
+        nextToken();
+        ScopedLineState LineState(*this);
+        ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
+                                                /*MustBeDeclaration=*/ false);
+        Line->Level += 1;
+        parseLevel(/*HasOpeningBrace=*/ true);
+        Line->Level -= 1;
+      }
       break;
     }
     case tok::at:

Modified: cfe/trunk/lib/Format/UnwrappedLineParser.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/UnwrappedLineParser.h?rev=182568&r1=182567&r2=182568&view=diff
==============================================================================
--- cfe/trunk/lib/Format/UnwrappedLineParser.h (original)
+++ cfe/trunk/lib/Format/UnwrappedLineParser.h Thu May 23 04:41:43 2013
@@ -122,6 +122,12 @@ public:
   virtual ~FormatTokenSource() {
   }
   virtual FormatToken getNextToken() = 0;
+
+  // FIXME: This interface will become an implementation detail of
+  // the UnwrappedLineParser once we switch to generate all tokens
+  // up-front.
+  virtual unsigned getPosition() { return 0; }
+  virtual FormatToken setPosition(unsigned Position) { assert(false); }
 };
 
 class UnwrappedLineParser {
@@ -140,6 +146,7 @@ private:
   void parsePPDefine();
   void parsePPUnknown();
   void parseStructuralElement();
+  bool tryToParseBracedList();
   void parseBracedList();
   void parseReturn();
   void parseParens();
@@ -163,6 +170,14 @@ private:
   void readToken();
   void flushComments(bool NewlineBeforeNext);
   void pushToken(const FormatToken &Tok);
+  void calculateBraceTypes();
+
+  // Represents what type of block a left brace opens.
+  enum LBraceState {
+    BS_Unknown,
+    BS_Block,
+    BS_BracedInit
+  };
 
   // FIXME: We are constantly running into bugs where Line.Level is incorrectly
   // subtracted from beyond 0. Introduce a method to subtract from Line.Level
@@ -203,6 +218,16 @@ private:
   FormatTokenSource *Tokens;
   UnwrappedLineConsumer &Callback;
 
+  // FIXME: This is a temporary measure until we have reworked the ownership
+  // of the format tokens. The goal is to have the actual tokens created and
+  // owned outside of and handed into the UnwrappedLineParser.
+  SmallVector<FormatToken, 16> AllTokens;
+
+  // FIXME: Currently we cannot store attributes with tokens, as we treat
+  // them as read-only; thus, we now store the brace state indexed by the
+  // position of the token in the stream (see \c AllTokens).
+  SmallVector<LBraceState, 16> LBraces;
+
   friend class ScopedLineState;
 };
 

Modified: cfe/trunk/unittests/Format/FormatTest.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/unittests/Format/FormatTest.cpp?rev=182568&r1=182567&r2=182568&view=diff
==============================================================================
--- cfe/trunk/unittests/Format/FormatTest.cpp (original)
+++ cfe/trunk/unittests/Format/FormatTest.cpp Thu May 23 04:41:43 2013
@@ -2706,7 +2706,7 @@ TEST_F(FormatTest, UnderstandsUsesOfStar
   verifyIndependentOfContext("Type **A = static_cast<Type **>(P);");
   verifyGoogleFormat("Type** A = static_cast<Type**>(P);");
   // FIXME: The newline is wrong.
-  verifyFormat("auto a = [](int **&, int ***) {}\n;");
+  verifyFormat("auto a = [](int **&, int ***) {};");
 
   verifyIndependentOfContext("InvalidRegions[*R] = 0;");
 
@@ -3100,6 +3100,14 @@ TEST_F(FormatTest, LayoutBraceInitialize
   verifyFormat("return (a)(b) { 1, 2, 3 };");
 }
 
+TEST_F(FormatTest, LayoutCxx11ConstructorBraceInitializers) {
+    verifyFormat("vector<int> x { 1, 2, 3, 4 };");
+    verifyFormat("vector<T> x { {}, {}, {}, {} };");
+    verifyFormat("f({ 1, 2 });");
+    verifyFormat("auto v = Foo { 1 };");
+    verifyFormat("f({ 1, 2 }, { { 2, 3 }, { 4, 5 } }, c, { d });");
+}
+
 TEST_F(FormatTest, LayoutTokensFollowingBlockInParentheses) {
   // FIXME: This is bad, find a better and more generic solution.
   verifyFormat(
@@ -3859,8 +3867,7 @@ TEST_F(FormatTest, ObjCLiterals) {
                "{ @2 : @1 }\n"
                "}");
   verifyFormat("@{ @\"one\" : @\n"
-               "{ @2 : @1 }\n"
-               ",\n"
+               "{ @2 : @1 },\n"
                "}");
 
   verifyFormat("@{ 1 > 2 ? @\"one\" : @\"two\" : 1 > 2 ? @1 : @2 }");





More information about the cfe-commits mailing list