[cfe-commits] r171640 - in /cfe/trunk: lib/Format/Format.cpp lib/Format/UnwrappedLineParser.cpp lib/Format/UnwrappedLineParser.h unittests/Format/FormatTest.cpp

Sat Jan 5 14:56:07 PST 2013

Author: klimek
Date: Sat Jan  5 16:56:06 2013
New Revision: 171640

URL: http://llvm.org/viewvc/llvm-project?rev=171640&view=rev
Log:
Fixes parsing of hash tokens in the middle of a line.

To parse # correctly, we need to know whether it is the first token in a
line - we can deduct this either from the whitespace or seeing that the
token is the first in the file - we already calculate this information.
This patch moves the identification of the first token into the
getNextToken method and stores it inside the FormatToken, so the
UnwrappedLineParser can stay independent of the SourceManager.

Modified:
    cfe/trunk/lib/Format/Format.cpp
    cfe/trunk/lib/Format/UnwrappedLineParser.cpp
    cfe/trunk/lib/Format/UnwrappedLineParser.h
    cfe/trunk/unittests/Format/FormatTest.cpp

Modified: cfe/trunk/lib/Format/Format.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/Format.cpp?rev=171640&r1=171639&r2=171640&view=diff
==============================================================================

--- cfe/trunk/lib/Format/Format.cpp (original)
+++ cfe/trunk/lib/Format/Format.cpp Sat Jan  5 16:56:06 2013
@@ -483,8 +483,7 @@
 
     unsigned Newlines =
         std::min(Token.NewlinesBefore, Style.MaxEmptyLinesToKeep + 1);
-    unsigned Offset = SourceMgr.getFileOffset(Token.WhiteSpaceStart);
-    if (Newlines == 0 && Offset != 0)
+    if (Newlines == 0 && !Token.IsFirst)
       Newlines = 1;
     unsigned Indent = Line.Level * 2;
     if ((Token.Tok.is(tok::kw_public) || Token.Tok.is(tok::kw_protected) ||
@@ -685,9 +684,10 @@
       next();
       if (Index >= Tokens.size())
         return;
-      // It is the responsibility of the UnwrappedLineParser to make sure
-      // this sequence is not produced inside an unwrapped line.
-      assert(Tokens[Index].Tok.getIdentifierInfo() != NULL);
+      // Hashes in the middle of a line can lead to any strange token
+      // sequence.
+      if (Tokens[Index].Tok.getIdentifierInfo() == NULL)
+        return;
       switch (Tokens[Index].Tok.getIdentifierInfo()->getPPKeywordID()) {
       case tok::pp_include:
       case tok::pp_import:
@@ -1033,6 +1033,8 @@
     Lex.LexFromRawLexer(FormatTok.Tok);
     StringRef Text = tokenText(FormatTok.Tok);
     FormatTok.WhiteSpaceStart = FormatTok.Tok.getLocation();
+    if (SourceMgr.getFileOffset(FormatTok.WhiteSpaceStart) == 0)
+      FormatTok.IsFirst = true;
 
     // Consume and record whitespace until we find a significant token.
     while (FormatTok.Tok.is(tok::unknown)) {

Modified: cfe/trunk/lib/Format/UnwrappedLineParser.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/UnwrappedLineParser.cpp?rev=171640&r1=171639&r2=171640&view=diff
==============================================================================
--- cfe/trunk/lib/Format/UnwrappedLineParser.cpp (original)
+++ cfe/trunk/lib/Format/UnwrappedLineParser.cpp Sat Jan  5 16:56:06 2013
@@ -470,7 +470,9 @@
 
 void UnwrappedLineParser::readToken() {
   FormatTok = Tokens->getNextToken();
-  while (!Line.InPPDirective && FormatTok.Tok.is(tok::hash)) {
+  while (!Line.InPPDirective && FormatTok.Tok.is(tok::hash) &&
+         ((FormatTok.NewlinesBefore > 0 && FormatTok.HasUnescapedNewline) ||
+          FormatTok.IsFirst)) {
     // FIXME: This is incorrect - the correct way is to create a
     // data structure that will construct the parts around the preprocessor
     // directive as a structured \c UnwrappedLine.

Modified: cfe/trunk/lib/Format/UnwrappedLineParser.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/UnwrappedLineParser.h?rev=171640&r1=171639&r2=171640&view=diff
==============================================================================
--- cfe/trunk/lib/Format/UnwrappedLineParser.h (original)
+++ cfe/trunk/lib/Format/UnwrappedLineParser.h Sat Jan  5 16:56:06 2013
@@ -31,7 +31,8 @@
 /// whitespace characters preceeding it.
 struct FormatToken {
   FormatToken()
-      : NewlinesBefore(0), HasUnescapedNewline(false), WhiteSpaceLength(0) {
+      : NewlinesBefore(0), HasUnescapedNewline(false), WhiteSpaceLength(0),
+        IsFirst(false) {
   }
 
   /// \brief The \c Token.
@@ -56,6 +57,9 @@
   /// \brief The length in characters of the whitespace immediately preceeding
   /// the \c Token.
   unsigned WhiteSpaceLength;
+
+  /// \brief Indicates that this is the first token.
+  bool IsFirst;
 };
 
 /// \brief An unwrapped line is a sequence of \c Token, that we would like to

Modified: cfe/trunk/unittests/Format/FormatTest.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/unittests/Format/FormatTest.cpp?rev=171640&r1=171639&r2=171640&view=diff
==============================================================================
--- cfe/trunk/unittests/Format/FormatTest.cpp (original)
+++ cfe/trunk/unittests/Format/FormatTest.cpp Sat Jan  5 16:56:06 2013
@@ -470,6 +470,10 @@
   EXPECT_EQ("{\n  {\n#define A\n  }\n}", format("{{\n#define A\n}}"));
 }
 
+TEST_F(FormatTest, FormatHashIfNotAtStartOfLine) {
+  verifyFormat("{\n  {\n    a #c;\n  }\n}");
+}
+
 // FIXME: write test for unbalanced braces in macros...
 // FIXME: test # inside a normal statement (like {#define A b})