r182572 - Use a SourceRange for the whitespace location in FormatToken.

Thu May 23 03:56:37 PDT 2013

Author: klimek
Date: Thu May 23 05:56:37 2013
New Revision: 182572

URL: http://llvm.org/viewvc/llvm-project?rev=182572&view=rev
Log:
Use a SourceRange for the whitespace location in FormatToken.

Replaces the use of WhitespaceStart + WhitspaceLength.
This made a bug in the formatter obvous where we would incorrectly
calculate the next column.

FIXME: There's a similar bug left regarding TokenLength. We should
probably also move to have a TokenRange instead.

Modified:
    cfe/trunk/lib/Format/Format.cpp
    cfe/trunk/lib/Format/UnwrappedLineParser.cpp
    cfe/trunk/lib/Format/UnwrappedLineParser.h
    cfe/trunk/lib/Format/WhitespaceManager.cpp
    cfe/trunk/unittests/Format/FormatTest.cpp

Modified: cfe/trunk/lib/Format/Format.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/Format.cpp?rev=182572&r1=182571&r2=182572&view=diff
==============================================================================

--- cfe/trunk/lib/Format/Format.cpp (original)
+++ cfe/trunk/lib/Format/Format.cpp Thu May 23 05:56:37 2013
@@ -470,8 +470,13 @@ private:
     const AnnotatedToken &Previous = *State.NextToken->Parent;
 
     if (State.Stack.size() == 0 || Current.Type == TT_ImplicitStringLiteral) {
-      State.Column += State.NextToken->FormatTok.WhiteSpaceLength +
-                      State.NextToken->FormatTok.TokenLength;
+      // FIXME: Is this correct?
+      int WhitespaceLength =
+          SourceMgr.getSpellingColumnNumber(
+              State.NextToken->FormatTok.WhitespaceRange.getEnd()) -
+          SourceMgr.getSpellingColumnNumber(
+              State.NextToken->FormatTok.WhitespaceRange.getBegin());
+      State.Column += WhitespaceLength + State.NextToken->FormatTok.TokenLength;
       if (State.NextToken->Children.empty())
         State.NextToken = NULL;
       else
@@ -1078,9 +1083,9 @@ public:
   virtual FormatToken getNextToken() {
     if (GreaterStashed) {
       FormatTok.NewlinesBefore = 0;
-      FormatTok.WhiteSpaceStart =
+      SourceLocation GreaterLocation =
           FormatTok.Tok.getLocation().getLocWithOffset(1);
-      FormatTok.WhiteSpaceLength = 0;
+      FormatTok.WhitespaceRange = SourceRange(GreaterLocation, GreaterLocation);
       GreaterStashed = false;
       return FormatTok;
     }
@@ -1088,23 +1093,27 @@ public:
     FormatTok = FormatToken();
     Lex.LexFromRawLexer(FormatTok.Tok);
     StringRef Text = rawTokenText(FormatTok.Tok);
-    FormatTok.WhiteSpaceStart = FormatTok.Tok.getLocation();
-    if (SourceMgr.getFileOffset(FormatTok.WhiteSpaceStart) == 0)
+    SourceLocation WhitespaceStart = FormatTok.Tok.getLocation();
+    if (SourceMgr.getFileOffset(WhitespaceStart) == 0)
       FormatTok.IsFirst = true;
 
     // Consume and record whitespace until we find a significant token.
+    unsigned WhitespaceLength = 0;
     while (FormatTok.Tok.is(tok::unknown)) {
       unsigned Newlines = Text.count('\n');
       if (Newlines > 0)
-        FormatTok.LastNewlineOffset =
-            FormatTok.WhiteSpaceLength + Text.rfind('\n') + 1;
+        FormatTok.LastNewlineOffset = WhitespaceLength + Text.rfind('\n') + 1;
       unsigned EscapedNewlines = Text.count("\\\n");
       FormatTok.NewlinesBefore += Newlines;
       FormatTok.HasUnescapedNewline |= EscapedNewlines != Newlines;
-      FormatTok.WhiteSpaceLength += FormatTok.Tok.getLength();
+      WhitespaceLength += FormatTok.Tok.getLength();
 
-      if (FormatTok.Tok.is(tok::eof))
+      if (FormatTok.Tok.is(tok::eof)) {
+        FormatTok.WhitespaceRange =
+            SourceRange(WhitespaceStart,
+                        WhitespaceStart.getLocWithOffset(WhitespaceLength));
         return FormatTok;
+      }
       Lex.LexFromRawLexer(FormatTok.Tok);
       Text = rawTokenText(FormatTok.Tok);
     }
@@ -1126,7 +1135,7 @@ public:
     unsigned i = 0;
     while (i + 1 < Text.size() && Text[i] == '\\' && Text[i + 1] == '\n') {
       // FIXME: ++FormatTok.NewlinesBefore is missing...
-      FormatTok.WhiteSpaceLength += 2;
+      WhitespaceLength += 2;
       FormatTok.TokenLength -= 2;
       i += 2;
     }
@@ -1143,6 +1152,8 @@ public:
       GreaterStashed = true;
     }
 
+    FormatTok.WhitespaceRange = SourceRange(
+        WhitespaceStart, WhitespaceStart.getLocWithOffset(WhitespaceLength));
     return FormatTok;
   }
 
@@ -1235,7 +1246,7 @@ public:
       } else if (TheLine.Type != LT_Invalid &&
                  (WasMoved || FormatPPDirective || touchesLine(TheLine))) {
         unsigned LevelIndent = getIndent(IndentForLevel, TheLine.Level);
-        if (FirstTok.WhiteSpaceStart.isValid() &&
+        if (FirstTok.WhitespaceRange.isValid() &&
             // Insert a break even if there is a structural error in case where
             // we break apart a line consisting of multiple unwrapped lines.
             (FirstTok.NewlinesBefore == 0 || !StructuralError)) {
@@ -1300,8 +1311,11 @@ private:
       AnnotatedToken *Tok = &AnnotatedLines[i].First.Children[0];
       while (!Tok->Children.empty()) {
         if (Tok->Type == TT_PointerOrReference) {
-          bool SpacesBefore = Tok->FormatTok.WhiteSpaceLength > 0;
-          bool SpacesAfter = Tok->Children[0].FormatTok.WhiteSpaceLength > 0;
+          bool SpacesBefore = Tok->FormatTok.WhitespaceRange.getBegin() !=
+                              Tok->FormatTok.WhitespaceRange.getEnd();
+          bool SpacesAfter =
+              Tok->Children[0].FormatTok.WhitespaceRange.getBegin() !=
+              Tok->Children[0].FormatTok.WhitespaceRange.getEnd();
           if (SpacesBefore && !SpacesAfter)
             ++CountBoundToVariable;
           else if (!SpacesBefore && SpacesAfter)
@@ -1310,7 +1324,8 @@ private:
 
         if (Tok->Type == TT_TemplateCloser &&
             Tok->Parent->Type == TT_TemplateCloser &&
-            Tok->FormatTok.WhiteSpaceLength == 0)
+            Tok->FormatTok.WhitespaceRange.getBegin() ==
+                Tok->FormatTok.WhitespaceRange.getEnd())
           HasCpp03IncompatibleFormat = true;
         Tok = &Tok->Children[0];
       }
@@ -1509,7 +1524,8 @@ private:
     const FormatToken *First = &TheLine.First.FormatTok;
     const FormatToken *Last = &TheLine.Last->FormatTok;
     CharSourceRange LineRange = CharSourceRange::getCharRange(
-        First->WhiteSpaceStart.getLocWithOffset(First->LastNewlineOffset),
+        First->WhitespaceRange.getBegin().getLocWithOffset(
+            First->LastNewlineOffset),
         Last->Tok.getLocation().getLocWithOffset(Last->TokenLength - 1));
     return touchesRanges(LineRange);
   }
@@ -1528,8 +1544,9 @@ private:
   bool touchesEmptyLineBefore(const AnnotatedLine &TheLine) {
     const FormatToken *First = &TheLine.First.FormatTok;
     CharSourceRange LineRange = CharSourceRange::getCharRange(
-        First->WhiteSpaceStart,
-        First->WhiteSpaceStart.getLocWithOffset(First->LastNewlineOffset));
+        First->WhitespaceRange.getBegin(),
+        First->WhitespaceRange.getBegin().getLocWithOffset(
+            First->LastNewlineOffset));
     return touchesRanges(LineRange);
   }
 

Modified: cfe/trunk/lib/Format/UnwrappedLineParser.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/UnwrappedLineParser.cpp?rev=182572&r1=182571&r2=182572&view=diff
==============================================================================
--- cfe/trunk/lib/Format/UnwrappedLineParser.cpp (original)
+++ cfe/trunk/lib/Format/UnwrappedLineParser.cpp Thu May 23 05:56:37 2013
@@ -339,7 +339,8 @@ void UnwrappedLineParser::parsePPDefine(
   }
   nextToken();
   if (FormatTok.Tok.getKind() == tok::l_paren &&
-      FormatTok.WhiteSpaceLength == 0) {
+      FormatTok.WhitespaceRange.getBegin() ==
+          FormatTok.WhitespaceRange.getEnd()) {
     parseParens();
   }
   addUnwrappedLine();

Modified: cfe/trunk/lib/Format/UnwrappedLineParser.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/UnwrappedLineParser.h?rev=182572&r1=182571&r2=182572&view=diff
==============================================================================
--- cfe/trunk/lib/Format/UnwrappedLineParser.h (original)
+++ cfe/trunk/lib/Format/UnwrappedLineParser.h Thu May 23 05:56:37 2013
@@ -29,7 +29,7 @@ namespace format {
 /// whitespace characters preceeding it.
 struct FormatToken {
   FormatToken()
-      : NewlinesBefore(0), HasUnescapedNewline(false), WhiteSpaceLength(0),
+      : NewlinesBefore(0), HasUnescapedNewline(false),
         LastNewlineOffset(0), TokenLength(0), IsFirst(false),
         MustBreakBefore(false), TrailingWhiteSpaceLength(0) {}
 
@@ -46,15 +46,8 @@ struct FormatToken {
   /// Token.
   bool HasUnescapedNewline;
 
-  /// \brief The location of the start of the whitespace immediately preceeding
-  /// the \c Token.
-  ///
-  /// Used together with \c WhiteSpaceLength to create a \c Replacement.
-  SourceLocation WhiteSpaceStart;
-
-  /// \brief The length in characters of the whitespace immediately preceeding
-  /// the \c Token.
-  unsigned WhiteSpaceLength;
+  /// \brief The range of the whitespace immediately preceeding the \c Token.
+  SourceRange WhitespaceRange;
 
   /// \brief The offset just past the last '\n' in this token's leading
   /// whitespace (relative to \c WhiteSpaceStart). 0 if there is no '\n'.
@@ -83,7 +76,7 @@ struct FormatToken {
   /// This can be different to Tok.getLocation(), which includes leading escaped
   /// newlines.
   SourceLocation getStartOfNonWhitespace() const {
-    return WhiteSpaceStart.getLocWithOffset(WhiteSpaceLength);
+    return WhitespaceRange.getEnd();
   }
 };
 

Modified: cfe/trunk/lib/Format/WhitespaceManager.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/WhitespaceManager.cpp?rev=182572&r1=182571&r2=182572&view=diff
==============================================================================
--- cfe/trunk/lib/Format/WhitespaceManager.cpp (original)
+++ cfe/trunk/lib/Format/WhitespaceManager.cpp Thu May 23 05:56:37 2013
@@ -43,9 +43,7 @@ void WhitespaceManager::replaceWhitespac
                                           unsigned StartOfTokenColumn,
                                           bool InPPDirective) {
   Changes.push_back(Change(
-      true, SourceRange(Tok.FormatTok.WhiteSpaceStart,
-                        Tok.FormatTok.WhiteSpaceStart.getLocWithOffset(
-                            Tok.FormatTok.WhiteSpaceLength)),
+      true, Tok.FormatTok.WhitespaceRange,
       Spaces, StartOfTokenColumn, Newlines, "", "", Tok.FormatTok.Tok.getKind(),
       InPPDirective && !Tok.FormatTok.IsFirst));
 
@@ -65,14 +63,11 @@ void WhitespaceManager::replaceWhitespac
 
 void WhitespaceManager::addUntouchableToken(const FormatToken &Tok,
                                             bool InPPDirective) {
-  Changes.push_back(Change(
-      false,
-      SourceRange(Tok.WhiteSpaceStart,
-                  Tok.WhiteSpaceStart.getLocWithOffset(Tok.WhiteSpaceLength)),
-      Tok.WhiteSpaceLength - Tok.NewlinesBefore,
-      SourceMgr.getSpellingColumnNumber(Tok.Tok.getLocation()) - 1,
-      Tok.NewlinesBefore, "", "", Tok.Tok.getKind(),
-      InPPDirective && !Tok.IsFirst));
+  Changes.push_back(
+      Change(false, Tok.WhitespaceRange, /*Spaces=*/0,
+             SourceMgr.getSpellingColumnNumber(Tok.Tok.getLocation()) - 1,
+             Tok.NewlinesBefore, "", "", Tok.Tok.getKind(),
+             InPPDirective && !Tok.IsFirst));
 }
 
 void WhitespaceManager::breakToken(const FormatToken &Tok, unsigned Offset,

Modified: cfe/trunk/unittests/Format/FormatTest.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/unittests/Format/FormatTest.cpp?rev=182572&r1=182571&r2=182572&view=diff
==============================================================================
--- cfe/trunk/unittests/Format/FormatTest.cpp (original)
+++ cfe/trunk/unittests/Format/FormatTest.cpp Thu May 23 05:56:37 2013
@@ -567,6 +567,12 @@ TEST_F(FormatTest, UnderstandsSingleLine
                "#include \"a/b/c\" // comment");
   verifyFormat("#include <a>     // comment\n"
                "#include <a/b/c> // comment");
+  EXPECT_EQ("#include \\\n"
+            "  \"a\"            // comment\n"
+            "#include \"a/b/c\" // comment",
+            format("#include \\\n"
+                   "  \"a\" // comment\n"
+                   "#include \"a/b/c\" // comment"));
 
   verifyFormat("enum E {\n"
                "  // comment\n"