r293898 - [clang-format] Don't reflow across comment pragmas.

Thu Feb 2 07:32:20 PST 2017

Author: krasimir
Date: Thu Feb  2 09:32:19 2017
New Revision: 293898

URL: http://llvm.org/viewvc/llvm-project?rev=293898&view=rev
Log:
[clang-format] Don't reflow across comment pragmas.

Summary:
The comment reflower wasn't taking comment pragmas as reflow stoppers. This patch fixes that.

source:
```
// long long long long
// IWYU pragma:
```
format with column limit  = 20 before:
```
// long long long
// long IWYU pragma:
```
format with column limit  = 20 after:
```
// long long long
// long
// IWYU pragma:
```

Reviewers: djasper

Reviewed By: djasper

Subscribers: cfe-commits, klimek

Differential Revision: https://reviews.llvm.org/D29450

Modified:
    cfe/trunk/lib/Format/BreakableToken.cpp
    cfe/trunk/lib/Format/BreakableToken.h
    cfe/trunk/lib/Format/ContinuationIndenter.cpp
    cfe/trunk/lib/Format/UnwrappedLineParser.cpp
    cfe/trunk/lib/Format/UnwrappedLineParser.h
    cfe/trunk/unittests/Format/FormatTest.cpp

Modified: cfe/trunk/lib/Format/BreakableToken.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/BreakableToken.cpp?rev=293898&r1=293897&r2=293898&view=diff
==============================================================================

--- cfe/trunk/lib/Format/BreakableToken.cpp (original)
+++ cfe/trunk/lib/Format/BreakableToken.cpp Thu Feb  2 09:32:19 2017
@@ -321,13 +321,6 @@ static bool mayReflowContent(StringRef C
          (!isPunctuation(Content[0]) || !isPunctuation(Content[1]));
 }
 
-bool BreakableComment::mayReflow(unsigned LineIndex) const {
-  return LineIndex > 0 && mayReflowContent(Content[LineIndex]) &&
-         !Tok.Finalized && !switchesFormatting(tokenAt(LineIndex)) &&
-         (!Tok.is(TT_LineComment) ||
-          OriginalPrefix[LineIndex] == OriginalPrefix[LineIndex - 1]);
-}
-
 BreakableBlockComment::BreakableBlockComment(
     const FormatToken &Token, unsigned StartColumn,
     unsigned OriginalStartColumn, bool FirstInLine, bool InPPDirective,
@@ -501,8 +494,9 @@ void BreakableBlockComment::insertBreak(
 BreakableToken::Split BreakableBlockComment::getSplitBefore(
     unsigned LineIndex,
     unsigned PreviousEndColumn,
-    unsigned ColumnLimit) const {
-  if (!mayReflow(LineIndex))
+    unsigned ColumnLimit,
+    llvm::Regex &CommentPragmasRegex) const {
+  if (!mayReflow(LineIndex, CommentPragmasRegex))
     return Split(StringRef::npos, 0);
   StringRef TrimmedContent = Content[LineIndex].ltrim(Blanks);
   return getReflowSplit(TrimmedContent, ReflowPrefix, PreviousEndColumn,
@@ -622,6 +616,19 @@ void BreakableBlockComment::replaceWhite
       InPPDirective, /*Newlines=*/1, ContentColumn[LineIndex] - Prefix.size());
 }
 
+bool BreakableBlockComment::mayReflow(unsigned LineIndex,
+                                      llvm::Regex &CommentPragmasRegex) const {
+  // Content[LineIndex] may exclude the indent after the '*' decoration. In that
+  // case, we compute the start of the comment pragma manually.
+  StringRef IndentContent = Content[LineIndex];
+  if (Lines[LineIndex].ltrim(Blanks).startswith("*")) {
+    IndentContent = Lines[LineIndex].ltrim(Blanks).substr(1);
+  }
+  return LineIndex > 0 && !CommentPragmasRegex.match(IndentContent) &&
+         mayReflowContent(Content[LineIndex]) && !Tok.Finalized &&
+         !switchesFormatting(tokenAt(LineIndex));
+}
+
 unsigned
 BreakableBlockComment::getContentStartColumn(unsigned LineIndex,
                                              unsigned TailOffset) const {
@@ -748,10 +755,10 @@ void BreakableLineCommentSection::insert
 }
 
 BreakableComment::Split BreakableLineCommentSection::getSplitBefore(
-    unsigned LineIndex,
-    unsigned PreviousEndColumn,
-    unsigned ColumnLimit) const {
-  if (!mayReflow(LineIndex)) return Split(StringRef::npos, 0);
+    unsigned LineIndex, unsigned PreviousEndColumn, unsigned ColumnLimit,
+    llvm::Regex &CommentPragmasRegex) const {
+  if (!mayReflow(LineIndex, CommentPragmasRegex))
+    return Split(StringRef::npos, 0);
   return getReflowSplit(Content[LineIndex], ReflowPrefix, PreviousEndColumn,
                         ColumnLimit);
 }
@@ -850,6 +857,20 @@ void BreakableLineCommentSection::update
   }
 }
 
+bool BreakableLineCommentSection::mayReflow(
+    unsigned LineIndex, llvm::Regex &CommentPragmasRegex) const {
+  // Line comments have the indent as part of the prefix, so we need to
+  // recompute the start of the line.
+  StringRef IndentContent = Content[LineIndex];
+  if (Lines[LineIndex].startswith("//")) {
+    IndentContent = Lines[LineIndex].substr(2);
+  }
+  return LineIndex > 0 && !CommentPragmasRegex.match(IndentContent) &&
+         mayReflowContent(Content[LineIndex]) && !Tok.Finalized &&
+         !switchesFormatting(tokenAt(LineIndex)) &&
+         OriginalPrefix[LineIndex] == OriginalPrefix[LineIndex - 1];
+}
+
 unsigned
 BreakableLineCommentSection::getContentStartColumn(unsigned LineIndex,
                                                    unsigned TailOffset) const {

Modified: cfe/trunk/lib/Format/BreakableToken.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/BreakableToken.h?rev=293898&r1=293897&r2=293898&view=diff
==============================================================================
--- cfe/trunk/lib/Format/BreakableToken.h (original)
+++ cfe/trunk/lib/Format/BreakableToken.h Thu Feb  2 09:32:19 2017
@@ -21,6 +21,7 @@
 #include "Encoding.h"
 #include "TokenAnnotator.h"
 #include "WhitespaceManager.h"
+#include "llvm/Support/Regex.h"
 #include <utility>
 
 namespace clang {
@@ -118,7 +119,8 @@ public:
   /// needs to be reformatted before any breaks are made.
   virtual Split getSplitBefore(unsigned LineIndex,
                                unsigned PreviousEndColumn,
-                               unsigned ColumnLimit) const {
+                               unsigned ColumnLimit,
+                               llvm::Regex& CommentPragmasRegex) const {
     return Split(StringRef::npos, 0);
   }
 
@@ -238,7 +240,8 @@ protected:
 
   // Checks if the content of line LineIndex may be reflown with the previous
   // line.
-  bool mayReflow(unsigned LineIndex) const;
+  virtual bool mayReflow(unsigned LineIndex,
+                         llvm::Regex &CommentPragmasRegex) const = 0;
 
   // Contains the original text of the lines of the block comment.
   //
@@ -307,7 +310,8 @@ public:
   void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
                    WhitespaceManager &Whitespaces) override;
   Split getSplitBefore(unsigned LineIndex, unsigned PreviousEndColumn,
-                       unsigned ColumnLimit) const override;
+                       unsigned ColumnLimit,
+                       llvm::Regex &CommentPragmasRegex) const override;
   unsigned getLineLengthAfterSplitBefore(unsigned LineIndex,
                                          unsigned TailOffset,
                                          unsigned PreviousEndColumn,
@@ -317,6 +321,8 @@ public:
                                unsigned ColumnLimit,
                                Split SplitBefore,
                                WhitespaceManager &Whitespaces) override;
+  bool mayReflow(unsigned LineIndex,
+                 llvm::Regex &CommentPragmasRegex) const override;
 
 private:
   // Rearranges the whitespace between Lines[LineIndex-1] and Lines[LineIndex].
@@ -371,7 +377,8 @@ public:
   void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
                    WhitespaceManager &Whitespaces) override;
   Split getSplitBefore(unsigned LineIndex, unsigned PreviousEndColumn,
-                       unsigned ColumnLimit) const override;
+                       unsigned ColumnLimit,
+                       llvm::Regex &CommentPragmasRegex) const override;
   unsigned getLineLengthAfterSplitBefore(unsigned LineIndex, unsigned TailOffset,
                                          unsigned PreviousEndColumn,
                                          unsigned ColumnLimit,
@@ -380,6 +387,8 @@ public:
                                unsigned ColumnLimit, Split SplitBefore,
                                WhitespaceManager &Whitespaces) override;
   void updateNextToken(LineState& State) const override;
+  bool mayReflow(unsigned LineIndex,
+                 llvm::Regex &CommentPragmasRegex) const override;
 
 private:
   unsigned getContentStartColumn(unsigned LineIndex,

Modified: cfe/trunk/lib/Format/ContinuationIndenter.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/ContinuationIndenter.cpp?rev=293898&r1=293897&r2=293898&view=diff
==============================================================================
--- cfe/trunk/lib/Format/ContinuationIndenter.cpp (original)
+++ cfe/trunk/lib/Format/ContinuationIndenter.cpp Thu Feb  2 09:32:19 2017
@@ -1213,7 +1213,7 @@ unsigned ContinuationIndenter::breakProt
     BreakableToken::Split SplitBefore(StringRef::npos, 0);
     if (ReflowInProgress) {
       SplitBefore = Token->getSplitBefore(LineIndex, RemainingTokenColumns,
-                                          RemainingSpace);
+                                          RemainingSpace, CommentPragmasRegex);
     }
     ReflowInProgress = SplitBefore.first != StringRef::npos;
     unsigned TailOffset =

Modified: cfe/trunk/lib/Format/UnwrappedLineParser.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/UnwrappedLineParser.cpp?rev=293898&r1=293897&r2=293898&view=diff
==============================================================================
--- cfe/trunk/lib/Format/UnwrappedLineParser.cpp (original)
+++ cfe/trunk/lib/Format/UnwrappedLineParser.cpp Thu Feb  2 09:32:19 2017
@@ -202,7 +202,8 @@ UnwrappedLineParser::UnwrappedLineParser
                                          ArrayRef<FormatToken *> Tokens,
                                          UnwrappedLineConsumer &Callback)
     : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
-      CurrentLines(&Lines), Style(Style), Keywords(Keywords), Tokens(nullptr),
+      CurrentLines(&Lines), Style(Style), Keywords(Keywords),
+      CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr),
       Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1) {}
 
 void UnwrappedLineParser::reset() {
@@ -2048,10 +2049,18 @@ static bool isLineComment(const FormatTo
 // Checks if \p FormatTok is a line comment that continues the line comment
 // section on \p Line.
 static bool continuesLineComment(const FormatToken &FormatTok,
-                                 const UnwrappedLine &Line) {
+                                 const UnwrappedLine &Line,
+                                 llvm::Regex &CommentPragmasRegex) {
   if (Line.Tokens.empty())
     return false;
 
+  StringRef IndentContent = FormatTok.TokenText;
+  if (FormatTok.TokenText.startswith("//") ||
+      FormatTok.TokenText.startswith("/*"))
+    IndentContent = FormatTok.TokenText.substr(2);
+  if (CommentPragmasRegex.match(IndentContent))
+    return false;
+
   // If Line starts with a line comment, then FormatTok continues the comment
   // section if its original column is greater or equal to the original start
   // column of the line.
@@ -2162,7 +2171,8 @@ void UnwrappedLineParser::flushComments(
     //
     // FIXME: Consider putting separate line comment sections as children to the
     // unwrapped line instead.
-    (*I)->ContinuesLineCommentSection = continuesLineComment(**I, *Line);
+    (*I)->ContinuesLineCommentSection =
+        continuesLineComment(**I, *Line, CommentPragmasRegex);
     if (isOnNewLine(**I) && JustComments && !(*I)->ContinuesLineCommentSection)
       addUnwrappedLine();
     pushToken(*I);
@@ -2230,7 +2240,7 @@ void UnwrappedLineParser::readToken() {
     if (!FormatTok->Tok.is(tok::comment))
       return;
     FormatTok->ContinuesLineCommentSection =
-        continuesLineComment(*FormatTok, *Line);
+        continuesLineComment(*FormatTok, *Line, CommentPragmasRegex);
     if (!FormatTok->ContinuesLineCommentSection &&
         (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) {
       CommentsInCurrentLine = false;

Modified: cfe/trunk/lib/Format/UnwrappedLineParser.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/UnwrappedLineParser.h?rev=293898&r1=293897&r2=293898&view=diff
==============================================================================
--- cfe/trunk/lib/Format/UnwrappedLineParser.h (original)
+++ cfe/trunk/lib/Format/UnwrappedLineParser.h Thu Feb  2 09:32:19 2017
@@ -19,6 +19,7 @@
 #include "FormatToken.h"
 #include "clang/Basic/IdentifierTable.h"
 #include "clang/Format/Format.h"
+#include "llvm/Support/Regex.h"
 #include <list>
 #include <stack>
 
@@ -161,6 +162,8 @@ private:
 
   const FormatStyle &Style;
   const AdditionalKeywords &Keywords;
+  
+  llvm::Regex CommentPragmasRegex;
 
   FormatTokenSource *Tokens;
   UnwrappedLineConsumer &Callback;

Modified: cfe/trunk/unittests/Format/FormatTest.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/unittests/Format/FormatTest.cpp?rev=293898&r1=293897&r2=293898&view=diff
==============================================================================
--- cfe/trunk/unittests/Format/FormatTest.cpp (original)
+++ cfe/trunk/unittests/Format/FormatTest.cpp Thu Feb  2 09:32:19 2017
@@ -2370,6 +2370,22 @@ TEST_F(FormatTest, ReflowsComments) {
                    "// XXX: long",
                    getLLVMStyleWithColumns(20)));
 
+  // Don't reflow comment pragmas.
+  EXPECT_EQ("// long long long\n"
+            "// long\n"
+            "// IWYU pragma:",
+            format("// long long long long\n"
+                   "// IWYU pragma:",
+                   getLLVMStyleWithColumns(20)));
+  EXPECT_EQ("/* long long long\n"
+            " * long\n"
+            " * IWYU pragma:\n"
+            " */",
+            format("/* long long long long\n"
+                   " * IWYU pragma:\n"
+                   " */",
+                   getLLVMStyleWithColumns(20)));
+
   // Reflow lines that have a non-punctuation character among their first 2
   // characters.
   EXPECT_EQ("// long long long\n"