r308684 - [clang-format] Put '/**' and '*/' on own lines in multiline jsdocs

Thu Jul 20 15:29:39 PDT 2017

Author: krasimir
Date: Thu Jul 20 15:29:39 2017
New Revision: 308684

URL: http://llvm.org/viewvc/llvm-project?rev=308684&view=rev
Log:
[clang-format] Put '/**' and '*/' on own lines in multiline jsdocs

Reviewers: mprobst

Reviewed By: mprobst

Subscribers: cfe-commits, klimek

Differential Revision: https://reviews.llvm.org/D35683

Modified:
    cfe/trunk/lib/Format/BreakableToken.cpp
    cfe/trunk/lib/Format/BreakableToken.h
    cfe/trunk/lib/Format/ContinuationIndenter.cpp
    cfe/trunk/unittests/Format/FormatTestJS.cpp
    cfe/trunk/unittests/Format/FormatTestJava.cpp

Modified: cfe/trunk/lib/Format/BreakableToken.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/BreakableToken.cpp?rev=308684&r1=308683&r2=308684&view=diff
==============================================================================

--- cfe/trunk/lib/Format/BreakableToken.cpp (original)
+++ cfe/trunk/lib/Format/BreakableToken.cpp Thu Jul 20 15:29:39 2017
@@ -339,7 +339,8 @@ BreakableBlockComment::BreakableBlockCom
     const FormatToken &Token, unsigned StartColumn,
     unsigned OriginalStartColumn, bool FirstInLine, bool InPPDirective,
     encoding::Encoding Encoding, const FormatStyle &Style)
-    : BreakableComment(Token, StartColumn, InPPDirective, Encoding, Style) {
+    : BreakableComment(Token, StartColumn, InPPDirective, Encoding, Style),
+      DelimitersOnNewline(false) {
   assert(Tok.is(TT_BlockComment) &&
          "block comment section must start with a block comment");
 
@@ -430,8 +431,25 @@ BreakableBlockComment::BreakableBlockCom
   IndentAtLineBreak =
       std::max<unsigned>(IndentAtLineBreak, Decoration.size());
 
+  // Detect a multiline jsdoc comment and set DelimitersOnNewline in that case.
+  if (Style.Language == FormatStyle::LK_JavaScript ||
+      Style.Language == FormatStyle::LK_Java) {
+    if ((Lines[0] == "*" || Lines[0].startswith("* ")) && Lines.size() > 1) {
+      // This is a multiline jsdoc comment.
+      DelimitersOnNewline = true;
+    } else if (Lines[0].startswith("* ") && Lines.size() == 1) {
+      // Detect a long single-line comment, like:
+      // /** long long long */
+      // Below, '2' is the width of '*/'.
+      unsigned EndColumn = ContentColumn[0] + encoding::columnWidthWithTabs(
+          Lines[0], ContentColumn[0], Style.TabWidth, Encoding) + 2;
+      DelimitersOnNewline = EndColumn > Style.ColumnLimit;
+    }
+  }
+
   DEBUG({
     llvm::dbgs() << "IndentAtLineBreak " << IndentAtLineBreak << "\n";
+    llvm::dbgs() << "DelimitersOnNewline " << DelimitersOnNewline << "\n";
     for (size_t i = 0; i < Lines.size(); ++i) {
       llvm::dbgs() << i << " |" << Content[i] << "| "
                    << "CC=" << ContentColumn[i] << "| "
@@ -580,10 +598,22 @@ unsigned BreakableBlockComment::getLineL
     return getLineLengthAfterSplit(LineIndex, TailOffset, StringRef::npos);
   }
 }
+
 void BreakableBlockComment::replaceWhitespaceBefore(
     unsigned LineIndex, unsigned PreviousEndColumn, unsigned ColumnLimit,
     Split SplitBefore, WhitespaceManager &Whitespaces) {
-  if (LineIndex == 0) return;
+  if (LineIndex == 0) {
+    if (DelimitersOnNewline) {
+        // Since we're breaking af index 1 below, the break position and the
+        // break length are the same.
+        size_t BreakLength = Lines[0].substr(1).find_first_not_of(Blanks);
+        if (BreakLength != StringRef::npos) {
+          insertBreak(LineIndex, 0, Split(1, BreakLength), Whitespaces);
+          DelimitersOnNewline = true;
+        }
+    }
+    return;
+  }
   StringRef TrimmedContent = Content[LineIndex].ltrim(Blanks);
   if (SplitBefore.first != StringRef::npos) {
     // Here we need to reflow.
@@ -651,6 +681,15 @@ void BreakableBlockComment::replaceWhite
       InPPDirective, /*Newlines=*/1, ContentColumn[LineIndex] - Prefix.size());
 }
 
+BreakableToken::Split BreakableBlockComment::getSplitAfterLastLine(
+    unsigned TailOffset, unsigned ColumnLimit,
+    llvm::Regex &CommentPragmasRegex) const {
+  if (DelimitersOnNewline)
+    return getSplit(Lines.size() - 1, TailOffset, ColumnLimit,
+                    CommentPragmasRegex);
+  return Split(StringRef::npos, 0);
+}
+
 bool BreakableBlockComment::mayReflow(unsigned LineIndex,
                                       llvm::Regex &CommentPragmasRegex) const {
   // Content[LineIndex] may exclude the indent after the '*' decoration. In that

Modified: cfe/trunk/lib/Format/BreakableToken.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/BreakableToken.h?rev=308684&r1=308683&r2=308684&view=diff
==============================================================================
--- cfe/trunk/lib/Format/BreakableToken.h (original)
+++ cfe/trunk/lib/Format/BreakableToken.h Thu Jul 20 15:29:39 2017
@@ -64,6 +64,17 @@ struct FormatStyle;
 /// - replaceWhitespaceBefore, for executing the reflow using a whitespace
 ///   manager.
 ///
+/// For tokens that require the whitespace after the last line to be
+/// reformatted, for example in multiline jsdoc comments that require the
+/// trailing '*/' to be on a line of itself, there are analogous operations
+/// that might be executed after the last line has been reformatted:
+/// - getSplitAfterLastLine, for finding a split after the last line that needs
+///   to be reflown,
+/// - getLineLengthAfterSplitAfterLastLine, for calculating the line length in
+///   columns of the remainder of the token, and
+/// - replaceWhitespaceAfterLastLine, for executing the reflow using a
+///   whitespace manager.
+///
 /// FIXME: The interface seems set in stone, so we might want to just pull the
 /// strategy into the class, instead of controlling it from the outside.
 class BreakableToken {
@@ -144,6 +155,38 @@ public:
                                        unsigned ColumnLimit, Split SplitBefore,
                                        WhitespaceManager &Whitespaces) {}
 
+  /// \brief Returns a whitespace range (offset, length) of the content at
+  /// the last line that needs to be reformatted after the last line has been
+  /// reformatted.
+  ///
+  /// A result having offset == StringRef::npos means that no reformat is
+  /// necessary.
+  virtual Split getSplitAfterLastLine(unsigned TailOffset, unsigned ColumnLimit,
+                                      llvm::Regex &CommentPragmasRegex) const {
+    return Split(StringRef::npos, 0);
+  }
+
+  /// \brief Returns the number of columns required to format the piece token
+  /// after the last line after a reformat of the whitespace range \p
+  /// \p SplitAfterLastLine on the last line has been performed.
+  virtual unsigned
+  getLineLengthAfterSplitAfterLastLine(unsigned TailOffset,
+                                       Split SplitAfterLastLine) const {
+    return getLineLengthAfterSplit(getLineCount() - 1,
+                                   TailOffset + SplitAfterLastLine.first +
+                                       SplitAfterLastLine.second,
+                                   StringRef::npos);
+  }
+
+  /// \brief Replaces the whitespace from \p SplitAfterLastLine on the last line
+  /// after the last line has been formatted by performing a reformatting.
+  virtual void replaceWhitespaceAfterLastLine(unsigned TailOffset,
+                                              Split SplitAfterLastLine,
+                                              WhitespaceManager &Whitespaces) {
+    insertBreak(getLineCount() - 1, TailOffset, SplitAfterLastLine,
+                Whitespaces);
+  }
+
   /// \brief Updates the next token of \p State to the next token after this
   /// one. This can be used when this token manages a set of underlying tokens
   /// as a unit and is responsible for the formatting of the them.
@@ -304,6 +347,9 @@ public:
   void replaceWhitespaceBefore(unsigned LineIndex, unsigned PreviousEndColumn,
                                unsigned ColumnLimit, Split SplitBefore,
                                WhitespaceManager &Whitespaces) override;
+  Split getSplitAfterLastLine(unsigned TailOffset, unsigned ColumnLimit,
+                              llvm::Regex &CommentPragmasRegex) const override;
+
   bool mayReflow(unsigned LineIndex,
                  llvm::Regex &CommentPragmasRegex) const override;
 
@@ -348,6 +394,10 @@ private:
   // If this block comment has decorations, this is the column of the start of
   // the decorations.
   unsigned DecorationColumn;
+
+  // If true, make sure that the opening '/**' and the closing '*/' ends on a
+  // line of itself. Styles like jsdoc require this for multiline comments.
+  bool DelimitersOnNewline;
 };
 
 class BreakableLineCommentSection : public BreakableComment {

Modified: cfe/trunk/lib/Format/ContinuationIndenter.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/ContinuationIndenter.cpp?rev=308684&r1=308683&r2=308684&view=diff
==============================================================================
--- cfe/trunk/lib/Format/ContinuationIndenter.cpp (original)
+++ cfe/trunk/lib/Format/ContinuationIndenter.cpp Thu Jul 20 15:29:39 2017
@@ -1314,6 +1314,7 @@ unsigned ContinuationIndenter::breakProt
   bool ReflowInProgress = false;
   unsigned Penalty = 0;
   unsigned RemainingTokenColumns = 0;
+  unsigned TailOffset = 0;
   for (unsigned LineIndex = 0, EndIndex = Token->getLineCount();
        LineIndex != EndIndex; ++LineIndex) {
     BreakableToken::Split SplitBefore(StringRef::npos, 0);
@@ -1322,7 +1323,7 @@ unsigned ContinuationIndenter::breakProt
                                           RemainingSpace, CommentPragmasRegex);
     }
     ReflowInProgress = SplitBefore.first != StringRef::npos;
-    unsigned TailOffset =
+    TailOffset =
         ReflowInProgress ? (SplitBefore.first + SplitBefore.second) : 0;
     if (!DryRun)
       Token->replaceWhitespaceBefore(LineIndex, RemainingTokenColumns,
@@ -1379,6 +1380,16 @@ unsigned ContinuationIndenter::breakProt
     }
   }
 
+  BreakableToken::Split SplitAfterLastLine = Token->getSplitAfterLastLine(
+      TailOffset, ColumnLimit, CommentPragmasRegex);
+  if (SplitAfterLastLine.first != StringRef::npos) {
+    if (!DryRun)
+      Token->replaceWhitespaceAfterLastLine(TailOffset, SplitAfterLastLine,
+                                            Whitespaces);
+    RemainingTokenColumns = Token->getLineLengthAfterSplitAfterLastLine(
+        TailOffset, SplitAfterLastLine);
+  }
+
   State.Column = RemainingTokenColumns;
 
   if (BreakInserted) {

Modified: cfe/trunk/unittests/Format/FormatTestJS.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/unittests/Format/FormatTestJS.cpp?rev=308684&r1=308683&r2=308684&view=diff
==============================================================================
--- cfe/trunk/unittests/Format/FormatTestJS.cpp (original)
+++ cfe/trunk/unittests/Format/FormatTestJS.cpp Thu Jul 20 15:29:39 2017
@@ -67,6 +67,99 @@ TEST_F(FormatTestJS, BlockComments) {
                "    aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa);");
 }
 
+TEST_F(FormatTestJS, JSDocComments) {
+  // Break the first line of a multiline jsdoc comment.
+  EXPECT_EQ("/**\n"
+            " * jsdoc line 1\n"
+            " * jsdoc line 2\n"
+            " */",
+            format("/** jsdoc line 1\n"
+                   " * jsdoc line 2\n"
+                   " */",
+                   getGoogleJSStyleWithColumns(20)));
+  // Both break after '/**' and break the line itself.
+  EXPECT_EQ("/**\n"
+            " * jsdoc line long\n"
+            " * long jsdoc line 2\n"
+            " */",
+            format("/** jsdoc line long long\n"
+                   " * jsdoc line 2\n"
+                   " */",
+                   getGoogleJSStyleWithColumns(20)));
+  // Break a short first line if the ending '*/' is on a newline.
+  EXPECT_EQ("/**\n"
+            " * jsdoc line 1\n"
+            " */",
+            format("/** jsdoc line 1\n"
+                   " */", getGoogleJSStyleWithColumns(20)));
+  // Don't break the first line of a short single line jsdoc comment.
+  EXPECT_EQ("/** jsdoc line 1 */",
+            format("/** jsdoc line 1 */", getGoogleJSStyleWithColumns(20)));
+  // Don't break the first line of a single line jsdoc comment if it just fits
+  // the column limit.
+  EXPECT_EQ("/** jsdoc line 12 */",
+            format("/** jsdoc line 12 */", getGoogleJSStyleWithColumns(20)));
+  // Don't break after '/**' and before '*/' if there is no space between
+  // '/**' and the content.
+  EXPECT_EQ(
+      "/*** nonjsdoc long\n"
+      " * line */",
+      format("/*** nonjsdoc long line */", getGoogleJSStyleWithColumns(20)));
+  EXPECT_EQ(
+      "/**strange long long\n"
+      " * line */",
+      format("/**strange long long line */", getGoogleJSStyleWithColumns(20)));
+  // Break the first line of a single line jsdoc comment if it just exceeds the
+  // column limit.
+  EXPECT_EQ("/**\n"
+            " * jsdoc line 123\n"
+            " */",
+            format("/** jsdoc line 123 */", getGoogleJSStyleWithColumns(20)));
+  // Break also if the leading indent of the first line is more than 1 column.
+  EXPECT_EQ("/**\n"
+            " * jsdoc line 123\n"
+            " */",
+            format("/**  jsdoc line 123 */", getGoogleJSStyleWithColumns(20)));
+  // Break also if the leading indent of the first line is more than 1 column.
+  EXPECT_EQ("/**\n"
+            " * jsdoc line 123\n"
+            " */",
+            format("/**   jsdoc line 123 */", getGoogleJSStyleWithColumns(20)));
+  // Break after the content of the last line.
+  EXPECT_EQ("/**\n"
+            " * line 1\n"
+            " * line 2\n"
+            " */",
+            format("/**\n"
+                   " * line 1\n"
+                   " * line 2 */",
+                   getGoogleJSStyleWithColumns(20)));
+  // Break both the content and after the content of the last line.
+  EXPECT_EQ("/**\n"
+            " * line 1\n"
+            " * line long long\n"
+            " * long\n"
+            " */",
+            format("/**\n"
+                   " * line 1\n"
+                   " * line long long long */",
+                   getGoogleJSStyleWithColumns(20)));
+
+  // The comment block gets indented.
+  EXPECT_EQ("function f() {\n"
+            "  /**\n"
+            "   * comment about\n"
+            "   * x\n"
+            "   */\n"
+            "  var x = 1;\n"
+            "}",
+            format("function f() {\n"
+                   "/** comment about x */\n"
+                   "var x = 1;\n"
+                   "}",
+                   getGoogleJSStyleWithColumns(20)));
+}
+
 TEST_F(FormatTestJS, UnderstandsJavaScriptOperators) {
   verifyFormat("a == = b;");
   verifyFormat("a != = b;");

Modified: cfe/trunk/unittests/Format/FormatTestJava.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/unittests/Format/FormatTestJava.cpp?rev=308684&r1=308683&r2=308684&view=diff
==============================================================================
--- cfe/trunk/unittests/Format/FormatTestJava.cpp (original)
+++ cfe/trunk/unittests/Format/FormatTestJava.cpp Thu Jul 20 15:29:39 2017
@@ -525,6 +525,15 @@ TEST_F(FormatTestJava, AlignsBlockCommen
                    "  void f() {}"));
 }
 
+TEST_F(FormatTestJava, KeepsDelimitersOnOwnLineInJavaDocComments) {
+  EXPECT_EQ("/**\n"
+            " * javadoc line 1\n"
+            " * javadoc line 2\n"
+            " */",
+            format("/** javadoc line 1\n"
+                   " * javadoc line 2 */"));
+}
+
 TEST_F(FormatTestJava, RetainsLogicalShifts) {
     verifyFormat("void f() {\n"
                  "  int a = 1;\n"