r317886 - [clang-format] Support python-style comments in text protos

Krasimir Georgiev via cfe-commits cfe-commits at lists.llvm.org
Fri Nov 10 04:50:09 PST 2017


Author: krasimir
Date: Fri Nov 10 04:50:09 2017
New Revision: 317886

URL: http://llvm.org/viewvc/llvm-project?rev=317886&view=rev
Log:
[clang-format] Support python-style comments in text protos

Summary: This patch adds support for python-style comments in text protos.

Reviewers: djasper

Reviewed By: djasper

Subscribers: bkramer, cfe-commits, klimek

Differential Revision: https://reviews.llvm.org/D39806

Modified:
    cfe/trunk/lib/Format/BreakableToken.cpp
    cfe/trunk/lib/Format/FormatTokenLexer.cpp
    cfe/trunk/lib/Format/FormatTokenLexer.h
    cfe/trunk/lib/Format/UnwrappedLineParser.cpp
    cfe/trunk/unittests/Format/FormatTestComments.cpp

Modified: cfe/trunk/lib/Format/BreakableToken.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/BreakableToken.cpp?rev=317886&r1=317885&r2=317886&view=diff
==============================================================================
--- cfe/trunk/lib/Format/BreakableToken.cpp (original)
+++ cfe/trunk/lib/Format/BreakableToken.cpp Fri Nov 10 04:50:09 2017
@@ -40,9 +40,15 @@ static bool IsBlank(char C) {
   }
 }
 
-static StringRef getLineCommentIndentPrefix(StringRef Comment) {
-  static const char *const KnownPrefixes[] = {"///<", "//!<", "///", "//",
-                                              "//!"};
+static StringRef getLineCommentIndentPrefix(StringRef Comment,
+                                            const FormatStyle &Style) {
+  static const char *const KnownCStylePrefixes[] = {"///<", "//!<", "///", "//",
+                                                    "//!"};
+  static const char *const KnownTextProtoPrefixes[] = {"//", "#"};
+  ArrayRef<const char *> KnownPrefixes(KnownCStylePrefixes);
+  if (Style.Language == FormatStyle::LK_TextProto)
+    KnownPrefixes = KnownTextProtoPrefixes;
+
   StringRef LongestPrefix;
   for (StringRef KnownPrefix : KnownPrefixes) {
     if (Comment.startswith(KnownPrefix)) {
@@ -732,7 +738,8 @@ BreakableLineCommentSection::BreakableLi
        CurrentTok = CurrentTok->Next) {
     LastLineTok = LineTok;
     StringRef TokenText(CurrentTok->TokenText);
-    assert(TokenText.startswith("//"));
+    assert((TokenText.startswith("//") || TokenText.startswith("#")) &&
+           "unsupported line comment prefix, '//' and '#' are supported");
     size_t FirstLineIndex = Lines.size();
     TokenText.split(Lines, "\n");
     Content.resize(Lines.size());
@@ -745,8 +752,9 @@ BreakableLineCommentSection::BreakableLi
       // We need to trim the blanks in case this is not the first line in a
       // multiline comment. Then the indent is included in Lines[i].
       StringRef IndentPrefix =
-          getLineCommentIndentPrefix(Lines[i].ltrim(Blanks));
-      assert(IndentPrefix.startswith("//"));
+          getLineCommentIndentPrefix(Lines[i].ltrim(Blanks), Style);
+      assert((TokenText.startswith("//") || TokenText.startswith("#")) &&
+             "unsupported line comment prefix, '//' and '#' are supported");
       OriginalPrefix[i] = Prefix[i] = IndentPrefix;
       if (Lines[i].size() > Prefix[i].size() &&
           isAlphanumeric(Lines[i][Prefix[i].size()])) {
@@ -760,6 +768,9 @@ BreakableLineCommentSection::BreakableLi
           Prefix[i] = "///< ";
         else if (Prefix[i] == "//!<")
           Prefix[i] = "//!< ";
+        else if (Prefix[i] == "#" &&
+                 Style.Language == FormatStyle::LK_TextProto)
+          Prefix[i] = "# ";
       }
 
       Tokens[i] = LineTok;

Modified: cfe/trunk/lib/Format/FormatTokenLexer.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/FormatTokenLexer.cpp?rev=317886&r1=317885&r2=317886&view=diff
==============================================================================
--- cfe/trunk/lib/Format/FormatTokenLexer.cpp (original)
+++ cfe/trunk/lib/Format/FormatTokenLexer.cpp Fri Nov 10 04:50:09 2017
@@ -50,6 +50,8 @@ ArrayRef<FormatToken *> FormatTokenLexer
       tryParseJSRegexLiteral();
       handleTemplateStrings();
     }
+    if (Style.Language == FormatStyle::LK_TextProto)
+      tryParsePythonComment();
     tryMergePreviousTokens();
     if (Tokens.back()->NewlinesBefore > 0 || Tokens.back()->IsMultiline)
       FirstInLineIndex = Tokens.size() - 1;
@@ -330,6 +332,27 @@ void FormatTokenLexer::handleTemplateStr
   resetLexer(SourceMgr.getFileOffset(loc));
 }
 
+void FormatTokenLexer::tryParsePythonComment() {
+  FormatToken *HashToken = Tokens.back();
+  if (HashToken->isNot(tok::hash))
+    return;
+  // Turn the remainder of this line into a comment.
+  const char *CommentBegin =
+      Lex->getBufferLocation() - HashToken->TokenText.size(); // at "#"
+  size_t From = CommentBegin - Lex->getBuffer().begin();
+  size_t To = Lex->getBuffer().find_first_of('\n', From);
+  if (To == StringRef::npos)
+    To = Lex->getBuffer().size();
+  size_t Len = To - From;
+  HashToken->Type = TT_LineComment;
+  HashToken->Tok.setKind(tok::comment);
+  HashToken->TokenText = Lex->getBuffer().substr(From, Len);
+  SourceLocation Loc = To < Lex->getBuffer().size()
+                           ? Lex->getSourceLocation(CommentBegin + Len)
+                           : SourceMgr.getLocForEndOfFile(ID);
+  resetLexer(SourceMgr.getFileOffset(Loc));
+}
+
 bool FormatTokenLexer::tryMerge_TMacro() {
   if (Tokens.size() < 4)
     return false;

Modified: cfe/trunk/lib/Format/FormatTokenLexer.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/FormatTokenLexer.h?rev=317886&r1=317885&r2=317886&view=diff
==============================================================================
--- cfe/trunk/lib/Format/FormatTokenLexer.h (original)
+++ cfe/trunk/lib/Format/FormatTokenLexer.h Fri Nov 10 04:50:09 2017
@@ -73,6 +73,8 @@ private:
   // nested template parts by balancing curly braces.
   void handleTemplateStrings();
 
+  void tryParsePythonComment();
+
   bool tryMerge_TMacro();
 
   bool tryMergeConflictMarkers();

Modified: cfe/trunk/lib/Format/UnwrappedLineParser.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/UnwrappedLineParser.cpp?rev=317886&r1=317885&r2=317886&view=diff
==============================================================================
--- cfe/trunk/lib/Format/UnwrappedLineParser.cpp (original)
+++ cfe/trunk/lib/Format/UnwrappedLineParser.cpp Fri Nov 10 04:50:09 2017
@@ -56,7 +56,7 @@ private:
 };
 
 static bool isLineComment(const FormatToken &FormatTok) {
-  return FormatTok.is(tok::comment) && FormatTok.TokenText.startswith("//");
+  return FormatTok.is(tok::comment) && !FormatTok.TokenText.startswith("/*");
 }
 
 // Checks if \p FormatTok is a line comment that continues the line comment

Modified: cfe/trunk/unittests/Format/FormatTestComments.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/unittests/Format/FormatTestComments.cpp?rev=317886&r1=317885&r2=317886&view=diff
==============================================================================
--- cfe/trunk/unittests/Format/FormatTestComments.cpp (original)
+++ cfe/trunk/unittests/Format/FormatTestComments.cpp Fri Nov 10 04:50:09 2017
@@ -62,6 +62,12 @@ protected:
     return Style;
   }
 
+  FormatStyle getTextProtoStyleWithColumns(unsigned ColumnLimit) {
+    FormatStyle Style = getGoogleStyle(FormatStyle::FormatStyle::LK_TextProto);
+    Style.ColumnLimit = ColumnLimit;
+    return Style;
+  }
+
   void verifyFormat(llvm::StringRef Code,
                     const FormatStyle &Style = getLLVMStyle()) {
     EXPECT_EQ(Code.str(), format(test::messUp(Code), Style));
@@ -2872,6 +2878,85 @@ TEST_F(FormatTestComments, NonTrailingBl
                    "    A = B;",
                    getLLVMStyleWithColumns(40)));
 }
+
+TEST_F(FormatTestComments, PythonStyleComments) {
+  // Keeps a space after '#'.
+  EXPECT_EQ("# comment\n"
+            "key: value",
+            format("#comment\n"
+                   "key:value",
+                   getTextProtoStyleWithColumns(20)));
+  EXPECT_EQ("# comment\n"
+            "key: value",
+            format("# comment\n"
+                   "key:value",
+                   getTextProtoStyleWithColumns(20)));
+  // Breaks long comment.
+  EXPECT_EQ("# comment comment\n"
+            "# comment\n"
+            "key: value",
+            format("# comment comment comment\n"
+                   "key:value",
+                   getTextProtoStyleWithColumns(20)));
+  // Indents comments.
+  EXPECT_EQ("data {\n"
+            "  # comment comment\n"
+            "  # comment\n"
+            "  key: value\n"
+            "}",
+            format("data {\n"
+                   "# comment comment comment\n"
+                   "key: value}",
+                   getTextProtoStyleWithColumns(20)));
+  EXPECT_EQ("data {\n"
+            "  # comment comment\n"
+            "  # comment\n"
+            "  key: value\n"
+            "}",
+            format("data {# comment comment comment\n"
+                   "key: value}",
+                   getTextProtoStyleWithColumns(20)));
+  // Reflows long comments.
+  EXPECT_EQ("# comment comment\n"
+            "# comment comment\n"
+            "key: value",
+            format("# comment comment comment\n"
+                   "# comment\n"
+                   "key:value",
+                   getTextProtoStyleWithColumns(20)));
+  // Breaks trailing comments.
+  EXPECT_EQ("k: val  # comment\n"
+            "        # comment\n"
+            "a: 1",
+            format("k:val#comment comment\n"
+                   "a:1",
+                   getTextProtoStyleWithColumns(20)));
+  EXPECT_EQ("id {\n"
+            "  k: val  # comment\n"
+            "          # comment\n"
+            "  # line line\n"
+            "  a: 1\n"
+            "}",
+            format("id {k:val#comment comment\n"
+                   "# line line\n"
+                   "a:1}",
+                   getTextProtoStyleWithColumns(20)));
+  // Aligns trailing comments.
+  EXPECT_EQ("k: val  # commen1\n"
+            "        # commen2\n"
+            "        # commen3\n"
+            "# commen4\n"
+            "a: 1  # commen5\n"
+            "      # commen6\n"
+            "      # commen7",
+            format("k:val#commen1 commen2\n"
+                   " # commen3\n"
+                   "# commen4\n"
+                   "a:1#commen5 commen6\n"
+                   " #commen7",
+                   getTextProtoStyleWithColumns(20)));
+}
+
 } // end namespace
 } // end namespace format
 } // end namespace clang




More information about the cfe-commits mailing list