[clang] 5e5efd8 - [clang-format] Fix SeparateDefinitionBlocks issues

via cfe-commits cfe-commits at lists.llvm.org
Mon Jan 24 06:24:12 PST 2022


Author: ksyx
Date: 2022-01-24T14:23:20Z
New Revision: 5e5efd8a91f2e340e79a73bedbc6ab66ad4a4281

URL: https://github.com/llvm/llvm-project/commit/5e5efd8a91f2e340e79a73bedbc6ab66ad4a4281
DIFF: https://github.com/llvm/llvm-project/commit/5e5efd8a91f2e340e79a73bedbc6ab66ad4a4281.diff

LOG: [clang-format] Fix SeparateDefinitionBlocks issues

- Fixes https://github.com/llvm/llvm-project/issues/53227 that wrongly
  indents multiline comments
- Fixes wrong detection of single-line opening braces when used along
  with those only opening scopes, causing crashes due to duplicated
  replacements on the same token:
    void foo()
    {
      {
        int x;
      }
    }
- Fixes wrong recognition of first line of definition when the line
  starts with block comment, causing crashes due to duplicated
  replacements on the same token for this leads toward skipping the line
  starting with inline block comment:
    /*
      Some descriptions about function
    */
    /*inline*/ void bar() {
    }
- Fixes wrong recognition of enum when used as a type name rather than
  starting definition block, causing crashes due to duplicated
  replacements on the same token since both actions for enum and for
  definition blocks were taken place:
    void foobar(const enum EnumType e) {
    }
- Change to use function keyword for JavaScript instead of comparing
  strings
- Resolves formatting conflict with options EmptyLineAfterAccessModifier
  and EmptyLineBeforeAccessModifier (prompts with --dry-run (-n) or
  --output-replacement-xml but no observable change)
- Recognize long (len>=5) uppercased name taking a single line as return
  type and fix the problem of adding newline below it, with adding new
  token type FunctionLikeOrFreestandingMacro and marking tokens in
  UnwrappedLineParser:
    void
    afunc(int x) {
      return;
    }
    TYPENAME
    func(int x, int y) {
      // ...
    }
- Remove redundant and repeated initialization
- Do no change to newlines before EOF

Reviewed By: MyDeveloperDay, curdeius, HazardyKnusperkeks
Differential Revision: https://reviews.llvm.org/D117520

Added: 
    

Modified: 
    clang/lib/Format/DefinitionBlockSeparator.cpp
    clang/lib/Format/DefinitionBlockSeparator.h
    clang/lib/Format/FormatToken.h
    clang/lib/Format/TokenAnnotator.cpp
    clang/lib/Format/UnwrappedLineParser.cpp
    clang/unittests/Format/DefinitionBlockSeparatorTest.cpp

Removed: 
    


################################################################################
diff  --git a/clang/lib/Format/DefinitionBlockSeparator.cpp b/clang/lib/Format/DefinitionBlockSeparator.cpp
index d09cd0bd33fbe..827564357f788 100644
--- a/clang/lib/Format/DefinitionBlockSeparator.cpp
+++ b/clang/lib/Format/DefinitionBlockSeparator.cpp
@@ -25,22 +25,27 @@ std::pair<tooling::Replacements, unsigned> DefinitionBlockSeparator::analyze(
   assert(Style.SeparateDefinitionBlocks != FormatStyle::SDS_Leave);
   AffectedRangeMgr.computeAffectedLines(AnnotatedLines);
   tooling::Replacements Result;
-  separateBlocks(AnnotatedLines, Result);
+  separateBlocks(AnnotatedLines, Result, Tokens);
   return {Result, 0};
 }
 
 void DefinitionBlockSeparator::separateBlocks(
-    SmallVectorImpl<AnnotatedLine *> &Lines, tooling::Replacements &Result) {
+    SmallVectorImpl<AnnotatedLine *> &Lines, tooling::Replacements &Result,
+    FormatTokenLexer &Tokens) {
   const bool IsNeverStyle =
       Style.SeparateDefinitionBlocks == FormatStyle::SDS_Never;
-  auto LikelyDefinition = [this](const AnnotatedLine *Line) {
+  const AdditionalKeywords &ExtraKeywords = Tokens.getKeywords();
+  auto LikelyDefinition = [this, ExtraKeywords](const AnnotatedLine *Line,
+                                                bool ExcludeEnum = false) {
     if ((Line->MightBeFunctionDecl && Line->mightBeFunctionDefinition()) ||
         Line->startsWithNamespace())
       return true;
     FormatToken *CurrentToken = Line->First;
     while (CurrentToken) {
-      if (CurrentToken->isOneOf(tok::kw_class, tok::kw_struct, tok::kw_enum) ||
-          (Style.isJavaScript() && CurrentToken->TokenText == "function"))
+      if (CurrentToken->isOneOf(tok::kw_class, tok::kw_struct) ||
+          (Style.isJavaScript() && CurrentToken->is(ExtraKeywords.kw_function)))
+        return true;
+      if (!ExcludeEnum && CurrentToken->is(tok::kw_enum))
         return true;
       CurrentToken = CurrentToken->Next;
     }
@@ -63,18 +68,25 @@ void DefinitionBlockSeparator::separateBlocks(
     AnnotatedLine *TargetLine;
     auto OpeningLineIndex = CurrentLine->MatchingOpeningBlockLineIndex;
     AnnotatedLine *OpeningLine = nullptr;
+    const auto IsAccessSpecifierToken = [](const FormatToken *Token) {
+      return Token->isAccessSpecifier() || Token->isObjCAccessSpecifier();
+    };
     const auto InsertReplacement = [&](const int NewlineToInsert) {
       assert(TargetLine);
       assert(TargetToken);
 
       // Do not handle EOF newlines.
-      if (TargetToken->is(tok::eof) && NewlineToInsert > 0)
+      if (TargetToken->is(tok::eof))
+        return;
+      if (IsAccessSpecifierToken(TargetToken) ||
+          (OpeningLineIndex > 0 &&
+           IsAccessSpecifierToken(Lines[OpeningLineIndex - 1]->First)))
         return;
       if (!TargetLine->Affected)
         return;
       Whitespaces.replaceWhitespace(*TargetToken, NewlineToInsert,
-                                    TargetToken->SpacesRequiredBefore - 1,
-                                    TargetToken->StartsColumn);
+                                    TargetToken->OriginalColumn,
+                                    TargetToken->OriginalColumn);
     };
     const auto IsPPConditional = [&](const size_t LineIndex) {
       const auto &Line = Lines[LineIndex];
@@ -89,26 +101,57 @@ void DefinitionBlockSeparator::separateBlocks(
              Lines[OpeningLineIndex - 1]->Last->opensScope() ||
              IsPPConditional(OpeningLineIndex - 1);
     };
-    const auto HasEnumOnLine = [CurrentLine]() {
+    const auto HasEnumOnLine = [&]() {
       FormatToken *CurrentToken = CurrentLine->First;
+      bool FoundEnumKeyword = false;
       while (CurrentToken) {
         if (CurrentToken->is(tok::kw_enum))
+          FoundEnumKeyword = true;
+        else if (FoundEnumKeyword && CurrentToken->is(tok::l_brace))
           return true;
         CurrentToken = CurrentToken->Next;
       }
-      return false;
+      return FoundEnumKeyword && I + 1 < Lines.size() &&
+             Lines[I + 1]->First->is(tok::l_brace);
     };
 
     bool IsDefBlock = false;
     const auto MayPrecedeDefinition = [&](const int Direction = -1) {
+      assert(Direction >= -1);
+      assert(Direction <= 1);
       const size_t OperateIndex = OpeningLineIndex + Direction;
       assert(OperateIndex < Lines.size());
       const auto &OperateLine = Lines[OperateIndex];
-      return (Style.isCSharp() && OperateLine->First->is(TT_AttributeSquare)) ||
-             OperateLine->First->is(tok::comment);
+      if (LikelyDefinition(OperateLine))
+        return false;
+
+      if (OperateLine->First->is(tok::comment))
+        return true;
+
+      // A single line identifier that is not in the last line.
+      if (OperateLine->First->is(tok::identifier) &&
+          OperateLine->First == OperateLine->Last &&
+          OperateIndex + 1 < Lines.size()) {
+        // UnwrappedLineParser's recognition of free-standing macro like
+        // Q_OBJECT may also recognize some uppercased type names that may be
+        // used as return type as that kind of macros, which is a bit hard to
+        // distinguish one from another purely from token patterns. Here, we
+        // try not to add new lines below those identifiers.
+        AnnotatedLine *NextLine = Lines[OperateIndex + 1];
+        if (NextLine->MightBeFunctionDecl &&
+            NextLine->mightBeFunctionDefinition() &&
+            NextLine->First->NewlinesBefore == 1 &&
+            OperateLine->First->is(TT_FunctionLikeOrFreestandingMacro))
+          return true;
+      }
+
+      if ((Style.isCSharp() && OperateLine->First->is(TT_AttributeSquare)))
+        return true;
+      return false;
     };
 
-    if (HasEnumOnLine()) {
+    if (HasEnumOnLine() &&
+        !LikelyDefinition(CurrentLine, /*ExcludeEnum=*/true)) {
       // We have no scope opening/closing information for enum.
       IsDefBlock = true;
       OpeningLineIndex = I;
@@ -132,8 +175,13 @@ void DefinitionBlockSeparator::separateBlocks(
     } else if (CurrentLine->First->closesScope()) {
       if (OpeningLineIndex > Lines.size())
         continue;
-      // Handling the case that opening bracket has its own line.
-      OpeningLineIndex -= Lines[OpeningLineIndex]->First->is(tok::l_brace);
+      // Handling the case that opening brace has its own line, with checking
+      // whether the last line already had an opening brace to guard against
+      // misrecognition.
+      if (OpeningLineIndex > 0 &&
+          Lines[OpeningLineIndex]->First->is(tok::l_brace) &&
+          Lines[OpeningLineIndex - 1]->Last->isNot(tok::l_brace))
+        --OpeningLineIndex;
       OpeningLine = Lines[OpeningLineIndex];
       // Closing a function definition.
       if (LikelyDefinition(OpeningLine)) {
@@ -168,15 +216,13 @@ void DefinitionBlockSeparator::separateBlocks(
           ++OpeningLineIndex;
         TargetLine = Lines[OpeningLineIndex];
         if (!LikelyDefinition(TargetLine)) {
+          OpeningLineIndex = I + 1;
           TargetLine = Lines[I + 1];
           TargetToken = TargetLine->First;
           InsertReplacement(NewlineCount);
         }
-      } else if (IsNeverStyle) {
-        TargetLine = Lines[I + 1];
-        TargetToken = TargetLine->First;
-        InsertReplacement(OpeningLineIndex != 0);
-      }
+      } else if (IsNeverStyle)
+        InsertReplacement(/*NewlineToInsert=*/1);
     }
   }
   for (const auto &R : Whitespaces.generateReplacements())

diff  --git a/clang/lib/Format/DefinitionBlockSeparator.h b/clang/lib/Format/DefinitionBlockSeparator.h
index 13b90c5ab083d..31c0f34d6e198 100644
--- a/clang/lib/Format/DefinitionBlockSeparator.h
+++ b/clang/lib/Format/DefinitionBlockSeparator.h
@@ -33,7 +33,7 @@ class DefinitionBlockSeparator : public TokenAnalyzer {
 
 private:
   void separateBlocks(SmallVectorImpl<AnnotatedLine *> &Lines,
-                      tooling::Replacements &Result);
+                      tooling::Replacements &Result, FormatTokenLexer &Tokens);
 };
 } // namespace format
 } // namespace clang

diff  --git a/clang/lib/Format/FormatToken.h b/clang/lib/Format/FormatToken.h
index b087f9f120411..7cc090cb77def 100644
--- a/clang/lib/Format/FormatToken.h
+++ b/clang/lib/Format/FormatToken.h
@@ -51,6 +51,7 @@ namespace format {
   TYPE(FunctionAnnotationRParen)                                               \
   TYPE(FunctionDeclarationName)                                                \
   TYPE(FunctionLBrace)                                                         \
+  TYPE(FunctionLikeOrFreestandingMacro)                                        \
   TYPE(FunctionTypeLParen)                                                     \
   TYPE(IfMacro)                                                                \
   TYPE(ImplicitStringLiteral)                                                  \

diff  --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp
index cc8b48387fc9e..b9535f7965598 100644
--- a/clang/lib/Format/TokenAnnotator.cpp
+++ b/clang/lib/Format/TokenAnnotator.cpp
@@ -1423,7 +1423,7 @@ class AnnotatingParser {
             TT_LambdaArrow, TT_NamespaceMacro, TT_OverloadedOperator,
             TT_RegexLiteral, TT_TemplateString, TT_ObjCStringLiteral,
             TT_UntouchableMacroFunc, TT_ConstraintJunctions,
-            TT_StatementAttributeLikeMacro))
+            TT_StatementAttributeLikeMacro, TT_FunctionLikeOrFreestandingMacro))
       CurrentToken->setType(TT_Unknown);
     CurrentToken->Role.reset();
     CurrentToken->MatchingParen = nullptr;

diff  --git a/clang/lib/Format/UnwrappedLineParser.cpp b/clang/lib/Format/UnwrappedLineParser.cpp
index 67b7b3937b07e..96d227b7fe763 100644
--- a/clang/lib/Format/UnwrappedLineParser.cpp
+++ b/clang/lib/Format/UnwrappedLineParser.cpp
@@ -1682,6 +1682,8 @@ void UnwrappedLineParser::parseStructuralElement(IfStmtKind *IfKind,
 
       // See if the following token should start a new unwrapped line.
       StringRef Text = FormatTok->TokenText;
+
+      FormatToken *PreviousToken = FormatTok;
       nextToken();
 
       // JS doesn't have macros, and within classes colons indicate fields, not
@@ -1710,6 +1712,7 @@ void UnwrappedLineParser::parseStructuralElement(IfStmtKind *IfKind,
 
         if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) &&
             tokenCanStartNewLine(*FormatTok) && Text == Text.upper()) {
+          PreviousToken->setType(TT_FunctionLikeOrFreestandingMacro);
           addUnwrappedLine();
           return;
         }

diff  --git a/clang/unittests/Format/DefinitionBlockSeparatorTest.cpp b/clang/unittests/Format/DefinitionBlockSeparatorTest.cpp
index 69c87cb4b51fd..4cbae0f55b036 100644
--- a/clang/unittests/Format/DefinitionBlockSeparatorTest.cpp
+++ b/clang/unittests/Format/DefinitionBlockSeparatorTest.cpp
@@ -131,6 +131,73 @@ TEST_F(DefinitionBlockSeparatorTest, Basic) {
                "\n"
                "enum Bar { FOOBAR, BARFOO };\n",
                Style);
+
+  FormatStyle BreakAfterReturnTypeStyle = Style;
+  BreakAfterReturnTypeStyle.AlwaysBreakAfterReturnType = FormatStyle::RTBS_All;
+  // Test uppercased long typename
+  verifyFormat("class Foo {\n"
+               "  void\n"
+               "  Bar(int t, int p) {\n"
+               "    int r = t + p;\n"
+               "    return r;\n"
+               "  }\n"
+               "\n"
+               "  HRESULT\n"
+               "  Foobar(int t, int p) {\n"
+               "    int r = t * p;\n"
+               "    return r;\n"
+               "  }\n"
+               "}\n",
+               BreakAfterReturnTypeStyle);
+}
+
+TEST_F(DefinitionBlockSeparatorTest, FormatConflict) {
+  FormatStyle Style = getLLVMStyle();
+  Style.SeparateDefinitionBlocks = FormatStyle::SDS_Always;
+  llvm::StringRef Code = "class Test {\n"
+                         "public:\n"
+                         "  static void foo() {\n"
+                         "    int t;\n"
+                         "    return 1;\n"
+                         "  }\n"
+                         "};";
+  std::vector<tooling::Range> Ranges = {1, tooling::Range(0, Code.size())};
+  EXPECT_EQ(reformat(Style, Code, Ranges, "<stdin>").size(), 0u);
+}
+
+TEST_F(DefinitionBlockSeparatorTest, CommentBlock) {
+  FormatStyle Style = getLLVMStyle();
+  Style.SeparateDefinitionBlocks = FormatStyle::SDS_Always;
+  std::string Prefix = "enum Foo { FOO, BAR };\n"
+                       "\n"
+                       "/*\n"
+                       "test1\n"
+                       "test2\n"
+                       "*/\n"
+                       "int foo(int i, int j) {\n"
+                       "  int r = i + j;\n"
+                       "  return r;\n"
+                       "}\n";
+  std::string Suffix = "enum Bar { FOOBAR, BARFOO };\n"
+                       "\n"
+                       "/* Comment block in one line*/\n"
+                       "int bar3(int j, int k) {\n"
+                       "  // A comment\n"
+                       "  int r = j % k;\n"
+                       "  return r;\n"
+                       "}\n";
+  std::string CommentedCode = "/*\n"
+                              "int bar2(int j, int k) {\n"
+                              "  int r = j / k;\n"
+                              "  return r;\n"
+                              "}\n"
+                              "*/\n";
+  verifyFormat(removeEmptyLines(Prefix) + "\n" + CommentedCode + "\n" +
+                   removeEmptyLines(Suffix),
+               Style, Prefix + "\n" + CommentedCode + "\n" + Suffix);
+  verifyFormat(removeEmptyLines(Prefix) + "\n" + CommentedCode +
+                   removeEmptyLines(Suffix),
+               Style, Prefix + "\n" + CommentedCode + Suffix);
 }
 
 TEST_F(DefinitionBlockSeparatorTest, UntouchBlockStartStyle) {
@@ -175,13 +242,15 @@ TEST_F(DefinitionBlockSeparatorTest, UntouchBlockStartStyle) {
   FormatStyle NeverStyle = getLLVMStyle();
   NeverStyle.SeparateDefinitionBlocks = FormatStyle::SDS_Never;
 
-  auto TestKit = MakeUntouchTest("#ifdef FOO\n\n", "\n#elifndef BAR\n\n",
-                                 "\n#endif\n\n", false);
+  auto TestKit = MakeUntouchTest("/* FOOBAR */\n"
+                                 "#ifdef FOO\n\n",
+                                 "\n#elifndef BAR\n\n", "\n#endif\n\n", false);
   verifyFormat(TestKit.first, AlwaysStyle, TestKit.second);
   verifyFormat(TestKit.second, NeverStyle, removeEmptyLines(TestKit.second));
 
-  TestKit =
-      MakeUntouchTest("#ifdef FOO\n", "#elifndef BAR\n", "#endif\n", false);
+  TestKit = MakeUntouchTest("/* FOOBAR */\n"
+                            "#ifdef FOO\n",
+                            "#elifndef BAR\n", "#endif\n", false);
   verifyFormat(TestKit.first, AlwaysStyle, TestKit.second);
   verifyFormat(TestKit.second, NeverStyle, removeEmptyLines(TestKit.second));
 
@@ -213,7 +282,7 @@ TEST_F(DefinitionBlockSeparatorTest, Always) {
                       "test1\n"
                       "test2\n"
                       "*/\n"
-                      "int foo(int i, int j) {\n"
+                      "/*const*/ int foo(int i, int j) {\n"
                       "  int r = i + j;\n"
                       "  return r;\n"
                       "}\n"
@@ -225,8 +294,10 @@ TEST_F(DefinitionBlockSeparatorTest, Always) {
                       "// Comment line 2\n"
                       "// Comment line 3\n"
                       "int bar(int j, int k) {\n"
-                      "  int r = j * k;\n"
-                      "  return r;\n"
+                      "  {\n"
+                      "    int r = j * k;\n"
+                      "    return r;\n"
+                      "  }\n"
                       "}\n"
                       "\n"
                       "int bar2(int j, int k) {\n"
@@ -237,7 +308,7 @@ TEST_F(DefinitionBlockSeparatorTest, Always) {
                       "/* Comment block in one line*/\n"
                       "enum Bar { FOOBAR, BARFOO };\n"
                       "\n"
-                      "int bar3(int j, int k) {\n"
+                      "int bar3(int j, int k, const enum Bar b) {\n"
                       "  // A comment\n"
                       "  int r = j % k;\n"
                       "  return r;\n"
@@ -264,7 +335,7 @@ TEST_F(DefinitionBlockSeparatorTest, Never) {
                         "test1\n"
                         "test2\n"
                         "*/\n"
-                        "int foo(int i, int j) {\n"
+                        "/*const*/ int foo(int i, int j) {\n"
                         "  int r = i + j;\n"
                         "  return r;\n"
                         "}\n"
@@ -276,8 +347,10 @@ TEST_F(DefinitionBlockSeparatorTest, Never) {
                         "// Comment line 2\n"
                         "// Comment line 3\n"
                         "int bar(int j, int k) {\n"
-                        "  int r = j * k;\n"
-                        "  return r;\n"
+                        "  {\n"
+                        "    int r = j * k;\n"
+                        "    return r;\n"
+                        "  }\n"
                         "}\n"
                         "\n"
                         "int bar2(int j, int k) {\n"
@@ -288,7 +361,7 @@ TEST_F(DefinitionBlockSeparatorTest, Never) {
                         "/* Comment block in one line*/\n"
                         "enum Bar { FOOBAR, BARFOO };\n"
                         "\n"
-                        "int bar3(int j, int k) {\n"
+                        "int bar3(int j, int k, const enum Bar b) {\n"
                         "  // A comment\n"
                         "  int r = j % k;\n"
                         "  return r;\n"
@@ -316,7 +389,7 @@ TEST_F(DefinitionBlockSeparatorTest, OpeningBracketOwnsLine) {
                "test1\n"
                "test2\n"
                "*/\n"
-               "int foo(int i, int j)\n"
+               "/*const*/ int foo(int i, int j)\n"
                "{\n"
                "  int r = i + j;\n"
                "  return r;\n"
@@ -330,8 +403,10 @@ TEST_F(DefinitionBlockSeparatorTest, OpeningBracketOwnsLine) {
                "// Comment line 3\n"
                "int bar(int j, int k)\n"
                "{\n"
-               "  int r = j * k;\n"
-               "  return r;\n"
+               "  {\n"
+               "    int r = j * k;\n"
+               "    return r;\n"
+               "  }\n"
                "}\n"
                "\n"
                "int bar2(int j, int k)\n"
@@ -346,7 +421,7 @@ TEST_F(DefinitionBlockSeparatorTest, OpeningBracketOwnsLine) {
                "  BARFOO\n"
                "};\n"
                "\n"
-               "int bar3(int j, int k)\n"
+               "int bar3(int j, int k, const enum Bar b)\n"
                "{\n"
                "  // A comment\n"
                "  int r = j % k;\n"
@@ -370,7 +445,7 @@ TEST_F(DefinitionBlockSeparatorTest, Leave) {
                         "test1\n"
                         "test2\n"
                         "*/\n"
-                        "int foo(int i, int j) {\n"
+                        "/*const*/ int foo(int i, int j) {\n"
                         "  int r = i + j;\n"
                         "  return r;\n"
                         "}\n"
@@ -382,8 +457,10 @@ TEST_F(DefinitionBlockSeparatorTest, Leave) {
                         "// Comment line 2\n"
                         "// Comment line 3\n"
                         "int bar(int j, int k) {\n"
-                        "  int r = j * k;\n"
-                        "  return r;\n"
+                        "  {\n"
+                        "    int r = j * k;\n"
+                        "    return r;\n"
+                        "  }\n"
                         "}\n"
                         "\n"
                         "int bar2(int j, int k) {\n"
@@ -393,7 +470,7 @@ TEST_F(DefinitionBlockSeparatorTest, Leave) {
                         "\n"
                         "// Comment for inline enum\n"
                         "enum Bar { FOOBAR, BARFOO };\n"
-                        "int bar3(int j, int k) {\n"
+                        "int bar3(int j, int k, const enum Bar b) {\n"
                         "  // A comment\n"
                         "  int r = j % k;\n"
                         "  return r;\n"


        


More information about the cfe-commits mailing list