[clang] [clang-format] Separate License text and include blocks (PR #77918)

via cfe-commits cfe-commits at lists.llvm.org
Fri Jan 19 06:22:45 PST 2024


https://github.com/seranu updated https://github.com/llvm/llvm-project/pull/77918

>From af4bd7c1a75b1bb17cf4facde2f3caf86f01511e Mon Sep 17 00:00:00 2001
From: Serban Ungureanu <serban.ungureanu at randstaddigital.com>
Date: Tue, 16 Jan 2024 22:19:52 +0200
Subject: [PATCH] [clang-format] Add formatting options
 EmptyLinesAfterTopLevelComment and EmptyLinesAfterIncludes

---
 clang/docs/ClangFormatStyleOptions.rst        |  52 ++++++++
 clang/include/clang/Format/Format.h           |  48 ++++++++
 clang/lib/Format/CMakeLists.txt               |   1 +
 clang/lib/Format/Format.cpp                   |  14 +++
 clang/lib/Format/TokenAnnotator.h             |   8 ++
 clang/lib/Format/TopLevelCommentSeparator.cpp |  70 +++++++++++
 clang/lib/Format/TopLevelCommentSeparator.h   |  42 +++++++
 clang/lib/Format/UnwrappedLineFormatter.cpp   |   6 +
 clang/unittests/Format/CMakeLists.txt         |   1 +
 clang/unittests/Format/ConfigParseTest.cpp    |   4 +
 clang/unittests/Format/FormatTest.cpp         |  20 ++++
 .../Format/TopLevelCommentSeparatorTest.cpp   | 112 ++++++++++++++++++
 12 files changed, 378 insertions(+)
 create mode 100644 clang/lib/Format/TopLevelCommentSeparator.cpp
 create mode 100644 clang/lib/Format/TopLevelCommentSeparator.h
 create mode 100644 clang/unittests/Format/TopLevelCommentSeparatorTest.cpp

diff --git a/clang/docs/ClangFormatStyleOptions.rst b/clang/docs/ClangFormatStyleOptions.rst
index 8bc13e45bf2f5f..50f20ea796a7ff 100644
--- a/clang/docs/ClangFormatStyleOptions.rst
+++ b/clang/docs/ClangFormatStyleOptions.rst
@@ -3220,6 +3220,58 @@ the configuration (without a prefix: ``Auto``).
 
 
 
+.. _EmptyLinesAfterIncludes:
+
+**EmptyLinesAfterIncludes** (``Unsigned``) :versionbadge:`clang-format 1` :ref:`¶ <EmptyLinesAfterIncludes>`
+  Number of lines after includes.
+  If set, determines the number of lines to insert after includes.
+  Limited by MaxEmptyLinesToKeep.
+  Example:
+  EmptyLinesAfterIncludes = 1
+
+  .. code-block:: c++
+
+     #include <string>
+     #include <map>
+
+     class Test {};
+
+  vs EmptyLinesAfterIncludes = 2
+
+  .. code-block:: c++
+
+     #include <string>
+     #include <map>
+
+
+     class Test {};
+
+.. _EmptyLinesAfterTopLevelComment:
+
+**EmptyLinesAfterTopLevelComment** (``Unsigned``) :versionbadge:`clang-format 1` :ref:`¶ <EmptyLinesAfterTopLevelComment>`
+  Number of empty lines after top level comment.
+  If set, determines the number of empty lines to insert/keep after the top
+  level comment. Limited by MaxEmptyLinesToKeep.
+  Example:
+  EmptyLinesAfterTopLevelComment = 1
+
+  .. code-block:: c++
+
+     /* LICENSE TEXT */
+
+     #include <string>
+     class Test {};
+
+  vs EmptyLinesAfterTopLevelComment = 2
+
+  .. code-block:: c++
+
+     /* License Text */
+
+
+     #include <string>
+     class Test {};
+
 .. _ExperimentalAutoDetectBinPacking:
 
 **ExperimentalAutoDetectBinPacking** (``Boolean``) :versionbadge:`clang-format 3.7` :ref:`¶ <ExperimentalAutoDetectBinPacking>`
diff --git a/clang/include/clang/Format/Format.h b/clang/include/clang/Format/Format.h
index 6fd7947bd21791..5db9ccd8d6776a 100644
--- a/clang/include/clang/Format/Format.h
+++ b/clang/include/clang/Format/Format.h
@@ -2459,6 +2459,52 @@ struct FormatStyle {
   /// \version 12
   EmptyLineBeforeAccessModifierStyle EmptyLineBeforeAccessModifier;
 
+  /// \brief Number of lines after includes.
+  /// If set, determines the number of lines to insert after includes.
+  /// Limited by MaxEmptyLinesToKeep.
+  /// Example:
+  /// EmptyLinesAfterIncludes = 1
+  /// \code
+  ///    #include <string>
+  ///    #include <map>
+  ///
+  ///    class Test {};
+  ///
+  /// \endcode
+  /// vs EmptyLinesAfterIncludes = 2
+  /// \code
+  ///    #include <string>
+  ///    #include <map>
+  ///
+  ///
+  ///    class Test {};
+  /// \endcode
+  /// \version 1
+  std::optional<unsigned> EmptyLinesAfterIncludes;
+
+  /// \brief Number of empty lines after top level comment.
+  /// If set, determines the number of empty lines to insert/keep after the top
+  /// level comment. Limited by MaxEmptyLinesToKeep.
+  /// Example:
+  /// EmptyLinesAfterTopLevelComment = 1
+  /// \code
+  ///    /* LICENSE TEXT */
+  ///
+  ///    #include <string>
+  ///    class Test {};
+  ///
+  /// \endcode
+  /// vs EmptyLinesAfterTopLevelComment = 2
+  /// \code
+  ///    /* License Text */
+  ///
+  ///
+  ///    #include <string>
+  ///    class Test {};
+  /// \endcode
+  /// \version 1
+  std::optional<unsigned> EmptyLinesAfterTopLevelComment;
+
   /// If ``true``, clang-format detects whether function calls and
   /// definitions are formatted with one parameter per line.
   ///
@@ -4827,6 +4873,8 @@ struct FormatStyle {
            DerivePointerAlignment == R.DerivePointerAlignment &&
            DisableFormat == R.DisableFormat &&
            EmptyLineAfterAccessModifier == R.EmptyLineAfterAccessModifier &&
+           EmptyLinesAfterIncludes == R.EmptyLinesAfterIncludes &&
+           EmptyLinesAfterTopLevelComment == R.EmptyLinesAfterTopLevelComment &&
            EmptyLineBeforeAccessModifier == R.EmptyLineBeforeAccessModifier &&
            ExperimentalAutoDetectBinPacking ==
                R.ExperimentalAutoDetectBinPacking &&
diff --git a/clang/lib/Format/CMakeLists.txt b/clang/lib/Format/CMakeLists.txt
index 84a3c136f650a8..64ad0dfb48f84a 100644
--- a/clang/lib/Format/CMakeLists.txt
+++ b/clang/lib/Format/CMakeLists.txt
@@ -18,6 +18,7 @@ add_clang_library(clangFormat
   SortJavaScriptImports.cpp
   TokenAnalyzer.cpp
   TokenAnnotator.cpp
+  TopLevelCommentSeparator.cpp
   UnwrappedLineFormatter.cpp
   UnwrappedLineParser.cpp
   UsingDeclarationsSorter.cpp
diff --git a/clang/lib/Format/Format.cpp b/clang/lib/Format/Format.cpp
index 7c2f4dcf3d2308..a4aff282c8b25e 100644
--- a/clang/lib/Format/Format.cpp
+++ b/clang/lib/Format/Format.cpp
@@ -27,6 +27,7 @@
 #include "SortJavaScriptImports.h"
 #include "TokenAnalyzer.h"
 #include "TokenAnnotator.h"
+#include "TopLevelCommentSeparator.h"
 #include "UnwrappedLineFormatter.h"
 #include "UnwrappedLineParser.h"
 #include "UsingDeclarationsSorter.h"
@@ -995,6 +996,9 @@ template <> struct MappingTraits<FormatStyle> {
     IO.mapOptional("DisableFormat", Style.DisableFormat);
     IO.mapOptional("EmptyLineAfterAccessModifier",
                    Style.EmptyLineAfterAccessModifier);
+    IO.mapOptional("EmptyLinesAfterIncludes", Style.EmptyLinesAfterIncludes);
+    IO.mapOptional("EmptyLinesAfterTopLevelComment",
+                   Style.EmptyLinesAfterTopLevelComment);
     IO.mapOptional("EmptyLineBeforeAccessModifier",
                    Style.EmptyLineBeforeAccessModifier);
     IO.mapOptional("ExperimentalAutoDetectBinPacking",
@@ -1035,6 +1039,8 @@ template <> struct MappingTraits<FormatStyle> {
     IO.mapOptional("MaxEmptyLinesToKeep", Style.MaxEmptyLinesToKeep);
     IO.mapOptional("NamespaceIndentation", Style.NamespaceIndentation);
     IO.mapOptional("NamespaceMacros", Style.NamespaceMacros);
+    IO.mapOptional("EmptyLinesAfterTopLevelComment",
+                   Style.EmptyLinesAfterTopLevelComment);
     IO.mapOptional("ObjCBinPackProtocolList", Style.ObjCBinPackProtocolList);
     IO.mapOptional("ObjCBlockIndentWidth", Style.ObjCBlockIndentWidth);
     IO.mapOptional("ObjCBreakBeforeNestedBlockParam",
@@ -1501,6 +1507,8 @@ FormatStyle getLLVMStyle(FormatStyle::LanguageKind Language) {
   LLVMStyle.DerivePointerAlignment = false;
   LLVMStyle.DisableFormat = false;
   LLVMStyle.EmptyLineAfterAccessModifier = FormatStyle::ELAAMS_Never;
+  LLVMStyle.EmptyLinesAfterIncludes = std::nullopt;
+  LLVMStyle.EmptyLinesAfterTopLevelComment = std::nullopt;
   LLVMStyle.EmptyLineBeforeAccessModifier = FormatStyle::ELBAMS_LogicalBlock;
   LLVMStyle.ExperimentalAutoDetectBinPacking = false;
   LLVMStyle.FixNamespaceComments = true;
@@ -3713,6 +3721,12 @@ reformat(const FormatStyle &Style, StringRef Code,
     });
   }
 
+  if (Style.EmptyLinesAfterTopLevelComment.has_value()) {
+    Passes.emplace_back([&](const Environment &Env) {
+      return TopLevelCommentSeparator(Env, Expanded).process();
+    });
+  }
+
   if (Style.Language == FormatStyle::LK_ObjC &&
       !Style.ObjCPropertyAttributeOrder.empty()) {
     Passes.emplace_back([&](const Environment &Env) {
diff --git a/clang/lib/Format/TokenAnnotator.h b/clang/lib/Format/TokenAnnotator.h
index 05a6daa87d8034..06486799ec4031 100644
--- a/clang/lib/Format/TokenAnnotator.h
+++ b/clang/lib/Format/TokenAnnotator.h
@@ -113,6 +113,14 @@ class AnnotatedLine {
     return First && First->is(tok::comment) && !First->getNextNonComment();
   }
 
+  bool isInclude() const {
+    if (!First)
+      return false;
+
+    const auto *NextToken = First->getNextNonComment();
+    return First->is(tok::hash) && NextToken && NextToken->is(tok::pp_include);
+  }
+
   /// \c true if this line starts with the given tokens in order, ignoring
   /// comments.
   template <typename... Ts> bool startsWith(Ts... Tokens) const {
diff --git a/clang/lib/Format/TopLevelCommentSeparator.cpp b/clang/lib/Format/TopLevelCommentSeparator.cpp
new file mode 100644
index 00000000000000..b55ddead48f5e5
--- /dev/null
+++ b/clang/lib/Format/TopLevelCommentSeparator.cpp
@@ -0,0 +1,70 @@
+//===--- TopLevelCommentSeparator.cpp ---------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file implements TopLevelCommentSeparator, a TokenAnalyzer that inserts
+/// new lines or removes empty lines after the top level comment (i.e. comment
+/// block at the top of the source file), usually license text or documentation.
+///
+//===----------------------------------------------------------------------===//
+
+#include "TopLevelCommentSeparator.h"
+#define DEBUG_TYPE "top-level-comment-separator"
+
+namespace clang {
+namespace format {
+std::pair<tooling::Replacements, unsigned> TopLevelCommentSeparator::analyze(
+    TokenAnnotator &Annotator, SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,
+    FormatTokenLexer &Tokens) {
+  assert(Style.EmptyLinesAfterTopLevelComment.has_value());
+  AffectedRangeMgr.computeAffectedLines(AnnotatedLines);
+  tooling::Replacements Result;
+  separateTopLevelComment(AnnotatedLines, Result, Tokens);
+  return {Result, 0};
+}
+
+void TopLevelCommentSeparator::separateTopLevelComment(
+    SmallVectorImpl<AnnotatedLine *> &Lines, tooling::Replacements &Result,
+    FormatTokenLexer &Tokens) {
+  unsigned NewlineCount = std::min(Style.MaxEmptyLinesToKeep,
+                                   *Style.EmptyLinesAfterTopLevelComment) +
+                          1;
+  WhitespaceManager Whitespaces(
+      Env.getSourceManager(), Style,
+      Style.LineEnding > FormatStyle::LE_CRLF
+          ? WhitespaceManager::inputUsesCRLF(
+                Env.getSourceManager().getBufferData(Env.getFileID()),
+                Style.LineEnding == FormatStyle::LE_DeriveCRLF)
+          : Style.LineEnding == FormatStyle::LE_CRLF);
+
+  bool InTopLevelComment = false;
+  for (unsigned I = 0; I < Lines.size(); ++I) {
+    const auto &CurrentLine = Lines[I];
+    if (CurrentLine->isComment()) {
+      InTopLevelComment = true;
+    } else if (InTopLevelComment) {
+      // Do not handle EOF newlines.
+      if (!CurrentLine->First->is(tok::eof) && CurrentLine->Affected) {
+        Whitespaces.replaceWhitespace(*CurrentLine->First, NewlineCount,
+                                      CurrentLine->First->OriginalColumn,
+                                      CurrentLine->First->OriginalColumn);
+      }
+      break;
+    }
+  }
+
+  for (const auto &R : Whitespaces.generateReplacements()) {
+    // The add method returns an Error instance which simulates program exit
+    // code through overloading boolean operator, thus false here indicates
+    // success.
+    if (Result.add(R))
+      return;
+  }
+}
+} // namespace format
+} // namespace clang
diff --git a/clang/lib/Format/TopLevelCommentSeparator.h b/clang/lib/Format/TopLevelCommentSeparator.h
new file mode 100644
index 00000000000000..2901942d8cf7d0
--- /dev/null
+++ b/clang/lib/Format/TopLevelCommentSeparator.h
@@ -0,0 +1,42 @@
+//===--- TopLevelCommentSeparator.h -----------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file declares TopLevelCommentSeparator, a TokenAnalyzer that inserts
+/// new lines or removes empty lines after the top level comment (i.e. comment
+/// block at the top of the source file), usually license text or documentation.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_LIB_FORMAT_TOPLEVELCOMMENTSEPARATOR_H
+#define LLVM_CLANG_LIB_FORMAT_TOPLEVELCOMMENTSEPARATOR_H
+
+#include "TokenAnalyzer.h"
+#include "WhitespaceManager.h"
+
+namespace clang {
+namespace format {
+class TopLevelCommentSeparator : public TokenAnalyzer {
+public:
+  TopLevelCommentSeparator(const Environment &Env, const FormatStyle &Style)
+      : TokenAnalyzer(Env, Style) {}
+
+  std::pair<tooling::Replacements, unsigned>
+  analyze(TokenAnnotator &Annotator,
+          SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,
+          FormatTokenLexer &Tokens) override;
+
+private:
+  void separateTopLevelComment(SmallVectorImpl<AnnotatedLine *> &Lines,
+                               tooling::Replacements &Result,
+                               FormatTokenLexer &Tokens);
+};
+} // namespace format
+} // namespace clang
+
+#endif
diff --git a/clang/lib/Format/UnwrappedLineFormatter.cpp b/clang/lib/Format/UnwrappedLineFormatter.cpp
index adeb072434873f..c512c919ef426b 100644
--- a/clang/lib/Format/UnwrappedLineFormatter.cpp
+++ b/clang/lib/Format/UnwrappedLineFormatter.cpp
@@ -1535,6 +1535,12 @@ static auto computeNewlines(const AnnotatedLine &Line,
     }
   }
 
+  if (Style.EmptyLinesAfterIncludes.has_value() && !Line.InMacroBody &&
+      PreviousLine && PreviousLine->isInclude() && !Line.isInclude()) {
+    Newlines =
+        1 + std::min(Style.MaxEmptyLinesToKeep, *Style.EmptyLinesAfterIncludes);
+  }
+
   return Newlines;
 }
 
diff --git a/clang/unittests/Format/CMakeLists.txt b/clang/unittests/Format/CMakeLists.txt
index 71f5886d946c80..c24f5fecffc1fc 100644
--- a/clang/unittests/Format/CMakeLists.txt
+++ b/clang/unittests/Format/CMakeLists.txt
@@ -36,6 +36,7 @@ add_clang_unittest(FormatTests
   SortIncludesTest.cpp
   UsingDeclarationsSorterTest.cpp
   TokenAnnotatorTest.cpp
+  TopLevelCommentSeparatorTest.cpp
   )
 
 clang_target_link_libraries(FormatTests
diff --git a/clang/unittests/Format/ConfigParseTest.cpp b/clang/unittests/Format/ConfigParseTest.cpp
index 172aaab5988ce5..7c9221ad12c8d4 100644
--- a/clang/unittests/Format/ConfigParseTest.cpp
+++ b/clang/unittests/Format/ConfigParseTest.cpp
@@ -1004,6 +1004,10 @@ TEST(ConfigParseTest, ParsesConfiguration) {
               FormatStyle::SDS_Leave);
   CHECK_PARSE("SeparateDefinitionBlocks: Never", SeparateDefinitionBlocks,
               FormatStyle::SDS_Never);
+
+  CHECK_PARSE("EmptyLinesAfterIncludes: 2", EmptyLinesAfterIncludes, 2);
+  CHECK_PARSE("EmptyLinesAfterTopLevelComment: 2",
+              EmptyLinesAfterTopLevelComment, 2);
 }
 
 TEST(ConfigParseTest, ParsesConfigurationWithLanguages) {
diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp
index c229d9bc56def8..f83ef91dcfe2de 100644
--- a/clang/unittests/Format/FormatTest.cpp
+++ b/clang/unittests/Format/FormatTest.cpp
@@ -26846,6 +26846,26 @@ TEST_F(FormatTest, BreakAdjacentStringLiterals) {
   Style.BreakAdjacentStringLiterals = false;
   verifyFormat(Code, Style);
 }
+
+TEST_F(FormatTest, EmptyLinesAfterInclude) {
+  auto Style = getLLVMStyle();
+  Style.EmptyLinesAfterIncludes = 2;
+  Style.MaxEmptyLinesToKeep = 2;
+  verifyFormat("#include <string>\n\n\n"
+               "class Test {};",
+               Style);
+
+  Style.EmptyLinesAfterIncludes = 1;
+  verifyFormat("#include <string>\n\n"
+               "class Test {};",
+               Style);
+
+  Style.EmptyLinesAfterIncludes = 2;
+  Style.MaxEmptyLinesToKeep = 1;
+  verifyFormat("#include <string>\n\n"
+               "class Test {};",
+               Style);
+}
 } // namespace
 } // namespace test
 } // namespace format
diff --git a/clang/unittests/Format/TopLevelCommentSeparatorTest.cpp b/clang/unittests/Format/TopLevelCommentSeparatorTest.cpp
new file mode 100644
index 00000000000000..6560819556457a
--- /dev/null
+++ b/clang/unittests/Format/TopLevelCommentSeparatorTest.cpp
@@ -0,0 +1,112 @@
+//===- unittest/Format/TopLevelCommentSeparatorTest.cpp - Formatting unit tests
+//-----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "FormatTestBase.h"
+
+#define DEBUG_TYPE "format-test-comments"
+
+namespace clang {
+namespace format {
+namespace test {
+namespace {
+
+class TopLevelCommentSeparatorTest : public FormatTestBase {};
+
+TEST_F(TopLevelCommentSeparatorTest, CheckEmptyLines) {
+  FormatStyle Style = getDefaultStyle();
+  Style.EmptyLinesAfterTopLevelComment = 2;
+  Style.MaxEmptyLinesToKeep = 2;
+  verifyFormat("// start license\n"
+               "// license text\n"
+               "// more license text\n"
+               "// end license\n\n\n"
+               "class Test {};",
+               Style);
+
+  verifyFormat("// start license\n"
+               "// license text\n"
+               "// more license text\n"
+               "// end license\n\n\n"
+               "static int test = 10;",
+               Style);
+
+  verifyFormat("// start license\n"
+               "// license text\n"
+               "// more license text\n"
+               "// end license\n\n\n"
+               "#include <iostream>",
+               Style);
+
+  verifyFormat("// start license\n"
+               "// license text\n"
+               "// more license text\n"
+               "// end license",
+               Style);
+
+  verifyFormat("/* top level comment */\n\n\n"
+               "#include <iostream>\n"
+               "class Test {\n"
+               "public:\n"
+               "  void test() {}\n"
+               "};\n"
+               "int main() {\n"
+               "  Test test;\n"
+               "  test.test();\n"
+               "  return 0;\n"
+               "}",
+               Style);
+
+  Style.EmptyLinesAfterTopLevelComment = 1;
+  verifyFormat("// start license\n"
+               "// license text\n"
+               "// more license text\n"
+               "// end license\n\n"
+               "class Test {};",
+               Style);
+
+  verifyFormat("// start license\n"
+               "// license text\n"
+               "// more license text\n"
+               "// end license\n\n"
+               "#include <iostream>",
+               Style);
+
+  verifyFormat("/* top level comment */\n\n"
+               "#include <iostream>\n"
+               "class Test {};",
+               Style);
+}
+
+TEST_F(TopLevelCommentSeparatorTest, LimitedByMaxEmptyLinesToKeep) {
+  FormatStyle Style = getDefaultStyle();
+  Style.EmptyLinesAfterTopLevelComment = 2;
+  Style.MaxEmptyLinesToKeep = 1;
+  verifyFormat("// start license\n"
+               "// license text\n"
+               "// more license text\n"
+               "// end license\n\n"
+               "class Test {};",
+               Style);
+
+  verifyFormat("// start license\n"
+               "// license text\n"
+               "// more license text\n"
+               "// end license\n\n"
+               "#include <iostream>",
+               Style);
+
+  verifyFormat("/* top level comment */\n\n"
+               "#include <iostream>\n"
+               "class Test {};",
+               Style);
+}
+} // namespace
+} // namespace test
+} // namespace format
+} // namespace clang



More information about the cfe-commits mailing list