[clang] [clang-format] Add options to set number of empty lines after includes (PR #78957)

via cfe-commits cfe-commits at lists.llvm.org
Mon Jan 22 02:50:29 PST 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-clang-format

Author: serbanu (seranu)

<details>
<summary>Changes</summary>

Add option to set the number of empty lines after include areas. 

An include area is a list of consecutive include statements. Include areas may be composed of multiple include blocks(group of related include statements) and may contain conditional compilation statements. Although in most cases source files have only one include area, there may be cases with several include areas.

EmtpyLinesAfterIncludes can be used to determine the number of empty lines to keep after each include area.

---
Full diff: https://github.com/llvm/llvm-project/pull/78957.diff


10 Files Affected:

- (modified) clang/docs/ClangFormatStyleOptions.rst (+19) 
- (modified) clang/docs/ReleaseNotes.rst (+1) 
- (modified) clang/include/clang/Format/Format.h (+18) 
- (modified) clang/lib/Format/CMakeLists.txt (+1) 
- (modified) clang/lib/Format/Format.cpp (+9) 
- (added) clang/lib/Format/IncludesSeparator.cpp (+160) 
- (added) clang/lib/Format/IncludesSeparator.h (+42) 
- (modified) clang/lib/Format/TokenAnnotator.h (+8) 
- (modified) clang/unittests/Format/ConfigParseTest.cpp (+2) 
- (modified) clang/unittests/Format/FormatTest.cpp (+185) 


``````````diff
diff --git a/clang/docs/ClangFormatStyleOptions.rst b/clang/docs/ClangFormatStyleOptions.rst
index 4dc0de3a90f2650..4ba38808cd5090a 100644
--- a/clang/docs/ClangFormatStyleOptions.rst
+++ b/clang/docs/ClangFormatStyleOptions.rst
@@ -3220,6 +3220,25 @@ the configuration (without a prefix: ``Auto``).
 
 
 
+.. _EmptyLinesAfterIncludes:
+
+**EmptyLinesAfterIncludes** (``Unsigned``) :versionbadge:`clang-format 18` :ref:`¶ <EmptyLinesAfterIncludes>`
+  Number of lines after each include area. An include area is
+  a list of consecutive include statements. The include area may be
+  composed of multiple include blocks.
+  Limited by MaxEmptyLinesToKeep.
+  Example:
+
+  .. code-block:: c++
+
+
+     EmptyLinesAfterIncludes: 1  vs.  EmptyLinesAfterIncludes: 2
+     #include <string>                #include <string>
+     #include <map>                   #include <map>
+
+     class Test {};
+                                      class Test {};
+
 .. _ExperimentalAutoDetectBinPacking:
 
 **ExperimentalAutoDetectBinPacking** (``Boolean``) :versionbadge:`clang-format 3.7` :ref:`¶ <ExperimentalAutoDetectBinPacking>`
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 4888ffe6f4dfc85..e1bf86c8a83b671 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -1260,6 +1260,7 @@ clang-format
 - Add ``.clang-format-ignore`` files.
 - Add ``AlignFunctionPointers`` sub-option for ``AlignConsecutiveDeclarations``.
 - Add ``SkipMacroDefinitionBody`` option.
+- Add ``EmptyLinesAfterIncludes`` option.
 
 libclang
 --------
diff --git a/clang/include/clang/Format/Format.h b/clang/include/clang/Format/Format.h
index bc9eecd42f9ebfd..84d1a0b70b9efd2 100644
--- a/clang/include/clang/Format/Format.h
+++ b/clang/include/clang/Format/Format.h
@@ -2459,6 +2459,23 @@ struct FormatStyle {
   /// \version 12
   EmptyLineBeforeAccessModifierStyle EmptyLineBeforeAccessModifier;
 
+  /// \brief Number of lines after each include area. An include area is
+  /// a list of consecutive include statements. The include area may be
+  /// composed of multiple include blocks.
+  /// Limited by MaxEmptyLinesToKeep.
+  /// Example:
+  /// \code
+  ///
+  ///    EmptyLinesAfterIncludes: 1  vs.  EmptyLinesAfterIncludes: 2
+  ///    #include <string>                #include <string>
+  ///    #include <map>                   #include <map>
+  ///
+  ///    class Test {};
+  ///                                     class Test {};
+  /// \endcode
+  /// \version 18
+  std::optional<unsigned> EmptyLinesAfterIncludes;
+
   /// If ``true``, clang-format detects whether function calls and
   /// definitions are formatted with one parameter per line.
   ///
@@ -4831,6 +4848,7 @@ struct FormatStyle {
            DerivePointerAlignment == R.DerivePointerAlignment &&
            DisableFormat == R.DisableFormat &&
            EmptyLineAfterAccessModifier == R.EmptyLineAfterAccessModifier &&
+           EmptyLinesAfterIncludes == R.EmptyLinesAfterIncludes &&
            EmptyLineBeforeAccessModifier == R.EmptyLineBeforeAccessModifier &&
            ExperimentalAutoDetectBinPacking ==
                R.ExperimentalAutoDetectBinPacking &&
diff --git a/clang/lib/Format/CMakeLists.txt b/clang/lib/Format/CMakeLists.txt
index 84a3c136f650a85..ff3860426407adc 100644
--- a/clang/lib/Format/CMakeLists.txt
+++ b/clang/lib/Format/CMakeLists.txt
@@ -8,6 +8,7 @@ add_clang_library(clangFormat
   Format.cpp
   FormatToken.cpp
   FormatTokenLexer.cpp
+  IncludesSeparator.cpp
   IntegerLiteralSeparatorFixer.cpp
   MacroCallReconstructor.cpp
   MacroExpander.cpp
diff --git a/clang/lib/Format/Format.cpp b/clang/lib/Format/Format.cpp
index ff326dc784783b2..f068da97e6dfbff 100644
--- a/clang/lib/Format/Format.cpp
+++ b/clang/lib/Format/Format.cpp
@@ -20,6 +20,7 @@
 #include "FormatInternal.h"
 #include "FormatToken.h"
 #include "FormatTokenLexer.h"
+#include "IncludesSeparator.h"
 #include "IntegerLiteralSeparatorFixer.h"
 #include "NamespaceEndCommentsFixer.h"
 #include "ObjCPropertyAttributeOrderFixer.h"
@@ -995,6 +996,7 @@ template <> struct MappingTraits<FormatStyle> {
     IO.mapOptional("DisableFormat", Style.DisableFormat);
     IO.mapOptional("EmptyLineAfterAccessModifier",
                    Style.EmptyLineAfterAccessModifier);
+    IO.mapOptional("EmptyLinesAfterIncludes", Style.EmptyLinesAfterIncludes);
     IO.mapOptional("EmptyLineBeforeAccessModifier",
                    Style.EmptyLineBeforeAccessModifier);
     IO.mapOptional("ExperimentalAutoDetectBinPacking",
@@ -1502,6 +1504,7 @@ FormatStyle getLLVMStyle(FormatStyle::LanguageKind Language) {
   LLVMStyle.DerivePointerAlignment = false;
   LLVMStyle.DisableFormat = false;
   LLVMStyle.EmptyLineAfterAccessModifier = FormatStyle::ELAAMS_Never;
+  LLVMStyle.EmptyLinesAfterIncludes = std::nullopt;
   LLVMStyle.EmptyLineBeforeAccessModifier = FormatStyle::ELBAMS_LogicalBlock;
   LLVMStyle.ExperimentalAutoDetectBinPacking = false;
   LLVMStyle.FixNamespaceComments = true;
@@ -3715,6 +3718,12 @@ reformat(const FormatStyle &Style, StringRef Code,
     });
   }
 
+  if (Style.EmptyLinesAfterIncludes.has_value()) {
+    Passes.emplace_back([&](const Environment &Env) {
+      return IncludesSeparator(Env, Expanded).process();
+    });
+  }
+
   if (Style.Language == FormatStyle::LK_ObjC &&
       !Style.ObjCPropertyAttributeOrder.empty()) {
     Passes.emplace_back([&](const Environment &Env) {
diff --git a/clang/lib/Format/IncludesSeparator.cpp b/clang/lib/Format/IncludesSeparator.cpp
new file mode 100644
index 000000000000000..7aad7b9d3695ff5
--- /dev/null
+++ b/clang/lib/Format/IncludesSeparator.cpp
@@ -0,0 +1,160 @@
+//===--- IncludesSeparator.cpp ---------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file implements IncludesSeparator, a TokenAnalyzer that inserts
+/// new lines or removes empty lines after an include area.
+/// An includes area is a list of consecutive include statements.
+///
+//===----------------------------------------------------------------------===//
+
+#include "IncludesSeparator.h"
+#include "TokenAnnotator.h"
+#define DEBUG_TYPE "includes-separator"
+
+namespace {
+bool isConditionalCompilationStart(const clang::format::AnnotatedLine &Line) {
+  if (!Line.First)
+    return false;
+  const auto *NextToken = Line.First->getNextNonComment();
+  return Line.First->is(clang::tok::hash) && NextToken &&
+         NextToken->isOneOf(clang::tok::pp_if, clang::tok::pp_ifdef,
+                            clang::tok::pp_ifndef, clang::tok::pp_defined);
+}
+
+bool isConditionalCompilationEnd(const clang::format::AnnotatedLine &Line) {
+  if (!Line.First)
+    return false;
+  const auto *NextToken = Line.First->getNextNonComment();
+  return Line.First->is(clang::tok::hash) && NextToken &&
+         NextToken->is(clang::tok::pp_endif);
+}
+
+bool isConditionalCompilationStatement(
+    const clang::format::AnnotatedLine &Line) {
+  if (!Line.First)
+    return false;
+  const auto *NextToken = Line.First->getNextNonComment();
+  return Line.First->is(clang::tok::hash) && NextToken &&
+         NextToken->isOneOf(clang::tok::pp_if, clang::tok::pp_ifdef,
+                            clang::tok::pp_ifndef, clang::tok::pp_elif,
+                            clang::tok::pp_elifdef, clang::tok::pp_elifndef,
+                            clang::tok::pp_else, clang::tok::pp_defined,
+                            clang::tok::pp_endif);
+}
+
+bool isCCOnlyWithIncludes(
+    const llvm::SmallVectorImpl<clang::format::AnnotatedLine *> &Lines,
+    unsigned StartIdx) {
+  int CCLevel = 0;
+  for (unsigned I = StartIdx; I < Lines.size(); ++I) {
+    const auto &CurrentLine = *Lines[I];
+    if (isConditionalCompilationStart(CurrentLine))
+      CCLevel++;
+
+    if (isConditionalCompilationEnd(CurrentLine))
+      CCLevel--;
+
+    if (CCLevel == 0)
+      break;
+
+    if (!(CurrentLine.isInclude() ||
+          isConditionalCompilationStatement(CurrentLine))) {
+      return false;
+    }
+  }
+  return true;
+}
+
+unsigned getEndOfCCBlock(
+    const llvm::SmallVectorImpl<clang::format::AnnotatedLine *> &Lines,
+    unsigned StartIdx) {
+  int CCLevel = 0;
+  unsigned I = StartIdx;
+  for (; I < Lines.size(); ++I) {
+    const auto &CurrentLine = *Lines[I];
+    if (isConditionalCompilationStart(CurrentLine))
+      CCLevel++;
+
+    if (isConditionalCompilationEnd(CurrentLine))
+      CCLevel--;
+
+    if (CCLevel == 0)
+      break;
+  }
+  return I;
+}
+} // namespace
+
+namespace clang {
+namespace format {
+std::pair<tooling::Replacements, unsigned>
+IncludesSeparator::analyze(TokenAnnotator &Annotator,
+                           SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,
+                           FormatTokenLexer &Tokens) {
+  assert(Style.EmptyLinesAfterIncludes.has_value());
+  AffectedRangeMgr.computeAffectedLines(AnnotatedLines);
+  tooling::Replacements Result;
+  separateIncludes(AnnotatedLines, Result, Tokens);
+  return {Result, 0};
+}
+
+void IncludesSeparator::separateIncludes(
+    SmallVectorImpl<AnnotatedLine *> &Lines, tooling::Replacements &Result,
+    FormatTokenLexer &Tokens) {
+  const unsigned NewlineCount =
+      std::min(Style.MaxEmptyLinesToKeep, *Style.EmptyLinesAfterIncludes) + 1;
+  WhitespaceManager Whitespaces(
+      Env.getSourceManager(), Style,
+      Style.LineEnding > FormatStyle::LE_CRLF
+          ? WhitespaceManager::inputUsesCRLF(
+                Env.getSourceManager().getBufferData(Env.getFileID()),
+                Style.LineEnding == FormatStyle::LE_DeriveCRLF)
+          : Style.LineEnding == FormatStyle::LE_CRLF);
+
+  bool InIncludeArea = false;
+  for (unsigned I = 0; I < Lines.size(); ++I) {
+    const auto &CurrentLine = *Lines[I];
+
+    if (InIncludeArea) {
+      if (CurrentLine.isInclude())
+        continue;
+
+      if (isConditionalCompilationStart(CurrentLine)) {
+        const bool CCWithOnlyIncludes = isCCOnlyWithIncludes(Lines, I);
+        I = getEndOfCCBlock(Lines, I);
+
+        // Conditional compilation blocks that only contain
+        // include statements are considered part of the include area.
+        if (CCWithOnlyIncludes)
+          continue;
+      }
+
+      if (!CurrentLine.First->is(tok::eof) && CurrentLine.Affected) {
+        Whitespaces.replaceWhitespace(*CurrentLine.First, NewlineCount,
+                                      CurrentLine.First->OriginalColumn,
+                                      CurrentLine.First->OriginalColumn);
+      }
+      InIncludeArea = false;
+    } else {
+      if (CurrentLine.isInclude())
+        InIncludeArea = true;
+    }
+  }
+
+  for (const auto &R : Whitespaces.generateReplacements()) {
+    // The add method returns an Error instance which simulates program exit
+    // code through overloading boolean operator, thus false here indicates
+    // success.
+    if (Result.add(R))
+      return;
+  }
+}
+} // namespace format
+} // namespace clang
+                    
\ No newline at end of file
diff --git a/clang/lib/Format/IncludesSeparator.h b/clang/lib/Format/IncludesSeparator.h
new file mode 100644
index 000000000000000..d093e24dbf9d412
--- /dev/null
+++ b/clang/lib/Format/IncludesSeparator.h
@@ -0,0 +1,42 @@
+//===--- IncludesSeparator.h -----------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file declares IncludesSeparator, a TokenAnalyzer that inserts
+/// new lines or removes empty lines after an includes area.
+/// An includes area is a list of consecutive include statements.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_LIB_FORMAT_INCLUDESSEPARATOR_H
+#define LLVM_CLANG_LIB_FORMAT_INCLUDESSEPARATOR_H
+
+#include "TokenAnalyzer.h"
+#include "WhitespaceManager.h"
+
+namespace clang {
+namespace format {
+class IncludesSeparator : public TokenAnalyzer {
+public:
+  IncludesSeparator(const Environment &Env, const FormatStyle &Style)
+      : TokenAnalyzer(Env, Style) {}
+
+  std::pair<tooling::Replacements, unsigned>
+  analyze(TokenAnnotator &Annotator,
+          SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,
+          FormatTokenLexer &Tokens) override;
+
+private:
+  void separateIncludes(SmallVectorImpl<AnnotatedLine *> &Lines,
+                        tooling::Replacements &Result,
+                        FormatTokenLexer &Tokens);
+};
+} // namespace format
+} // namespace clang
+
+#endif
diff --git a/clang/lib/Format/TokenAnnotator.h b/clang/lib/Format/TokenAnnotator.h
index 05a6daa87d80340..06486799ec4031a 100644
--- a/clang/lib/Format/TokenAnnotator.h
+++ b/clang/lib/Format/TokenAnnotator.h
@@ -113,6 +113,14 @@ class AnnotatedLine {
     return First && First->is(tok::comment) && !First->getNextNonComment();
   }
 
+  bool isInclude() const {
+    if (!First)
+      return false;
+
+    const auto *NextToken = First->getNextNonComment();
+    return First->is(tok::hash) && NextToken && NextToken->is(tok::pp_include);
+  }
+
   /// \c true if this line starts with the given tokens in order, ignoring
   /// comments.
   template <typename... Ts> bool startsWith(Ts... Tokens) const {
diff --git a/clang/unittests/Format/ConfigParseTest.cpp b/clang/unittests/Format/ConfigParseTest.cpp
index 2a8d79359a49b40..cbb442203b1628a 100644
--- a/clang/unittests/Format/ConfigParseTest.cpp
+++ b/clang/unittests/Format/ConfigParseTest.cpp
@@ -1005,6 +1005,8 @@ TEST(ConfigParseTest, ParsesConfiguration) {
               FormatStyle::SDS_Leave);
   CHECK_PARSE("SeparateDefinitionBlocks: Never", SeparateDefinitionBlocks,
               FormatStyle::SDS_Never);
+
+  CHECK_PARSE("EmptyLinesAfterIncludes: 2", EmptyLinesAfterIncludes, 2);
 }
 
 TEST(ConfigParseTest, ParsesConfigurationWithLanguages) {
diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp
index 3fb55ae2c1f4137..c2a280f003c70f9 100644
--- a/clang/unittests/Format/FormatTest.cpp
+++ b/clang/unittests/Format/FormatTest.cpp
@@ -26990,6 +26990,191 @@ TEST_F(FormatTest, BreakAdjacentStringLiterals) {
   Style.BreakAdjacentStringLiterals = false;
   verifyFormat(Code, Style);
 }
+
+TEST_F(FormatTest, EmptyLinesAfterInclude) {
+  auto Style = getLLVMStyle();
+  Style.EmptyLinesAfterIncludes = 2;
+  Style.MaxEmptyLinesToKeep = 2;
+
+  verifyFormat("#include <string>\n"
+               "\n"
+               "\n"
+               "class Test {};",
+               Style);
+
+  verifyFormat("#ifndef TEST_H\n"
+               "#define TEST_H\n"
+               "#include <string>\n"
+               "\n"
+               "\n"
+               "#define PP_DEFINE\n"
+               "#include <map>\n"
+               "#include <vector>\n"
+               "\n"
+               "\n"
+               "class Test {};",
+               Style);
+
+  verifyFormat("#include <string>\n"
+               "#ifdef WINDOWS\n"
+               "#include <win32>\n"
+               "#ifdef X32\n"
+               "#include <additionalHeader>\n"
+               "#else\n"
+               "#include <unistd.h>\n"
+               "#endif\n"
+               "#endif\n"
+               "#include <map>\n"
+               "\n"
+               "\n"
+               "class Test {};",
+               Style);
+
+  verifyFormat("#pragma once\n"
+               "#include <string>\n"
+               "#include <map>\n"
+               "#ifdef WINDOWS\n"
+               "#include <w32>\n"
+               "#endif\n"
+               "#include <vector>\n"
+               "\n"
+               "\n"
+               "class Test {};",
+               Style);
+
+  verifyFormat("#include <string>\n"
+               "\n"
+               "\n"
+               "#ifdef WINDOWS\n"
+               "#define OS_VERSION WINDOWS\n"
+               "#endif\n"
+               "\n"
+               "#include <map>\n"
+               "#include <vector>\n"
+               "\n"
+               "\n"
+               "class Test {};",
+               Style);
+
+  verifyFormat("#include <string>\n"
+               "#include <map>\n"
+               "\n"
+               "\n"
+               "#define INCLUDE_MACRO #include<vector> #include<set>\n"
+               "#include <queue>\n"
+               "\n"
+               "\n"
+               "class Test {};",
+               Style);
+
+  verifyFormat("#include <string>\n"
+               "\n"
+               "\n"
+               "#ifdef WINDOWS\n"
+               "#ifdef x86\n"
+               "#include <x86_windows>\n"
+               "#endif\n"
+               "#define OS_VERSION WINDOWS\n"
+               "#endif\n"
+               "\n"
+               "#include <map>\n"
+               "#include <vector>\n"
+               "\n"
+               "\n"
+               "class Test {};",
+               Style);
+
+  Style.EmptyLinesAfterIncludes = 1;
+  verifyFormat("#include <string>\n"
+               "\n"
+               "class Test {};",
+               Style);
+
+  verifyFormat("#pragma once\n"
+               "#include <string>\n"
+               "\n"
+               "class Test {};",
+               Style);
+
+  verifyFormat("#ifndef TEST_H\n"
+               "#define TEST_H\n"
+               "#include <string>\n"
+               "#include <map>\n"
+               "\n"
+               "void func();\n"
+               "#endif // TEST_H",
+               Style);
+}
+
+TEST_F(FormatTest, EmptyLinesAfterIncludesLimitedByMaxEmptyLinesToKeep) {
+  auto Style = getLLVMStyle();
+  Style.EmptyLinesAfterIncludes = 2;
+  Style.MaxEmptyLinesToKeep = 1;
+  verifyFormat("#include <string>\n"
+               "\n"
+               "class Test {};",
+               Style);
+}
+
+TEST_F(FormatTest, EmptyLinesAfterIncludesWithIncludesPreseve) {
+  auto Style = getLLVMStyle();
+  Style.EmptyLinesAfterIncludes = 2;
+  Style.MaxEmptyLinesToKeep = 2;
+  Style.IncludeStyle.IncludeBlocks = tooling::IncludeStyle::IBS_Preserve;
+  verifyFormat("#pragma once\n"
+               "// test file documentation\n"
+               "#include \"b.h\"\n"
+               "#include \"d.h\"\n"
+               "\n"
+               "#include \"a.h\"\n"
+               "#include \"c.h\"\n"
+               "#include \"e.h\"\n"
+               "\n"
+               "\n"
+               "class Test {};",
+               Style);
+}
+
+TEST_F(FormatTest, EmptyLinesAfterIncludesWithIncludesMerge) {
+  auto Style = getLLVMStyle();
+  Style.EmptyLinesAfterIncludes = 2;
+  Style.MaxEmptyLinesToKeep = 2;
+  Style.IncludeStyle.IncludeBlocks = tooling::IncludeStyle::IBS_Merge;
+  verifyFormat("#pragma once\n"
+               "// test file documentation\n"
+               "#include \"a.h\"\n"
+               "#include \"b.h\"\n"
+               "#include \"c.h\"\n"
+               "\n"
+               "\n"
+               "class Test {};",
+               Style);
+}
+
+TEST_F(FormatTest, EmptyLinesAfterIncludesWithIncludesRegroup) {
+  auto Style = getLLVMStyle();
+  Style.EmptyLinesAfterIncludes = 2;
+  Style.MaxEmptyLinesToKeep = 2;
+  Style.IncludeStyle.IncludeBlocks = tooling::IncludeStyle::IBS_Regroup;
+  verifyFormat("#pragma once\n"
+               "// test file documentation\n"
+               "#include \"a.h\"\n"
+               "#include \"c.h\"\n"
+               "\n"
+               "#include <b.h>\n"
+               "#include <d.h>\n"
+               "\n"
+               "\n"
+               "class Test {};",
+               Style);
+
+  verifyFormat("#include <b.h>\n"
+               "#include <d.h>\n"
+               "\n"
+               "\n"
+               "class Test {};",
+               Style);
+}
 } // namespace
 } // namespace test
 } // namespace format

``````````

</details>


https://github.com/llvm/llvm-project/pull/78957


More information about the cfe-commits mailing list