[clang] [clang-format] Add options to set number of empty lines after includes (PR #78957)

via cfe-commits cfe-commits at lists.llvm.org
Mon Jan 22 02:50:00 PST 2024


https://github.com/seranu created https://github.com/llvm/llvm-project/pull/78957

Add option to set the number of empty lines after include areas. 

An include area is a list of consecutive include statements. Include areas may be composed of multiple include blocks(group of related include statements) and may contain conditional compilation statements. Although in most cases source files have only one include area, there may be cases with several include areas.

EmtpyLinesAfterIncludes can be used to determine the number of empty lines to keep after each include area.

>From 351995614fb6155bcb621860f18d97f727ece101 Mon Sep 17 00:00:00 2001
From: Serban Ungureanu <serban.ungureanu at randstaddigital.com>
Date: Sat, 20 Jan 2024 17:02:04 +0200
Subject: [PATCH] [clang-format] Add options to set number of empty lines after
 includes

---
 clang/docs/ClangFormatStyleOptions.rst     |  19 +++
 clang/docs/ReleaseNotes.rst                |   1 +
 clang/include/clang/Format/Format.h        |  18 ++
 clang/lib/Format/CMakeLists.txt            |   1 +
 clang/lib/Format/Format.cpp                |   9 +
 clang/lib/Format/IncludesSeparator.cpp     | 160 ++++++++++++++++++
 clang/lib/Format/IncludesSeparator.h       |  42 +++++
 clang/lib/Format/TokenAnnotator.h          |   8 +
 clang/unittests/Format/ConfigParseTest.cpp |   2 +
 clang/unittests/Format/FormatTest.cpp      | 185 +++++++++++++++++++++
 10 files changed, 445 insertions(+)
 create mode 100644 clang/lib/Format/IncludesSeparator.cpp
 create mode 100644 clang/lib/Format/IncludesSeparator.h

diff --git a/clang/docs/ClangFormatStyleOptions.rst b/clang/docs/ClangFormatStyleOptions.rst
index 4dc0de3a90f265..4ba38808cd5090 100644
--- a/clang/docs/ClangFormatStyleOptions.rst
+++ b/clang/docs/ClangFormatStyleOptions.rst
@@ -3220,6 +3220,25 @@ the configuration (without a prefix: ``Auto``).
 
 
 
+.. _EmptyLinesAfterIncludes:
+
+**EmptyLinesAfterIncludes** (``Unsigned``) :versionbadge:`clang-format 18` :ref:`¶ <EmptyLinesAfterIncludes>`
+  Number of lines after each include area. An include area is
+  a list of consecutive include statements. The include area may be
+  composed of multiple include blocks.
+  Limited by MaxEmptyLinesToKeep.
+  Example:
+
+  .. code-block:: c++
+
+
+     EmptyLinesAfterIncludes: 1  vs.  EmptyLinesAfterIncludes: 2
+     #include <string>                #include <string>
+     #include <map>                   #include <map>
+
+     class Test {};
+                                      class Test {};
+
 .. _ExperimentalAutoDetectBinPacking:
 
 **ExperimentalAutoDetectBinPacking** (``Boolean``) :versionbadge:`clang-format 3.7` :ref:`¶ <ExperimentalAutoDetectBinPacking>`
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 4888ffe6f4dfc8..e1bf86c8a83b67 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -1260,6 +1260,7 @@ clang-format
 - Add ``.clang-format-ignore`` files.
 - Add ``AlignFunctionPointers`` sub-option for ``AlignConsecutiveDeclarations``.
 - Add ``SkipMacroDefinitionBody`` option.
+- Add ``EmptyLinesAfterIncludes`` option.
 
 libclang
 --------
diff --git a/clang/include/clang/Format/Format.h b/clang/include/clang/Format/Format.h
index bc9eecd42f9ebf..84d1a0b70b9efd 100644
--- a/clang/include/clang/Format/Format.h
+++ b/clang/include/clang/Format/Format.h
@@ -2459,6 +2459,23 @@ struct FormatStyle {
   /// \version 12
   EmptyLineBeforeAccessModifierStyle EmptyLineBeforeAccessModifier;
 
+  /// \brief Number of lines after each include area. An include area is
+  /// a list of consecutive include statements. The include area may be
+  /// composed of multiple include blocks.
+  /// Limited by MaxEmptyLinesToKeep.
+  /// Example:
+  /// \code
+  ///
+  ///    EmptyLinesAfterIncludes: 1  vs.  EmptyLinesAfterIncludes: 2
+  ///    #include <string>                #include <string>
+  ///    #include <map>                   #include <map>
+  ///
+  ///    class Test {};
+  ///                                     class Test {};
+  /// \endcode
+  /// \version 18
+  std::optional<unsigned> EmptyLinesAfterIncludes;
+
   /// If ``true``, clang-format detects whether function calls and
   /// definitions are formatted with one parameter per line.
   ///
@@ -4831,6 +4848,7 @@ struct FormatStyle {
            DerivePointerAlignment == R.DerivePointerAlignment &&
            DisableFormat == R.DisableFormat &&
            EmptyLineAfterAccessModifier == R.EmptyLineAfterAccessModifier &&
+           EmptyLinesAfterIncludes == R.EmptyLinesAfterIncludes &&
            EmptyLineBeforeAccessModifier == R.EmptyLineBeforeAccessModifier &&
            ExperimentalAutoDetectBinPacking ==
                R.ExperimentalAutoDetectBinPacking &&
diff --git a/clang/lib/Format/CMakeLists.txt b/clang/lib/Format/CMakeLists.txt
index 84a3c136f650a8..ff3860426407ad 100644
--- a/clang/lib/Format/CMakeLists.txt
+++ b/clang/lib/Format/CMakeLists.txt
@@ -8,6 +8,7 @@ add_clang_library(clangFormat
   Format.cpp
   FormatToken.cpp
   FormatTokenLexer.cpp
+  IncludesSeparator.cpp
   IntegerLiteralSeparatorFixer.cpp
   MacroCallReconstructor.cpp
   MacroExpander.cpp
diff --git a/clang/lib/Format/Format.cpp b/clang/lib/Format/Format.cpp
index ff326dc784783b..f068da97e6dfbf 100644
--- a/clang/lib/Format/Format.cpp
+++ b/clang/lib/Format/Format.cpp
@@ -20,6 +20,7 @@
 #include "FormatInternal.h"
 #include "FormatToken.h"
 #include "FormatTokenLexer.h"
+#include "IncludesSeparator.h"
 #include "IntegerLiteralSeparatorFixer.h"
 #include "NamespaceEndCommentsFixer.h"
 #include "ObjCPropertyAttributeOrderFixer.h"
@@ -995,6 +996,7 @@ template <> struct MappingTraits<FormatStyle> {
     IO.mapOptional("DisableFormat", Style.DisableFormat);
     IO.mapOptional("EmptyLineAfterAccessModifier",
                    Style.EmptyLineAfterAccessModifier);
+    IO.mapOptional("EmptyLinesAfterIncludes", Style.EmptyLinesAfterIncludes);
     IO.mapOptional("EmptyLineBeforeAccessModifier",
                    Style.EmptyLineBeforeAccessModifier);
     IO.mapOptional("ExperimentalAutoDetectBinPacking",
@@ -1502,6 +1504,7 @@ FormatStyle getLLVMStyle(FormatStyle::LanguageKind Language) {
   LLVMStyle.DerivePointerAlignment = false;
   LLVMStyle.DisableFormat = false;
   LLVMStyle.EmptyLineAfterAccessModifier = FormatStyle::ELAAMS_Never;
+  LLVMStyle.EmptyLinesAfterIncludes = std::nullopt;
   LLVMStyle.EmptyLineBeforeAccessModifier = FormatStyle::ELBAMS_LogicalBlock;
   LLVMStyle.ExperimentalAutoDetectBinPacking = false;
   LLVMStyle.FixNamespaceComments = true;
@@ -3715,6 +3718,12 @@ reformat(const FormatStyle &Style, StringRef Code,
     });
   }
 
+  if (Style.EmptyLinesAfterIncludes.has_value()) {
+    Passes.emplace_back([&](const Environment &Env) {
+      return IncludesSeparator(Env, Expanded).process();
+    });
+  }
+
   if (Style.Language == FormatStyle::LK_ObjC &&
       !Style.ObjCPropertyAttributeOrder.empty()) {
     Passes.emplace_back([&](const Environment &Env) {
diff --git a/clang/lib/Format/IncludesSeparator.cpp b/clang/lib/Format/IncludesSeparator.cpp
new file mode 100644
index 00000000000000..7aad7b9d3695ff
--- /dev/null
+++ b/clang/lib/Format/IncludesSeparator.cpp
@@ -0,0 +1,160 @@
+//===--- IncludesSeparator.cpp ---------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file implements IncludesSeparator, a TokenAnalyzer that inserts
+/// new lines or removes empty lines after an include area.
+/// An includes area is a list of consecutive include statements.
+///
+//===----------------------------------------------------------------------===//
+
+#include "IncludesSeparator.h"
+#include "TokenAnnotator.h"
+#define DEBUG_TYPE "includes-separator"
+
+namespace {
+bool isConditionalCompilationStart(const clang::format::AnnotatedLine &Line) {
+  if (!Line.First)
+    return false;
+  const auto *NextToken = Line.First->getNextNonComment();
+  return Line.First->is(clang::tok::hash) && NextToken &&
+         NextToken->isOneOf(clang::tok::pp_if, clang::tok::pp_ifdef,
+                            clang::tok::pp_ifndef, clang::tok::pp_defined);
+}
+
+bool isConditionalCompilationEnd(const clang::format::AnnotatedLine &Line) {
+  if (!Line.First)
+    return false;
+  const auto *NextToken = Line.First->getNextNonComment();
+  return Line.First->is(clang::tok::hash) && NextToken &&
+         NextToken->is(clang::tok::pp_endif);
+}
+
+bool isConditionalCompilationStatement(
+    const clang::format::AnnotatedLine &Line) {
+  if (!Line.First)
+    return false;
+  const auto *NextToken = Line.First->getNextNonComment();
+  return Line.First->is(clang::tok::hash) && NextToken &&
+         NextToken->isOneOf(clang::tok::pp_if, clang::tok::pp_ifdef,
+                            clang::tok::pp_ifndef, clang::tok::pp_elif,
+                            clang::tok::pp_elifdef, clang::tok::pp_elifndef,
+                            clang::tok::pp_else, clang::tok::pp_defined,
+                            clang::tok::pp_endif);
+}
+
+bool isCCOnlyWithIncludes(
+    const llvm::SmallVectorImpl<clang::format::AnnotatedLine *> &Lines,
+    unsigned StartIdx) {
+  int CCLevel = 0;
+  for (unsigned I = StartIdx; I < Lines.size(); ++I) {
+    const auto &CurrentLine = *Lines[I];
+    if (isConditionalCompilationStart(CurrentLine))
+      CCLevel++;
+
+    if (isConditionalCompilationEnd(CurrentLine))
+      CCLevel--;
+
+    if (CCLevel == 0)
+      break;
+
+    if (!(CurrentLine.isInclude() ||
+          isConditionalCompilationStatement(CurrentLine))) {
+      return false;
+    }
+  }
+  return true;
+}
+
+unsigned getEndOfCCBlock(
+    const llvm::SmallVectorImpl<clang::format::AnnotatedLine *> &Lines,
+    unsigned StartIdx) {
+  int CCLevel = 0;
+  unsigned I = StartIdx;
+  for (; I < Lines.size(); ++I) {
+    const auto &CurrentLine = *Lines[I];
+    if (isConditionalCompilationStart(CurrentLine))
+      CCLevel++;
+
+    if (isConditionalCompilationEnd(CurrentLine))
+      CCLevel--;
+
+    if (CCLevel == 0)
+      break;
+  }
+  return I;
+}
+} // namespace
+
+namespace clang {
+namespace format {
+std::pair<tooling::Replacements, unsigned>
+IncludesSeparator::analyze(TokenAnnotator &Annotator,
+                           SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,
+                           FormatTokenLexer &Tokens) {
+  assert(Style.EmptyLinesAfterIncludes.has_value());
+  AffectedRangeMgr.computeAffectedLines(AnnotatedLines);
+  tooling::Replacements Result;
+  separateIncludes(AnnotatedLines, Result, Tokens);
+  return {Result, 0};
+}
+
+void IncludesSeparator::separateIncludes(
+    SmallVectorImpl<AnnotatedLine *> &Lines, tooling::Replacements &Result,
+    FormatTokenLexer &Tokens) {
+  const unsigned NewlineCount =
+      std::min(Style.MaxEmptyLinesToKeep, *Style.EmptyLinesAfterIncludes) + 1;
+  WhitespaceManager Whitespaces(
+      Env.getSourceManager(), Style,
+      Style.LineEnding > FormatStyle::LE_CRLF
+          ? WhitespaceManager::inputUsesCRLF(
+                Env.getSourceManager().getBufferData(Env.getFileID()),
+                Style.LineEnding == FormatStyle::LE_DeriveCRLF)
+          : Style.LineEnding == FormatStyle::LE_CRLF);
+
+  bool InIncludeArea = false;
+  for (unsigned I = 0; I < Lines.size(); ++I) {
+    const auto &CurrentLine = *Lines[I];
+
+    if (InIncludeArea) {
+      if (CurrentLine.isInclude())
+        continue;
+
+      if (isConditionalCompilationStart(CurrentLine)) {
+        const bool CCWithOnlyIncludes = isCCOnlyWithIncludes(Lines, I);
+        I = getEndOfCCBlock(Lines, I);
+
+        // Conditional compilation blocks that only contain
+        // include statements are considered part of the include area.
+        if (CCWithOnlyIncludes)
+          continue;
+      }
+
+      if (!CurrentLine.First->is(tok::eof) && CurrentLine.Affected) {
+        Whitespaces.replaceWhitespace(*CurrentLine.First, NewlineCount,
+                                      CurrentLine.First->OriginalColumn,
+                                      CurrentLine.First->OriginalColumn);
+      }
+      InIncludeArea = false;
+    } else {
+      if (CurrentLine.isInclude())
+        InIncludeArea = true;
+    }
+  }
+
+  for (const auto &R : Whitespaces.generateReplacements()) {
+    // The add method returns an Error instance which simulates program exit
+    // code through overloading boolean operator, thus false here indicates
+    // success.
+    if (Result.add(R))
+      return;
+  }
+}
+} // namespace format
+} // namespace clang
+                    
\ No newline at end of file
diff --git a/clang/lib/Format/IncludesSeparator.h b/clang/lib/Format/IncludesSeparator.h
new file mode 100644
index 00000000000000..d093e24dbf9d41
--- /dev/null
+++ b/clang/lib/Format/IncludesSeparator.h
@@ -0,0 +1,42 @@
+//===--- IncludesSeparator.h -----------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file declares IncludesSeparator, a TokenAnalyzer that inserts
+/// new lines or removes empty lines after an includes area.
+/// An includes area is a list of consecutive include statements.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_LIB_FORMAT_INCLUDESSEPARATOR_H
+#define LLVM_CLANG_LIB_FORMAT_INCLUDESSEPARATOR_H
+
+#include "TokenAnalyzer.h"
+#include "WhitespaceManager.h"
+
+namespace clang {
+namespace format {
+class IncludesSeparator : public TokenAnalyzer {
+public:
+  IncludesSeparator(const Environment &Env, const FormatStyle &Style)
+      : TokenAnalyzer(Env, Style) {}
+
+  std::pair<tooling::Replacements, unsigned>
+  analyze(TokenAnnotator &Annotator,
+          SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,
+          FormatTokenLexer &Tokens) override;
+
+private:
+  void separateIncludes(SmallVectorImpl<AnnotatedLine *> &Lines,
+                        tooling::Replacements &Result,
+                        FormatTokenLexer &Tokens);
+};
+} // namespace format
+} // namespace clang
+
+#endif
diff --git a/clang/lib/Format/TokenAnnotator.h b/clang/lib/Format/TokenAnnotator.h
index 05a6daa87d8034..06486799ec4031 100644
--- a/clang/lib/Format/TokenAnnotator.h
+++ b/clang/lib/Format/TokenAnnotator.h
@@ -113,6 +113,14 @@ class AnnotatedLine {
     return First && First->is(tok::comment) && !First->getNextNonComment();
   }
 
+  bool isInclude() const {
+    if (!First)
+      return false;
+
+    const auto *NextToken = First->getNextNonComment();
+    return First->is(tok::hash) && NextToken && NextToken->is(tok::pp_include);
+  }
+
   /// \c true if this line starts with the given tokens in order, ignoring
   /// comments.
   template <typename... Ts> bool startsWith(Ts... Tokens) const {
diff --git a/clang/unittests/Format/ConfigParseTest.cpp b/clang/unittests/Format/ConfigParseTest.cpp
index 2a8d79359a49b4..cbb442203b1628 100644
--- a/clang/unittests/Format/ConfigParseTest.cpp
+++ b/clang/unittests/Format/ConfigParseTest.cpp
@@ -1005,6 +1005,8 @@ TEST(ConfigParseTest, ParsesConfiguration) {
               FormatStyle::SDS_Leave);
   CHECK_PARSE("SeparateDefinitionBlocks: Never", SeparateDefinitionBlocks,
               FormatStyle::SDS_Never);
+
+  CHECK_PARSE("EmptyLinesAfterIncludes: 2", EmptyLinesAfterIncludes, 2);
 }
 
 TEST(ConfigParseTest, ParsesConfigurationWithLanguages) {
diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp
index 3fb55ae2c1f413..c2a280f003c70f 100644
--- a/clang/unittests/Format/FormatTest.cpp
+++ b/clang/unittests/Format/FormatTest.cpp
@@ -26990,6 +26990,191 @@ TEST_F(FormatTest, BreakAdjacentStringLiterals) {
   Style.BreakAdjacentStringLiterals = false;
   verifyFormat(Code, Style);
 }
+
+TEST_F(FormatTest, EmptyLinesAfterInclude) {
+  auto Style = getLLVMStyle();
+  Style.EmptyLinesAfterIncludes = 2;
+  Style.MaxEmptyLinesToKeep = 2;
+
+  verifyFormat("#include <string>\n"
+               "\n"
+               "\n"
+               "class Test {};",
+               Style);
+
+  verifyFormat("#ifndef TEST_H\n"
+               "#define TEST_H\n"
+               "#include <string>\n"
+               "\n"
+               "\n"
+               "#define PP_DEFINE\n"
+               "#include <map>\n"
+               "#include <vector>\n"
+               "\n"
+               "\n"
+               "class Test {};",
+               Style);
+
+  verifyFormat("#include <string>\n"
+               "#ifdef WINDOWS\n"
+               "#include <win32>\n"
+               "#ifdef X32\n"
+               "#include <additionalHeader>\n"
+               "#else\n"
+               "#include <unistd.h>\n"
+               "#endif\n"
+               "#endif\n"
+               "#include <map>\n"
+               "\n"
+               "\n"
+               "class Test {};",
+               Style);
+
+  verifyFormat("#pragma once\n"
+               "#include <string>\n"
+               "#include <map>\n"
+               "#ifdef WINDOWS\n"
+               "#include <w32>\n"
+               "#endif\n"
+               "#include <vector>\n"
+               "\n"
+               "\n"
+               "class Test {};",
+               Style);
+
+  verifyFormat("#include <string>\n"
+               "\n"
+               "\n"
+               "#ifdef WINDOWS\n"
+               "#define OS_VERSION WINDOWS\n"
+               "#endif\n"
+               "\n"
+               "#include <map>\n"
+               "#include <vector>\n"
+               "\n"
+               "\n"
+               "class Test {};",
+               Style);
+
+  verifyFormat("#include <string>\n"
+               "#include <map>\n"
+               "\n"
+               "\n"
+               "#define INCLUDE_MACRO #include<vector> #include<set>\n"
+               "#include <queue>\n"
+               "\n"
+               "\n"
+               "class Test {};",
+               Style);
+
+  verifyFormat("#include <string>\n"
+               "\n"
+               "\n"
+               "#ifdef WINDOWS\n"
+               "#ifdef x86\n"
+               "#include <x86_windows>\n"
+               "#endif\n"
+               "#define OS_VERSION WINDOWS\n"
+               "#endif\n"
+               "\n"
+               "#include <map>\n"
+               "#include <vector>\n"
+               "\n"
+               "\n"
+               "class Test {};",
+               Style);
+
+  Style.EmptyLinesAfterIncludes = 1;
+  verifyFormat("#include <string>\n"
+               "\n"
+               "class Test {};",
+               Style);
+
+  verifyFormat("#pragma once\n"
+               "#include <string>\n"
+               "\n"
+               "class Test {};",
+               Style);
+
+  verifyFormat("#ifndef TEST_H\n"
+               "#define TEST_H\n"
+               "#include <string>\n"
+               "#include <map>\n"
+               "\n"
+               "void func();\n"
+               "#endif // TEST_H",
+               Style);
+}
+
+TEST_F(FormatTest, EmptyLinesAfterIncludesLimitedByMaxEmptyLinesToKeep) {
+  auto Style = getLLVMStyle();
+  Style.EmptyLinesAfterIncludes = 2;
+  Style.MaxEmptyLinesToKeep = 1;
+  verifyFormat("#include <string>\n"
+               "\n"
+               "class Test {};",
+               Style);
+}
+
+TEST_F(FormatTest, EmptyLinesAfterIncludesWithIncludesPreseve) {
+  auto Style = getLLVMStyle();
+  Style.EmptyLinesAfterIncludes = 2;
+  Style.MaxEmptyLinesToKeep = 2;
+  Style.IncludeStyle.IncludeBlocks = tooling::IncludeStyle::IBS_Preserve;
+  verifyFormat("#pragma once\n"
+               "// test file documentation\n"
+               "#include \"b.h\"\n"
+               "#include \"d.h\"\n"
+               "\n"
+               "#include \"a.h\"\n"
+               "#include \"c.h\"\n"
+               "#include \"e.h\"\n"
+               "\n"
+               "\n"
+               "class Test {};",
+               Style);
+}
+
+TEST_F(FormatTest, EmptyLinesAfterIncludesWithIncludesMerge) {
+  auto Style = getLLVMStyle();
+  Style.EmptyLinesAfterIncludes = 2;
+  Style.MaxEmptyLinesToKeep = 2;
+  Style.IncludeStyle.IncludeBlocks = tooling::IncludeStyle::IBS_Merge;
+  verifyFormat("#pragma once\n"
+               "// test file documentation\n"
+               "#include \"a.h\"\n"
+               "#include \"b.h\"\n"
+               "#include \"c.h\"\n"
+               "\n"
+               "\n"
+               "class Test {};",
+               Style);
+}
+
+TEST_F(FormatTest, EmptyLinesAfterIncludesWithIncludesRegroup) {
+  auto Style = getLLVMStyle();
+  Style.EmptyLinesAfterIncludes = 2;
+  Style.MaxEmptyLinesToKeep = 2;
+  Style.IncludeStyle.IncludeBlocks = tooling::IncludeStyle::IBS_Regroup;
+  verifyFormat("#pragma once\n"
+               "// test file documentation\n"
+               "#include \"a.h\"\n"
+               "#include \"c.h\"\n"
+               "\n"
+               "#include <b.h>\n"
+               "#include <d.h>\n"
+               "\n"
+               "\n"
+               "class Test {};",
+               Style);
+
+  verifyFormat("#include <b.h>\n"
+               "#include <d.h>\n"
+               "\n"
+               "\n"
+               "class Test {};",
+               Style);
+}
 } // namespace
 } // namespace test
 } // namespace format



More information about the cfe-commits mailing list