[clang] [clang-format] Add options to set number of empty lines after includes (PR #78957)

via cfe-commits cfe-commits at lists.llvm.org
Mon Jan 22 03:56:07 PST 2024


https://github.com/seranu updated https://github.com/llvm/llvm-project/pull/78957

>From 501cd2230cc878189e2dde912bd659faf711a2db Mon Sep 17 00:00:00 2001
From: Serban Ungureanu <serban.ungureanu at randstaddigital.com>
Date: Sat, 20 Jan 2024 17:02:04 +0200
Subject: [PATCH] [clang-format] Add options to set number of empty lines after
 includes

---
 clang/docs/ClangFormatStyleOptions.rst     |  19 +++
 clang/docs/ReleaseNotes.rst                |   1 +
 clang/include/clang/Format/Format.h        |  18 ++
 clang/lib/Format/CMakeLists.txt            |   1 +
 clang/lib/Format/Format.cpp                |   9 +
 clang/lib/Format/IncludesSeparator.cpp     | 159 ++++++++++++++++++
 clang/lib/Format/IncludesSeparator.h       |  42 +++++
 clang/lib/Format/TokenAnnotator.h          |   8 +
 clang/unittests/Format/ConfigParseTest.cpp |   2 +
 clang/unittests/Format/FormatTest.cpp      | 185 +++++++++++++++++++++
 10 files changed, 444 insertions(+)
 create mode 100644 clang/lib/Format/IncludesSeparator.cpp
 create mode 100644 clang/lib/Format/IncludesSeparator.h

diff --git a/clang/docs/ClangFormatStyleOptions.rst b/clang/docs/ClangFormatStyleOptions.rst
index 4dc0de3a90f2650..4ba38808cd5090a 100644
--- a/clang/docs/ClangFormatStyleOptions.rst
+++ b/clang/docs/ClangFormatStyleOptions.rst
@@ -3220,6 +3220,25 @@ the configuration (without a prefix: ``Auto``).
 
 
 
+.. _EmptyLinesAfterIncludes:
+
+**EmptyLinesAfterIncludes** (``Unsigned``) :versionbadge:`clang-format 18` :ref:`¶ <EmptyLinesAfterIncludes>`
+  Number of lines after each include area. An include area is
+  a list of consecutive include statements. The include area may be
+  composed of multiple include blocks.
+  Limited by MaxEmptyLinesToKeep.
+  Example:
+
+  .. code-block:: c++
+
+
+     EmptyLinesAfterIncludes: 1  vs.  EmptyLinesAfterIncludes: 2
+     #include <string>                #include <string>
+     #include <map>                   #include <map>
+
+     class Test {};
+                                      class Test {};
+
 .. _ExperimentalAutoDetectBinPacking:
 
 **ExperimentalAutoDetectBinPacking** (``Boolean``) :versionbadge:`clang-format 3.7` :ref:`¶ <ExperimentalAutoDetectBinPacking>`
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 4888ffe6f4dfc85..e1bf86c8a83b671 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -1260,6 +1260,7 @@ clang-format
 - Add ``.clang-format-ignore`` files.
 - Add ``AlignFunctionPointers`` sub-option for ``AlignConsecutiveDeclarations``.
 - Add ``SkipMacroDefinitionBody`` option.
+- Add ``EmptyLinesAfterIncludes`` option.
 
 libclang
 --------
diff --git a/clang/include/clang/Format/Format.h b/clang/include/clang/Format/Format.h
index bc9eecd42f9ebfd..84d1a0b70b9efd2 100644
--- a/clang/include/clang/Format/Format.h
+++ b/clang/include/clang/Format/Format.h
@@ -2459,6 +2459,23 @@ struct FormatStyle {
   /// \version 12
   EmptyLineBeforeAccessModifierStyle EmptyLineBeforeAccessModifier;
 
+  /// \brief Number of lines after each include area. An include area is
+  /// a list of consecutive include statements. The include area may be
+  /// composed of multiple include blocks.
+  /// Limited by MaxEmptyLinesToKeep.
+  /// Example:
+  /// \code
+  ///
+  ///    EmptyLinesAfterIncludes: 1  vs.  EmptyLinesAfterIncludes: 2
+  ///    #include <string>                #include <string>
+  ///    #include <map>                   #include <map>
+  ///
+  ///    class Test {};
+  ///                                     class Test {};
+  /// \endcode
+  /// \version 18
+  std::optional<unsigned> EmptyLinesAfterIncludes;
+
   /// If ``true``, clang-format detects whether function calls and
   /// definitions are formatted with one parameter per line.
   ///
@@ -4831,6 +4848,7 @@ struct FormatStyle {
            DerivePointerAlignment == R.DerivePointerAlignment &&
            DisableFormat == R.DisableFormat &&
            EmptyLineAfterAccessModifier == R.EmptyLineAfterAccessModifier &&
+           EmptyLinesAfterIncludes == R.EmptyLinesAfterIncludes &&
            EmptyLineBeforeAccessModifier == R.EmptyLineBeforeAccessModifier &&
            ExperimentalAutoDetectBinPacking ==
                R.ExperimentalAutoDetectBinPacking &&
diff --git a/clang/lib/Format/CMakeLists.txt b/clang/lib/Format/CMakeLists.txt
index 84a3c136f650a85..ff3860426407adc 100644
--- a/clang/lib/Format/CMakeLists.txt
+++ b/clang/lib/Format/CMakeLists.txt
@@ -8,6 +8,7 @@ add_clang_library(clangFormat
   Format.cpp
   FormatToken.cpp
   FormatTokenLexer.cpp
+  IncludesSeparator.cpp
   IntegerLiteralSeparatorFixer.cpp
   MacroCallReconstructor.cpp
   MacroExpander.cpp
diff --git a/clang/lib/Format/Format.cpp b/clang/lib/Format/Format.cpp
index ff326dc784783b2..f068da97e6dfbff 100644
--- a/clang/lib/Format/Format.cpp
+++ b/clang/lib/Format/Format.cpp
@@ -20,6 +20,7 @@
 #include "FormatInternal.h"
 #include "FormatToken.h"
 #include "FormatTokenLexer.h"
+#include "IncludesSeparator.h"
 #include "IntegerLiteralSeparatorFixer.h"
 #include "NamespaceEndCommentsFixer.h"
 #include "ObjCPropertyAttributeOrderFixer.h"
@@ -995,6 +996,7 @@ template <> struct MappingTraits<FormatStyle> {
     IO.mapOptional("DisableFormat", Style.DisableFormat);
     IO.mapOptional("EmptyLineAfterAccessModifier",
                    Style.EmptyLineAfterAccessModifier);
+    IO.mapOptional("EmptyLinesAfterIncludes", Style.EmptyLinesAfterIncludes);
     IO.mapOptional("EmptyLineBeforeAccessModifier",
                    Style.EmptyLineBeforeAccessModifier);
     IO.mapOptional("ExperimentalAutoDetectBinPacking",
@@ -1502,6 +1504,7 @@ FormatStyle getLLVMStyle(FormatStyle::LanguageKind Language) {
   LLVMStyle.DerivePointerAlignment = false;
   LLVMStyle.DisableFormat = false;
   LLVMStyle.EmptyLineAfterAccessModifier = FormatStyle::ELAAMS_Never;
+  LLVMStyle.EmptyLinesAfterIncludes = std::nullopt;
   LLVMStyle.EmptyLineBeforeAccessModifier = FormatStyle::ELBAMS_LogicalBlock;
   LLVMStyle.ExperimentalAutoDetectBinPacking = false;
   LLVMStyle.FixNamespaceComments = true;
@@ -3715,6 +3718,12 @@ reformat(const FormatStyle &Style, StringRef Code,
     });
   }
 
+  if (Style.EmptyLinesAfterIncludes.has_value()) {
+    Passes.emplace_back([&](const Environment &Env) {
+      return IncludesSeparator(Env, Expanded).process();
+    });
+  }
+
   if (Style.Language == FormatStyle::LK_ObjC &&
       !Style.ObjCPropertyAttributeOrder.empty()) {
     Passes.emplace_back([&](const Environment &Env) {
diff --git a/clang/lib/Format/IncludesSeparator.cpp b/clang/lib/Format/IncludesSeparator.cpp
new file mode 100644
index 000000000000000..0e74383c7c5b78f
--- /dev/null
+++ b/clang/lib/Format/IncludesSeparator.cpp
@@ -0,0 +1,159 @@
+//===--- IncludesSeparator.cpp ---------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file implements IncludesSeparator, a TokenAnalyzer that inserts
+/// new lines or removes empty lines after an include area.
+/// An includes area is a list of consecutive include statements.
+///
+//===----------------------------------------------------------------------===//
+
+#include "IncludesSeparator.h"
+#include "TokenAnnotator.h"
+#define DEBUG_TYPE "includes-separator"
+
+namespace {
+bool isConditionalCompilationStart(const clang::format::AnnotatedLine &Line) {
+  if (!Line.First)
+    return false;
+  const auto *NextToken = Line.First->getNextNonComment();
+  return Line.First->is(clang::tok::hash) && NextToken &&
+         NextToken->isOneOf(clang::tok::pp_if, clang::tok::pp_ifdef,
+                            clang::tok::pp_ifndef, clang::tok::pp_defined);
+}
+
+bool isConditionalCompilationEnd(const clang::format::AnnotatedLine &Line) {
+  if (!Line.First)
+    return false;
+  const auto *NextToken = Line.First->getNextNonComment();
+  return Line.First->is(clang::tok::hash) && NextToken &&
+         NextToken->is(clang::tok::pp_endif);
+}
+
+bool isConditionalCompilationStatement(
+    const clang::format::AnnotatedLine &Line) {
+  if (!Line.First)
+    return false;
+  const auto *NextToken = Line.First->getNextNonComment();
+  return Line.First->is(clang::tok::hash) && NextToken &&
+         NextToken->isOneOf(clang::tok::pp_if, clang::tok::pp_ifdef,
+                            clang::tok::pp_ifndef, clang::tok::pp_elif,
+                            clang::tok::pp_elifdef, clang::tok::pp_elifndef,
+                            clang::tok::pp_else, clang::tok::pp_defined,
+                            clang::tok::pp_endif);
+}
+
+bool isCCOnlyWithIncludes(
+    const llvm::SmallVectorImpl<clang::format::AnnotatedLine *> &Lines,
+    unsigned StartIdx) {
+  int CCLevel = 0;
+  for (unsigned I = StartIdx; I < Lines.size(); ++I) {
+    const auto &CurrentLine = *Lines[I];
+    if (isConditionalCompilationStart(CurrentLine))
+      CCLevel++;
+
+    if (isConditionalCompilationEnd(CurrentLine))
+      CCLevel--;
+
+    if (CCLevel == 0)
+      break;
+
+    if (!(CurrentLine.isInclude() ||
+          isConditionalCompilationStatement(CurrentLine))) {
+      return false;
+    }
+  }
+  return true;
+}
+
+unsigned getEndOfCCBlock(
+    const llvm::SmallVectorImpl<clang::format::AnnotatedLine *> &Lines,
+    unsigned StartIdx) {
+  int CCLevel = 0;
+  unsigned I = StartIdx;
+  for (; I < Lines.size(); ++I) {
+    const auto &CurrentLine = *Lines[I];
+    if (isConditionalCompilationStart(CurrentLine))
+      CCLevel++;
+
+    if (isConditionalCompilationEnd(CurrentLine))
+      CCLevel--;
+
+    if (CCLevel == 0)
+      break;
+  }
+  return I;
+}
+} // namespace
+
+namespace clang {
+namespace format {
+std::pair<tooling::Replacements, unsigned>
+IncludesSeparator::analyze(TokenAnnotator &Annotator,
+                           SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,
+                           FormatTokenLexer &Tokens) {
+  assert(Style.EmptyLinesAfterIncludes.has_value());
+  AffectedRangeMgr.computeAffectedLines(AnnotatedLines);
+  tooling::Replacements Result;
+  separateIncludes(AnnotatedLines, Result, Tokens);
+  return {Result, 0};
+}
+
+void IncludesSeparator::separateIncludes(
+    SmallVectorImpl<AnnotatedLine *> &Lines, tooling::Replacements &Result,
+    FormatTokenLexer &Tokens) {
+  const unsigned NewlineCount =
+      std::min(Style.MaxEmptyLinesToKeep, *Style.EmptyLinesAfterIncludes) + 1;
+  WhitespaceManager Whitespaces(
+      Env.getSourceManager(), Style,
+      Style.LineEnding > FormatStyle::LE_CRLF
+          ? WhitespaceManager::inputUsesCRLF(
+                Env.getSourceManager().getBufferData(Env.getFileID()),
+                Style.LineEnding == FormatStyle::LE_DeriveCRLF)
+          : Style.LineEnding == FormatStyle::LE_CRLF);
+
+  bool InIncludeArea = false;
+  for (unsigned I = 0; I < Lines.size(); ++I) {
+    const auto &CurrentLine = *Lines[I];
+
+    if (InIncludeArea) {
+      if (CurrentLine.isInclude())
+        continue;
+
+      if (isConditionalCompilationStart(CurrentLine)) {
+        const bool CCWithOnlyIncludes = isCCOnlyWithIncludes(Lines, I);
+        I = getEndOfCCBlock(Lines, I);
+
+        // Conditional compilation blocks that only contain
+        // include statements are considered part of the include area.
+        if (CCWithOnlyIncludes)
+          continue;
+      }
+
+      if (!CurrentLine.First->is(tok::eof) && CurrentLine.Affected) {
+        Whitespaces.replaceWhitespace(*CurrentLine.First, NewlineCount,
+                                      CurrentLine.First->OriginalColumn,
+                                      CurrentLine.First->OriginalColumn);
+      }
+      InIncludeArea = false;
+    } else {
+      if (CurrentLine.isInclude())
+        InIncludeArea = true;
+    }
+  }
+
+  for (const auto &R : Whitespaces.generateReplacements()) {
+    // The add method returns an Error instance which simulates program exit
+    // code through overloading boolean operator, thus false here indicates
+    // success.
+    if (Result.add(R))
+      return;
+  }
+}
+} // namespace format
+} // namespace clang
diff --git a/clang/lib/Format/IncludesSeparator.h b/clang/lib/Format/IncludesSeparator.h
new file mode 100644
index 000000000000000..d093e24dbf9d412
--- /dev/null
+++ b/clang/lib/Format/IncludesSeparator.h
@@ -0,0 +1,42 @@
+//===--- IncludesSeparator.h -----------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file declares IncludesSeparator, a TokenAnalyzer that inserts
+/// new lines or removes empty lines after an includes area.
+/// An includes area is a list of consecutive include statements.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_LIB_FORMAT_INCLUDESSEPARATOR_H
+#define LLVM_CLANG_LIB_FORMAT_INCLUDESSEPARATOR_H
+
+#include "TokenAnalyzer.h"
+#include "WhitespaceManager.h"
+
+namespace clang {
+namespace format {
+class IncludesSeparator : public TokenAnalyzer {
+public:
+  IncludesSeparator(const Environment &Env, const FormatStyle &Style)
+      : TokenAnalyzer(Env, Style) {}
+
+  std::pair<tooling::Replacements, unsigned>
+  analyze(TokenAnnotator &Annotator,
+          SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,
+          FormatTokenLexer &Tokens) override;
+
+private:
+  void separateIncludes(SmallVectorImpl<AnnotatedLine *> &Lines,
+                        tooling::Replacements &Result,
+                        FormatTokenLexer &Tokens);
+};
+} // namespace format
+} // namespace clang
+
+#endif
diff --git a/clang/lib/Format/TokenAnnotator.h b/clang/lib/Format/TokenAnnotator.h
index 05a6daa87d80340..06486799ec4031a 100644
--- a/clang/lib/Format/TokenAnnotator.h
+++ b/clang/lib/Format/TokenAnnotator.h
@@ -113,6 +113,14 @@ class AnnotatedLine {
     return First && First->is(tok::comment) && !First->getNextNonComment();
   }
 
+  bool isInclude() const {
+    if (!First)
+      return false;
+
+    const auto *NextToken = First->getNextNonComment();
+    return First->is(tok::hash) && NextToken && NextToken->is(tok::pp_include);
+  }
+
   /// \c true if this line starts with the given tokens in order, ignoring
   /// comments.
   template <typename... Ts> bool startsWith(Ts... Tokens) const {
diff --git a/clang/unittests/Format/ConfigParseTest.cpp b/clang/unittests/Format/ConfigParseTest.cpp
index 2a8d79359a49b40..cbb442203b1628a 100644
--- a/clang/unittests/Format/ConfigParseTest.cpp
+++ b/clang/unittests/Format/ConfigParseTest.cpp
@@ -1005,6 +1005,8 @@ TEST(ConfigParseTest, ParsesConfiguration) {
               FormatStyle::SDS_Leave);
   CHECK_PARSE("SeparateDefinitionBlocks: Never", SeparateDefinitionBlocks,
               FormatStyle::SDS_Never);
+
+  CHECK_PARSE("EmptyLinesAfterIncludes: 2", EmptyLinesAfterIncludes, 2);
 }
 
 TEST(ConfigParseTest, ParsesConfigurationWithLanguages) {
diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp
index 3fb55ae2c1f4137..c2a280f003c70f9 100644
--- a/clang/unittests/Format/FormatTest.cpp
+++ b/clang/unittests/Format/FormatTest.cpp
@@ -26990,6 +26990,191 @@ TEST_F(FormatTest, BreakAdjacentStringLiterals) {
   Style.BreakAdjacentStringLiterals = false;
   verifyFormat(Code, Style);
 }
+
+TEST_F(FormatTest, EmptyLinesAfterInclude) {
+  auto Style = getLLVMStyle();
+  Style.EmptyLinesAfterIncludes = 2;
+  Style.MaxEmptyLinesToKeep = 2;
+
+  verifyFormat("#include <string>\n"
+               "\n"
+               "\n"
+               "class Test {};",
+               Style);
+
+  verifyFormat("#ifndef TEST_H\n"
+               "#define TEST_H\n"
+               "#include <string>\n"
+               "\n"
+               "\n"
+               "#define PP_DEFINE\n"
+               "#include <map>\n"
+               "#include <vector>\n"
+               "\n"
+               "\n"
+               "class Test {};",
+               Style);
+
+  verifyFormat("#include <string>\n"
+               "#ifdef WINDOWS\n"
+               "#include <win32>\n"
+               "#ifdef X32\n"
+               "#include <additionalHeader>\n"
+               "#else\n"
+               "#include <unistd.h>\n"
+               "#endif\n"
+               "#endif\n"
+               "#include <map>\n"
+               "\n"
+               "\n"
+               "class Test {};",
+               Style);
+
+  verifyFormat("#pragma once\n"
+               "#include <string>\n"
+               "#include <map>\n"
+               "#ifdef WINDOWS\n"
+               "#include <w32>\n"
+               "#endif\n"
+               "#include <vector>\n"
+               "\n"
+               "\n"
+               "class Test {};",
+               Style);
+
+  verifyFormat("#include <string>\n"
+               "\n"
+               "\n"
+               "#ifdef WINDOWS\n"
+               "#define OS_VERSION WINDOWS\n"
+               "#endif\n"
+               "\n"
+               "#include <map>\n"
+               "#include <vector>\n"
+               "\n"
+               "\n"
+               "class Test {};",
+               Style);
+
+  verifyFormat("#include <string>\n"
+               "#include <map>\n"
+               "\n"
+               "\n"
+               "#define INCLUDE_MACRO #include<vector> #include<set>\n"
+               "#include <queue>\n"
+               "\n"
+               "\n"
+               "class Test {};",
+               Style);
+
+  verifyFormat("#include <string>\n"
+               "\n"
+               "\n"
+               "#ifdef WINDOWS\n"
+               "#ifdef x86\n"
+               "#include <x86_windows>\n"
+               "#endif\n"
+               "#define OS_VERSION WINDOWS\n"
+               "#endif\n"
+               "\n"
+               "#include <map>\n"
+               "#include <vector>\n"
+               "\n"
+               "\n"
+               "class Test {};",
+               Style);
+
+  Style.EmptyLinesAfterIncludes = 1;
+  verifyFormat("#include <string>\n"
+               "\n"
+               "class Test {};",
+               Style);
+
+  verifyFormat("#pragma once\n"
+               "#include <string>\n"
+               "\n"
+               "class Test {};",
+               Style);
+
+  verifyFormat("#ifndef TEST_H\n"
+               "#define TEST_H\n"
+               "#include <string>\n"
+               "#include <map>\n"
+               "\n"
+               "void func();\n"
+               "#endif // TEST_H",
+               Style);
+}
+
+TEST_F(FormatTest, EmptyLinesAfterIncludesLimitedByMaxEmptyLinesToKeep) {
+  auto Style = getLLVMStyle();
+  Style.EmptyLinesAfterIncludes = 2;
+  Style.MaxEmptyLinesToKeep = 1;
+  verifyFormat("#include <string>\n"
+               "\n"
+               "class Test {};",
+               Style);
+}
+
+TEST_F(FormatTest, EmptyLinesAfterIncludesWithIncludesPreseve) {
+  auto Style = getLLVMStyle();
+  Style.EmptyLinesAfterIncludes = 2;
+  Style.MaxEmptyLinesToKeep = 2;
+  Style.IncludeStyle.IncludeBlocks = tooling::IncludeStyle::IBS_Preserve;
+  verifyFormat("#pragma once\n"
+               "// test file documentation\n"
+               "#include \"b.h\"\n"
+               "#include \"d.h\"\n"
+               "\n"
+               "#include \"a.h\"\n"
+               "#include \"c.h\"\n"
+               "#include \"e.h\"\n"
+               "\n"
+               "\n"
+               "class Test {};",
+               Style);
+}
+
+TEST_F(FormatTest, EmptyLinesAfterIncludesWithIncludesMerge) {
+  auto Style = getLLVMStyle();
+  Style.EmptyLinesAfterIncludes = 2;
+  Style.MaxEmptyLinesToKeep = 2;
+  Style.IncludeStyle.IncludeBlocks = tooling::IncludeStyle::IBS_Merge;
+  verifyFormat("#pragma once\n"
+               "// test file documentation\n"
+               "#include \"a.h\"\n"
+               "#include \"b.h\"\n"
+               "#include \"c.h\"\n"
+               "\n"
+               "\n"
+               "class Test {};",
+               Style);
+}
+
+TEST_F(FormatTest, EmptyLinesAfterIncludesWithIncludesRegroup) {
+  auto Style = getLLVMStyle();
+  Style.EmptyLinesAfterIncludes = 2;
+  Style.MaxEmptyLinesToKeep = 2;
+  Style.IncludeStyle.IncludeBlocks = tooling::IncludeStyle::IBS_Regroup;
+  verifyFormat("#pragma once\n"
+               "// test file documentation\n"
+               "#include \"a.h\"\n"
+               "#include \"c.h\"\n"
+               "\n"
+               "#include <b.h>\n"
+               "#include <d.h>\n"
+               "\n"
+               "\n"
+               "class Test {};",
+               Style);
+
+  verifyFormat("#include <b.h>\n"
+               "#include <d.h>\n"
+               "\n"
+               "\n"
+               "class Test {};",
+               Style);
+}
 } // namespace
 } // namespace test
 } // namespace format



More information about the cfe-commits mailing list