[clang-tools-extra] [clangd] Add config option to analyze unused system headers (PR #87208)

Vadim D. via cfe-commits cfe-commits at lists.llvm.org
Sun Mar 31 15:59:58 PDT 2024


https://github.com/vvd170501 created https://github.com/llvm/llvm-project/pull/87208

This PR adds a new `AnalyzeSystemHeaders` option to `Includes` section of clangd config. This option will allow to mark all system headers, not just the ones from standard library, as unused.

Why this change is useful?
In our codebase all includes from outside of local directory are written with angle brackets. For example,
```cpp
// foo.cpp
#include "foo.h"  // main header
#include "foo_utils.h"  // From the same directory
#include <util/generic/string.h>  // Other directory, path is relative the repository root
#include <algorithm>  // Header from standard library
```

I'd like to use the builtin include-cleaner of clangd to detect unused includes, but currently it ignores non-standard system headers

I understand that enabling unused include detection for system headers will probably produce some false positives due to lack of support for umbrella headers, but they can be filtered out by `IgnoreHeader`. This is not ideal, but still better than not having unused include detection for system headers at all.

>From f222e44f5586297ddd61ac0efff4a7115a766c53 Mon Sep 17 00:00:00 2001
From: vvd170501 <36827317+vvd170501 at users.noreply.github.com>
Date: Mon, 1 Apr 2024 01:12:11 +0300
Subject: [PATCH] Add config option to analyze unused system headers

---
 clang-tools-extra/clangd/Config.h             |  1 +
 clang-tools-extra/clangd/ConfigCompile.cpp    | 56 +++++++++++--------
 clang-tools-extra/clangd/ConfigFragment.h     |  4 ++
 clang-tools-extra/clangd/ConfigYAML.cpp       |  4 ++
 clang-tools-extra/clangd/IncludeCleaner.cpp   | 34 +++++++----
 clang-tools-extra/clangd/IncludeCleaner.h     | 13 +----
 clang-tools-extra/clangd/ParsedAST.cpp        |  3 +-
 .../clangd/unittests/ConfigCompileTests.cpp   |  6 ++
 .../clangd/unittests/ConfigYAMLTests.cpp      | 15 +++++
 .../clangd/unittests/IncludeCleanerTests.cpp  | 15 +++++
 clang-tools-extra/docs/ReleaseNotes.rst       |  4 ++
 11 files changed, 110 insertions(+), 45 deletions(-)

diff --git a/clang-tools-extra/clangd/Config.h b/clang-tools-extra/clangd/Config.h
index 4371c80a6c5877..9629854abff31b 100644
--- a/clang-tools-extra/clangd/Config.h
+++ b/clang-tools-extra/clangd/Config.h
@@ -114,6 +114,7 @@ struct Config {
     /// these regexes.
     struct {
       std::vector<std::function<bool(llvm::StringRef)>> IgnoreHeader;
+      bool AnalyzeSystemHeaders = false;
     } Includes;
   } Diagnostics;
 
diff --git a/clang-tools-extra/clangd/ConfigCompile.cpp b/clang-tools-extra/clangd/ConfigCompile.cpp
index 5bb2eb4a9f803f..570f3cfc3ce2ed 100644
--- a/clang-tools-extra/clangd/ConfigCompile.cpp
+++ b/clang-tools-extra/clangd/ConfigCompile.cpp
@@ -572,32 +572,42 @@ struct FragmentCompiler {
 #else
     static llvm::Regex::RegexFlags Flags = llvm::Regex::NoFlags;
 #endif
-    auto Filters = std::make_shared<std::vector<llvm::Regex>>();
-    for (auto &HeaderPattern : F.IgnoreHeader) {
-      // Anchor on the right.
-      std::string AnchoredPattern = "(" + *HeaderPattern + ")$";
-      llvm::Regex CompiledRegex(AnchoredPattern, Flags);
-      std::string RegexError;
-      if (!CompiledRegex.isValid(RegexError)) {
-        diag(Warning,
-             llvm::formatv("Invalid regular expression '{0}': {1}",
-                           *HeaderPattern, RegexError)
-                 .str(),
-             HeaderPattern.Range);
-        continue;
+    std::shared_ptr<std::vector<llvm::Regex>> Filters;
+    if (!F.IgnoreHeader.empty()) {
+      Filters = std::make_shared<std::vector<llvm::Regex>>();
+      for (auto &HeaderPattern : F.IgnoreHeader) {
+        // Anchor on the right.
+        std::string AnchoredPattern = "(" + *HeaderPattern + ")$";
+        llvm::Regex CompiledRegex(AnchoredPattern, Flags);
+        std::string RegexError;
+        if (!CompiledRegex.isValid(RegexError)) {
+          diag(Warning,
+               llvm::formatv("Invalid regular expression '{0}': {1}",
+                             *HeaderPattern, RegexError)
+                   .str(), HeaderPattern.Range);
+          continue;
+        }
+        Filters->push_back(std::move(CompiledRegex));
       }
-      Filters->push_back(std::move(CompiledRegex));
     }
-    if (Filters->empty())
+    std::optional<bool> AnalyzeSystemHeaders;
+    if (F.AnalyzeSystemHeaders.has_value())
+      AnalyzeSystemHeaders = **F.AnalyzeSystemHeaders;
+    if (!Filters && !AnalyzeSystemHeaders.has_value())
       return;
-    auto Filter = [Filters](llvm::StringRef Path) {
-      for (auto &Regex : *Filters)
-        if (Regex.match(Path))
-          return true;
-      return false;
-    };
-    Out.Apply.push_back([Filter](const Params &, Config &C) {
-      C.Diagnostics.Includes.IgnoreHeader.emplace_back(Filter);
+    Out.Apply.push_back([Filters = std::move(Filters),
+                         AnalyzeSystemHeaders](const Params &, Config &C) {
+      if (Filters) {
+        auto Filter = [Filters](llvm::StringRef Path) {
+          for (auto &Regex : *Filters)
+            if (Regex.match(Path))
+              return true;
+          return false;
+        };
+        C.Diagnostics.Includes.IgnoreHeader.emplace_back(std::move(Filter));
+      }
+      if (AnalyzeSystemHeaders.has_value())
+        C.Diagnostics.Includes.AnalyzeSystemHeaders = *AnalyzeSystemHeaders;
     });
   }
 
diff --git a/clang-tools-extra/clangd/ConfigFragment.h b/clang-tools-extra/clangd/ConfigFragment.h
index 7fa61108c78a05..ac8b88c2454128 100644
--- a/clang-tools-extra/clangd/ConfigFragment.h
+++ b/clang-tools-extra/clangd/ConfigFragment.h
@@ -254,6 +254,10 @@ struct Fragment {
       /// unused or missing. These can match any suffix of the header file in
       /// question.
       std::vector<Located<std::string>> IgnoreHeader;
+
+      /// If false (default), unused system headers will be ignored.
+      /// Standard library headers are analyzed regardless of this option.
+      std::optional<Located<bool>> AnalyzeSystemHeaders;
     };
     IncludesBlock Includes;
 
diff --git a/clang-tools-extra/clangd/ConfigYAML.cpp b/clang-tools-extra/clangd/ConfigYAML.cpp
index ce09af819247ae..7608af42005386 100644
--- a/clang-tools-extra/clangd/ConfigYAML.cpp
+++ b/clang-tools-extra/clangd/ConfigYAML.cpp
@@ -169,6 +169,10 @@ class Parser {
       if (auto Values = scalarValues(N))
         F.IgnoreHeader = std::move(*Values);
     });
+    Dict.handle("AnalyzeSystemHeaders", [&](Node &N) {
+      if (auto Value = boolValue(N, "AnalyzeSystemHeaders"))
+        F.AnalyzeSystemHeaders = *Value;
+    });
     Dict.parse(N);
   }
 
diff --git a/clang-tools-extra/clangd/IncludeCleaner.cpp b/clang-tools-extra/clangd/IncludeCleaner.cpp
index 8e48f546d94e77..0fdd0412d1e485 100644
--- a/clang-tools-extra/clangd/IncludeCleaner.cpp
+++ b/clang-tools-extra/clangd/IncludeCleaner.cpp
@@ -68,24 +68,32 @@ bool isIgnored(llvm::StringRef HeaderPath, HeaderFilter IgnoreHeaders) {
 }
 
 bool mayConsiderUnused(const Inclusion &Inc, ParsedAST &AST,
-                       const include_cleaner::PragmaIncludes *PI) {
+                       const include_cleaner::PragmaIncludes *PI,
+                       bool AnalyzeSystemHeaders) {
   assert(Inc.HeaderID);
   auto HID = static_cast<IncludeStructure::HeaderID>(*Inc.HeaderID);
   auto FE = AST.getSourceManager().getFileManager().getFileRef(
       AST.getIncludeStructure().getRealPath(HID));
   assert(FE);
   if (FE->getDir() == AST.getPreprocessor()
-                  .getHeaderSearchInfo()
-                  .getModuleMap()
-                  .getBuiltinDir()) 
+                          .getHeaderSearchInfo()
+                          .getModuleMap()
+                          .getBuiltinDir())
     return false;
   if (PI && PI->shouldKeep(*FE))
     return false;
   // FIXME(kirillbobyrev): We currently do not support the umbrella headers.
   // System headers are likely to be standard library headers.
-  // Until we have good support for umbrella headers, don't warn about them.
-  if (Inc.Written.front() == '<')
-    return tooling::stdlib::Header::named(Inc.Written).has_value();
+  // Until we have good support for umbrella headers, don't warn about them
+  // (unless analysis is explicitly enabled).
+  if (Inc.Written.front() == '<') {
+    if (tooling::stdlib::Header::named(Inc.Written)) {
+      return true;
+    }
+    if (!AnalyzeSystemHeaders) {
+      return false;
+    }
+  }
   if (PI) {
     // Check if main file is the public interface for a private header. If so we
     // shouldn't diagnose it as unused.
@@ -266,7 +274,8 @@ Fix fixAll(const Fix &RemoveAllUnused, const Fix &AddAllMissing) {
 
 std::vector<const Inclusion *>
 getUnused(ParsedAST &AST,
-          const llvm::DenseSet<IncludeStructure::HeaderID> &ReferencedFiles) {
+          const llvm::DenseSet<IncludeStructure::HeaderID> &ReferencedFiles,
+          bool AnalyzeSystemHeaders) {
   trace::Span Tracer("IncludeCleaner::getUnused");
   std::vector<const Inclusion *> Unused;
   for (const Inclusion &MFI : AST.getIncludeStructure().MainFileIncludes) {
@@ -275,7 +284,8 @@ getUnused(ParsedAST &AST,
     auto IncludeID = static_cast<IncludeStructure::HeaderID>(*MFI.HeaderID);
     if (ReferencedFiles.contains(IncludeID))
       continue;
-    if (!mayConsiderUnused(MFI, AST, &AST.getPragmaIncludes())) {
+    if (!mayConsiderUnused(MFI, AST, &AST.getPragmaIncludes(),
+                           AnalyzeSystemHeaders)) {
       dlog("{0} was not used, but is not eligible to be diagnosed as unused",
            MFI.Written);
       continue;
@@ -347,7 +357,8 @@ include_cleaner::Includes convertIncludes(const ParsedAST &AST) {
   return ConvertedIncludes;
 }
 
-IncludeCleanerFindings computeIncludeCleanerFindings(ParsedAST &AST) {
+IncludeCleanerFindings
+computeIncludeCleanerFindings(ParsedAST &AST, bool AnalyzeSystemHeaders) {
   // Interaction is only polished for C/CPP.
   if (AST.getLangOpts().ObjC)
     return {};
@@ -432,7 +443,8 @@ IncludeCleanerFindings computeIncludeCleanerFindings(ParsedAST &AST) {
            MapInfo::getHashValue(RHS.Symbol);
   });
   MissingIncludes.erase(llvm::unique(MissingIncludes), MissingIncludes.end());
-  std::vector<const Inclusion *> UnusedIncludes = getUnused(AST, Used);
+  std::vector<const Inclusion *> UnusedIncludes =
+      getUnused(AST, Used, AnalyzeSystemHeaders);
   return {std::move(UnusedIncludes), std::move(MissingIncludes)};
 }
 
diff --git a/clang-tools-extra/clangd/IncludeCleaner.h b/clang-tools-extra/clangd/IncludeCleaner.h
index 387763de340767..d03d74038b483b 100644
--- a/clang-tools-extra/clangd/IncludeCleaner.h
+++ b/clang-tools-extra/clangd/IncludeCleaner.h
@@ -53,7 +53,9 @@ struct IncludeCleanerFindings {
   std::vector<MissingIncludeDiagInfo> MissingIncludes;
 };
 
-IncludeCleanerFindings computeIncludeCleanerFindings(ParsedAST &AST);
+IncludeCleanerFindings
+computeIncludeCleanerFindings(ParsedAST &AST,
+                              bool AnalyzeSystemHeaders = false);
 
 using HeaderFilter = llvm::ArrayRef<std::function<bool(llvm::StringRef)>>;
 std::vector<Diag>
@@ -62,15 +64,6 @@ issueIncludeCleanerDiagnostics(ParsedAST &AST, llvm::StringRef Code,
                                const ThreadsafeFS &TFS,
                                HeaderFilter IgnoreHeader = {});
 
-/// Affects whether standard library includes should be considered for
-/// removal. This is off by default for now due to implementation limitations:
-/// - macros are not tracked
-/// - symbol names without a unique associated header are not tracked
-/// - references to std-namespaced C types are not properly tracked:
-///   instead of std::size_t -> <cstddef> we see ::size_t -> <stddef.h>
-/// FIXME: remove this hack once the implementation is good enough.
-void setIncludeCleanerAnalyzesStdlib(bool B);
-
 /// Converts the clangd include representation to include-cleaner
 /// include representation.
 include_cleaner::Includes convertIncludes(const ParsedAST &);
diff --git a/clang-tools-extra/clangd/ParsedAST.cpp b/clang-tools-extra/clangd/ParsedAST.cpp
index 3ff759415f7c8b..8073b1ee76e643 100644
--- a/clang-tools-extra/clangd/ParsedAST.cpp
+++ b/clang-tools-extra/clangd/ParsedAST.cpp
@@ -373,7 +373,8 @@ std::vector<Diag> getIncludeCleanerDiags(ParsedAST &AST, llvm::StringRef Code,
       Cfg.Diagnostics.UnusedIncludes == Config::IncludesPolicy::None;
   if (SuppressMissing && SuppressUnused)
     return {};
-  auto Findings = computeIncludeCleanerFindings(AST);
+  auto Findings = computeIncludeCleanerFindings(
+      AST, Cfg.Diagnostics.Includes.AnalyzeSystemHeaders);
   if (SuppressMissing)
     Findings.MissingIncludes.clear();
   if (SuppressUnused)
diff --git a/clang-tools-extra/clangd/unittests/ConfigCompileTests.cpp b/clang-tools-extra/clangd/unittests/ConfigCompileTests.cpp
index f0ffc429c0ca90..20d873525fd163 100644
--- a/clang-tools-extra/clangd/unittests/ConfigCompileTests.cpp
+++ b/clang-tools-extra/clangd/unittests/ConfigCompileTests.cpp
@@ -277,6 +277,12 @@ TEST_F(ConfigCompileTests, DiagnosticsIncludeCleaner) {
   };
   EXPECT_TRUE(HeaderFilter("foo.h"));
   EXPECT_FALSE(HeaderFilter("bar.h"));
+
+  Frag = {};
+  EXPECT_FALSE(Conf.Diagnostics.Includes.AnalyzeSystemHeaders);
+  Frag.Diagnostics.Includes.AnalyzeSystemHeaders = true;
+  EXPECT_TRUE(compileAndApply());
+  EXPECT_TRUE(Conf.Diagnostics.Includes.AnalyzeSystemHeaders);
 }
 
 TEST_F(ConfigCompileTests, DiagnosticSuppression) {
diff --git a/clang-tools-extra/clangd/unittests/ConfigYAMLTests.cpp b/clang-tools-extra/clangd/unittests/ConfigYAMLTests.cpp
index 44a6647d4c0a81..162db1fa34125d 100644
--- a/clang-tools-extra/clangd/unittests/ConfigYAMLTests.cpp
+++ b/clang-tools-extra/clangd/unittests/ConfigYAMLTests.cpp
@@ -278,6 +278,21 @@ TEST(ParseYAML, IncludesIgnoreHeader) {
               ElementsAre(val("foo"), val("bar")));
 }
 
+TEST(ParseYAML, IncludesAnalyzeSystemHeaders) {
+  CapturedDiags Diags;
+  Annotations YAML(R"yaml(
+Diagnostics:
+  Includes:
+    AnalyzeSystemHeaders: true
+  )yaml");
+  auto Results =
+      Fragment::parseYAML(YAML.code(), "config.yaml", Diags.callback());
+  ASSERT_THAT(Diags.Diagnostics, IsEmpty());
+  ASSERT_EQ(Results.size(), 1u);
+  EXPECT_THAT(Results[0].Diagnostics.Includes.AnalyzeSystemHeaders,
+              llvm::ValueIs(val(true)));
+}
+
 TEST(ParseYAML, Style) {
   CapturedDiags Diags;
   Annotations YAML(R"yaml(
diff --git a/clang-tools-extra/clangd/unittests/IncludeCleanerTests.cpp b/clang-tools-extra/clangd/unittests/IncludeCleanerTests.cpp
index 142310837bd9ce..104c42779383ef 100644
--- a/clang-tools-extra/clangd/unittests/IncludeCleanerTests.cpp
+++ b/clang-tools-extra/clangd/unittests/IncludeCleanerTests.cpp
@@ -135,6 +135,21 @@ TEST(IncludeCleaner, GetUnusedHeaders) {
                            Pointee(writtenInclusion("\"dir/unused.h\""))));
 }
 
+TEST(IncludeCleaner, SystemUnusedHeaders) {
+  auto TU = TestTU::withCode(R"cpp(
+    #include <system_header.h>
+    #include <system_unused.h>
+    SystemClass x;
+  )cpp");
+  TU.AdditionalFiles["system/system_header.h"] = guard("class SystemClass {};");
+  TU.AdditionalFiles["system/system_unused.h"] = guard("");
+  TU.ExtraArgs = {"-isystem", testPath("system")};
+  auto AST = TU.build();
+  IncludeCleanerFindings Findings = computeIncludeCleanerFindings(AST, true);
+  EXPECT_THAT(Findings.UnusedIncludes,
+              ElementsAre(Pointee(writtenInclusion("<system_unused.h>"))));
+}
+
 TEST(IncludeCleaner, ComputeMissingHeaders) {
   Annotations MainFile(R"cpp(
     #include "a.h"
diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst
index 78b09d23d4427f..1bd395d95250db 100644
--- a/clang-tools-extra/docs/ReleaseNotes.rst
+++ b/clang-tools-extra/docs/ReleaseNotes.rst
@@ -81,6 +81,10 @@ Objective-C
 Miscellaneous
 ^^^^^^^^^^^^^
 
+- Added config option `Includes.AnalyzeSystemHeaders` to enable unused includes
+  detection for all system headers. At this moment umbrella headers are not supported,
+  so enabling this option may result in false-positives.
+
 Improvements to clang-doc
 -------------------------
 



More information about the cfe-commits mailing list