[llvm-branch-commits] [clang-tools-extra] [clang-tidy] add fragment support to `misc-include-cleaner` (PR #196767)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Sat May 9 15:46:01 PDT 2026
llvmorg-github-actions[bot] wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-clang-tools-extra
@llvm/pr-subscribers-clang-tidy
Author: Daniil Dudkin (unterumarmung)
<details>
<summary>Changes</summary>
---
Patch is 27.07 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/196767.diff
5 Files Affected:
- (modified) clang-tools-extra/clang-tidy/misc/IncludeCleanerCheck.cpp (+97-85)
- (modified) clang-tools-extra/clang-tidy/misc/IncludeCleanerCheck.h (+6-1)
- (modified) clang-tools-extra/docs/ReleaseNotes.rst (+6-1)
- (modified) clang-tools-extra/docs/clang-tidy/checks/misc/include-cleaner.rst (+46-1)
- (modified) clang-tools-extra/unittests/clang-tidy/IncludeCleanerTest.cpp (+323)
``````````diff
diff --git a/clang-tools-extra/clang-tidy/misc/IncludeCleanerCheck.cpp b/clang-tools-extra/clang-tidy/misc/IncludeCleanerCheck.cpp
index 0097ea54ba548..28e51add92a38 100644
--- a/clang-tools-extra/clang-tidy/misc/IncludeCleanerCheck.cpp
+++ b/clang-tools-extra/clang-tidy/misc/IncludeCleanerCheck.cpp
@@ -38,6 +38,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/Regex.h"
+#include <functional>
#include <optional>
#include <string>
#include <vector>
@@ -46,18 +47,21 @@ using namespace clang::ast_matchers;
namespace clang::tidy::misc {
-namespace {
-struct MissingIncludeInfo {
- include_cleaner::SymbolReference SymRef;
- include_cleaner::Header Missing;
-};
-} // namespace
+static bool matchesAnyRegex(llvm::ArrayRef<llvm::Regex> Regexes,
+ llvm::StringRef Path) {
+ return llvm::any_of(Regexes,
+ [&](const llvm::Regex &R) { return R.match(Path); });
+}
IncludeCleanerCheck::IncludeCleanerCheck(StringRef Name,
ClangTidyContext *Context)
: ClangTidyCheck(Name, Context),
IgnoreHeaders(
utils::options::parseStringList(Options.get("IgnoreHeaders", ""))),
+ FragmentHeaderPatterns(
+ utils::options::parseStringList(Options.get("FragmentHeaders", ""))),
+ FragmentDependencyCommentFormat(
+ Options.get("FragmentDependencyCommentFormat", "")),
DeduplicateFindings(Options.get("DeduplicateFindings", true)),
UnusedIncludes(Options.get("UnusedIncludes", true)),
MissingIncludes(Options.get("MissingIncludes", true)) {
@@ -69,6 +73,16 @@ IncludeCleanerCheck::IncludeCleanerCheck(StringRef Name,
HeaderSuffix += '$';
IgnoreHeadersRegex.emplace_back(HeaderSuffix);
}
+ for (const StringRef Pattern : FragmentHeaderPatterns) {
+ llvm::Regex CompiledRegex(Pattern);
+ std::string RegexError;
+ if (!CompiledRegex.isValid(RegexError)) {
+ configurationDiag("Invalid fragment headers regular expression '%0': %1")
+ << Pattern << RegexError;
+ continue;
+ }
+ FragmentHeaderRegexes.push_back(std::move(CompiledRegex));
+ }
if (UnusedIncludes == false && MissingIncludes == false)
this->configurationDiag("The check 'misc-include-cleaner' will not "
@@ -79,6 +93,10 @@ IncludeCleanerCheck::IncludeCleanerCheck(StringRef Name,
void IncludeCleanerCheck::storeOptions(ClangTidyOptions::OptionMap &Opts) {
Options.store(Opts, "IgnoreHeaders",
utils::options::serializeStringList(IgnoreHeaders));
+ Options.store(Opts, "FragmentHeaders",
+ utils::options::serializeStringList(FragmentHeaderPatterns));
+ Options.store(Opts, "FragmentDependencyCommentFormat",
+ FragmentDependencyCommentFormat);
Options.store(Opts, "DeduplicateFindings", DeduplicateFindings);
Options.store(Opts, "UnusedIncludes", UnusedIncludes);
Options.store(Opts, "MissingIncludes", MissingIncludes);
@@ -121,84 +139,23 @@ bool IncludeCleanerCheck::shouldIgnore(const include_cleaner::Header &H) {
void IncludeCleanerCheck::check(const MatchFinder::MatchResult &Result) {
const SourceManager *SM = Result.SourceManager;
const FileEntry *MainFile = SM->getFileEntryForID(SM->getMainFileID());
- llvm::DenseSet<const include_cleaner::Include *> Used;
- std::vector<MissingIncludeInfo> Missing;
- SmallVector<Decl *> MainFileDecls;
+ llvm::SmallVector<Decl *> RootDecls;
for (Decl *D : Result.Nodes.getNodeAs<TranslationUnitDecl>("top")->decls()) {
- if (!SM->isWrittenInMainFile(SM->getExpansionLoc(D->getLocation())))
- continue;
// FIXME: Filter out implicit template specializations.
- MainFileDecls.push_back(D);
+ RootDecls.push_back(D);
}
- llvm::DenseSet<include_cleaner::Symbol> SeenSymbols;
- OptionalDirectoryEntryRef ResourceDir =
- PP->getHeaderSearchInfo().getModuleMap().getBuiltinDir();
- // FIXME: Find a way to have less code duplication between include-cleaner
- // analysis implementation and the below code.
- walkUsed(MainFileDecls, RecordedPreprocessor.MacroReferences, &RecordedPI,
- *PP,
- [&](const include_cleaner::SymbolReference &Ref,
- llvm::ArrayRef<include_cleaner::Header> Providers) {
- // Process each symbol once to reduce noise in the findings.
- // Tidy checks are used in two different workflows:
- // - Ones that show all the findings for a given file. For such
- // workflows there is not much point in showing all the occurences,
- // as one is enough to indicate the issue.
- // - Ones that show only the findings on changed pieces. For such
- // workflows it's useful to show findings on every reference of a
- // symbol as otherwise tools might give incosistent results
- // depending on the parts of the file being edited. But it should
- // still help surface findings for "new violations" (i.e.
- // dependency did not exist in the code at all before).
- if (DeduplicateFindings && !SeenSymbols.insert(Ref.Target).second)
- return;
- bool Satisfied = false;
- for (const include_cleaner::Header &H : Providers) {
- if (H.kind() == include_cleaner::Header::Physical &&
- (H.physical() == MainFile ||
- H.physical().getDir() == ResourceDir)) {
- Satisfied = true;
- continue;
- }
-
- for (const include_cleaner::Include *I :
- RecordedPreprocessor.Includes.match(H)) {
- Used.insert(I);
- Satisfied = true;
- }
- }
- if (!Satisfied && !Providers.empty() &&
- Ref.RT == include_cleaner::RefType::Explicit &&
- !shouldIgnore(Providers.front()))
- Missing.push_back({Ref, Providers.front()});
- });
-
- std::vector<const include_cleaner::Include *> Unused;
- for (const include_cleaner::Include &I :
- RecordedPreprocessor.Includes.all()) {
- if (Used.contains(&I) || !I.Resolved || I.Resolved->getDir() == ResourceDir)
- continue;
- if (RecordedPI.shouldKeep(*I.Resolved))
- continue;
- // Check if main file is the public interface for a private header. If so
- // we shouldn't diagnose it as unused.
- if (auto PHeader = RecordedPI.getPublic(*I.Resolved); !PHeader.empty()) {
- PHeader = PHeader.trim("<>\"");
- // Since most private -> public mappings happen in a verbatim way, we
- // check textually here. This might go wrong in presence of symlinks or
- // header mappings. But that's not different than rest of the places.
- if (getCurrentMainFile().ends_with(PHeader))
- continue;
- }
- auto StdHeader = tooling::stdlib::Header::named(
- I.quote(), PP->getLangOpts().CPlusPlus ? tooling::stdlib::Lang::CXX
- : tooling::stdlib::Lang::C);
- if (StdHeader && shouldIgnore(*StdHeader))
- continue;
- if (shouldIgnore(*I.Resolved))
- continue;
- Unused.push_back(&I);
+ std::function<bool(llvm::StringRef)> FragmentHeaderFilter;
+ if (!FragmentHeaderRegexes.empty()) {
+ FragmentHeaderFilter = [&](llvm::StringRef Path) {
+ return matchesAnyRegex(FragmentHeaderRegexes, Path);
+ };
}
+ const include_cleaner::AnalysisOptions AnalyzeOptions{
+ [&](const include_cleaner::Header &H) { return shouldIgnore(H); },
+ FragmentHeaderFilter};
+ const include_cleaner::AnalysisResults Results =
+ analyze(RootDecls, RecordedPreprocessor.MacroReferences,
+ RecordedPreprocessor.Includes, &RecordedPI, *PP, AnalyzeOptions);
const StringRef Code = SM->getBufferData(SM->getMainFileID());
auto FileStyle =
@@ -209,7 +166,7 @@ void IncludeCleanerCheck::check(const MatchFinder::MatchResult &Result) {
FileStyle = format::getLLVMStyle();
if (UnusedIncludes) {
- for (const auto *Inc : Unused) {
+ for (const auto *Inc : Results.Unused) {
diag(Inc->HashLocation, "included header %0 is not used directly")
<< llvm::sys::path::filename(Inc->Spelled,
llvm::sys::path::Style::posix)
@@ -224,9 +181,33 @@ void IncludeCleanerCheck::check(const MatchFinder::MatchResult &Result) {
FileStyle->IncludeStyle);
// Deduplicate insertions when running in bulk fix mode.
llvm::StringSet<> InsertedHeaders{};
- for (const auto &Inc : Missing) {
+ llvm::DenseSet<include_cleaner::Symbol> SeenSymbols;
+ auto DiagLocation = [&](const include_cleaner::MissingIncludeRef &Missing) {
+ if (Missing.Origin == include_cleaner::SymbolReferenceOrigin::Fragment &&
+ Missing.FragmentInclude) {
+ // Fragment refs are reported on their direct include site in the main
+ // file to preserve the main-file-only diagnostics contract.
+ return Missing.FragmentInclude->HashLocation;
+ }
+ return SM->getSpellingLoc(Missing.Ref.RefLocation);
+ };
+ for (const auto &Missing : Results.MissingRefs) {
+ // Process each symbol once to reduce noise in the findings.
+ // Tidy checks are used in two different workflows:
+ // - Ones that show all the findings for a given file. For such
+ // workflows there is not much point in showing all the occurences,
+ // as one is enough to indicate the issue.
+ // - Ones that show only the findings on changed pieces. For such
+ // workflows it's useful to show findings on every reference of a
+ // symbol as otherwise tools might give incosistent results
+ // depending on the parts of the file being edited. But it should
+ // still help surface findings for "new violations" (i.e.
+ // dependency did not exist in the code at all before).
+ if (DeduplicateFindings && !SeenSymbols.insert(Missing.Ref.Target).second)
+ continue;
+ assert(!Missing.Providers.empty() && "missing include without provider");
const std::string Spelling = include_cleaner::spellHeader(
- {Inc.Missing, PP->getHeaderSearchInfo(), MainFile});
+ {Missing.Providers.front(), PP->getHeaderSearchInfo(), MainFile});
const bool Angled = StringRef{Spelling}.starts_with('<');
// We might suggest insertion of an existing include in edge cases, e.g.,
// include is present in a PP-disabled region, or spelling of the header
@@ -236,9 +217,9 @@ void IncludeCleanerCheck::check(const MatchFinder::MatchResult &Result) {
HeaderIncludes.insert(StringRef{Spelling}.trim("\"<>"), Angled,
tooling::IncludeDirective::Include)) {
const DiagnosticBuilder DB =
- diag(SM->getSpellingLoc(Inc.SymRef.RefLocation),
+ diag(DiagLocation(Missing),
"no header providing \"%0\" is directly included")
- << Inc.SymRef.Target.name();
+ << Missing.Ref.Target.name();
if (areDiagsSelfContained() ||
InsertedHeaders.insert(Replacement->getReplacementText()).second) {
DB << FixItHint::CreateInsertion(
@@ -248,6 +229,37 @@ void IncludeCleanerCheck::check(const MatchFinder::MatchResult &Result) {
}
}
}
+
+ if (FragmentDependencyCommentFormat.empty())
+ return;
+
+ const include_cleaner::FixIncludesOptions FixOptions{
+ FragmentDependencyCommentFormat};
+ const include_cleaner::IncludeFixes Fixes = computeIncludeFixes(
+ Results, getCurrentMainFile(), Code, *FileStyle, FixOptions);
+ for (const auto &Comment : Fixes.FragmentComments) {
+ if (Comment.Status ==
+ include_cleaner::FragmentDependencyCommentStatus::AlreadyPresent) {
+ continue;
+ }
+ std::string FragmentList;
+ for (const auto *Fragment : Comment.Fragments) {
+ if (!FragmentList.empty())
+ FragmentList += ", ";
+ FragmentList += Fragment->quote();
+ }
+ const DiagnosticBuilder DB =
+ diag(Comment.Preserved->HashLocation,
+ "included header %0 is used only by fragment header(s) %1")
+ << llvm::sys::path::filename(Comment.Preserved->Spelled,
+ llvm::sys::path::Style::posix)
+ << FragmentList;
+ if (const auto Replacement = Comment.Replacement) {
+ DB << FixItHint::CreateInsertion(
+ SM->getComposedLoc(SM->getMainFileID(), Replacement->getOffset()),
+ Replacement->getReplacementText());
+ }
+ }
}
} // namespace clang::tidy::misc
diff --git a/clang-tools-extra/clang-tidy/misc/IncludeCleanerCheck.h b/clang-tools-extra/clang-tidy/misc/IncludeCleanerCheck.h
index ab97565775e46..b3dc6e47f86cd 100644
--- a/clang-tools-extra/clang-tidy/misc/IncludeCleanerCheck.h
+++ b/clang-tools-extra/clang-tidy/misc/IncludeCleanerCheck.h
@@ -20,12 +20,14 @@
#include "clang/Lex/HeaderSearch.h"
#include "clang/Lex/Preprocessor.h"
#include "llvm/Support/Regex.h"
+#include <string>
#include <vector>
namespace clang::tidy::misc {
/// Checks for unused and missing includes. Generates findings only for
-/// the main file of a translation unit.
+/// the main file of a translation unit, optionally treating some direct
+/// includes as fragments of the main file for usage scanning.
/// Findings correspond to https://clangd.llvm.org/design/include-cleaner.
///
/// For the user-facing documentation see:
@@ -45,6 +47,8 @@ class IncludeCleanerCheck : public ClangTidyCheck {
include_cleaner::PragmaIncludes RecordedPI;
const Preprocessor *PP = nullptr;
std::vector<StringRef> IgnoreHeaders;
+ std::vector<StringRef> FragmentHeaderPatterns;
+ std::string FragmentDependencyCommentFormat;
// Whether emit only one finding per usage of a symbol.
const bool DeduplicateFindings;
// Whether to report unused includes.
@@ -52,6 +56,7 @@ class IncludeCleanerCheck : public ClangTidyCheck {
// Whether to report missing includes.
const bool MissingIncludes;
SmallVector<llvm::Regex> IgnoreHeadersRegex;
+ llvm::SmallVector<llvm::Regex> FragmentHeaderRegexes;
bool shouldIgnore(const include_cleaner::Header &H);
};
diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst
index c508cd1bce510..7d80c3de2918d 100644
--- a/clang-tools-extra/docs/ReleaseNotes.rst
+++ b/clang-tools-extra/docs/ReleaseNotes.rst
@@ -393,7 +393,6 @@ Changes in existing checks
- Added support for analyzing function parameters with the `AnalyzeParameters`
option.
-
- Fixed false positive where an array of pointers to ``const`` was
incorrectly diagnosed as allowing the pointee to be made ``const``.
@@ -403,6 +402,12 @@ Changes in existing checks
- Fixed false positives when pointers were later passed or bound through
``const``-qualified pointer references.
+- Improved :doc:`misc-include-cleaner
+ <clang-tidy/checks/misc/include-cleaner>` check by adding the
+ ``FragmentHeaders`` option for fragment-aware usage scanning and the
+ ``FragmentDependencyCommentFormat`` option for annotating includes kept only
+ by those fragments.
+
- Improved :doc:`misc-multiple-inheritance
<clang-tidy/checks/misc/multiple-inheritance>` by avoiding false positives when
virtual inheritance causes concrete bases to be counted more than once.
diff --git a/clang-tools-extra/docs/clang-tidy/checks/misc/include-cleaner.rst b/clang-tools-extra/docs/clang-tidy/checks/misc/include-cleaner.rst
index 4364610787058..594c0ff76bf5b 100644
--- a/clang-tools-extra/docs/clang-tidy/checks/misc/include-cleaner.rst
+++ b/clang-tools-extra/docs/clang-tidy/checks/misc/include-cleaner.rst
@@ -4,7 +4,8 @@ misc-include-cleaner
====================
Checks for unused and missing includes. Generates findings only for
-the main file of a translation unit.
+the main file of a translation unit. Optionally, direct includes can be
+treated as fragments of the main file for usage scanning.
Findings correspond to https://clangd.llvm.org/design/include-cleaner.
Example:
@@ -34,6 +35,50 @@ Options
insertion/removal for all headers under the directory `foo`. Default is an
empty string, no headers will be ignored.
+.. option:: FragmentHeaders
+
+ A semicolon-separated list of regular expressions that match against
+ normalized resolved include paths (POSIX-style separators). Direct includes
+ of the main file that match are treated as fragments of the main file for
+ usage scanning. This is intended for non-self-contained generated
+ ``.inc``/``.def`` files or other include fragments. Only direct includes are
+ considered; includes inside fragments are not treated as fragments. Default
+ is ``""``.
+
+ Diagnostics remain anchored to the main file, but symbol uses inside
+ fragments can keep prerequisite includes in the main file from being
+ removed or marked missing. Note that include-cleaner does not support
+ ``// IWYU pragma: associated``.
+
+ Example configuration:
+
+ .. code-block:: yaml
+
+ CheckOptions:
+ - key: misc-include-cleaner.FragmentHeaders
+ value: 'gen-out/;generated/;\\.(inc|def)$'
+
+.. option:: FragmentDependencyCommentFormat
+
+ A trailing comment format to add to includes that are kept only because they
+ are used from fragment headers matched by :option:`FragmentHeaders`. The
+ value should not include the leading ``//``. An empty string disables these
+ diagnostics and fix-its. Default is ``""``.
+
+ Use ``{0}`` to substitute the comma-separated direct fragment include
+ spellings that keep the include alive.
+
+ Example configuration:
+
+ .. code-block:: yaml
+
+ CheckOptions:
+ - key: misc-include-cleaner.FragmentDependencyCommentFormat
+ value: 'needed by {0}'
+
+ For example, setting the value to ``IWYU pragma: keep`` inserts
+ ``// IWYU pragma: keep``.
+
.. option:: DeduplicateFindings
A boolean that controls whether the check should deduplicate findings for the
diff --git a/clang-tools-extra/unittests/clang-tidy/IncludeCleanerTest.cpp b/clang-tools-extra/unittests/clang-tidy/IncludeCleanerTest.cpp
index 00576916492e1..a9d5806632a56 100644
--- a/clang-tools-extra/unittests/clang-tidy/IncludeCleanerTest.cpp
+++ b/clang-tools-extra/unittests/clang-tidy/IncludeCleanerTest.cpp
@@ -339,6 +339,329 @@ TEST(IncludeCleanerCheckTest, UnusedIncludes) {
}
}
+TEST(IncludeCleanerCheckTest, FragmentHeadersPreserveIncludes) {
+ const std::string Fragment = "gen.inc";
+ const std::string Code = llvm::formatv(R"(
+#include <vector>
+#include "{0}"
+
+Gen G;
+)",
+ Fragment)
+ .str();
+ const char *FragmentCode = R"(
+struct Gen {
+ std::vector<int> Values;
+};
+)";
+ const char *VectorHeader = R"(
+#pragma once
+namespace std {
+template <typename T>
+struct vector {};
+} // namespace std
+)";
+
+ {
+ std::vector<ClangTidyError> Errors;
+ runCheckOnCode<IncludeCleanerCheck>(
+ Code, &Errors, "file.cpp", {}, ClangTidyOptions(),
+ {{Fragment, FragmentCode}, {"vector", VectorHeader}});
+ ASSERT_THAT(Errors.size(), testing::Eq(1U));
+ EXPECT_EQ(Errors.front().Message.Message,
+ "included header vector is not used directly");
+ }
+ {
+ std::vector<ClangTidyError> Errors;
+ ClangTidyOptions Opts;
+ Opts.CheckOptions["test-check-0.FragmentHeaders"] = "";
+ runCheckOnCode<IncludeCleanerCheck>(
+ Code, &Errors, "file.cpp", {}, Opts,
+ {{Fragment, FragmentCode}, {"vector", VectorHeader}});
+ ASSERT_THAT(Errors.size(), testing::Eq(1U));
+ }
+ {
+ std::vector<ClangTidyError> Errors;
+ ClangTidyOptions Opts;
+ Opts.CheckOptions["test-check-0.FragmentHeaders"] = ".*\\.inc$";
+ runCheckOnCode<IncludeCleanerCheck>(
+ Code, &Errors, "file.cpp", {}, Opts,
+ {{Fragment, FragmentCode}, {"vector", VectorHeader}});
+ ASSERT_THAT(Errors.size(), testing::Eq(0U));
+ }
+}
+
+TEST(IncludeCleanerCheckTest, FragmentHeadersGeneratedPaths) {
+ const std::string Fragment =
+ appendPathFileSystemIndependent({"gen-out", "bin", "gen.inc"});
+ const std::string Code = llvm::formatv(R"(
+#include <vector>
+#include "{0}"
+
+Gen G;
+)"...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/196767
More information about the llvm-branch-commits
mailing list