[llvm-branch-commits] [clang-tools-extra] [include-cleaner] add fragment header analysis (PR #196764)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Sat May 9 15:45:47 PDT 2026
llvmorg-github-actions[bot] wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-clang-tools-extra
Author: Daniil Dudkin (unterumarmung)
<details>
<summary>Changes</summary>
---
Patch is 30.10 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/196764.diff
4 Files Affected:
- (modified) clang-tools-extra/include-cleaner/include/clang-include-cleaner/Analysis.h (+29)
- (modified) clang-tools-extra/include-cleaner/lib/Analysis.cpp (+219-59)
- (modified) clang-tools-extra/include-cleaner/tool/IncludeCleaner.cpp (+1-1)
- (modified) clang-tools-extra/include-cleaner/unittests/AnalysisTest.cpp (+318-7)
``````````diff
diff --git a/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Analysis.h b/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Analysis.h
index 8e4912fa7bd84..1d28d87c025ca 100644
--- a/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Analysis.h
+++ b/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Analysis.h
@@ -62,24 +62,53 @@ void walkUsed(llvm::ArrayRef<Decl *> ASTRoots,
const PragmaIncludes *PI, const Preprocessor &PP,
UsedSymbolCB CB);
+/// A location kind where a symbol reference is observed.
+enum class SymbolReferenceOrigin {
+ MainFile,
+ Preamble,
+ Fragment,
+};
+
/// A missing include insertion candidate.
struct MissingInclude {
std::string Spelling;
Header Provider;
};
+/// A missing include finding with per-reference provenance.
+struct MissingIncludeRef {
+ SymbolReference Ref;
+ llvm::SmallVector<Header> Providers;
+ SymbolReferenceOrigin Origin = SymbolReferenceOrigin::MainFile;
+ /// Null if the fragment file has multiple direct include sites.
+ const Include *FragmentInclude = nullptr;
+};
+
+/// An include kept alive only by fragment usage.
+struct FragmentDependency {
+ const Include *Preserved = nullptr;
+ llvm::SmallVector<const Include *> Fragments;
+};
+
/// The result of include-cleaner analysis for one main file.
struct AnalysisResults {
std::vector<const Include *> Unused;
/// Deduplicated insertion plan, e.g. "<vector>" paired with the chosen
/// provider Header.
std::vector<MissingInclude> MissingIncludes;
+ /// Per-reference provenance for consumers that need richer diagnostics.
+ std::vector<MissingIncludeRef> MissingRefs;
+ /// Include-preservation provenance for fragment-only uses.
+ std::vector<FragmentDependency> FragmentDependencies;
};
/// Analysis configuration shared by include-cleaner consumers.
struct AnalysisOptions {
/// No analysis will be performed for headers that satisfy the predicate.
std::function<bool(const Header &)> HeaderFilter;
+ /// A predicate matched against normalized resolved paths, and normalized
+ /// spelled paths as a fallback, to identify direct include fragments.
+ std::function<bool(llvm::StringRef)> FragmentHeaderFilter;
};
/// Determine which headers should be inserted or removed from the main file.
diff --git a/clang-tools-extra/include-cleaner/lib/Analysis.cpp b/clang-tools-extra/include-cleaner/lib/Analysis.cpp
index cd0b3396bf418..321b67a5787d3 100644
--- a/clang-tools-extra/include-cleaner/lib/Analysis.cpp
+++ b/clang-tools-extra/include-cleaner/lib/Analysis.cpp
@@ -8,6 +8,7 @@
#include "clang-include-cleaner/Analysis.h"
#include "AnalysisInternal.h"
+#include "TypesInternal.h"
#include "clang-include-cleaner/IncludeSpeller.h"
#include "clang-include-cleaner/Record.h"
#include "clang-include-cleaner/Types.h"
@@ -22,9 +23,11 @@
#include "clang/Tooling/Core/Replacement.h"
#include "clang/Tooling/Inclusions/StandardLibrary.h"
#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/STLFunctionalExtras.h"
+#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
@@ -32,13 +35,91 @@
#include "llvm/Support/ErrorHandling.h"
#include <cassert>
#include <climits>
+#include <optional>
#include <string>
namespace clang::include_cleaner {
namespace {
+
+struct ClassifiedReference {
+ SymbolReferenceOrigin Origin;
+ // Null if the fragment file has multiple direct include sites.
+ const Include *FragmentInclude = nullptr;
+};
+
+class FragmentTracker {
+public:
+ FragmentTracker(const Includes &Inc, const SourceManager &SM,
+ const std::function<bool(llvm::StringRef)> &HeaderFilter)
+ : SM(SM) {
+ if (!HeaderFilter)
+ return;
+
+ for (const Include &I : Inc.all()) {
+ if (!I.Resolved ||
+ locateInMainFile(I.HashLocation, SM) != MainFileLocation::MainFile) {
+ continue;
+ }
+
+ // Match the canonical path first, but fall back to the spelled include
+ // so generated paths can still be configured even when resolution loses
+ // that spelling detail.
+ const llvm::SmallString<128> ResolvedPath =
+ normalizePath(I.Resolved->getName());
+ bool IsFragment = HeaderFilter(ResolvedPath);
+ if (!IsFragment) {
+ const llvm::SmallString<128> SpelledPath = normalizePath(I.Spelled);
+ if (!SpelledPath.empty())
+ IsFragment = HeaderFilter(SpelledPath);
+ }
+ if (!IsFragment)
+ continue;
+
+ // Fragments are intentionally direct-includes-only for now. If a
+ // fragment includes another file, that nested include keeps normal
+ // header semantics.
+ IncludeSitesByFile[&I.Resolved->getFileEntry()].push_back(&I);
+ DirectIncludes.insert(&I);
+ }
+ }
+
+ std::optional<ClassifiedReference> classify(FileID FID) const {
+ if (FID == SM.getMainFileID())
+ return ClassifiedReference{SymbolReferenceOrigin::MainFile};
+ if (FID == SM.getPreambleFileID())
+ return ClassifiedReference{SymbolReferenceOrigin::Preamble};
+ auto FE = SM.getFileEntryRefForID(FID);
+ if (!FE)
+ return std::nullopt;
+ auto It = IncludeSitesByFile.find(&FE->getFileEntry());
+ if (It == IncludeSitesByFile.end())
+ return std::nullopt;
+ if (It->second.size() != 1)
+ return ClassifiedReference{SymbolReferenceOrigin::Fragment};
+ return ClassifiedReference{SymbolReferenceOrigin::Fragment,
+ It->second.front()};
+ }
+
+ bool isDirectInclude(const Include *I) const {
+ return DirectIncludes.contains(I);
+ }
+
+private:
+ const SourceManager &SM;
+ llvm::DenseMap<const FileEntry *, llvm::SmallVector<const Include *>>
+ IncludeSitesByFile;
+ llvm::DenseSet<const Include *> DirectIncludes;
+};
+
+using UsedSymbolWithOriginCB = llvm::function_ref<void(
+ const SymbolReference &Ref, llvm::ArrayRef<Header> Providers,
+ const ClassifiedReference &Info)>;
+
bool shouldIgnoreMacroReference(const Preprocessor &PP, const Macro &M) {
- auto *MI = PP.getMacroInfo(M.Name);
+ auto &MutablePP = const_cast<Preprocessor &>(PP);
+ auto MD = MutablePP.getMacroDefinitionAtLoc(M.Name, M.Definition);
+ auto *MI = MD ? MD.getMacroInfo() : PP.getMacroInfo(M.Name);
// Macros that expand to themselves are confusing from user's point of view.
// They usually aspect the usage to be attributed to the underlying decl and
// not the macro definition. So ignore such macros (e.g. std{in,out,err} are
@@ -47,15 +128,12 @@ bool shouldIgnoreMacroReference(const Preprocessor &PP, const Macro &M) {
return MI && MI->getNumTokens() == 1 && MI->isObjectLike() &&
MI->getReplacementToken(0).getIdentifierInfo() == M.Name;
}
-} // namespace
-void walkUsed(llvm::ArrayRef<Decl *> ASTRoots,
- llvm::ArrayRef<SymbolReference> MacroRefs,
- const PragmaIncludes *PI, const Preprocessor &PP,
- UsedSymbolCB CB) {
+void walkUsedWithOrigins(
+ llvm::ArrayRef<Decl *> ASTRoots, llvm::ArrayRef<SymbolReference> MacroRefs,
+ const PragmaIncludes *PI, const Preprocessor &PP, UsedSymbolWithOriginCB CB,
+ llvm::function_ref<std::optional<ClassifiedReference>(FileID)> Classify) {
const auto &SM = PP.getSourceManager();
- // This is duplicated in writeHTMLReport, changes should be mirrored there.
- tooling::stdlib::Recognizer Recognizer;
for (auto *Root : ASTRoots) {
walkAST(*Root, [&](SourceLocation Loc, NamedDecl &ND, RefType RT) {
auto SpellLoc = SM.getSpellingLoc(Loc);
@@ -71,25 +149,28 @@ void walkUsed(llvm::ArrayRef<Decl *> ASTRoots,
SpellLoc = SM.getSpellingLoc(Loc);
}
auto FID = SM.getFileID(SpellLoc);
- if (FID != SM.getMainFileID() && FID != SM.getPreambleFileID())
+ auto Info = Classify(FID);
+ if (!Info)
return;
// FIXME: Most of the work done here is repetitive. It might be useful to
// have a cache/batching.
SymbolReference SymRef{ND, Loc, RT};
- return CB(SymRef, headersForSymbol(ND, PP, PI));
+ return CB(SymRef, headersForSymbol(ND, PP, PI), *Info);
});
}
+
for (const SymbolReference &MacroRef : MacroRefs) {
assert(MacroRef.Target.kind() == Symbol::Macro);
- if (!SM.isWrittenInMainFile(SM.getSpellingLoc(MacroRef.RefLocation)) ||
- shouldIgnoreMacroReference(PP, MacroRef.Target.macro()))
+ if (shouldIgnoreMacroReference(PP, MacroRef.Target.macro()))
continue;
- CB(MacroRef, headersForSymbol(MacroRef.Target, PP, PI));
+ auto FID = SM.getFileID(SM.getSpellingLoc(MacroRef.RefLocation));
+ auto Info = Classify(FID);
+ if (!Info)
+ continue;
+ CB(MacroRef, headersForSymbol(MacroRef.Target, PP, PI), *Info);
}
}
-namespace {
-
bool isFilteredInclude(const Include &I,
llvm::function_ref<bool(const Header &)> HeaderFilter,
const Preprocessor &PP) {
@@ -97,8 +178,9 @@ bool isFilteredInclude(const Include &I,
auto Lang = PP.getLangOpts().CPlusPlus ? tooling::stdlib::Lang::CXX
: tooling::stdlib::Lang::C;
if (auto StdHeader = tooling::stdlib::Header::named(I.quote(), Lang);
- StdHeader && HeaderFilter(*StdHeader))
+ StdHeader && HeaderFilter(*StdHeader)) {
return true;
+ }
}
return I.Resolved && HeaderFilter(*I.Resolved);
}
@@ -109,32 +191,89 @@ bool shouldSuppressIncludeDiagnostic(
const PragmaIncludes *PI, const Preprocessor &PP,
OptionalDirectoryEntryRef ResourceDir) {
if (!I.Resolved || I.Resolved->getDir() == ResourceDir ||
- isFilteredInclude(I, HeaderFilter, PP))
+ isFilteredInclude(I, HeaderFilter, PP)) {
return true;
+ }
if (!PI)
return false;
if (PI->shouldKeep(*I.Resolved))
return true;
// Check if main file is the public interface for a private header. If so
- // we shouldn't diagnose it as unused.
+ // we shouldn't diagnose it as unused or record fragment dependencies.
if (auto PHeader = PI->getPublic(*I.Resolved); !PHeader.empty()) {
PHeader = PHeader.trim("<>\"");
- // Since most private -> public mappings happen in a verbatim way, we
- // check textually here. This might go wrong in presence of symlinks or
- // header mappings. But that's not different than rest of the places.
if (MainFile.getName().ends_with(PHeader))
return true;
}
return false;
}
+void walkUsedInFiles(llvm::ArrayRef<Decl *> ASTRoots,
+ llvm::ArrayRef<SymbolReference> MacroRefs,
+ const PragmaIncludes *PI, const Preprocessor &PP,
+ UsedSymbolCB CB,
+ llvm::function_ref<bool(FileID)> IsMainFile) {
+ walkUsedWithOrigins(
+ ASTRoots, MacroRefs, PI, PP,
+ [&](const SymbolReference &Ref, llvm::ArrayRef<Header> Providers,
+ const ClassifiedReference &) { CB(Ref, Providers); },
+ [&](FileID FID) -> std::optional<ClassifiedReference> {
+ if (!IsMainFile(FID))
+ return std::nullopt;
+ return ClassifiedReference{SymbolReferenceOrigin::MainFile};
+ });
+}
+
+} // namespace
+
+void walkUsed(llvm::ArrayRef<Decl *> ASTRoots,
+ llvm::ArrayRef<SymbolReference> MacroRefs,
+ const PragmaIncludes *PI, const Preprocessor &PP,
+ UsedSymbolCB CB) {
+ const auto &SM = PP.getSourceManager();
+ walkUsedInFiles(ASTRoots, MacroRefs, PI, PP, CB, [&](FileID FID) {
+ return FID == SM.getMainFileID() || FID == SM.getPreambleFileID();
+ });
+}
+
+namespace {
+
class IncludeUsage {
public:
- void mark(const Include *I) { Used.insert(I); }
+ void mark(const Include *I, const ClassifiedReference &Info) {
+ Used.insert(I);
+ if (Info.Origin != SymbolReferenceOrigin::Fragment) {
+ UsedByMainOrPreamble.insert(I);
+ return;
+ }
+
+ UsedByFragment.insert(I);
+ if (!Info.FragmentInclude)
+ return;
+ auto &Reasons = ByPreserved[I];
+ if (!llvm::is_contained(Reasons, Info.FragmentInclude))
+ Reasons.push_back(Info.FragmentInclude);
+ }
+
bool contains(const Include *I) const { return Used.contains(I); }
+ bool isFragmentOnly(const Include *I) const {
+ return UsedByFragment.contains(I) && !UsedByMainOrPreamble.contains(I);
+ }
+
+ llvm::SmallVector<const Include *> fragmentSites(const Include *I) const {
+ auto It = ByPreserved.find(I);
+ if (It == ByPreserved.end())
+ return {};
+ return It->second;
+ }
+
private:
llvm::DenseSet<const Include *> Used;
+ llvm::DenseSet<const Include *> UsedByMainOrPreamble;
+ llvm::DenseSet<const Include *> UsedByFragment;
+ llvm::DenseMap<const Include *, llvm::SmallVector<const Include *>>
+ ByPreserved;
};
class MissingIncludeCollector {
@@ -170,48 +309,69 @@ AnalysisResults analyze(llvm::ArrayRef<Decl *> ASTRoots,
auto HeaderFilter = [&](const Header &H) {
return Options.HeaderFilter && Options.HeaderFilter(H);
};
+ FragmentTracker Fragments(Inc, SM, Options.FragmentHeaderFilter);
IncludeUsage Usage;
MissingIncludeCollector MissingIncludes;
+ std::vector<MissingIncludeRef> MissingRefs;
OptionalDirectoryEntryRef ResourceDir =
PP.getHeaderSearchInfo().getModuleMap().getBuiltinDir();
- walkUsed(ASTRoots, MacroRefs, PI, PP,
- [&](const SymbolReference &Ref, llvm::ArrayRef<Header> Providers) {
- bool Satisfied = false;
- for (const Header &H : Providers) {
- if (H.kind() == Header::Physical &&
- (H.physical() == MainFile ||
- H.physical().getDir() == ResourceDir)) {
- Satisfied = true;
- }
- for (const Include *I : Inc.match(H)) {
- Usage.mark(I);
- Satisfied = true;
- }
- }
- // Bail out if we can't (or need not) insert an include.
- if (Satisfied || Providers.empty() || Ref.RT != RefType::Explicit)
- return;
- if (HeaderFilter(Providers.front()))
- return;
- // Check if we have any headers with the same spelling, in edge
- // cases like `#include_next "foo.h"`, the user can't ever
- // include the physical foo.h, but can have a spelling that
- // refers to it.
- auto Spelling = spellHeader(
- {Providers.front(), PP.getHeaderSearchInfo(), MainFile});
- for (const Include *I : Inc.match(Header{Spelling})) {
- Usage.mark(I);
- Satisfied = true;
- }
- if (!Satisfied)
- MissingIncludes.add(Spelling, Providers.front());
- });
-
- AnalysisResults Results;
+
+ walkUsedWithOrigins(
+ ASTRoots, MacroRefs, PI, PP,
+ [&](const SymbolReference &Ref, llvm::ArrayRef<Header> Providers,
+ const ClassifiedReference &Info) {
+ bool Satisfied = false;
+ for (const Header &H : Providers) {
+ if (H.kind() == Header::Physical &&
+ (H.physical() == MainFile ||
+ H.physical().getDir() == ResourceDir)) {
+ Satisfied = true;
+ }
+ for (const Include *I : Inc.match(H)) {
+ Usage.mark(I, Info);
+ Satisfied = true;
+ }
+ }
+
+ if (Satisfied || Providers.empty() || Ref.RT != RefType::Explicit)
+ return;
+ if (HeaderFilter(Providers.front()))
+ return;
+
+ auto Spelling = spellHeader(
+ {Providers.front(), PP.getHeaderSearchInfo(), MainFile});
+ for (const Include *I : Inc.match(Header{Spelling})) {
+ Usage.mark(I, Info);
+ Satisfied = true;
+ }
+ if (Satisfied)
+ return;
+
+ // MissingIncludes drives edits, while MissingRefs preserves where the
+ // unsatisfied use came from for higher-level diagnostics.
+ MissingIncludes.add(Spelling, Providers.front());
+ MissingRefs.push_back(
+ MissingIncludeRef{Ref, llvm::SmallVector<Header>(Providers),
+ Info.Origin, Info.FragmentInclude});
+ },
+ [&](FileID FID) { return Fragments.classify(FID); });
+
+ AnalysisResults Results{{}, {}, std::move(MissingRefs), {}};
for (const Include &I : Inc.all()) {
- if (Usage.contains(&I) ||
- shouldSuppressIncludeDiagnostic(I, MainFile, HeaderFilter, PI, PP,
- ResourceDir))
+ bool Suppressed = shouldSuppressIncludeDiagnostic(I, MainFile, HeaderFilter,
+ PI, PP, ResourceDir);
+ if (Usage.contains(&I)) {
+ const llvm::SmallVector<const Include *> FragmentSites =
+ Usage.fragmentSites(&I);
+ // Ambiguous fragment sites still preserve the header, but get no comment.
+ if (!Suppressed && Usage.isFragmentOnly(&I) &&
+ !Fragments.isDirectInclude(&I) && !FragmentSites.empty()) {
+ Results.FragmentDependencies.push_back(
+ FragmentDependency{&I, FragmentSites});
+ }
+ continue;
+ }
+ if (Suppressed)
continue;
Results.Unused.push_back(&I);
}
diff --git a/clang-tools-extra/include-cleaner/tool/IncludeCleaner.cpp b/clang-tools-extra/include-cleaner/tool/IncludeCleaner.cpp
index d72d6adf33c9b..49bd5495bcc13 100644
--- a/clang-tools-extra/include-cleaner/tool/IncludeCleaner.cpp
+++ b/clang-tools-extra/include-cleaner/tool/IncludeCleaner.cpp
@@ -192,7 +192,7 @@ class Action : public clang::ASTFrontendAction {
SM.getFileManager().makeAbsolutePath(AbsPath);
llvm::StringRef Code = SM.getBufferData(SM.getMainFileID());
- AnalysisOptions AnalyzeOptions{HeaderFilter};
+ AnalysisOptions AnalyzeOptions{HeaderFilter, {}};
auto Results =
analyze(AST.Roots, PP.MacroReferences, PP.Includes, &PI,
getCompilerInstance().getPreprocessor(), AnalyzeOptions);
diff --git a/clang-tools-extra/include-cleaner/unittests/AnalysisTest.cpp b/clang-tools-extra/include-cleaner/unittests/AnalysisTest.cpp
index 3fcf82251d14b..0c68fb58032fb 100644
--- a/clang-tools-extra/include-cleaner/unittests/AnalysisTest.cpp
+++ b/clang-tools-extra/include-cleaner/unittests/AnalysisTest.cpp
@@ -39,6 +39,14 @@
#include <vector>
namespace clang::include_cleaner {
+
+static std::vector<Decl *> topLevelDecls(TestAST &AST) {
+ std::vector<Decl *> Decls;
+ for (auto *D : AST.context().getTranslationUnitDecl()->decls())
+ Decls.push_back(D);
+ return Decls;
+}
+
namespace {
using testing::_;
using testing::AllOf;
@@ -375,9 +383,7 @@ TEST_F(AnalyzeTest, SpellingIncludesWithSymlinks) {
/*ModificationTime=*/{});
TestAST AST(Inputs);
- std::vector<Decl *> DeclsInTU;
- for (auto *D : AST.context().getTranslationUnitDecl()->decls())
- DeclsInTU.push_back(D);
+ std::vector<Decl *> DeclsInTU = topLevelDecls(AST);
auto Results = analyze(DeclsInTU, {}, PP.Includes, &PI, AST.preprocessor());
// Check that we're spelling header using the symlink, and not underlying
// path.
@@ -388,8 +394,10 @@ TEST_F(AnalyzeTest, SpellingIncludesWithSymlinks) {
{
// Make sure filtering is also applied to symlink, not underlying file.
- AnalysisOptions Options{
- [](const Header &H) { return H.resolvedPath() == "inner.h"; }};
+ AnalysisOptions Options;
+ Options.HeaderFilter = [](const Header &H) {
+ return H.resolvedPath() == "inner.h";
+ };
Results =
analyze(DeclsInTU, {}, PP.Includes, &PI, AST.preprocessor(), Options);
EXPECT_THAT(Results.MissingIncludes,
@@ -398,8 +406,10 @@ TEST_F(AnalyzeTest, SpellingIncludesWithSymlinks) {
EXPECT_THAT(Results.Unused, Not(testing::IsEmpty()));
}
{
- AnalysisOptions Options{
- [](const Header &H) { return H.resolvedPath() == "header.h"; }};
+ AnalysisOptions Options;
+ Options.HeaderFilter = [](const Header &H) {
+ return H.resolvedPath() == "header.h";
+ };
Results =
analyze(DeclsInTU, {}, PP.Includes, &PI, AST.preprocessor(), Options);
// header.h should be ignored now.
@@ -409,6 +419,307 @@ TEST_F(AnalyzeTest, SpellingIn...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/196764
More information about the llvm-branch-commits
mailing list