[clang-tools-extra] 16b5e18 - [include-cleaner] Loose matching for verbatim headers
Sam McCall via cfe-commits
cfe-commits at lists.llvm.org
Thu Jul 27 09:19:31 PDT 2023
Author: Sam McCall
Date: 2023-07-27T18:19:24+02:00
New Revision: 16b5e1897b7efaa4250b04dface0c807a02ae31a
URL: https://github.com/llvm/llvm-project/commit/16b5e1897b7efaa4250b04dface0c807a02ae31a
DIFF: https://github.com/llvm/llvm-project/commit/16b5e1897b7efaa4250b04dface0c807a02ae31a.diff
LOG: [include-cleaner] Loose matching for verbatim headers
A verbatim header usually corresponds to a symbol from a header with
a pragma "IWYU pragma: private, include <foo.h>".
Currently this is only satisfied if the main file contains exactly
#include <foo.h>
In practice this is too strict, we also want to allow
#include "path/to/foo.h"
so long as they resolve to the same file.
We cannot be 100% sure without doing IO, and we're not willing to do
that, but we can detect the common cases based on paths.
Differential Revision: https://reviews.llvm.org/D155819
Added:
Modified:
clang-tools-extra/include-cleaner/include/clang-include-cleaner/Types.h
clang-tools-extra/include-cleaner/lib/Record.cpp
clang-tools-extra/include-cleaner/lib/Types.cpp
clang-tools-extra/include-cleaner/unittests/TypesTest.cpp
Removed:
################################################################################
diff --git a/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Types.h b/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Types.h
index 02269cea4bdf84..cd3fd8600060b5 100644
--- a/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Types.h
+++ b/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Types.h
@@ -31,7 +31,9 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSet.h"
#include <memory>
+#include <string>
#include <utility>
#include <variant>
#include <vector>
@@ -167,6 +169,20 @@ llvm::raw_ostream &operator<<(llvm::raw_ostream &, const Include &);
/// Supports efficiently hit-testing Headers against Includes.
class Includes {
public:
+ /// Registers a directory on the include path (-I etc) from HeaderSearch.
+ /// This allows reasoning about equivalence of e.g. "path/a/b.h" and "a/b.h".
+ /// This must be called before calling add() in order to take effect.
+ ///
+ /// The paths may be relative or absolute, but the paths passed to
+ /// addSearchDirectory() and add() (that is: Include.Resolved->getName())
+ /// should be consistent, as they are compared lexically.
+ /// Generally, this is satisfied if you obtain paths through HeaderSearch
+ /// and FileEntries through PPCallbacks::IncludeDirective().
+ void addSearchDirectory(llvm::StringRef);
+
+ /// Registers an include directive seen in the main file.
+ ///
+ /// This should only be called after all search directories are added.
void add(const Include &);
/// All #includes seen, in the order they appear.
@@ -183,9 +199,13 @@ class Includes {
const Include *atLine(unsigned OneBasedIndex) const;
private:
+ llvm::StringSet<> SearchPath;
+
std::vector<Include> All;
// Lookup structures for match(), values are index into All.
llvm::StringMap<llvm::SmallVector<unsigned>> BySpelling;
+ // Heuristic spellings that likely resolve to the given file.
+ llvm::StringMap<llvm::SmallVector<unsigned>> BySpellingAlternate;
llvm::DenseMap<const FileEntry *, llvm::SmallVector<unsigned>> ByFile;
llvm::DenseMap<unsigned, unsigned> ByLine;
};
diff --git a/clang-tools-extra/include-cleaner/lib/Record.cpp b/clang-tools-extra/include-cleaner/lib/Record.cpp
index 50a15229cbe55b..78f21bc2262a1f 100644
--- a/clang-tools-extra/include-cleaner/lib/Record.cpp
+++ b/clang-tools-extra/include-cleaner/lib/Record.cpp
@@ -14,13 +14,16 @@
#include "clang/Basic/SourceManager.h"
#include "clang/Basic/Specifiers.h"
#include "clang/Frontend/CompilerInstance.h"
+#include "clang/Lex/DirectoryLookup.h"
#include "clang/Lex/MacroInfo.h"
#include "clang/Lex/PPCallbacks.h"
#include "clang/Lex/Preprocessor.h"
#include "clang/Tooling/Inclusions/HeaderAnalysis.h"
#include "clang/Tooling/Inclusions/StandardLibrary.h"
+#include "llvm/ADT/StringRef.h"
#include <memory>
#include <utility>
+#include <vector>
namespace clang::include_cleaner {
namespace {
@@ -28,7 +31,11 @@ namespace {
class PPRecorder : public PPCallbacks {
public:
PPRecorder(RecordedPP &Recorded, const Preprocessor &PP)
- : Recorded(Recorded), PP(PP), SM(PP.getSourceManager()) {}
+ : Recorded(Recorded), PP(PP), SM(PP.getSourceManager()) {
+ for (const auto &Dir : PP.getHeaderSearchInfo().search_dir_range())
+ if (Dir.getLookupType() == DirectoryLookup::LT_NormalDir)
+ Recorded.Includes.addSearchDirectory(Dir.getDirRef()->getName());
+ }
void FileChanged(SourceLocation Loc, FileChangeReason Reason,
SrcMgr::CharacteristicKind FileType,
diff --git a/clang-tools-extra/include-cleaner/lib/Types.cpp b/clang-tools-extra/include-cleaner/lib/Types.cpp
index bcd15920797b46..2061fdc443886c 100644
--- a/clang-tools-extra/include-cleaner/lib/Types.cpp
+++ b/clang-tools-extra/include-cleaner/lib/Types.cpp
@@ -10,8 +10,15 @@
#include "TypesInternal.h"
#include "clang/AST/Decl.h"
#include "clang/Basic/FileEntry.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Path.h"
#include "llvm/Support/raw_ostream.h"
+#include <vector>
namespace clang::include_cleaner {
@@ -94,16 +101,51 @@ std::string Include::quote() const {
.str();
}
+static llvm::SmallString<128> normalizePath(llvm::StringRef Path) {
+ namespace path = llvm::sys::path;
+
+ llvm::SmallString<128> P = Path;
+ path::remove_dots(P, /*remove_dot_dot=*/true);
+ path::native(P, path::Style::posix);
+ while (!P.empty() && P.back() == '/')
+ P.pop_back();
+ return P;
+}
+
+void Includes::addSearchDirectory(llvm::StringRef Path) {
+ SearchPath.try_emplace(normalizePath(Path));
+}
+
void Includes::add(const Include &I) {
+ namespace path = llvm::sys::path;
+
unsigned Index = All.size();
All.push_back(I);
auto BySpellingIt = BySpelling.try_emplace(I.Spelled).first;
All.back().Spelled = BySpellingIt->first(); // Now we own the backing string.
BySpellingIt->second.push_back(Index);
- if (I.Resolved)
- ByFile[&I.Resolved->getFileEntry()].push_back(Index);
ByLine[I.Line] = Index;
+
+ if (!I.Resolved)
+ return;
+ ByFile[&I.Resolved->getFileEntry()].push_back(Index);
+
+ // While verbatim headers ideally should match #include spelling exactly,
+ // we want to be tolerant of
diff erent spellings of the same file.
+ //
+ // If the search path includes "/a/b" and "/a/b/c/d",
+ // verbatim "e/f" should match (spelled=c/d/e/f, resolved=/a/b/c/d/e/f).
+ // We assume entry's (normalized) name will match the search dirs.
+ auto Path = normalizePath(I.Resolved->getName());
+ for (llvm::StringRef Parent = path::parent_path(Path); !Parent.empty();
+ Parent = path::parent_path(Parent)) {
+ if (!SearchPath.contains(Parent))
+ continue;
+ llvm::StringRef Rel =
+ llvm::StringRef(Path).drop_front(Parent.size()).ltrim('/');
+ BySpellingAlternate[Rel].push_back(Index);
+ }
}
const Include *Includes::atLine(unsigned OneBasedIndex) const {
@@ -122,11 +164,16 @@ llvm::SmallVector<const Include *> Includes::match(Header H) const {
for (unsigned I : BySpelling.lookup(H.standard().name().trim("<>")))
Result.push_back(&All[I]);
break;
- case Header::Verbatim:
- for (unsigned I : BySpelling.lookup(H.verbatim().trim("\"<>")))
+ case Header::Verbatim: {
+ llvm::StringRef Spelling = H.verbatim().trim("\"<>");
+ for (unsigned I : BySpelling.lookup(Spelling))
Result.push_back(&All[I]);
+ for (unsigned I : BySpellingAlternate.lookup(Spelling))
+ if (!llvm::is_contained(Result, &All[I]))
+ Result.push_back(&All[I]);
break;
}
+ }
return Result;
}
diff --git a/clang-tools-extra/include-cleaner/unittests/TypesTest.cpp b/clang-tools-extra/include-cleaner/unittests/TypesTest.cpp
index a2aec2eaf316b9..56b5fbe603adb8 100644
--- a/clang-tools-extra/include-cleaner/unittests/TypesTest.cpp
+++ b/clang-tools-extra/include-cleaner/unittests/TypesTest.cpp
@@ -8,6 +8,7 @@
#include "clang-include-cleaner/Types.h"
#include "clang/Basic/FileManager.h"
+#include "clang/Basic/FileSystemOptions.h"
#include "clang/Tooling/Inclusions/StandardLibrary.h"
#include "llvm/ADT/IntrusiveRefCntPtr.h"
#include "llvm/Support/VirtualFileSystem.h"
@@ -17,6 +18,8 @@
namespace clang::include_cleaner {
namespace {
using testing::ElementsAre;
+using testing::IsEmpty;
+using testing::UnorderedElementsAre;
// Matches an Include* on the specified line;
MATCHER_P(line, N, "") { return arg->Line == (unsigned)N; }
@@ -44,5 +47,57 @@ TEST(RecordedIncludesTest, Match) {
ElementsAre(line(4), line(5)));
}
+TEST(RecordedIncludesTest, MatchVerbatim) {
+ auto FS = llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
+ FileManager FM(FileSystemOptions{});
+ Includes Inc;
+
+ // By default, a verbatim header only matches includes with the same spelling.
+ auto Foo =
+ FM.getVirtualFileRef("repo/lib/include/rel/foo.h", /*Size=*/0, time_t{});
+ Inc.add(Include{"lib/include/rel/foo.h", Foo, SourceLocation(), 1});
+ Inc.add(Include{"rel/foo.h", Foo, SourceLocation(), 2});
+ EXPECT_THAT(Inc.match(Header("<rel/foo.h>")), ElementsAre(line(2)));
+
+ // A verbatim header can match another spelling if the search path
+ // suggests it's equivalent.
+ auto Bar =
+ FM.getVirtualFileRef("repo/lib/include/rel/bar.h", /*Size=*/0, time_t{});
+ Inc.addSearchDirectory("repo/");
+ Inc.addSearchDirectory("repo/lib/include");
+ Inc.add(Include{"lib/include/rel/bar.h", Bar, SourceLocation(), 3});
+ Inc.add(Include{"rel/bar.h", Bar, SourceLocation(), 4});
+ EXPECT_THAT(Inc.match(Header("<rel/bar.h>")),
+ UnorderedElementsAre(line(3), line(4)));
+
+ // We don't apply this logic to system headers, though.
+ auto Vector =
+ FM.getVirtualFileRef("repo/lib/include/vector", /*Size=*/0, time_t{});
+ Inc.add(Include{"lib/include/vector", Vector, SourceLocation(), 5});
+ EXPECT_THAT(Inc.match(Header(*tooling::stdlib::Header::named("<vector>"))),
+ IsEmpty());
+}
+
+TEST(RecordedIncludesTest, MatchVerbatimMixedAbsoluteRelative) {
+ auto FS = llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
+ FS->setCurrentWorkingDirectory("/working");
+ FileManager FM(FileSystemOptions{});
+ Includes Inc;
+
+ auto Foo =
+ FM.getVirtualFileRef("/working/rel1/rel2/foo.h", /*Size=*/0, time_t{});
+ Inc.addSearchDirectory("rel1");
+ Inc.addSearchDirectory("rel1/rel2");
+ Inc.add(Include{"rel2/foo.h", Foo, SourceLocation(), 1});
+ EXPECT_THAT(Inc.match(Header("<foo.h>")), IsEmpty());
+
+ Inc = Includes{};
+ auto Bar = FM.getVirtualFileRef("rel1/rel2/bar.h", /*Size=*/0, time_t{});
+ Inc.addSearchDirectory("/working/rel1");
+ Inc.addSearchDirectory("/working/rel1/rel2");
+ Inc.add(Include{"rel2/bar.h", Bar, SourceLocation(), 1});
+ EXPECT_THAT(Inc.match(Header("<bar.h>")), IsEmpty());
+}
+
} // namespace
} // namespace clang::include_cleaner
More information about the cfe-commits
mailing list