[clang-tools-extra] 16b5e18 - [include-cleaner] Loose matching for verbatim headers

Sam McCall via cfe-commits cfe-commits at lists.llvm.org
Thu Jul 27 09:19:31 PDT 2023


Author: Sam McCall
Date: 2023-07-27T18:19:24+02:00
New Revision: 16b5e1897b7efaa4250b04dface0c807a02ae31a

URL: https://github.com/llvm/llvm-project/commit/16b5e1897b7efaa4250b04dface0c807a02ae31a
DIFF: https://github.com/llvm/llvm-project/commit/16b5e1897b7efaa4250b04dface0c807a02ae31a.diff

LOG: [include-cleaner] Loose matching for verbatim headers

A verbatim header usually corresponds to a symbol from a header with
a pragma "IWYU pragma: private, include <foo.h>".

Currently this is only satisfied if the main file contains exactly
  #include <foo.h>
In practice this is too strict, we also want to allow
  #include "path/to/foo.h"
so long as they resolve to the same file.

We cannot be 100% sure without doing IO, and we're not willing to do
that, but we can detect the common cases based on paths.

Differential Revision: https://reviews.llvm.org/D155819

Added: 
    

Modified: 
    clang-tools-extra/include-cleaner/include/clang-include-cleaner/Types.h
    clang-tools-extra/include-cleaner/lib/Record.cpp
    clang-tools-extra/include-cleaner/lib/Types.cpp
    clang-tools-extra/include-cleaner/unittests/TypesTest.cpp

Removed: 
    


################################################################################
diff  --git a/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Types.h b/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Types.h
index 02269cea4bdf84..cd3fd8600060b5 100644
--- a/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Types.h
+++ b/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Types.h
@@ -31,7 +31,9 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSet.h"
 #include <memory>
+#include <string>
 #include <utility>
 #include <variant>
 #include <vector>
@@ -167,6 +169,20 @@ llvm::raw_ostream &operator<<(llvm::raw_ostream &, const Include &);
 /// Supports efficiently hit-testing Headers against Includes.
 class Includes {
 public:
+  /// Registers a directory on the include path (-I etc) from HeaderSearch.
+  /// This allows reasoning about equivalence of e.g. "path/a/b.h" and "a/b.h".
+  /// This must be called before calling add() in order to take effect.
+  ///
+  /// The paths may be relative or absolute, but the paths passed to
+  /// addSearchDirectory() and add() (that is: Include.Resolved->getName())
+  /// should be consistent, as they are compared lexically.
+  /// Generally, this is satisfied if you obtain paths through HeaderSearch
+  /// and FileEntries through PPCallbacks::IncludeDirective().
+  void addSearchDirectory(llvm::StringRef);
+
+  /// Registers an include directive seen in the main file.
+  ///
+  /// This should only be called after all search directories are added.
   void add(const Include &);
 
   /// All #includes seen, in the order they appear.
@@ -183,9 +199,13 @@ class Includes {
   const Include *atLine(unsigned OneBasedIndex) const;
 
 private:
+  llvm::StringSet<> SearchPath;
+
   std::vector<Include> All;
   // Lookup structures for match(), values are index into All.
   llvm::StringMap<llvm::SmallVector<unsigned>> BySpelling;
+  // Heuristic spellings that likely resolve to the given file.
+  llvm::StringMap<llvm::SmallVector<unsigned>> BySpellingAlternate;
   llvm::DenseMap<const FileEntry *, llvm::SmallVector<unsigned>> ByFile;
   llvm::DenseMap<unsigned, unsigned> ByLine;
 };

diff  --git a/clang-tools-extra/include-cleaner/lib/Record.cpp b/clang-tools-extra/include-cleaner/lib/Record.cpp
index 50a15229cbe55b..78f21bc2262a1f 100644
--- a/clang-tools-extra/include-cleaner/lib/Record.cpp
+++ b/clang-tools-extra/include-cleaner/lib/Record.cpp
@@ -14,13 +14,16 @@
 #include "clang/Basic/SourceManager.h"
 #include "clang/Basic/Specifiers.h"
 #include "clang/Frontend/CompilerInstance.h"
+#include "clang/Lex/DirectoryLookup.h"
 #include "clang/Lex/MacroInfo.h"
 #include "clang/Lex/PPCallbacks.h"
 #include "clang/Lex/Preprocessor.h"
 #include "clang/Tooling/Inclusions/HeaderAnalysis.h"
 #include "clang/Tooling/Inclusions/StandardLibrary.h"
+#include "llvm/ADT/StringRef.h"
 #include <memory>
 #include <utility>
+#include <vector>
 
 namespace clang::include_cleaner {
 namespace {
@@ -28,7 +31,11 @@ namespace {
 class PPRecorder : public PPCallbacks {
 public:
   PPRecorder(RecordedPP &Recorded, const Preprocessor &PP)
-      : Recorded(Recorded), PP(PP), SM(PP.getSourceManager()) {}
+      : Recorded(Recorded), PP(PP), SM(PP.getSourceManager()) {
+    for (const auto &Dir : PP.getHeaderSearchInfo().search_dir_range())
+      if (Dir.getLookupType() == DirectoryLookup::LT_NormalDir)
+        Recorded.Includes.addSearchDirectory(Dir.getDirRef()->getName());
+  }
 
   void FileChanged(SourceLocation Loc, FileChangeReason Reason,
                    SrcMgr::CharacteristicKind FileType,

diff  --git a/clang-tools-extra/include-cleaner/lib/Types.cpp b/clang-tools-extra/include-cleaner/lib/Types.cpp
index bcd15920797b46..2061fdc443886c 100644
--- a/clang-tools-extra/include-cleaner/lib/Types.cpp
+++ b/clang-tools-extra/include-cleaner/lib/Types.cpp
@@ -10,8 +10,15 @@
 #include "TypesInternal.h"
 #include "clang/AST/Decl.h"
 #include "clang/Basic/FileEntry.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Path.h"
 #include "llvm/Support/raw_ostream.h"
+#include <vector>
 
 namespace clang::include_cleaner {
 
@@ -94,16 +101,51 @@ std::string Include::quote() const {
       .str();
 }
 
+static llvm::SmallString<128> normalizePath(llvm::StringRef Path) {
+  namespace path = llvm::sys::path;
+
+  llvm::SmallString<128> P = Path;
+  path::remove_dots(P, /*remove_dot_dot=*/true);
+  path::native(P, path::Style::posix);
+  while (!P.empty() && P.back() == '/')
+    P.pop_back();
+  return P;
+}
+
+void Includes::addSearchDirectory(llvm::StringRef Path) {
+  SearchPath.try_emplace(normalizePath(Path));
+}
+
 void Includes::add(const Include &I) {
+  namespace path = llvm::sys::path;
+
   unsigned Index = All.size();
   All.push_back(I);
   auto BySpellingIt = BySpelling.try_emplace(I.Spelled).first;
   All.back().Spelled = BySpellingIt->first(); // Now we own the backing string.
 
   BySpellingIt->second.push_back(Index);
-  if (I.Resolved)
-    ByFile[&I.Resolved->getFileEntry()].push_back(Index);
   ByLine[I.Line] = Index;
+
+  if (!I.Resolved)
+    return;
+  ByFile[&I.Resolved->getFileEntry()].push_back(Index);
+
+  // While verbatim headers ideally should match #include spelling exactly,
+  // we want to be tolerant of 
diff erent spellings of the same file.
+  //
+  // If the search path includes "/a/b" and "/a/b/c/d",
+  // verbatim "e/f" should match (spelled=c/d/e/f, resolved=/a/b/c/d/e/f).
+  // We assume entry's (normalized) name will match the search dirs.
+  auto Path = normalizePath(I.Resolved->getName());
+  for (llvm::StringRef Parent = path::parent_path(Path); !Parent.empty();
+       Parent = path::parent_path(Parent)) {
+    if (!SearchPath.contains(Parent))
+      continue;
+    llvm::StringRef Rel =
+        llvm::StringRef(Path).drop_front(Parent.size()).ltrim('/');
+    BySpellingAlternate[Rel].push_back(Index);
+  }
 }
 
 const Include *Includes::atLine(unsigned OneBasedIndex) const {
@@ -122,11 +164,16 @@ llvm::SmallVector<const Include *> Includes::match(Header H) const {
     for (unsigned I : BySpelling.lookup(H.standard().name().trim("<>")))
       Result.push_back(&All[I]);
     break;
-  case Header::Verbatim:
-    for (unsigned I : BySpelling.lookup(H.verbatim().trim("\"<>")))
+  case Header::Verbatim: {
+    llvm::StringRef Spelling = H.verbatim().trim("\"<>");
+    for (unsigned I : BySpelling.lookup(Spelling))
       Result.push_back(&All[I]);
+    for (unsigned I : BySpellingAlternate.lookup(Spelling))
+      if (!llvm::is_contained(Result, &All[I]))
+        Result.push_back(&All[I]);
     break;
   }
+  }
   return Result;
 }
 

diff  --git a/clang-tools-extra/include-cleaner/unittests/TypesTest.cpp b/clang-tools-extra/include-cleaner/unittests/TypesTest.cpp
index a2aec2eaf316b9..56b5fbe603adb8 100644
--- a/clang-tools-extra/include-cleaner/unittests/TypesTest.cpp
+++ b/clang-tools-extra/include-cleaner/unittests/TypesTest.cpp
@@ -8,6 +8,7 @@
 
 #include "clang-include-cleaner/Types.h"
 #include "clang/Basic/FileManager.h"
+#include "clang/Basic/FileSystemOptions.h"
 #include "clang/Tooling/Inclusions/StandardLibrary.h"
 #include "llvm/ADT/IntrusiveRefCntPtr.h"
 #include "llvm/Support/VirtualFileSystem.h"
@@ -17,6 +18,8 @@
 namespace clang::include_cleaner {
 namespace {
 using testing::ElementsAre;
+using testing::IsEmpty;
+using testing::UnorderedElementsAre;
 
 // Matches an Include* on the specified line;
 MATCHER_P(line, N, "") { return arg->Line == (unsigned)N; }
@@ -44,5 +47,57 @@ TEST(RecordedIncludesTest, Match) {
               ElementsAre(line(4), line(5)));
 }
 
+TEST(RecordedIncludesTest, MatchVerbatim) {
+  auto FS = llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
+  FileManager FM(FileSystemOptions{});
+  Includes Inc;
+
+  // By default, a verbatim header only matches includes with the same spelling.
+  auto Foo =
+      FM.getVirtualFileRef("repo/lib/include/rel/foo.h", /*Size=*/0, time_t{});
+  Inc.add(Include{"lib/include/rel/foo.h", Foo, SourceLocation(), 1});
+  Inc.add(Include{"rel/foo.h", Foo, SourceLocation(), 2});
+  EXPECT_THAT(Inc.match(Header("<rel/foo.h>")), ElementsAre(line(2)));
+
+  // A verbatim header can match another spelling if the search path
+  // suggests it's equivalent.
+  auto Bar =
+      FM.getVirtualFileRef("repo/lib/include/rel/bar.h", /*Size=*/0, time_t{});
+  Inc.addSearchDirectory("repo/");
+  Inc.addSearchDirectory("repo/lib/include");
+  Inc.add(Include{"lib/include/rel/bar.h", Bar, SourceLocation(), 3});
+  Inc.add(Include{"rel/bar.h", Bar, SourceLocation(), 4});
+  EXPECT_THAT(Inc.match(Header("<rel/bar.h>")),
+              UnorderedElementsAre(line(3), line(4)));
+
+  // We don't apply this logic to system headers, though.
+  auto Vector =
+      FM.getVirtualFileRef("repo/lib/include/vector", /*Size=*/0, time_t{});
+  Inc.add(Include{"lib/include/vector", Vector, SourceLocation(), 5});
+  EXPECT_THAT(Inc.match(Header(*tooling::stdlib::Header::named("<vector>"))),
+              IsEmpty());
+}
+
+TEST(RecordedIncludesTest, MatchVerbatimMixedAbsoluteRelative) {
+  auto FS = llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
+  FS->setCurrentWorkingDirectory("/working");
+  FileManager FM(FileSystemOptions{});
+  Includes Inc;
+
+  auto Foo =
+      FM.getVirtualFileRef("/working/rel1/rel2/foo.h", /*Size=*/0, time_t{});
+  Inc.addSearchDirectory("rel1");
+  Inc.addSearchDirectory("rel1/rel2");
+  Inc.add(Include{"rel2/foo.h", Foo, SourceLocation(), 1});
+  EXPECT_THAT(Inc.match(Header("<foo.h>")), IsEmpty());
+
+  Inc = Includes{};
+  auto Bar = FM.getVirtualFileRef("rel1/rel2/bar.h", /*Size=*/0, time_t{});
+  Inc.addSearchDirectory("/working/rel1");
+  Inc.addSearchDirectory("/working/rel1/rel2");
+  Inc.add(Include{"rel2/bar.h", Bar, SourceLocation(), 1});
+  EXPECT_THAT(Inc.match(Header("<bar.h>")), IsEmpty());
+}
+
 } // namespace
 } // namespace clang::include_cleaner


        


More information about the cfe-commits mailing list