[clang-tools-extra] d97a341 - [clangd] Loose include-cleaner matching for verbatim headers

Sam McCall via cfe-commits cfe-commits at lists.llvm.org
Thu Jul 27 10:21:00 PDT 2023


Author: Sam McCall
Date: 2023-07-27T19:20:53+02:00
New Revision: d97a3419c0a35cfa07cb67459846d76ea80c058c

URL: https://github.com/llvm/llvm-project/commit/d97a3419c0a35cfa07cb67459846d76ea80c058c
DIFF: https://github.com/llvm/llvm-project/commit/d97a3419c0a35cfa07cb67459846d76ea80c058c.diff

LOG: [clangd] Loose include-cleaner matching for verbatim headers

This updates clangd to take advantage of the APIs added in D155819.
The main difficulties here are around path normalization.

For layering and performance reasons Includes compares paths lexically, and so
we should have consistent paths that can be compared across addSearchPath() and
add(): symlinks resolved or not, relative or absolute.
This patch spells out that requirement, for most tools consistent use of
FileManager/HeaderSearch is enough.

For clangd this does not work: IncludeStructure doesn't hold FileEntrys due to
the preamble/main-file split. It records paths, but canonicalizes them first.
We choose to use this canonical form as our common representation, so we have
to canonicalize the directory entries too. This is done in preamble-build and
recorded in IncludeStructure, as canonicalization is quite expensive.

Differential Revision: https://reviews.llvm.org/D155878

Added: 
    

Modified: 
    clang-tools-extra/clangd/Headers.cpp
    clang-tools-extra/clangd/Headers.h
    clang-tools-extra/clangd/Hover.cpp
    clang-tools-extra/clangd/IncludeCleaner.cpp
    clang-tools-extra/clangd/IncludeCleaner.h
    clang-tools-extra/clangd/XRefs.cpp
    clang-tools-extra/clangd/unittests/HeadersTests.cpp
    clang-tools-extra/clangd/unittests/IncludeCleanerTests.cpp

Removed: 
    


################################################################################
diff  --git a/clang-tools-extra/clangd/Headers.cpp b/clang-tools-extra/clangd/Headers.cpp
index c3414571170b72..83fba21b1d931a 100644
--- a/clang-tools-extra/clangd/Headers.cpp
+++ b/clang-tools-extra/clangd/Headers.cpp
@@ -12,6 +12,7 @@
 #include "clang/Basic/SourceLocation.h"
 #include "clang/Basic/SourceManager.h"
 #include "clang/Frontend/CompilerInstance.h"
+#include "clang/Lex/DirectoryLookup.h"
 #include "clang/Lex/HeaderSearch.h"
 #include "clang/Lex/PPCallbacks.h"
 #include "clang/Lex/Preprocessor.h"
@@ -178,6 +179,17 @@ void IncludeStructure::collect(const CompilerInstance &CI) {
   MainFileEntry = SM.getFileEntryForID(SM.getMainFileID());
   auto Collector = std::make_unique<RecordHeaders>(CI, this);
   CI.getPreprocessor().addPPCallbacks(std::move(Collector));
+
+  // If we're reusing a preamble, don't repopulate SearchPathsCanonical.
+  // The entries will be the same, but canonicalizing to find out is expensive!
+  if (SearchPathsCanonical.empty()) {
+    for (const auto &Dir :
+         CI.getPreprocessor().getHeaderSearchInfo().search_dir_range()) {
+      if (Dir.getLookupType() == DirectoryLookup::LT_NormalDir)
+        SearchPathsCanonical.emplace_back(
+            SM.getFileManager().getCanonicalName(*Dir.getDirRef()));
+    }
+  }
 }
 
 std::optional<IncludeStructure::HeaderID>

diff  --git a/clang-tools-extra/clangd/Headers.h b/clang-tools-extra/clangd/Headers.h
index f6aeab1dbfd352..41cf3de6bba350 100644
--- a/clang-tools-extra/clangd/Headers.h
+++ b/clang-tools-extra/clangd/Headers.h
@@ -173,6 +173,11 @@ class IncludeStructure {
 
   std::vector<Inclusion> MainFileIncludes;
 
+  // The entries of the header search path. (HeaderSearch::search_dir_range())
+  // Only includes the plain-directory entries (not header maps or frameworks).
+  // All paths are canonical (FileManager::getCanonicalPath()).
+  std::vector<std::string> SearchPathsCanonical;
+
   // We reserve HeaderID(0) for the main file and will manually check for that
   // in getID and getOrCreateID because the UniqueID is not stable when the
   // content of the main file changes.

diff  --git a/clang-tools-extra/clangd/Hover.cpp b/clang-tools-extra/clangd/Hover.cpp
index ceae563131c13f..aaa160bb048ef5 100644
--- a/clang-tools-extra/clangd/Hover.cpp
+++ b/clang-tools-extra/clangd/Hover.cpp
@@ -1194,8 +1194,7 @@ void maybeAddSymbolProviders(ParsedAST &AST, HoverInfo &HI,
     return;
 
   std::string Result;
-  include_cleaner::Includes ConvertedIncludes =
-      convertIncludes(SM, AST.getIncludeStructure().MainFileIncludes);
+  include_cleaner::Includes ConvertedIncludes = convertIncludes(AST);
   for (const auto &P : RankedProviders) {
     if (P.kind() == include_cleaner::Header::Physical &&
         P.physical() == SM.getFileEntryForID(SM.getMainFileID()))
@@ -1247,9 +1246,7 @@ std::string getSymbolName(include_cleaner::Symbol Sym) {
 
 void maybeAddUsedSymbols(ParsedAST &AST, HoverInfo &HI, const Inclusion &Inc) {
   const SourceManager &SM = AST.getSourceManager();
-  const auto &ConvertedMainFileIncludes =
-      convertIncludes(SM, AST.getIncludeStructure().MainFileIncludes);
-  const auto &HoveredInclude = convertIncludes(SM, llvm::ArrayRef{Inc});
+  auto Converted = convertIncludes(AST);
   llvm::DenseSet<include_cleaner::Symbol> UsedSymbols;
   include_cleaner::walkUsed(
       AST.getLocalTopLevelDecls(), collectMacroReferences(AST),
@@ -1260,12 +1257,8 @@ void maybeAddUsedSymbols(ParsedAST &AST, HoverInfo &HI, const Inclusion &Inc) {
             UsedSymbols.contains(Ref.Target))
           return;
 
-        auto Provider =
-            firstMatchedProvider(ConvertedMainFileIncludes, Providers);
-        if (!Provider || HoveredInclude.match(*Provider).empty())
-          return;
-
-        UsedSymbols.insert(Ref.Target);
+        if (isPreferredProvider(Inc, Converted, Providers))
+          UsedSymbols.insert(Ref.Target);
       });
 
   for (const auto &UsedSymbolDecl : UsedSymbols)

diff  --git a/clang-tools-extra/clangd/IncludeCleaner.cpp b/clang-tools-extra/clangd/IncludeCleaner.cpp
index 78dff4dd31bc1c..8da42dba78f877 100644
--- a/clang-tools-extra/clangd/IncludeCleaner.cpp
+++ b/clang-tools-extra/clangd/IncludeCleaner.cpp
@@ -26,6 +26,7 @@
 #include "clang/Basic/SourceLocation.h"
 #include "clang/Basic/SourceManager.h"
 #include "clang/Format/Format.h"
+#include "clang/Lex/DirectoryLookup.h"
 #include "clang/Lex/HeaderSearch.h"
 #include "clang/Lex/Preprocessor.h"
 #include "clang/Tooling/Core/Replacement.h"
@@ -347,11 +348,16 @@ collectMacroReferences(ParsedAST &AST) {
   return Macros;
 }
 
-include_cleaner::Includes
-convertIncludes(const SourceManager &SM,
-                const llvm::ArrayRef<Inclusion> Includes) {
+include_cleaner::Includes convertIncludes(const ParsedAST &AST) {
+  auto &SM = AST.getSourceManager();
+
   include_cleaner::Includes ConvertedIncludes;
-  for (const Inclusion &Inc : Includes) {
+  // We satisfy Includes's contract that search dirs and included files have
+  // matching path styles: both ultimately use FileManager::getCanonicalName().
+  for (const auto &Dir : AST.getIncludeStructure().SearchPathsCanonical)
+    ConvertedIncludes.addSearchDirectory(Dir);
+
+  for (const Inclusion &Inc : AST.getIncludeStructure().MainFileIncludes) {
     include_cleaner::Include TransformedInc;
     llvm::StringRef WrittenRef = llvm::StringRef(Inc.Written);
     TransformedInc.Spelled = WrittenRef.trim("\"<>");
@@ -359,6 +365,8 @@ convertIncludes(const SourceManager &SM,
         SM.getComposedLoc(SM.getMainFileID(), Inc.HashOffset);
     TransformedInc.Line = Inc.HashLine + 1;
     TransformedInc.Angled = WrittenRef.starts_with("<");
+    // Inc.Resolved is canonicalized with clangd::getCanonicalPath(),
+    // which is based on FileManager::getCanonicalName(ParentDir).
     auto FE = SM.getFileManager().getFileRef(Inc.Resolved);
     if (!FE) {
       elog("IncludeCleaner: Failed to get an entry for resolved path {0}: {1}",
@@ -376,9 +384,7 @@ IncludeCleanerFindings computeIncludeCleanerFindings(ParsedAST &AST) {
   if (AST.getLangOpts().ObjC)
     return {};
   const auto &SM = AST.getSourceManager();
-  const auto &Includes = AST.getIncludeStructure();
-  include_cleaner::Includes ConvertedIncludes =
-      convertIncludes(SM, Includes.MainFileIncludes);
+  include_cleaner::Includes ConvertedIncludes = convertIncludes(AST);
   const FileEntry *MainFile = SM.getFileEntryForID(SM.getMainFileID());
   auto *PreamblePatch = PreamblePatch::getPatchEntry(AST.tuPath(), SM);
 
@@ -401,7 +407,8 @@ IncludeCleanerFindings computeIncludeCleanerFindings(ParsedAST &AST) {
           }
           for (auto *Inc : ConvertedIncludes.match(H)) {
             Satisfied = true;
-            auto HeaderID = Includes.getID(&Inc->Resolved->getFileEntry());
+            auto HeaderID =
+                AST.getIncludeStructure().getID(&Inc->Resolved->getFileEntry());
             assert(HeaderID.has_value() &&
                    "ConvertedIncludes only contains resolved includes.");
             Used.insert(*HeaderID);
@@ -456,6 +463,20 @@ IncludeCleanerFindings computeIncludeCleanerFindings(ParsedAST &AST) {
   return {std::move(UnusedIncludes), std::move(MissingIncludes)};
 }
 
+bool isPreferredProvider(const Inclusion &Inc,
+                         const include_cleaner::Includes &Includes,
+                         llvm::ArrayRef<include_cleaner::Header> Providers) {
+  for (const auto &H : Providers) {
+    auto Matches = Includes.match(H);
+    for (const include_cleaner::Include *Match : Matches)
+      if (Match->Line == unsigned(Inc.HashLine + 1))
+        return true; // this header is (equal) best
+    if (!Matches.empty())
+      return false; // another header is better
+  }
+  return false; // no header provides the symbol
+}
+
 std::vector<Diag>
 issueIncludeCleanerDiagnostics(ParsedAST &AST, llvm::StringRef Code,
                                const IncludeCleanerFindings &Findings,
@@ -494,14 +515,4 @@ issueIncludeCleanerDiagnostics(ParsedAST &AST, llvm::StringRef Code,
   return Result;
 }
 
-std::optional<include_cleaner::Header>
-firstMatchedProvider(const include_cleaner::Includes &Includes,
-                     llvm::ArrayRef<include_cleaner::Header> Providers) {
-  for (const auto &H : Providers) {
-    if (!Includes.match(H).empty())
-      return H;
-  }
-  // No match for this provider in the includes list.
-  return std::nullopt;
-}
 } // namespace clang::clangd

diff  --git a/clang-tools-extra/clangd/IncludeCleaner.h b/clang-tools-extra/clangd/IncludeCleaner.h
index b1acbdd5484345..b3ba3a716083e8 100644
--- a/clang-tools-extra/clangd/IncludeCleaner.h
+++ b/clang-tools-extra/clangd/IncludeCleaner.h
@@ -72,17 +72,18 @@ void setIncludeCleanerAnalyzesStdlib(bool B);
 
 /// Converts the clangd include representation to include-cleaner
 /// include representation.
-include_cleaner::Includes
-convertIncludes(const SourceManager &SM,
-                const llvm::ArrayRef<Inclusion> Includes);
+include_cleaner::Includes convertIncludes(const ParsedAST &);
 
 std::vector<include_cleaner::SymbolReference>
 collectMacroReferences(ParsedAST &AST);
 
-/// Find the first provider in the list that is matched by the includes.
-std::optional<include_cleaner::Header>
-firstMatchedProvider(const include_cleaner::Includes &Includes,
-                     llvm::ArrayRef<include_cleaner::Header> Providers);
+/// Whether this #include is considered to provide a particular symbol.
+///
+/// This means it satisfies the reference, and no other #include does better.
+/// `Providers` is the symbol's candidate headers according to walkUsed().
+bool isPreferredProvider(const Inclusion &, const include_cleaner::Includes &,
+                         llvm::ArrayRef<include_cleaner::Header> Providers);
+
 } // namespace clangd
 } // namespace clang
 

diff  --git a/clang-tools-extra/clangd/XRefs.cpp b/clang-tools-extra/clangd/XRefs.cpp
index da1a803daebb0d..bcbd214a725cea 100644
--- a/clang-tools-extra/clangd/XRefs.cpp
+++ b/clang-tools-extra/clangd/XRefs.cpp
@@ -1334,22 +1334,16 @@ maybeFindIncludeReferences(ParsedAST &AST, Position Pos,
   if (IncludeOnLine == Includes.end())
     return std::nullopt;
 
-  const auto &Inc = *IncludeOnLine;
   const SourceManager &SM = AST.getSourceManager();
   ReferencesResult Results;
-  auto ConvertedMainFileIncludes = convertIncludes(SM, Includes);
-  auto ReferencedInclude = convertIncludes(SM, Inc);
+  auto Converted = convertIncludes(AST);
   include_cleaner::walkUsed(
       AST.getLocalTopLevelDecls(), collectMacroReferences(AST),
       AST.getPragmaIncludes().get(), SM,
       [&](const include_cleaner::SymbolReference &Ref,
           llvm::ArrayRef<include_cleaner::Header> Providers) {
-        if (Ref.RT != include_cleaner::RefType::Explicit)
-          return;
-
-        auto Provider =
-            firstMatchedProvider(ConvertedMainFileIncludes, Providers);
-        if (!Provider || ReferencedInclude.match(*Provider).empty())
+        if (Ref.RT != include_cleaner::RefType::Explicit ||
+            !isPreferredProvider(*IncludeOnLine, Converted, Providers))
           return;
 
         auto Loc = SM.getFileLoc(Ref.RefLocation);
@@ -1370,8 +1364,8 @@ maybeFindIncludeReferences(ParsedAST &AST, Position Pos,
 
   // Add the #include line to the references list.
   ReferencesResult::Reference Result;
-  Result.Loc.range =
-      rangeTillEOL(SM.getBufferData(SM.getMainFileID()), Inc.HashOffset);
+  Result.Loc.range = rangeTillEOL(SM.getBufferData(SM.getMainFileID()),
+                                  IncludeOnLine->HashOffset);
   Result.Loc.uri = URIMainFile;
   Results.References.push_back(std::move(Result));
   return Results;

diff  --git a/clang-tools-extra/clangd/unittests/HeadersTests.cpp b/clang-tools-extra/clangd/unittests/HeadersTests.cpp
index 8c0eae6bb3cc8b..3a9576ff74e41f 100644
--- a/clang-tools-extra/clangd/unittests/HeadersTests.cpp
+++ b/clang-tools-extra/clangd/unittests/HeadersTests.cpp
@@ -292,6 +292,15 @@ TEST_F(HeadersTest, IncludeDirective) {
                                    directive(tok::pp_include_next)));
 }
 
+TEST_F(HeadersTest, SearchPath) {
+  FS.Files["foo/bar.h"] = "x";
+  FS.Files["foo/bar/baz.h"] = "y";
+  CDB.ExtraClangFlags.push_back("-Ifoo/bar");
+  CDB.ExtraClangFlags.push_back("-Ifoo/bar/..");
+  EXPECT_THAT(collectIncludes().SearchPathsCanonical,
+              ElementsAre(Subdir, testPath("foo/bar"), testPath("foo")));
+}
+
 TEST_F(HeadersTest, InsertInclude) {
   std::string Path = testPath("sub/bar.h");
   FS.Files[Path] = "";

diff  --git a/clang-tools-extra/clangd/unittests/IncludeCleanerTests.cpp b/clang-tools-extra/clangd/unittests/IncludeCleanerTests.cpp
index c55351fb1f91d8..c5d6ecfe4dcf61 100644
--- a/clang-tools-extra/clangd/unittests/IncludeCleanerTests.cpp
+++ b/clang-tools-extra/clangd/unittests/IncludeCleanerTests.cpp
@@ -19,7 +19,9 @@
 #include "clang/AST/DeclBase.h"
 #include "clang/Basic/SourceManager.h"
 #include "clang/Tooling/Syntax/Tokens.h"
+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/ScopeExit.h"
+#include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/Error.h"
@@ -455,46 +457,30 @@ TEST(IncludeCleaner, NoCrash) {
       MainCode.range());
 }
 
-TEST(IncludeCleaner, FirstMatchedProvider) {
-  struct {
-    const char *Code;
-    const std::vector<include_cleaner::Header> Providers;
-    const std::optional<include_cleaner::Header> ExpectedProvider;
-  } Cases[] = {
-      {R"cpp(
-        #include "bar.h"
-        #include "foo.h"
-      )cpp",
-       {include_cleaner::Header{"bar.h"}, include_cleaner::Header{"foo.h"}},
-       include_cleaner::Header{"bar.h"}},
-      {R"cpp(
-        #include "bar.h"
-        #include "foo.h"
-      )cpp",
-       {include_cleaner::Header{"foo.h"}, include_cleaner::Header{"bar.h"}},
-       include_cleaner::Header{"foo.h"}},
-      {"#include \"bar.h\"",
-       {include_cleaner::Header{"bar.h"}},
-       include_cleaner::Header{"bar.h"}},
-      {"#include \"bar.h\"", {include_cleaner::Header{"foo.h"}}, std::nullopt},
-      {"#include \"bar.h\"", {}, std::nullopt}};
-  for (const auto &Case : Cases) {
-    Annotations Code{Case.Code};
-    SCOPED_TRACE(Code.code());
-
-    TestTU TU;
-    TU.Code = Code.code();
-    TU.AdditionalFiles["bar.h"] = "";
-    TU.AdditionalFiles["foo.h"] = "";
-
-    auto AST = TU.build();
-    std::optional<include_cleaner::Header> MatchedProvider =
-        firstMatchedProvider(
-            convertIncludes(AST.getSourceManager(),
-                            AST.getIncludeStructure().MainFileIncludes),
-            Case.Providers);
-    EXPECT_EQ(MatchedProvider, Case.ExpectedProvider);
-  }
+TEST(IncludeCleaner, IsPreferredProvider) {
+  auto TU = TestTU::withCode(R"cpp(
+     #include "decl.h"
+     #include "def.h"
+     #include "def.h"
+  )cpp");
+  TU.AdditionalFiles["decl.h"] = "";
+  TU.AdditionalFiles["def.h"] = "";
+
+  auto AST = TU.build();
+  auto &IncludeDecl = AST.getIncludeStructure().MainFileIncludes[0];
+  auto &IncludeDef1 = AST.getIncludeStructure().MainFileIncludes[1];
+  auto &IncludeDef2 = AST.getIncludeStructure().MainFileIncludes[2];
+
+  auto &FM = AST.getSourceManager().getFileManager();
+  auto *DeclH = &FM.getOptionalFileRef("decl.h")->getFileEntry();
+  auto *DefH = &FM.getOptionalFileRef("def.h")->getFileEntry();
+
+  auto Includes = convertIncludes(AST);
+  std::vector<include_cleaner::Header> Providers = {
+      include_cleaner::Header(DefH), include_cleaner::Header(DeclH)};
+  EXPECT_FALSE(isPreferredProvider(IncludeDecl, Includes, Providers));
+  EXPECT_TRUE(isPreferredProvider(IncludeDef1, Includes, Providers));
+  EXPECT_TRUE(isPreferredProvider(IncludeDef2, Includes, Providers));
 }
 
 TEST(IncludeCleaner, BatchFix) {
@@ -560,6 +546,32 @@ TEST(IncludeCleaner, BatchFix) {
                                     FixMessage("fix all includes")})));
 }
 
+// In the presence of IWYU pragma private, we should accept spellings other
+// than the recommended one if they appear to name the same public header.
+TEST(IncludeCleaner, VerbatimEquivalence) {
+  auto TU = TestTU::withCode(R"cpp(
+    #include "lib/rel/public.h"
+    int x = Public;
+  )cpp");
+  TU.AdditionalFiles["repo/lib/rel/private.h"] = R"cpp(
+    #pragma once
+    // IWYU pragma: private, include "rel/public.h"
+    int Public;
+  )cpp";
+  TU.AdditionalFiles["repo/lib/rel/public.h"] = R"cpp(
+    #pragma once
+    #include "rel/private.h"
+  )cpp";
+
+  TU.ExtraArgs.push_back("-Irepo");
+  TU.ExtraArgs.push_back("-Irepo/lib");
+
+  auto AST = TU.build();
+  auto Findings = computeIncludeCleanerFindings(AST);
+  EXPECT_THAT(Findings.MissingIncludes, IsEmpty());
+  EXPECT_THAT(Findings.UnusedIncludes, IsEmpty());
+}
+
 } // namespace
 } // namespace clangd
 } // namespace clang


        


More information about the cfe-commits mailing list