[clang-tools-extra] 34cc210 - [clangd] IncludeCleaner: Attribute symbols from non self-contained headers to their parents

Kirill Bobyrev via cfe-commits cfe-commits at lists.llvm.org
Fri Nov 26 07:21:05 PST 2021


Author: Kirill Bobyrev
Date: 2021-11-26T16:20:48+01:00
New Revision: 34cc210aa8af2fd33598e5559d0f5b51f9423dd6

URL: https://github.com/llvm/llvm-project/commit/34cc210aa8af2fd33598e5559d0f5b51f9423dd6
DIFF: https://github.com/llvm/llvm-project/commit/34cc210aa8af2fd33598e5559d0f5b51f9423dd6.diff

LOG: [clangd] IncludeCleaner: Attribute symbols from non self-contained headers to their parents

When a symbol comes from the non self-contained header, we recursively uplift
the file we consider used to the first includer that has a header guard. We
need to do this while we still have FileIDs because every time a non
self-contained header is included, it gets a new FileID but is later
deduplicated by HeaderID and it's not possible to understand where it was
included from.

Based on D114370.

Reviewed By: sammccall

Differential Revision: https://reviews.llvm.org/D114623

Added: 
    

Modified: 
    clang-tools-extra/clangd/IncludeCleaner.cpp
    clang-tools-extra/clangd/IncludeCleaner.h
    clang-tools-extra/clangd/unittests/IncludeCleanerTests.cpp

Removed: 
    


################################################################################
diff  --git a/clang-tools-extra/clangd/IncludeCleaner.cpp b/clang-tools-extra/clangd/IncludeCleaner.cpp
index a0b9e303e3bfe..33964d9f83ce0 100644
--- a/clang-tools-extra/clangd/IncludeCleaner.cpp
+++ b/clang-tools-extra/clangd/IncludeCleaner.cpp
@@ -8,6 +8,7 @@
 
 #include "IncludeCleaner.h"
 #include "Config.h"
+#include "Headers.h"
 #include "ParsedAST.h"
 #include "Protocol.h"
 #include "SourceCode.h"
@@ -16,6 +17,7 @@
 #include "clang/AST/ExprCXX.h"
 #include "clang/AST/RecursiveASTVisitor.h"
 #include "clang/Basic/SourceLocation.h"
+#include "clang/Basic/SourceManager.h"
 #include "clang/Lex/HeaderSearch.h"
 #include "clang/Lex/Preprocessor.h"
 #include "clang/Tooling/Syntax/Tokens.h"
@@ -221,6 +223,31 @@ bool mayConsiderUnused(const Inclusion &Inc, ParsedAST &AST) {
   return true;
 }
 
+// In case symbols are coming from non self-contained header, we need to find
+// its first includer that is self-contained. This is the header users can
+// include, so it will be responsible for bringing the symbols from given
+// header into the scope.
+FileID headerResponsible(FileID ID, const SourceManager &SM,
+                         const IncludeStructure &Includes) {
+  // Unroll the chain of non self-contained headers until we find the one that
+  // can be included.
+  for (const FileEntry *FE = SM.getFileEntryForID(ID); ID != SM.getMainFileID();
+       FE = SM.getFileEntryForID(ID)) {
+    // If FE is nullptr, we consider it to be the responsible header.
+    if (!FE)
+      break;
+    auto HID = Includes.getID(FE);
+    assert(HID && "We're iterating over headers already existing in "
+                  "IncludeStructure");
+    if (Includes.isSelfContained(*HID))
+      break;
+    // The header is not self-contained: put the responsibility for its symbols
+    // on its includer.
+    ID = SM.getFileID(SM.getIncludeLoc(ID));
+  }
+  return ID;
+}
+
 } // namespace
 
 ReferencedLocations findReferencedLocations(ParsedAST &AST) {
@@ -234,20 +261,27 @@ ReferencedLocations findReferencedLocations(ParsedAST &AST) {
 
 llvm::DenseSet<FileID>
 findReferencedFiles(const llvm::DenseSet<SourceLocation> &Locs,
-                    const SourceManager &SM) {
+                    const IncludeStructure &Includes, const SourceManager &SM) {
   std::vector<SourceLocation> Sorted{Locs.begin(), Locs.end()};
   llvm::sort(Sorted); // Group by FileID.
-  ReferencedFiles Result(SM);
+  ReferencedFiles Files(SM);
   for (auto It = Sorted.begin(); It < Sorted.end();) {
     FileID FID = SM.getFileID(*It);
-    Result.add(FID, *It);
+    Files.add(FID, *It);
     // Cheaply skip over all the other locations from the same FileID.
     // This avoids lots of redundant Loc->File lookups for the same file.
     do
       ++It;
     while (It != Sorted.end() && SM.isInFileID(*It, FID));
   }
-  return std::move(Result.Files);
+  // If a header is not self-contained, we consider its symbols a logical part
+  // of the including file. Therefore, mark the parents of all used
+  // non-self-contained FileIDs as used. Perform this on FileIDs rather than
+  // HeaderIDs, as each inclusion of a non-self-contained file is distinct.
+  llvm::DenseSet<FileID> Result;
+  for (FileID ID : Files.Files)
+    Result.insert(headerResponsible(ID, SM, Includes));
+  return Result;
 }
 
 std::vector<const Inclusion *>
@@ -304,12 +338,8 @@ std::vector<const Inclusion *> computeUnusedIncludes(ParsedAST &AST) {
   const auto &SM = AST.getSourceManager();
 
   auto Refs = findReferencedLocations(AST);
-  // FIXME(kirillbobyrev): Attribute the symbols from non self-contained
-  // headers to their parents while the information about respective
-  // SourceLocations and FileIDs is not lost. It is not possible to do that
-  // later when non-guarded headers included multiple times will get same
-  // HeaderID.
-  auto ReferencedFileIDs = findReferencedFiles(Refs, SM);
+  auto ReferencedFileIDs = findReferencedFiles(Refs, AST.getIncludeStructure(),
+                                               AST.getSourceManager());
   auto ReferencedHeaders =
       translateToHeaderIDs(ReferencedFileIDs, AST.getIncludeStructure(), SM);
   return getUnused(AST, ReferencedHeaders);

diff  --git a/clang-tools-extra/clangd/IncludeCleaner.h b/clang-tools-extra/clangd/IncludeCleaner.h
index a381e40cb2bb9..59756faea8e78 100644
--- a/clang-tools-extra/clangd/IncludeCleaner.h
+++ b/clang-tools-extra/clangd/IncludeCleaner.h
@@ -52,6 +52,7 @@ ReferencedLocations findReferencedLocations(ParsedAST &AST);
 /// The output only includes things SourceManager sees as files (not macro IDs).
 /// This can include <built-in>, <scratch space> etc that are not true files.
 llvm::DenseSet<FileID> findReferencedFiles(const ReferencedLocations &Locs,
+                                           const IncludeStructure &Includes,
                                            const SourceManager &SM);
 
 /// Maps FileIDs to the internal IncludeStructure representation (HeaderIDs).

diff  --git a/clang-tools-extra/clangd/unittests/IncludeCleanerTests.cpp b/clang-tools-extra/clangd/unittests/IncludeCleanerTests.cpp
index f25f7388df66a..f72791da4030a 100644
--- a/clang-tools-extra/clangd/unittests/IncludeCleanerTests.cpp
+++ b/clang-tools-extra/clangd/unittests/IncludeCleanerTests.cpp
@@ -281,7 +281,8 @@ TEST(IncludeCleaner, VirtualBuffers) {
   auto &SM = AST.getSourceManager();
   auto &Includes = AST.getIncludeStructure();
 
-  auto ReferencedFiles = findReferencedFiles(findReferencedLocations(AST), SM);
+  auto ReferencedFiles =
+      findReferencedFiles(findReferencedLocations(AST), Includes, SM);
   llvm::StringSet<> ReferencedFileNames;
   for (FileID FID : ReferencedFiles)
     ReferencedFileNames.insert(
@@ -303,6 +304,80 @@ TEST(IncludeCleaner, VirtualBuffers) {
   EXPECT_THAT(getUnused(AST, ReferencedHeaders), ::testing::IsEmpty());
 }
 
+TEST(IncludeCleaner, DistinctUnguardedInclusions) {
+  TestTU TU;
+  TU.Code = R"cpp(
+    #include "bar.h"
+    #include "foo.h"
+
+    int LocalFoo = foo::Variable;
+    )cpp";
+  TU.AdditionalFiles["foo.h"] = R"cpp(
+    #pragma once
+    namespace foo {
+    #include "unguarded.h"
+    }
+    )cpp";
+  TU.AdditionalFiles["bar.h"] = R"cpp(
+    #pragma once
+    namespace bar {
+    #include "unguarded.h"
+    }
+    )cpp";
+  TU.AdditionalFiles["unguarded.h"] = R"cpp(
+    constexpr int Variable = 42;
+    )cpp";
+
+  ParsedAST AST = TU.build();
+
+  auto ReferencedFiles =
+      findReferencedFiles(findReferencedLocations(AST),
+                          AST.getIncludeStructure(), AST.getSourceManager());
+  llvm::StringSet<> ReferencedFileNames;
+  auto &SM = AST.getSourceManager();
+  for (FileID FID : ReferencedFiles)
+    ReferencedFileNames.insert(
+        SM.getPresumedLoc(SM.getLocForStartOfFile(FID)).getFilename());
+  // Note that we have uplifted the referenced files from non self-contained
+  // headers to header-guarded ones.
+  EXPECT_THAT(ReferencedFileNames.keys(),
+              UnorderedElementsAre(testPath("foo.h")));
+}
+
+TEST(IncludeCleaner, NonSelfContainedHeaders) {
+  TestTU TU;
+  TU.Code = R"cpp(
+    #include "foo.h"
+
+    int LocalFoo = Variable;
+    )cpp";
+  TU.AdditionalFiles["foo.h"] = R"cpp(
+    #pragma once
+    #include "indirection.h"
+    )cpp";
+  TU.AdditionalFiles["indirection.h"] = R"cpp(
+    #include "unguarded.h"
+    )cpp";
+  TU.AdditionalFiles["unguarded.h"] = R"cpp(
+    constexpr int Variable = 42;
+    )cpp";
+
+  ParsedAST AST = TU.build();
+
+  auto ReferencedFiles =
+      findReferencedFiles(findReferencedLocations(AST),
+                          AST.getIncludeStructure(), AST.getSourceManager());
+  llvm::StringSet<> ReferencedFileNames;
+  auto &SM = AST.getSourceManager();
+  for (FileID FID : ReferencedFiles)
+    ReferencedFileNames.insert(
+        SM.getPresumedLoc(SM.getLocForStartOfFile(FID)).getFilename());
+  // Note that we have uplifted the referenced files from non self-contained
+  // headers to header-guarded ones.
+  EXPECT_THAT(ReferencedFileNames.keys(),
+              UnorderedElementsAre(testPath("foo.h")));
+}
+
 } // namespace
 } // namespace clangd
 } // namespace clang


        


More information about the cfe-commits mailing list