[clang-tools-extra] 64d9713 - [include-cleaner] Unify symlink handling (#102615)

via cfe-commits cfe-commits at lists.llvm.org
Tue Aug 13 03:51:55 PDT 2024


Author: kadir çetinkaya
Date: 2024-08-13T12:51:52+02:00
New Revision: 64d9713637ab98e2b65c9c4317a50ddba0ba0dbc

URL: https://github.com/llvm/llvm-project/commit/64d9713637ab98e2b65c9c4317a50ddba0ba0dbc
DIFF: https://github.com/llvm/llvm-project/commit/64d9713637ab98e2b65c9c4317a50ddba0ba0dbc.diff

LOG: [include-cleaner] Unify symlink handling (#102615)

We were using tryGetRealPathName in certain places, which resolves
symlinks (sometimes). This was resulting in discrepancies in behavior,
depending on how a file was first reached.

This path migrates all usages of tryGetRealPathName to regular getName
instead.

This implies one backward incompatible change for header-filtering. Our
ignore-header option used to filter against suffixes of absolute paths,
whereas now filter can receive working-directory relative paths in some
cases, possibly braking existing filters.
Chances of really braking users is pretty low:
- We'll still filter against absolute paths when header is outside the
  working directory (e.g. /usr/bin/include/some/linux/header.h.)
- Most projects run builds in a working directory that's nested inside
  the repository, hence relative paths still contain all the segments
  relative to repository root and anything else is unlikely to be
  meaningful. e.g. if a header is in
  `$HOME/work/llvm-project/clang-tools-extra/header.h` with builds being
  run in `$home/work/llvm-project/build`, we'll still filter against
  `../clang-tools-extra/header.h` which has all the useful segments as a
  suffix.
- This is also a change in how we handle symlinks, but this is aligned
  with what we do in rest of our tools (clangd, tidy checks etc.). We
  tend to not resolve any symlinks for the file.

Added: 
    

Modified: 
    clang-tools-extra/include-cleaner/include/clang-include-cleaner/Types.h
    clang-tools-extra/include-cleaner/lib/Analysis.cpp
    clang-tools-extra/include-cleaner/lib/HTMLReport.cpp
    clang-tools-extra/include-cleaner/lib/Types.cpp
    clang-tools-extra/include-cleaner/tool/IncludeCleaner.cpp
    clang-tools-extra/include-cleaner/unittests/AnalysisTest.cpp
    clang-tools-extra/include-cleaner/unittests/IncludeSpellerTest.cpp
    clang-tools-extra/include-cleaner/unittests/RecordTest.cpp

Removed: 
    


################################################################################
diff  --git a/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Types.h b/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Types.h
index 7d5c19872d5a85..2888e252267556 100644
--- a/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Types.h
+++ b/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Types.h
@@ -136,8 +136,8 @@ struct Header {
   }
   StringRef verbatim() const { return std::get<Verbatim>(Storage); }
 
-  /// Absolute path for the header when it's a physical file. Otherwise just
-  /// the spelling without surrounding quotes/brackets.
+  /// For phiscal files, either absolute path or path relative to the execution
+  /// root. Otherwise just the spelling without surrounding quotes/brackets.
   llvm::StringRef resolvedPath() const;
 
 private:

diff  --git a/clang-tools-extra/include-cleaner/lib/Analysis.cpp b/clang-tools-extra/include-cleaner/lib/Analysis.cpp
index 68fe79d6929f6b..05e9d14734a95f 100644
--- a/clang-tools-extra/include-cleaner/lib/Analysis.cpp
+++ b/clang-tools-extra/include-cleaner/lib/Analysis.cpp
@@ -82,7 +82,7 @@ analyze(llvm::ArrayRef<Decl *> ASTRoots,
         const PragmaIncludes *PI, const Preprocessor &PP,
         llvm::function_ref<bool(llvm::StringRef)> HeaderFilter) {
   auto &SM = PP.getSourceManager();
-  const FileEntry *MainFile = SM.getFileEntryForID(SM.getMainFileID());
+  const auto MainFile = *SM.getFileEntryRefForID(SM.getMainFileID());
   llvm::DenseSet<const Include *> Used;
   llvm::StringSet<> Missing;
   if (!HeaderFilter)
@@ -95,7 +95,7 @@ analyze(llvm::ArrayRef<Decl *> ASTRoots,
              for (const Header &H : Providers) {
                if (H.kind() == Header::Physical &&
                    (H.physical() == MainFile ||
-                    (ResourceDir && H.physical().getDir() == *ResourceDir))) {
+                    H.physical().getDir() == ResourceDir)) {
                  Satisfied = true;
                }
                for (const Include *I : Inc.match(H)) {
@@ -103,29 +103,30 @@ analyze(llvm::ArrayRef<Decl *> ASTRoots,
                  Satisfied = true;
                }
              }
-             if (!Satisfied && !Providers.empty() &&
-                 Ref.RT == RefType::Explicit &&
-                 !HeaderFilter(Providers.front().resolvedPath())) {
-               // Check if we have any headers with the same spelling, in edge
-               // cases like `#include_next "foo.h"`, the user can't ever
-               // include the physical foo.h, but can have a spelling that
-               // refers to it.
-               auto Spelling = spellHeader(
-                   {Providers.front(), PP.getHeaderSearchInfo(), MainFile});
-               for (const Include *I : Inc.match(Header{Spelling})) {
-                 Used.insert(I);
-                 Satisfied = true;
-               }
-               if (!Satisfied)
-                 Missing.insert(std::move(Spelling));
+             // Bail out if we can't (or need not) insert an include.
+             if (Satisfied || Providers.empty() || Ref.RT != RefType::Explicit)
+               return;
+             if (HeaderFilter(Providers.front().resolvedPath()))
+               return;
+             // Check if we have any headers with the same spelling, in edge
+             // cases like `#include_next "foo.h"`, the user can't ever
+             // include the physical foo.h, but can have a spelling that
+             // refers to it.
+             auto Spelling = spellHeader(
+                 {Providers.front(), PP.getHeaderSearchInfo(), MainFile});
+             for (const Include *I : Inc.match(Header{Spelling})) {
+               Used.insert(I);
+               Satisfied = true;
              }
+             if (!Satisfied)
+               Missing.insert(std::move(Spelling));
            });
 
   AnalysisResults Results;
   for (const Include &I : Inc.all()) {
     if (Used.contains(&I) || !I.Resolved ||
-        HeaderFilter(I.Resolved->getFileEntry().tryGetRealPathName()) ||
-        (ResourceDir && I.Resolved->getFileEntry().getDir() == *ResourceDir))
+        HeaderFilter(I.Resolved->getName()) ||
+        I.Resolved->getDir() == ResourceDir)
       continue;
     if (PI) {
       if (PI->shouldKeep(*I.Resolved))
@@ -137,7 +138,7 @@ analyze(llvm::ArrayRef<Decl *> ASTRoots,
         // Since most private -> public mappings happen in a verbatim way, we
         // check textually here. This might go wrong in presence of symlinks or
         // header mappings. But that's not 
diff erent than rest of the places.
-        if (MainFile->tryGetRealPathName().ends_with(PHeader))
+        if (MainFile.getName().ends_with(PHeader))
           continue;
       }
     }

diff  --git a/clang-tools-extra/include-cleaner/lib/HTMLReport.cpp b/clang-tools-extra/include-cleaner/lib/HTMLReport.cpp
index 195f658a0af920..bbe8bc230c6e20 100644
--- a/clang-tools-extra/include-cleaner/lib/HTMLReport.cpp
+++ b/clang-tools-extra/include-cleaner/lib/HTMLReport.cpp
@@ -14,6 +14,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "AnalysisInternal.h"
+#include "clang-include-cleaner/IncludeSpeller.h"
 #include "clang-include-cleaner/Types.h"
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/PrettyPrinter.h"
@@ -167,22 +168,6 @@ class Reporter {
     return "semiused";
   }
 
-  std::string spellHeader(const Header &H) {
-    switch (H.kind()) {
-    case Header::Physical: {
-      bool IsAngled = false;
-      std::string Path = HS.suggestPathToFileForDiagnostics(
-          H.physical(), MainFE->tryGetRealPathName(), &IsAngled);
-      return IsAngled ? "<" + Path + ">" : "\"" + Path + "\"";
-    }
-    case Header::Standard:
-      return H.standard().name().str();
-    case Header::Verbatim:
-      return H.verbatim().str();
-    }
-    llvm_unreachable("Unknown Header kind");
-  }
-
   void fillTarget(Ref &R) {
     // Duplicates logic from walkUsed(), which doesn't expose SymbolLocations.
     for (auto &Loc : locateSymbol(R.Sym))
@@ -204,7 +189,7 @@ class Reporter {
                      R.Includes.end());
 
     if (!R.Headers.empty())
-      R.Insert = spellHeader(R.Headers.front());
+      R.Insert = spellHeader({R.Headers.front(), HS, MainFE});
   }
 
 public:

diff  --git a/clang-tools-extra/include-cleaner/lib/Types.cpp b/clang-tools-extra/include-cleaner/lib/Types.cpp
index cb8a55ed13e5d0..7a637639edf8b5 100644
--- a/clang-tools-extra/include-cleaner/lib/Types.cpp
+++ b/clang-tools-extra/include-cleaner/lib/Types.cpp
@@ -10,7 +10,6 @@
 #include "TypesInternal.h"
 #include "clang/AST/Decl.h"
 #include "clang/Basic/FileEntry.h"
-#include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/SmallVector.h"
@@ -48,7 +47,7 @@ llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Symbol &S) {
 llvm::StringRef Header::resolvedPath() const {
   switch (kind()) {
   case include_cleaner::Header::Physical:
-    return physical().getFileEntry().tryGetRealPathName();
+    return physical().getName();
   case include_cleaner::Header::Standard:
     return standard().name().trim("<>\"");
   case include_cleaner::Header::Verbatim:

diff  --git a/clang-tools-extra/include-cleaner/tool/IncludeCleaner.cpp b/clang-tools-extra/include-cleaner/tool/IncludeCleaner.cpp
index 3bc449b0152bba..d8a44ab9b6e12e 100644
--- a/clang-tools-extra/include-cleaner/tool/IncludeCleaner.cpp
+++ b/clang-tools-extra/include-cleaner/tool/IncludeCleaner.cpp
@@ -164,7 +164,7 @@ class Action : public clang::ASTFrontendAction {
       writeHTML();
 
     llvm::StringRef Path =
-        SM.getFileEntryForID(SM.getMainFileID())->tryGetRealPathName();
+        SM.getFileEntryRefForID(SM.getMainFileID())->getName();
     assert(!Path.empty() && "Main file path not known?");
     llvm::StringRef Code = SM.getBufferData(SM.getMainFileID());
 

diff  --git a/clang-tools-extra/include-cleaner/unittests/AnalysisTest.cpp b/clang-tools-extra/include-cleaner/unittests/AnalysisTest.cpp
index 5696c380758f85..43634ee8f2d803 100644
--- a/clang-tools-extra/include-cleaner/unittests/AnalysisTest.cpp
+++ b/clang-tools-extra/include-cleaner/unittests/AnalysisTest.cpp
@@ -22,9 +22,12 @@
 #include "clang/Testing/TestAST.h"
 #include "clang/Tooling/Inclusions/StandardLibrary.h"
 #include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/IntrusiveRefCntPtr.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/ScopedPrinter.h"
+#include "llvm/Support/VirtualFileSystem.h"
 #include "llvm/Testing/Annotations/Annotations.h"
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
@@ -204,21 +207,37 @@ class AnalyzeTest : public testing::Test {
   TestInputs Inputs;
   PragmaIncludes PI;
   RecordedPP PP;
+  llvm::IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> ExtraFS = nullptr;
+
   AnalyzeTest() {
     Inputs.MakeAction = [this] {
       struct Hook : public SyntaxOnlyAction {
       public:
-        Hook(RecordedPP &PP, PragmaIncludes &PI) : PP(PP), PI(PI) {}
+        Hook(RecordedPP &PP, PragmaIncludes &PI,
+             llvm::IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> ExtraFS)
+            : PP(PP), PI(PI), ExtraFS(std::move(ExtraFS)) {}
         bool BeginSourceFileAction(clang::CompilerInstance &CI) override {
           CI.getPreprocessor().addPPCallbacks(PP.record(CI.getPreprocessor()));
           PI.record(CI);
           return true;
         }
 
+        bool BeginInvocation(CompilerInstance &CI) override {
+          if (!ExtraFS)
+            return true;
+          auto OverlayFS =
+              llvm::makeIntrusiveRefCnt<llvm::vfs::OverlayFileSystem>(
+                  CI.getFileManager().getVirtualFileSystemPtr());
+          OverlayFS->pushOverlay(ExtraFS);
+          CI.getFileManager().setVirtualFileSystem(std::move(OverlayFS));
+          return true;
+        }
+
         RecordedPP &PP;
         PragmaIncludes &PI;
+        llvm::IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> ExtraFS;
       };
-      return std::make_unique<Hook>(PP, PI);
+      return std::make_unique<Hook>(PP, PI, ExtraFS);
     };
   }
 };
@@ -322,6 +341,58 @@ TEST_F(AnalyzeTest, DifferentHeaderSameSpelling) {
   EXPECT_THAT(Results.Missing, testing::IsEmpty());
 }
 
+TEST_F(AnalyzeTest, SpellingIncludesWithSymlinks) {
+  llvm::Annotations Code(R"cpp(
+  #include "header.h"
+  void $bar^bar() {
+    $foo^foo();
+  }
+  )cpp");
+  Inputs.Code = Code.code();
+  ExtraFS = llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
+  ExtraFS->addFile("content_for/0", /*ModificationTime=*/{},
+                   llvm::MemoryBuffer::getMemBufferCopy(guard(R"cpp(
+  #include "inner.h"
+  )cpp")));
+  ExtraFS->addSymbolicLink("header.h", "content_for/0",
+                           /*ModificationTime=*/{});
+  ExtraFS->addFile("content_for/1", /*ModificationTime=*/{},
+                   llvm::MemoryBuffer::getMemBufferCopy(guard(R"cpp(
+  void foo();
+  )cpp")));
+  ExtraFS->addSymbolicLink("inner.h", "content_for/1",
+                           /*ModificationTime=*/{});
+
+  TestAST AST(Inputs);
+  std::vector<Decl *> DeclsInTU;
+  for (auto *D : AST.context().getTranslationUnitDecl()->decls())
+    DeclsInTU.push_back(D);
+  auto Results = analyze(DeclsInTU, {}, PP.Includes, &PI, AST.preprocessor());
+  // Check that we're spelling header using the symlink, and not underlying
+  // path.
+  EXPECT_THAT(Results.Missing, testing::ElementsAre("\"inner.h\""));
+  // header.h should be unused.
+  EXPECT_THAT(Results.Unused, Not(testing::IsEmpty()));
+
+  {
+    // Make sure filtering is also applied to symlink, not underlying file.
+    auto HeaderFilter = [](llvm::StringRef Path) { return Path == "inner.h"; };
+    Results = analyze(DeclsInTU, {}, PP.Includes, &PI, AST.preprocessor(),
+                      HeaderFilter);
+    EXPECT_THAT(Results.Missing, testing::ElementsAre("\"inner.h\""));
+    // header.h should be unused.
+    EXPECT_THAT(Results.Unused, Not(testing::IsEmpty()));
+  }
+  {
+    auto HeaderFilter = [](llvm::StringRef Path) { return Path == "header.h"; };
+    Results = analyze(DeclsInTU, {}, PP.Includes, &PI, AST.preprocessor(),
+                      HeaderFilter);
+    // header.h should be ignored now.
+    EXPECT_THAT(Results.Unused, Not(testing::IsEmpty()));
+    EXPECT_THAT(Results.Missing, testing::ElementsAre("\"inner.h\""));
+  }
+}
+
 TEST(FixIncludes, Basic) {
   llvm::StringRef Code = R"cpp(#include "d.h"
 #include "a.h"

diff  --git a/clang-tools-extra/include-cleaner/unittests/IncludeSpellerTest.cpp b/clang-tools-extra/include-cleaner/unittests/IncludeSpellerTest.cpp
index 8f6ad09c46cc4a..a27e83a434372f 100644
--- a/clang-tools-extra/include-cleaner/unittests/IncludeSpellerTest.cpp
+++ b/clang-tools-extra/include-cleaner/unittests/IncludeSpellerTest.cpp
@@ -47,8 +47,7 @@ class DummyIncludeSpeller : public IncludeSpeller {
       return "<bits/stdc++.h>";
     if (Input.H.kind() != Header::Physical)
       return "";
-    llvm::StringRef AbsolutePath =
-        Input.H.physical().getFileEntry().tryGetRealPathName();
+    llvm::StringRef AbsolutePath = Input.H.resolvedPath();
     std::string RootWithSeparator{testRoot()};
     RootWithSeparator += llvm::sys::path::get_separator();
     if (!AbsolutePath.consume_front(llvm::StringRef{RootWithSeparator}))

diff  --git a/clang-tools-extra/include-cleaner/unittests/RecordTest.cpp b/clang-tools-extra/include-cleaner/unittests/RecordTest.cpp
index 1a5996e5df284b..bcb9f8982c461a 100644
--- a/clang-tools-extra/include-cleaner/unittests/RecordTest.cpp
+++ b/clang-tools-extra/include-cleaner/unittests/RecordTest.cpp
@@ -53,9 +53,11 @@ MATCHER_P(named, N, "") {
 }
 
 MATCHER_P(FileNamed, N, "") {
-  if (arg.getFileEntry().tryGetRealPathName() == N)
+  llvm::StringRef ActualName = arg.getName();
+  ActualName.consume_front("./");
+  if (ActualName == N)
     return true;
-  *result_listener << arg.getFileEntry().tryGetRealPathName().str();
+  *result_listener << ActualName.str();
   return false;
 }
 


        


More information about the cfe-commits mailing list