[clang-tools-extra] [clang-include-cleaner] Fix incorrect directory issue for writing files (PR #111375)

Byoungchan Lee via cfe-commits cfe-commits at lists.llvm.org
Thu Oct 10 02:36:34 PDT 2024


https://github.com/bc-lee updated https://github.com/llvm/llvm-project/pull/111375

>From 23b90bba12c010e5882e09e9f6b765a7281324aa Mon Sep 17 00:00:00 2001
From: Byoungchan Lee <byoungchan.lee at gmx.com>
Date: Mon, 7 Oct 2024 22:19:38 +0900
Subject: [PATCH 1/4] [clang-include-cleaner] Fix incorrect directory issue for
 writing files

If the current working directory of `clang-include-cleaner` differs from
the directory of the input files specified in the compilation database,
it doesn't adjust the input file paths properly. As a result,
`clang-include-cleaner` either writes files to the wrong directory or
fails to write files altogether.

This pull request fixes the issue by checking whether the input file
path is absolute. If it isn't, the path is concatenated with
the directory of the input file specified in the compilation database.

Fixes #110843.
---
 .../include-cleaner/tool/IncludeCleaner.cpp   | 24 +++++++++++++++++--
 1 file changed, 22 insertions(+), 2 deletions(-)

diff --git a/clang-tools-extra/include-cleaner/tool/IncludeCleaner.cpp b/clang-tools-extra/include-cleaner/tool/IncludeCleaner.cpp
index 080099adc9a07a..1cb8b9c4ae99b7 100644
--- a/clang-tools-extra/include-cleaner/tool/IncludeCleaner.cpp
+++ b/clang-tools-extra/include-cleaner/tool/IncludeCleaner.cpp
@@ -173,11 +173,22 @@ class Action : public clang::ASTFrontendAction {
     if (!HTMLReportPath.empty())
       writeHTML();
 
+    // Source File's path relative to compilation database.
     llvm::StringRef Path =
         SM.getFileEntryRefForID(SM.getMainFileID())->getName();
     assert(!Path.empty() && "Main file path not known?");
     llvm::StringRef Code = SM.getBufferData(SM.getMainFileID());
 
+    auto Cwd = getCompilerInstance()
+                   .getFileManager()
+                   .getVirtualFileSystem()
+                   .getCurrentWorkingDirectory();
+    if (Cwd.getError()) {
+      llvm::errs() << "Failed to get current working directory: "
+                   << Cwd.getError().message() << "\n";
+      return;
+    }
+
     auto Results =
         analyze(AST.Roots, PP.MacroReferences, PP.Includes, &PI,
                 getCompilerInstance().getPreprocessor(), HeaderFilter);
@@ -201,8 +212,17 @@ class Action : public clang::ASTFrontendAction {
       }
     }
 
-    if (!Results.Missing.empty() || !Results.Unused.empty())
-      EditedFiles.try_emplace(Path, Final);
+    if (!Results.Missing.empty() || !Results.Unused.empty()) {
+      auto Sept = llvm::sys::path::get_separator();
+      // Adjust the path to be absolute if it is not.
+      std::string FullPath;
+      if (llvm::sys::path::is_absolute(Path))
+        FullPath = Path.str();
+      else
+        FullPath = Cwd.get() + Sept.str() + Path.str();
+
+      EditedFiles.try_emplace(FullPath, Final);
+    }
   }
 
   void writeHTML() {

>From eef397cf22e062cf024ffcd84fef7bdf98833351 Mon Sep 17 00:00:00 2001
From: Byoungchan Lee <byoungchan.lee at gmx.com>
Date: Wed, 9 Oct 2024 19:43:02 +0900
Subject: [PATCH 2/4] Fix path issue identified by @kadircet

Some Bazel systems might have a read-only file system in the build
directory. In such cases, the output file should be written to
the original source file. Adjusted the logic to find the correct
output directory based on the compilation database and
the current working directory.
---
 .../include-cleaner/tool/IncludeCleaner.cpp   | 95 ++++++++++++++-----
 1 file changed, 73 insertions(+), 22 deletions(-)

diff --git a/clang-tools-extra/include-cleaner/tool/IncludeCleaner.cpp b/clang-tools-extra/include-cleaner/tool/IncludeCleaner.cpp
index 1cb8b9c4ae99b7..4481601140e3fe 100644
--- a/clang-tools-extra/include-cleaner/tool/IncludeCleaner.cpp
+++ b/clang-tools-extra/include-cleaner/tool/IncludeCleaner.cpp
@@ -179,16 +179,6 @@ class Action : public clang::ASTFrontendAction {
     assert(!Path.empty() && "Main file path not known?");
     llvm::StringRef Code = SM.getBufferData(SM.getMainFileID());
 
-    auto Cwd = getCompilerInstance()
-                   .getFileManager()
-                   .getVirtualFileSystem()
-                   .getCurrentWorkingDirectory();
-    if (Cwd.getError()) {
-      llvm::errs() << "Failed to get current working directory: "
-                   << Cwd.getError().message() << "\n";
-      return;
-    }
-
     auto Results =
         analyze(AST.Roots, PP.MacroReferences, PP.Includes, &PI,
                 getCompilerInstance().getPreprocessor(), HeaderFilter);
@@ -212,17 +202,8 @@ class Action : public clang::ASTFrontendAction {
       }
     }
 
-    if (!Results.Missing.empty() || !Results.Unused.empty()) {
-      auto Sept = llvm::sys::path::get_separator();
-      // Adjust the path to be absolute if it is not.
-      std::string FullPath;
-      if (llvm::sys::path::is_absolute(Path))
-        FullPath = Path.str();
-      else
-        FullPath = Cwd.get() + Sept.str() + Path.str();
-
-      EditedFiles.try_emplace(FullPath, Final);
-    }
+    if (!Results.Missing.empty() || !Results.Unused.empty())
+      EditedFiles.try_emplace(Path, Final);
   }
 
   void writeHTML() {
@@ -300,6 +281,42 @@ std::function<bool(llvm::StringRef)> headerFilter() {
   };
 }
 
+llvm::ErrorOr<std::string> getCurrentWorkingDirectory() {
+  llvm::SmallString<256> CurrentPath;
+  if (const std::error_code EC = llvm::sys::fs::current_path(CurrentPath))
+    return llvm::ErrorOr<std::string>(EC);
+  return std::string(CurrentPath.str());
+}
+
+std::optional<std::string>
+adjustCompilationPath(const std::string &Filename, const std::string &Directory,
+                      const std::string &CurrentWorkingDirectory) {
+  if (llvm::sys::path::is_absolute(Filename)) {
+    // If the file path is already absolute, use it as is.
+    return Filename;
+  }
+
+  auto Sept = llvm::sys::path::get_separator().str();
+  // First, try to find the file based on the compilation database.
+  std::string FilePath = Directory + Sept + Filename;
+  // Check if it is writable.
+  if (llvm::sys::fs::access(FilePath, llvm::sys::fs::AccessMode::Write) !=
+      std::error_code()) {
+    // If not, try to find the file based on the current working
+    // directory, as some Bazel invocations may not set the compilation
+    // invocation's filesystem as non-writable. In such cases, we can
+    // find the file based on the current working directory.
+    FilePath =
+        static_cast<std::string>(CurrentWorkingDirectory) + Sept + Filename;
+    if (llvm::sys::fs::access(FilePath, llvm::sys::fs::AccessMode::Write) !=
+        std::error_code()) {
+      llvm::errs() << "Failed to find a writable path for " << Filename << "\n";
+      return std::nullopt;
+    }
+  }
+  return FilePath;
+}
+
 } // namespace
 } // namespace include_cleaner
 } // namespace clang
@@ -325,7 +342,32 @@ int main(int argc, const char **argv) {
     }
   }
 
-  clang::tooling::ClangTool Tool(OptionsParser->getCompilations(),
+  auto &CompilationDatabase = OptionsParser->getCompilations();
+  // Mapping of edited file paths to their actual paths.
+  std::map<std::string, std::string> EditedFilesToActualFiles;
+  if (Edit) {
+    std::vector<clang::tooling::CompileCommand> Commands =
+        CompilationDatabase.getAllCompileCommands();
+    auto CurrentWorkingDirectory = getCurrentWorkingDirectory();
+    // if (CurrentWorkingDirectory.get()
+    if (auto EC = CurrentWorkingDirectory.getError()) {
+      llvm::errs() << "Failed to get current working directory: "
+                   << EC.message() << "\n";
+      return 1;
+    }
+
+    for (const auto &Command : Commands) {
+      auto AdjustedFilePath = adjustCompilationPath(
+          Command.Filename, Command.Directory, CurrentWorkingDirectory.get());
+      if (!AdjustedFilePath.has_value()) {
+        // We already printed an error message.
+        return 1;
+      }
+      EditedFilesToActualFiles[Command.Filename] = AdjustedFilePath.value();
+    }
+  }
+
+  clang::tooling::ClangTool Tool(CompilationDatabase,
                                  OptionsParser->getSourcePathList());
 
   auto HeaderFilter = headerFilter();
@@ -336,6 +378,15 @@ int main(int argc, const char **argv) {
   if (Edit) {
     for (const auto &NameAndContent : Factory.editedFiles()) {
       llvm::StringRef FileName = NameAndContent.first();
+      auto AdjustedFilePathIter = EditedFilesToActualFiles.find(FileName.str());
+      if (AdjustedFilePathIter != EditedFilesToActualFiles.end()) {
+        FileName = AdjustedFilePathIter->second;
+      } else {
+        llvm::errs() << "Failed to find the actual path for " << FileName
+                     << "\n";
+        ++Errors;
+      }
+
       const std::string &FinalCode = NameAndContent.second;
       if (auto Err = llvm::writeToOutput(
               FileName, [&](llvm::raw_ostream &OS) -> llvm::Error {

>From 2409d6f41be2933846b7ac21244cd59463c8e917 Mon Sep 17 00:00:00 2001
From: Byoungchan Lee <byoungchan.lee at gmx.com>
Date: Thu, 10 Oct 2024 18:02:27 +0900
Subject: [PATCH 3/4] Apply code review feedback

- Store the compilation database file as absolute path
- Instead of using `llvm::sys::fs::current_path`,
  use `FileSystem::makeAbsolute` to get the absolute path
  of various files
- Add more comments to explain the intent of the code
---
 .../include-cleaner/tool/IncludeCleaner.cpp   | 152 ++++++++++--------
 1 file changed, 81 insertions(+), 71 deletions(-)

diff --git a/clang-tools-extra/include-cleaner/tool/IncludeCleaner.cpp b/clang-tools-extra/include-cleaner/tool/IncludeCleaner.cpp
index 4481601140e3fe..c0363b3d2bdb56 100644
--- a/clang-tools-extra/include-cleaner/tool/IncludeCleaner.cpp
+++ b/clang-tools-extra/include-cleaner/tool/IncludeCleaner.cpp
@@ -173,10 +173,10 @@ class Action : public clang::ASTFrontendAction {
     if (!HTMLReportPath.empty())
       writeHTML();
 
-    // Source File's path relative to compilation database.
-    llvm::StringRef Path =
-        SM.getFileEntryRefForID(SM.getMainFileID())->getName();
-    assert(!Path.empty() && "Main file path not known?");
+    // Source File's path of compiler invocation, converted to absolute path.
+    llvm::SmallString<256> AbsPath(SM.getFileEntryRefForID(SM.getMainFileID())->getName());
+    assert(!AbsPath.empty() && "Main file path not known?");
+    SM.getFileManager().makeAbsolutePath(AbsPath);
     llvm::StringRef Code = SM.getBufferData(SM.getMainFileID());
 
     auto Results =
@@ -186,7 +186,7 @@ class Action : public clang::ASTFrontendAction {
       Results.Missing.clear();
     if (!Remove)
       Results.Unused.clear();
-    std::string Final = fixIncludes(Results, Path, Code, getStyle(Path));
+    std::string Final = fixIncludes(Results, AbsPath, Code, getStyle(AbsPath));
 
     if (Print.getNumOccurrences()) {
       switch (Print) {
@@ -203,7 +203,7 @@ class Action : public clang::ASTFrontendAction {
     }
 
     if (!Results.Missing.empty() || !Results.Unused.empty())
-      EditedFiles.try_emplace(Path, Final);
+      EditedFiles.try_emplace(AbsPath, Final);
   }
 
   void writeHTML() {
@@ -281,42 +281,6 @@ std::function<bool(llvm::StringRef)> headerFilter() {
   };
 }
 
-llvm::ErrorOr<std::string> getCurrentWorkingDirectory() {
-  llvm::SmallString<256> CurrentPath;
-  if (const std::error_code EC = llvm::sys::fs::current_path(CurrentPath))
-    return llvm::ErrorOr<std::string>(EC);
-  return std::string(CurrentPath.str());
-}
-
-std::optional<std::string>
-adjustCompilationPath(const std::string &Filename, const std::string &Directory,
-                      const std::string &CurrentWorkingDirectory) {
-  if (llvm::sys::path::is_absolute(Filename)) {
-    // If the file path is already absolute, use it as is.
-    return Filename;
-  }
-
-  auto Sept = llvm::sys::path::get_separator().str();
-  // First, try to find the file based on the compilation database.
-  std::string FilePath = Directory + Sept + Filename;
-  // Check if it is writable.
-  if (llvm::sys::fs::access(FilePath, llvm::sys::fs::AccessMode::Write) !=
-      std::error_code()) {
-    // If not, try to find the file based on the current working
-    // directory, as some Bazel invocations may not set the compilation
-    // invocation's filesystem as non-writable. In such cases, we can
-    // find the file based on the current working directory.
-    FilePath =
-        static_cast<std::string>(CurrentWorkingDirectory) + Sept + Filename;
-    if (llvm::sys::fs::access(FilePath, llvm::sys::fs::AccessMode::Write) !=
-        std::error_code()) {
-      llvm::errs() << "Failed to find a writable path for " << Filename << "\n";
-      return std::nullopt;
-    }
-  }
-  return FilePath;
-}
-
 } // namespace
 } // namespace include_cleaner
 } // namespace clang
@@ -342,33 +306,84 @@ int main(int argc, const char **argv) {
     }
   }
 
-  auto &CompilationDatabase = OptionsParser->getCompilations();
-  // Mapping of edited file paths to their actual paths.
-  std::map<std::string, std::string> EditedFilesToActualFiles;
+  auto VFS = llvm::vfs::getRealFileSystem();
+  auto &CDB = OptionsParser->getCompilations();
+  // CDBToAbsPaths is a map from the path in the compilation database to the
+  // writable absolute path of the file.
+  std::map<std::string, std::string> CDBToAbsPaths;
   if (Edit) {
-    std::vector<clang::tooling::CompileCommand> Commands =
-        CompilationDatabase.getAllCompileCommands();
-    auto CurrentWorkingDirectory = getCurrentWorkingDirectory();
-    // if (CurrentWorkingDirectory.get()
-    if (auto EC = CurrentWorkingDirectory.getError()) {
-      llvm::errs() << "Failed to get current working directory: "
-                   << EC.message() << "\n";
-      return 1;
-    }
-
-    for (const auto &Command : Commands) {
-      auto AdjustedFilePath = adjustCompilationPath(
-          Command.Filename, Command.Directory, CurrentWorkingDirectory.get());
-      if (!AdjustedFilePath.has_value()) {
-        // We already printed an error message.
+    // if Edit is enabled, `Factory.editedFiles()` will contain the final code,
+    // along with the path given in the compilation database. That path can be
+    // absolute or relative, and if it is relative, it is relative to the
+    // "Directory" field in the compilation database. We need to make it
+    // absolute to write the final code to the correct path.
+    // There are several cases to consider:
+    // 1. The "Directory" field isn't same as the current working directory.
+    // 2. The file path resolved from the "Directory" field is not writable.
+    // For these cases, we need to find a writable path for the file.
+    // To effectively handle these cases, we only need to consider
+    // the files from `getSourcePathList()` that are present in the compilation
+    // database.
+    for (auto &Source : OptionsParser->getSourcePathList()) {
+      llvm::SmallString<256> AbsPath(Source);
+      if (auto Err = VFS->makeAbsolute(AbsPath)) {
+        llvm::errs() << "Failed to get absolute path for " << Source << " : "
+                     << Err.message() << '\n';
         return 1;
       }
-      EditedFilesToActualFiles[Command.Filename] = AdjustedFilePath.value();
+      std::vector<clang::tooling::CompileCommand> Cmds =
+          CDB.getCompileCommands(AbsPath);
+      if (Cmds.empty()) {
+        // Try with the original path.
+        Cmds = CDB.getCompileCommands(Source);
+        if (Cmds.empty()) {
+          continue;
+        }
+      }
+      // We only need the first command to get the directory.
+      auto Cmd = Cmds[0];
+      llvm::SmallString<256> CDBPath(Cmd.Filename);
+      std::string Directory(Cmd.Directory);
+
+      if (llvm::sys::path::is_absolute(CDBPath)) {
+        // If the path in the compilation database is already absolute, we don't
+        // need to do anything.
+        CDBToAbsPaths[static_cast<std::string>(CDBPath)] =
+            static_cast<std::string>(AbsPath);
+      } else {
+        auto Sept = llvm::sys::path::get_separator();
+        // First, try to find the file based on the compilation database.
+        llvm::Twine FilePathTwine = Directory + Sept + CDBPath;
+        llvm::SmallString<256> FilePath;
+        FilePathTwine.toVector(FilePath);
+        // Check if it is writable.
+        if (llvm::sys::fs::access(FilePath, llvm::sys::fs::AccessMode::Write) !=
+            std::error_code()) {
+          // If not, try to find the file based on the current working
+          // directory, as some Bazel invocations may not set the compilation
+          // invocation's filesystem as non-writable. In such cases, we can
+          // find the file based on the current working directory.
+          FilePath = Source;
+          if (auto EC = VFS->makeAbsolute(CDBPath)) {
+            llvm::errs() << "Failed to get absolute path for " << CDBPath
+                         << " : " << EC.message() << '\n';
+            return 1;
+          }
+          if (llvm::sys::fs::access(FilePath,
+                                    llvm::sys::fs::AccessMode::Write) !=
+              std::error_code()) {
+            llvm::errs() << "Failed to find a writable path for " << Source
+                         << '\n';
+            return 1;
+          }
+        }
+        CDBToAbsPaths[static_cast<std::string>(CDBPath)] =
+            static_cast<std::string>(FilePath);
+      }
     }
   }
 
-  clang::tooling::ClangTool Tool(CompilationDatabase,
-                                 OptionsParser->getSourcePathList());
+  clang::tooling::ClangTool Tool(CDB, OptionsParser->getSourcePathList());
 
   auto HeaderFilter = headerFilter();
   if (!HeaderFilter)
@@ -378,14 +393,9 @@ int main(int argc, const char **argv) {
   if (Edit) {
     for (const auto &NameAndContent : Factory.editedFiles()) {
       llvm::StringRef FileName = NameAndContent.first();
-      auto AdjustedFilePathIter = EditedFilesToActualFiles.find(FileName.str());
-      if (AdjustedFilePathIter != EditedFilesToActualFiles.end()) {
-        FileName = AdjustedFilePathIter->second;
-      } else {
-        llvm::errs() << "Failed to find the actual path for " << FileName
-                     << "\n";
-        ++Errors;
-      }
+      if (auto It = CDBToAbsPaths.find(FileName.str());
+          It != CDBToAbsPaths.end())
+        FileName = It->second;
 
       const std::string &FinalCode = NameAndContent.second;
       if (auto Err = llvm::writeToOutput(

>From 0c9477d3d7f2eb14f221a0a36cbd9dfd75015ef1 Mon Sep 17 00:00:00 2001
From: Byoungchan Lee <byoungchan.lee at gmx.com>
Date: Thu, 10 Oct 2024 18:17:29 +0900
Subject: [PATCH 4/4] Add test cases where the cwd and the compilation
 database's directory mismatch

---
 clang-tools-extra/include-cleaner/test/tool.cpp | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/clang-tools-extra/include-cleaner/test/tool.cpp b/clang-tools-extra/include-cleaner/test/tool.cpp
index 2155eec189d186..768fc4f4e583fb 100644
--- a/clang-tools-extra/include-cleaner/test/tool.cpp
+++ b/clang-tools-extra/include-cleaner/test/tool.cpp
@@ -38,13 +38,12 @@ int x = foo();
 //      PRINT: #include "foo.h"
 //  PRINT-NOT: {{^}}#include "foobar.h"{{$}}
 
+//        RUN: mkdir -p $(dirname %t)/out
 //        RUN: cp %s %t.cpp
-//        RUN: clang-include-cleaner -edit %t.cpp -- -I%S/Inputs/
-//        RUN: FileCheck --match-full-lines --check-prefix=EDIT %s < %t.cpp
-//       EDIT: #include "foo.h"
-//   EDIT-NOT: {{^}}#include "foobar.h"{{$}}
-
-//        RUN: cp %s %t.cpp
-//        RUN: clang-include-cleaner -edit --ignore-headers="foobar\.h,foo\.h" %t.cpp -- -I%S/Inputs/ 
-//        RUN: FileCheck --match-full-lines --check-prefix=EDIT2 %s < %t.cpp
-//  EDIT2-NOT: {{^}}#include "foo.h"{{$}}
+//        RUN: echo "[{\"directory\":\"$(dirname %t)/out\",\"file\":\"../$(basename %t).cpp\",\"command\":\":clang++ -I%S/Inputs/ ../$(basename %t).cpp\"}]" > $(dirname %t)/out/compile_commands.json
+//       RUN: pushd $(dirname %t)
+//       RUN: clang-include-cleaner -p out -edit $(basename %t).cpp
+//       RUN: popd
+//       RUN: FileCheck --match-full-lines --check-prefix=EDIT3 %s < %t.cpp
+//     EDIT3: #include "foo.h"
+// EDIT3-NOT: {{^}}#include "foobar.h"{{$}}



More information about the cfe-commits mailing list