[clang] [DependencyScanningFilesystem] Make sure the local/shared cache filename lookups use only absolute paths (PR #66122)

Argyrios Kyrtzidis via cfe-commits cfe-commits at lists.llvm.org
Tue Sep 19 13:34:52 PDT 2023


https://github.com/akyrtzi updated https://github.com/llvm/llvm-project/pull/66122

>From 26b0440d81f4bbf8e666c1c11e200963fa2cddb4 Mon Sep 17 00:00:00 2001
From: Argyrios Kyrtzidis <kyrtzidis at apple.com>
Date: Tue, 12 Sep 2023 11:26:46 -0700
Subject: [PATCH 1/2] [DependencyScanningFilesystem] Make sure the local/shared
 cache filename lookups use only absolute paths

Previously a relative path would be used as a key for cache lookup and if the same relative path
was used from another compiler invocation with a different working directory then the first cache entry
was erroneously returned.
---
 .../DependencyScanningFilesystem.h            | 18 ++++-
 .../DependencyScanningFilesystem.cpp          | 79 +++++++++++++++----
 clang/test/ClangScanDeps/relative-filenames.c | 38 +++++++++
 3 files changed, 117 insertions(+), 18 deletions(-)
 create mode 100644 clang/test/ClangScanDeps/relative-filenames.c

diff --git a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h
index 4b4e3c7eb2ecd06..dbe219b6dd8d723 100644
--- a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h
+++ b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h
@@ -215,6 +215,7 @@ class DependencyScanningFilesystemLocalCache {
 public:
   /// Returns entry associated with the filename or nullptr if none is found.
   const CachedFileSystemEntry *findEntryByFilename(StringRef Filename) const {
+    assert(llvm::sys::path::is_absolute_gnu(Filename));
     auto It = Cache.find(Filename);
     return It == Cache.end() ? nullptr : It->getValue();
   }
@@ -224,6 +225,7 @@ class DependencyScanningFilesystemLocalCache {
   const CachedFileSystemEntry &
   insertEntryForFilename(StringRef Filename,
                          const CachedFileSystemEntry &Entry) {
+    assert(llvm::sys::path::is_absolute_gnu(Filename));
     const auto *InsertedEntry = Cache.insert({Filename, &Entry}).first->second;
     assert(InsertedEntry == &Entry && "entry already present");
     return *InsertedEntry;
@@ -282,13 +284,14 @@ class DependencyScanningWorkerFilesystem : public llvm::vfs::ProxyFileSystem {
 public:
   DependencyScanningWorkerFilesystem(
       DependencyScanningFilesystemSharedCache &SharedCache,
-      IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS)
-      : ProxyFileSystem(std::move(FS)), SharedCache(SharedCache) {}
+      IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS);
 
   llvm::ErrorOr<llvm::vfs::Status> status(const Twine &Path) override;
   llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>>
   openFileForRead(const Twine &Path) override;
 
+  std::error_code setCurrentWorkingDirectory(const Twine &Path) override;
+
   /// Returns entry for the given filename.
   ///
   /// Attempts to use the local and shared caches first, then falls back to
@@ -304,8 +307,11 @@ class DependencyScanningWorkerFilesystem : public llvm::vfs::ProxyFileSystem {
   /// For a filename that's not yet associated with any entry in the caches,
   /// uses the underlying filesystem to either look up the entry based in the
   /// shared cache indexed by unique ID, or creates new entry from scratch.
+  /// \p FilenameForLookup will always be an absolute path, and different than
+  /// \p OriginalFilename if \p OriginalFilename is relative.
   llvm::ErrorOr<const CachedFileSystemEntry &>
-  computeAndStoreResult(StringRef Filename);
+  computeAndStoreResult(StringRef OriginalFilename,
+                        StringRef FilenameForLookup);
 
   /// Scan for preprocessor directives for the given entry if necessary and
   /// returns a wrapper object with reference semantics.
@@ -388,6 +394,12 @@ class DependencyScanningWorkerFilesystem : public llvm::vfs::ProxyFileSystem {
   /// The local cache is used by the worker thread to cache file system queries
   /// locally instead of querying the global cache every time.
   DependencyScanningFilesystemLocalCache LocalCache;
+
+  /// The working directory to use for making relative paths absolute before
+  /// using them for cache lookups.
+  llvm::ErrorOr<std::string> WorkingDirForCacheLookup;
+
+  void updateWorkingDirForCacheLookup();
 };
 
 } // end namespace dependencies
diff --git a/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp b/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp
index 31404855e3b1dc3..3e53c8fc5740875 100644
--- a/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp
+++ b/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp
@@ -96,6 +96,7 @@ DependencyScanningFilesystemSharedCache::
 DependencyScanningFilesystemSharedCache::CacheShard &
 DependencyScanningFilesystemSharedCache::getShardForFilename(
     StringRef Filename) const {
+  assert(llvm::sys::path::is_absolute_gnu(Filename));
   return CacheShards[llvm::hash_value(Filename) % NumShards];
 }
 
@@ -109,6 +110,7 @@ DependencyScanningFilesystemSharedCache::getShardForUID(
 const CachedFileSystemEntry *
 DependencyScanningFilesystemSharedCache::CacheShard::findEntryByFilename(
     StringRef Filename) const {
+  assert(llvm::sys::path::is_absolute_gnu(Filename));
   std::lock_guard<std::mutex> LockGuard(CacheLock);
   auto It = EntriesByFilename.find(Filename);
   return It == EntriesByFilename.end() ? nullptr : It->getValue();
@@ -189,6 +191,14 @@ static bool shouldCacheStatFailures(StringRef Filename) {
   return shouldScanForDirectivesBasedOnExtension(Filename);
 }
 
+DependencyScanningWorkerFilesystem::DependencyScanningWorkerFilesystem(
+    DependencyScanningFilesystemSharedCache &SharedCache,
+    IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS)
+    : ProxyFileSystem(std::move(FS)), SharedCache(SharedCache),
+      WorkingDirForCacheLookup(llvm::errc::invalid_argument) {
+  updateWorkingDirForCacheLookup();
+}
+
 bool DependencyScanningWorkerFilesystem::shouldScanForDirectives(
     StringRef Filename) {
   return shouldScanForDirectivesBasedOnExtension(Filename);
@@ -215,44 +225,62 @@ DependencyScanningWorkerFilesystem::findEntryByFilenameWithWriteThrough(
 }
 
 llvm::ErrorOr<const CachedFileSystemEntry &>
-DependencyScanningWorkerFilesystem::computeAndStoreResult(StringRef Filename) {
-  llvm::ErrorOr<llvm::vfs::Status> Stat = getUnderlyingFS().status(Filename);
+DependencyScanningWorkerFilesystem::computeAndStoreResult(
+    StringRef OriginalFilename, StringRef FilenameForLookup) {
+  llvm::ErrorOr<llvm::vfs::Status> Stat =
+      getUnderlyingFS().status(OriginalFilename);
   if (!Stat) {
-    if (!shouldCacheStatFailures(Filename))
+    if (!shouldCacheStatFailures(OriginalFilename))
       return Stat.getError();
     const auto &Entry =
-        getOrEmplaceSharedEntryForFilename(Filename, Stat.getError());
-    return insertLocalEntryForFilename(Filename, Entry);
+        getOrEmplaceSharedEntryForFilename(FilenameForLookup, Stat.getError());
+    return insertLocalEntryForFilename(FilenameForLookup, Entry);
   }
 
   if (const auto *Entry = findSharedEntryByUID(*Stat))
-    return insertLocalEntryForFilename(Filename, *Entry);
+    return insertLocalEntryForFilename(FilenameForLookup, *Entry);
 
   auto TEntry =
-      Stat->isDirectory() ? TentativeEntry(*Stat) : readFile(Filename);
+      Stat->isDirectory() ? TentativeEntry(*Stat) : readFile(OriginalFilename);
 
   const CachedFileSystemEntry *SharedEntry = [&]() {
     if (TEntry) {
       const auto &UIDEntry = getOrEmplaceSharedEntryForUID(std::move(*TEntry));
-      return &getOrInsertSharedEntryForFilename(Filename, UIDEntry);
+      return &getOrInsertSharedEntryForFilename(FilenameForLookup, UIDEntry);
     }
-    return &getOrEmplaceSharedEntryForFilename(Filename, TEntry.getError());
+    return &getOrEmplaceSharedEntryForFilename(FilenameForLookup,
+                                               TEntry.getError());
   }();
 
-  return insertLocalEntryForFilename(Filename, *SharedEntry);
+  return insertLocalEntryForFilename(FilenameForLookup, *SharedEntry);
 }
 
 llvm::ErrorOr<EntryRef>
 DependencyScanningWorkerFilesystem::getOrCreateFileSystemEntry(
-    StringRef Filename, bool DisableDirectivesScanning) {
-  if (const auto *Entry = findEntryByFilenameWithWriteThrough(Filename))
-    return scanForDirectivesIfNecessary(*Entry, Filename,
+    StringRef OriginalFilename, bool DisableDirectivesScanning) {
+  StringRef FilenameForLookup;
+  SmallString<256> PathBuf;
+  if (llvm::sys::path::is_absolute_gnu(OriginalFilename)) {
+    FilenameForLookup = OriginalFilename;
+  } else if (!WorkingDirForCacheLookup) {
+    return WorkingDirForCacheLookup.getError();
+  } else {
+    StringRef RelFilename = OriginalFilename;
+    RelFilename.consume_front("./");
+    PathBuf = *WorkingDirForCacheLookup;
+    llvm::sys::path::append(PathBuf, RelFilename);
+    FilenameForLookup = PathBuf.str();
+  }
+  assert(llvm::sys::path::is_absolute_gnu(FilenameForLookup));
+  if (const auto *Entry =
+          findEntryByFilenameWithWriteThrough(FilenameForLookup))
+    return scanForDirectivesIfNecessary(*Entry, OriginalFilename,
                                         DisableDirectivesScanning)
         .unwrapError();
-  auto MaybeEntry = computeAndStoreResult(Filename);
+  auto MaybeEntry = computeAndStoreResult(OriginalFilename, FilenameForLookup);
   if (!MaybeEntry)
     return MaybeEntry.getError();
-  return scanForDirectivesIfNecessary(*MaybeEntry, Filename,
+  return scanForDirectivesIfNecessary(*MaybeEntry, OriginalFilename,
                                       DisableDirectivesScanning)
       .unwrapError();
 }
@@ -330,3 +358,24 @@ DependencyScanningWorkerFilesystem::openFileForRead(const Twine &Path) {
     return Result.getError();
   return DepScanFile::create(Result.get());
 }
+
+std::error_code DependencyScanningWorkerFilesystem::setCurrentWorkingDirectory(
+    const Twine &Path) {
+  std::error_code EC = ProxyFileSystem::setCurrentWorkingDirectory(Path);
+  updateWorkingDirForCacheLookup();
+  return EC;
+}
+
+void DependencyScanningWorkerFilesystem::updateWorkingDirForCacheLookup() {
+  llvm::ErrorOr<std::string> CWD =
+      getUnderlyingFS().getCurrentWorkingDirectory();
+  if (!CWD) {
+    WorkingDirForCacheLookup = CWD.getError();
+  } else if (!llvm::sys::path::is_absolute_gnu(*CWD)) {
+    WorkingDirForCacheLookup = llvm::errc::invalid_argument;
+  } else {
+    WorkingDirForCacheLookup = *CWD;
+  }
+  assert(!WorkingDirForCacheLookup ||
+         llvm::sys::path::is_absolute_gnu(*WorkingDirForCacheLookup));
+}
diff --git a/clang/test/ClangScanDeps/relative-filenames.c b/clang/test/ClangScanDeps/relative-filenames.c
new file mode 100644
index 000000000000000..03f2be7ec4c1f6a
--- /dev/null
+++ b/clang/test/ClangScanDeps/relative-filenames.c
@@ -0,0 +1,38 @@
+// RUN: rm -rf %t
+// RUN: split-file %s %t
+// RUN: sed -e "s|DIR|%/t|g" %t/cdb.json.template > %t/cdb.json
+
+// RUN: clang-scan-deps -compilation-database %t/cdb.json -format make -j 1 > %t/result.txt
+// RUN: FileCheck %s -input-file=%t/result.txt
+
+// CHECK: {{/|\\}}dir1{{/|\\}}t1.c
+// CHECK: {{/|\\}}dir1{{/|\\}}head.h
+// CHECK: {{/|\\}}dir2{{/|\\}}t2.c
+// CHECK: {{/|\\}}dir2{{/|\\}}head.h
+
+//--- cdb.json.template
+[
+  {
+    "directory": "DIR/dir1",
+    "command": "clang -fsyntax-only t1.c",
+    "file": "t1.c"
+  },
+  {
+    "directory": "DIR/dir2",
+    "command": "clang -fsyntax-only t2.c",
+    "file": "t2.c"
+  }
+]
+
+//--- dir1/t1.c
+#include "head.h"
+
+//--- dir1/head.h
+#ifndef BBB
+#define BBB
+#endif
+
+//--- dir2/t2.c
+#include "head.h"
+
+//--- dir2/head.h

>From 5901b47a4f77c22b4bd1122af1ba528da095533c Mon Sep 17 00:00:00 2001
From: Argyrios Kyrtzidis <kyrtzidis at apple.com>
Date: Tue, 19 Sep 2023 13:34:29 -0700
Subject: [PATCH 2/2] Use `is_absolute` instead of `is_absolute_gnu`

---
 .../DependencyScanningFilesystem.h                   |  4 ++--
 .../DependencyScanningFilesystem.cpp                 | 12 ++++++------
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h
index dbe219b6dd8d723..55e7c196b117039 100644
--- a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h
+++ b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h
@@ -215,7 +215,7 @@ class DependencyScanningFilesystemLocalCache {
 public:
   /// Returns entry associated with the filename or nullptr if none is found.
   const CachedFileSystemEntry *findEntryByFilename(StringRef Filename) const {
-    assert(llvm::sys::path::is_absolute_gnu(Filename));
+    assert(llvm::sys::path::is_absolute(Filename));
     auto It = Cache.find(Filename);
     return It == Cache.end() ? nullptr : It->getValue();
   }
@@ -225,7 +225,7 @@ class DependencyScanningFilesystemLocalCache {
   const CachedFileSystemEntry &
   insertEntryForFilename(StringRef Filename,
                          const CachedFileSystemEntry &Entry) {
-    assert(llvm::sys::path::is_absolute_gnu(Filename));
+    assert(llvm::sys::path::is_absolute(Filename));
     const auto *InsertedEntry = Cache.insert({Filename, &Entry}).first->second;
     assert(InsertedEntry == &Entry && "entry already present");
     return *InsertedEntry;
diff --git a/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp b/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp
index 3e53c8fc5740875..7b859095174c10c 100644
--- a/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp
+++ b/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp
@@ -96,7 +96,7 @@ DependencyScanningFilesystemSharedCache::
 DependencyScanningFilesystemSharedCache::CacheShard &
 DependencyScanningFilesystemSharedCache::getShardForFilename(
     StringRef Filename) const {
-  assert(llvm::sys::path::is_absolute_gnu(Filename));
+  assert(llvm::sys::path::is_absolute(Filename));
   return CacheShards[llvm::hash_value(Filename) % NumShards];
 }
 
@@ -110,7 +110,7 @@ DependencyScanningFilesystemSharedCache::getShardForUID(
 const CachedFileSystemEntry *
 DependencyScanningFilesystemSharedCache::CacheShard::findEntryByFilename(
     StringRef Filename) const {
-  assert(llvm::sys::path::is_absolute_gnu(Filename));
+  assert(llvm::sys::path::is_absolute(Filename));
   std::lock_guard<std::mutex> LockGuard(CacheLock);
   auto It = EntriesByFilename.find(Filename);
   return It == EntriesByFilename.end() ? nullptr : It->getValue();
@@ -260,7 +260,7 @@ DependencyScanningWorkerFilesystem::getOrCreateFileSystemEntry(
     StringRef OriginalFilename, bool DisableDirectivesScanning) {
   StringRef FilenameForLookup;
   SmallString<256> PathBuf;
-  if (llvm::sys::path::is_absolute_gnu(OriginalFilename)) {
+  if (llvm::sys::path::is_absolute(OriginalFilename)) {
     FilenameForLookup = OriginalFilename;
   } else if (!WorkingDirForCacheLookup) {
     return WorkingDirForCacheLookup.getError();
@@ -271,7 +271,7 @@ DependencyScanningWorkerFilesystem::getOrCreateFileSystemEntry(
     llvm::sys::path::append(PathBuf, RelFilename);
     FilenameForLookup = PathBuf.str();
   }
-  assert(llvm::sys::path::is_absolute_gnu(FilenameForLookup));
+  assert(llvm::sys::path::is_absolute(FilenameForLookup));
   if (const auto *Entry =
           findEntryByFilenameWithWriteThrough(FilenameForLookup))
     return scanForDirectivesIfNecessary(*Entry, OriginalFilename,
@@ -371,11 +371,11 @@ void DependencyScanningWorkerFilesystem::updateWorkingDirForCacheLookup() {
       getUnderlyingFS().getCurrentWorkingDirectory();
   if (!CWD) {
     WorkingDirForCacheLookup = CWD.getError();
-  } else if (!llvm::sys::path::is_absolute_gnu(*CWD)) {
+  } else if (!llvm::sys::path::is_absolute(*CWD)) {
     WorkingDirForCacheLookup = llvm::errc::invalid_argument;
   } else {
     WorkingDirForCacheLookup = *CWD;
   }
   assert(!WorkingDirForCacheLookup ||
-         llvm::sys::path::is_absolute_gnu(*WorkingDirForCacheLookup));
+         llvm::sys::path::is_absolute(*WorkingDirForCacheLookup));
 }



More information about the cfe-commits mailing list