[llvm] [llvm][vfs] Preserve paths for fallback/fallthrough in RedirectingFileSystem (PR #85307)

Ben Langmuir via llvm-commits llvm-commits at lists.llvm.org
Thu Mar 14 13:46:06 PDT 2024


https://github.com/benlangmuir created https://github.com/llvm/llvm-project/pull/85307

When we lookup in the external filesystem, do not remove . and .. components from the original path. For .. this is a correctness issue in the presence of symlinks, while for . it is simply better practice to preserve the original path to better match the behaviour of other filesystems. The only modification we need is to apply the working directory, since it could differ from the external filesystem.

rdar://123655660

>From 2c331e6a8b27734b87adf92c290b854ca9a24f06 Mon Sep 17 00:00:00 2001
From: Ben Langmuir <blangmuir at apple.com>
Date: Thu, 14 Mar 2024 13:41:19 -0700
Subject: [PATCH] [llvm][vfs] Preserve paths for fallback/fallthrough in
 RedirectingFileSystem

When we lookup in the external filesystem, do not remove . and ..
components from the original path. For .. this is a correctness issue in
the presence of symlinks, while for . it is simply better practice to
preserve the original path to better match the behaviour of other
filesystems. The only modification we need is to apply the working
directory, since it could differ from the external filesystem.

rdar://123655660
---
 llvm/include/llvm/Support/VirtualFileSystem.h |  8 +-
 llvm/lib/Support/VirtualFileSystem.cpp        | 88 +++++++++----------
 .../Support/VirtualFileSystemTest.cpp         | 70 +++++++++++++++
 3 files changed, 117 insertions(+), 49 deletions(-)

diff --git a/llvm/include/llvm/Support/VirtualFileSystem.h b/llvm/include/llvm/Support/VirtualFileSystem.h
index ef1fac92c2fa4a..770ca8764426a4 100644
--- a/llvm/include/llvm/Support/VirtualFileSystem.h
+++ b/llvm/include/llvm/Support/VirtualFileSystem.h
@@ -929,12 +929,12 @@ class RedirectingFileSystem
   /// Canonicalize path by removing ".", "..", "./", components. This is
   /// a VFS request, do not bother about symlinks in the path components
   /// but canonicalize in order to perform the correct entry search.
-  std::error_code makeCanonical(SmallVectorImpl<char> &Path) const;
+  std::error_code makeCanonicalForLookup(SmallVectorImpl<char> &Path) const;
 
   /// Get the File status, or error, from the underlying external file system.
   /// This returns the status with the originally requested name, while looking
-  /// up the entry using the canonical path.
-  ErrorOr<Status> getExternalStatus(const Twine &CanonicalPath,
+  /// up the entry using a potentially different path.
+  ErrorOr<Status> getExternalStatus(const Twine &LookupPath,
                                     const Twine &OriginalPath) const;
 
   /// Make \a Path an absolute path.
@@ -1022,7 +1022,7 @@ class RedirectingFileSystem
                  llvm::SmallVectorImpl<Entry *> &Entries) const;
 
   /// Get the status for a path with the provided \c LookupResult.
-  ErrorOr<Status> status(const Twine &CanonicalPath, const Twine &OriginalPath,
+  ErrorOr<Status> status(const Twine &LookupPath, const Twine &OriginalPath,
                          const LookupResult &Result);
 
 public:
diff --git a/llvm/lib/Support/VirtualFileSystem.cpp b/llvm/lib/Support/VirtualFileSystem.cpp
index 051dd2a67d120f..057f8eae0552c6 100644
--- a/llvm/lib/Support/VirtualFileSystem.cpp
+++ b/llvm/lib/Support/VirtualFileSystem.cpp
@@ -1344,7 +1344,7 @@ std::error_code RedirectingFileSystem::isLocal(const Twine &Path_,
   SmallString<256> Path;
   Path_.toVector(Path);
 
-  if (makeCanonical(Path))
+  if (makeAbsolute(Path))
     return {};
 
   return ExternalFS->isLocal(Path, Result);
@@ -1411,7 +1411,7 @@ directory_iterator RedirectingFileSystem::dir_begin(const Twine &Dir,
   SmallString<256> Path;
   Dir.toVector(Path);
 
-  EC = makeCanonical(Path);
+  EC = makeAbsolute(Path);
   if (EC)
     return {};
 
@@ -2261,8 +2261,8 @@ void RedirectingFileSystem::LookupResult::getPath(
   llvm::sys::path::append(Result, E->getName());
 }
 
-std::error_code
-RedirectingFileSystem::makeCanonical(SmallVectorImpl<char> &Path) const {
+std::error_code RedirectingFileSystem::makeCanonicalForLookup(
+    SmallVectorImpl<char> &Path) const {
   if (std::error_code EC = makeAbsolute(Path))
     return EC;
 
@@ -2277,12 +2277,16 @@ RedirectingFileSystem::makeCanonical(SmallVectorImpl<char> &Path) const {
 
 ErrorOr<RedirectingFileSystem::LookupResult>
 RedirectingFileSystem::lookupPath(StringRef Path) const {
+  llvm::SmallString<128> CanonicalPath(Path);
+  if (std::error_code EC = makeCanonicalForLookup(CanonicalPath))
+    return EC;
+
   // RedirectOnly means the VFS is always used.
   if (UsageTrackingActive && Redirection == RedirectKind::RedirectOnly)
     HasBeenUsed = true;
 
-  sys::path::const_iterator Start = sys::path::begin(Path);
-  sys::path::const_iterator End = sys::path::end(Path);
+  sys::path::const_iterator Start = sys::path::begin(CanonicalPath);
+  sys::path::const_iterator End = sys::path::end(CanonicalPath);
   llvm::SmallVector<Entry *, 32> Entries;
   for (const auto &Root : Roots) {
     ErrorOr<RedirectingFileSystem::LookupResult> Result =
@@ -2358,14 +2362,14 @@ static Status getRedirectedFileStatus(const Twine &OriginalPath,
 }
 
 ErrorOr<Status> RedirectingFileSystem::status(
-    const Twine &CanonicalPath, const Twine &OriginalPath,
+    const Twine &LookupPath, const Twine &OriginalPath,
     const RedirectingFileSystem::LookupResult &Result) {
   if (std::optional<StringRef> ExtRedirect = Result.getExternalRedirect()) {
-    SmallString<256> CanonicalRemappedPath((*ExtRedirect).str());
-    if (std::error_code EC = makeCanonical(CanonicalRemappedPath))
+    SmallString<256> RemappedPath((*ExtRedirect).str());
+    if (std::error_code EC = makeAbsolute(RemappedPath))
       return EC;
 
-    ErrorOr<Status> S = ExternalFS->status(CanonicalRemappedPath);
+    ErrorOr<Status> S = ExternalFS->status(RemappedPath);
     if (!S)
       return S;
     S = Status::copyWithNewName(*S, *ExtRedirect);
@@ -2375,13 +2379,13 @@ ErrorOr<Status> RedirectingFileSystem::status(
   }
 
   auto *DE = cast<RedirectingFileSystem::DirectoryEntry>(Result.E);
-  return Status::copyWithNewName(DE->getStatus(), CanonicalPath);
+  return Status::copyWithNewName(DE->getStatus(), LookupPath);
 }
 
 ErrorOr<Status>
-RedirectingFileSystem::getExternalStatus(const Twine &CanonicalPath,
+RedirectingFileSystem::getExternalStatus(const Twine &LookupPath,
                                          const Twine &OriginalPath) const {
-  auto Result = ExternalFS->status(CanonicalPath);
+  auto Result = ExternalFS->status(LookupPath);
 
   // The path has been mapped by some nested VFS, don't override it with the
   // original path.
@@ -2391,38 +2395,37 @@ RedirectingFileSystem::getExternalStatus(const Twine &CanonicalPath,
 }
 
 ErrorOr<Status> RedirectingFileSystem::status(const Twine &OriginalPath) {
-  SmallString<256> CanonicalPath;
-  OriginalPath.toVector(CanonicalPath);
+  SmallString<256> Path;
+  OriginalPath.toVector(Path);
 
-  if (std::error_code EC = makeCanonical(CanonicalPath))
+  if (std::error_code EC = makeAbsolute(Path))
     return EC;
 
   if (Redirection == RedirectKind::Fallback) {
     // Attempt to find the original file first, only falling back to the
     // mapped file if that fails.
-    ErrorOr<Status> S = getExternalStatus(CanonicalPath, OriginalPath);
+    ErrorOr<Status> S = getExternalStatus(Path, OriginalPath);
     if (S)
       return S;
   }
 
-  ErrorOr<RedirectingFileSystem::LookupResult> Result =
-      lookupPath(CanonicalPath);
+  ErrorOr<RedirectingFileSystem::LookupResult> Result = lookupPath(Path);
   if (!Result) {
     // Was not able to map file, fallthrough to using the original path if
     // that was the specified redirection type.
     if (Redirection == RedirectKind::Fallthrough &&
         isFileNotFound(Result.getError()))
-      return getExternalStatus(CanonicalPath, OriginalPath);
+      return getExternalStatus(Path, OriginalPath);
     return Result.getError();
   }
 
-  ErrorOr<Status> S = status(CanonicalPath, OriginalPath, *Result);
+  ErrorOr<Status> S = status(Path, OriginalPath, *Result);
   if (!S && Redirection == RedirectKind::Fallthrough &&
       isFileNotFound(S.getError(), Result->E)) {
     // Mapped the file but it wasn't found in the underlying filesystem,
     // fallthrough to using the original path if that was the specified
     // redirection type.
-    return getExternalStatus(CanonicalPath, OriginalPath);
+    return getExternalStatus(Path, OriginalPath);
   }
 
   return S;
@@ -2471,30 +2474,27 @@ File::getWithPath(ErrorOr<std::unique_ptr<File>> Result, const Twine &P) {
 
 ErrorOr<std::unique_ptr<File>>
 RedirectingFileSystem::openFileForRead(const Twine &OriginalPath) {
-  SmallString<256> CanonicalPath;
-  OriginalPath.toVector(CanonicalPath);
+  SmallString<256> Path;
+  OriginalPath.toVector(Path);
 
-  if (std::error_code EC = makeCanonical(CanonicalPath))
+  if (std::error_code EC = makeAbsolute(Path))
     return EC;
 
   if (Redirection == RedirectKind::Fallback) {
     // Attempt to find the original file first, only falling back to the
     // mapped file if that fails.
-    auto F = File::getWithPath(ExternalFS->openFileForRead(CanonicalPath),
-                               OriginalPath);
+    auto F = File::getWithPath(ExternalFS->openFileForRead(Path), OriginalPath);
     if (F)
       return F;
   }
 
-  ErrorOr<RedirectingFileSystem::LookupResult> Result =
-      lookupPath(CanonicalPath);
+  ErrorOr<RedirectingFileSystem::LookupResult> Result = lookupPath(Path);
   if (!Result) {
     // Was not able to map file, fallthrough to using the original path if
     // that was the specified redirection type.
     if (Redirection == RedirectKind::Fallthrough &&
         isFileNotFound(Result.getError()))
-      return File::getWithPath(ExternalFS->openFileForRead(CanonicalPath),
-                               OriginalPath);
+      return File::getWithPath(ExternalFS->openFileForRead(Path), OriginalPath);
     return Result.getError();
   }
 
@@ -2502,22 +2502,21 @@ RedirectingFileSystem::openFileForRead(const Twine &OriginalPath) {
     return make_error_code(llvm::errc::invalid_argument);
 
   StringRef ExtRedirect = *Result->getExternalRedirect();
-  SmallString<256> CanonicalRemappedPath(ExtRedirect.str());
-  if (std::error_code EC = makeCanonical(CanonicalRemappedPath))
+  SmallString<256> RemappedPath(ExtRedirect.str());
+  if (std::error_code EC = makeAbsolute(RemappedPath))
     return EC;
 
   auto *RE = cast<RedirectingFileSystem::RemapEntry>(Result->E);
 
-  auto ExternalFile = File::getWithPath(
-      ExternalFS->openFileForRead(CanonicalRemappedPath), ExtRedirect);
+  auto ExternalFile =
+      File::getWithPath(ExternalFS->openFileForRead(RemappedPath), ExtRedirect);
   if (!ExternalFile) {
     if (Redirection == RedirectKind::Fallthrough &&
         isFileNotFound(ExternalFile.getError(), Result->E)) {
       // Mapped the file but it wasn't found in the underlying filesystem,
       // fallthrough to using the original path if that was the specified
       // redirection type.
-      return File::getWithPath(ExternalFS->openFileForRead(CanonicalPath),
-                               OriginalPath);
+      return File::getWithPath(ExternalFS->openFileForRead(Path), OriginalPath);
     }
     return ExternalFile;
   }
@@ -2537,28 +2536,27 @@ RedirectingFileSystem::openFileForRead(const Twine &OriginalPath) {
 std::error_code
 RedirectingFileSystem::getRealPath(const Twine &OriginalPath,
                                    SmallVectorImpl<char> &Output) const {
-  SmallString<256> CanonicalPath;
-  OriginalPath.toVector(CanonicalPath);
+  SmallString<256> Path;
+  OriginalPath.toVector(Path);
 
-  if (std::error_code EC = makeCanonical(CanonicalPath))
+  if (std::error_code EC = makeAbsolute(Path))
     return EC;
 
   if (Redirection == RedirectKind::Fallback) {
     // Attempt to find the original file first, only falling back to the
     // mapped file if that fails.
-    std::error_code EC = ExternalFS->getRealPath(CanonicalPath, Output);
+    std::error_code EC = ExternalFS->getRealPath(Path, Output);
     if (!EC)
       return EC;
   }
 
-  ErrorOr<RedirectingFileSystem::LookupResult> Result =
-      lookupPath(CanonicalPath);
+  ErrorOr<RedirectingFileSystem::LookupResult> Result = lookupPath(Path);
   if (!Result) {
     // Was not able to map file, fallthrough to using the original path if
     // that was the specified redirection type.
     if (Redirection == RedirectKind::Fallthrough &&
         isFileNotFound(Result.getError()))
-      return ExternalFS->getRealPath(CanonicalPath, Output);
+      return ExternalFS->getRealPath(Path, Output);
     return Result.getError();
   }
 
@@ -2571,7 +2569,7 @@ RedirectingFileSystem::getRealPath(const Twine &OriginalPath,
       // Mapped the file but it wasn't found in the underlying filesystem,
       // fallthrough to using the original path if that was the specified
       // redirection type.
-      return ExternalFS->getRealPath(CanonicalPath, Output);
+      return ExternalFS->getRealPath(Path, Output);
     }
     return P;
   }
diff --git a/llvm/unittests/Support/VirtualFileSystemTest.cpp b/llvm/unittests/Support/VirtualFileSystemTest.cpp
index d4abbb4345873c..bf5c53602e0712 100644
--- a/llvm/unittests/Support/VirtualFileSystemTest.cpp
+++ b/llvm/unittests/Support/VirtualFileSystemTest.cpp
@@ -7,9 +7,11 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Support/VirtualFileSystem.h"
+#include "llvm/ADT/IntrusiveRefCntPtr.h"
 #include "llvm/ADT/ScopeExit.h"
 #include "llvm/Config/llvm-config.h"
 #include "llvm/Support/Errc.h"
+#include "llvm/Support/FileSystem.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/SourceMgr.h"
@@ -3330,3 +3332,71 @@ TEST(RedirectingFileSystemTest, Used) {
   EXPECT_TRUE(Redirecting1->hasBeenUsed());
   EXPECT_FALSE(Redirecting2->hasBeenUsed());
 }
+
+// Check that paths looked up in the external filesystem are unmodified, except
+// potentially to add the working directory. We cannot canonicalize away ..
+// in the presence of symlinks in the external filesystem.
+TEST(RedirectingFileSystemTest, ExternalPaths) {
+  struct InterceptorFS : llvm::vfs::ProxyFileSystem {
+    std::vector<std::string> SeenPaths;
+
+    InterceptorFS(IntrusiveRefCntPtr<FileSystem> UnderlyingFS)
+        : ProxyFileSystem(UnderlyingFS) {}
+
+    llvm::ErrorOr<llvm::vfs::Status> status(const Twine &Path) override {
+      SeenPaths.push_back(Path.str());
+      return ProxyFileSystem::status(Path);
+    }
+
+    llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>>
+    openFileForRead(const Twine &Path) override {
+      SeenPaths.push_back(Path.str());
+      return ProxyFileSystem::openFileForRead(Path);
+    }
+
+    std::error_code isLocal(const Twine &Path, bool &Result) override {
+      SeenPaths.push_back(Path.str());
+      return ProxyFileSystem::isLocal(Path, Result);
+    }
+
+    vfs::directory_iterator dir_begin(const Twine &Dir,
+                                      std::error_code &EC) override {
+      SeenPaths.push_back(Dir.str());
+      return ProxyFileSystem::dir_begin(Dir, EC);
+    }
+  };
+
+  std::error_code EC;
+  auto BaseFS = makeIntrusiveRefCnt<DummyFileSystem>();
+  BaseFS->setCurrentWorkingDirectory("/cwd");
+  auto CheckFS = makeIntrusiveRefCnt<InterceptorFS>(BaseFS);
+  auto FS = vfs::RedirectingFileSystem::create({}, /*UseExternalNames=*/false,
+                                               *CheckFS);
+
+  FS->status("/a/../b");
+  FS->openFileForRead("c");
+  FS->exists("./d");
+  bool IsLocal = false;
+  FS->isLocal("/e/./../f", IsLocal);
+  FS->dir_begin(".././g", EC);
+
+  std::vector<std::string> Expected{
+    "/a/../b",
+    "/cwd/c",
+    "/cwd/./d",
+    "/e/./../f",
+    "/cwd/.././g"
+  };
+
+  EXPECT_EQ(CheckFS->SeenPaths, Expected);
+
+  CheckFS->SeenPaths.clear();
+  FS->setRedirection(vfs::RedirectingFileSystem::RedirectKind::Fallback);
+  FS->status("/a/../b");
+  FS->openFileForRead("c");
+  FS->exists("./d");
+  FS->isLocal("/e/./../f", IsLocal);
+  FS->dir_begin(".././g", EC);
+
+  EXPECT_EQ(CheckFS->SeenPaths, Expected);
+}



More information about the llvm-commits mailing list