[llvm] a44c645 - [llvm][vfs] Implement in-memory symlinks

Jan Svoboda via llvm-commits llvm-commits at lists.llvm.org
Tue Jun 21 07:30:07 PDT 2022


Author: Jan Svoboda
Date: 2022-06-21T16:29:54+02:00
New Revision: a44c6453fe3844de0efe8f490bb7a27c6f188dfd

URL: https://github.com/llvm/llvm-project/commit/a44c6453fe3844de0efe8f490bb7a27c6f188dfd
DIFF: https://github.com/llvm/llvm-project/commit/a44c6453fe3844de0efe8f490bb7a27c6f188dfd.diff

LOG: [llvm][vfs] Implement in-memory symlinks

This patch implements symlinks for the in-memory VFS. Original author: @erik.pilkington.

Depends on D117648 & D117649.

Reviewed By: sammccall

Differential Revision: https://reviews.llvm.org/D117650

Added: 
    

Modified: 
    llvm/include/llvm/Support/VirtualFileSystem.h
    llvm/lib/Support/VirtualFileSystem.cpp
    llvm/unittests/Support/VirtualFileSystemTest.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Support/VirtualFileSystem.h b/llvm/include/llvm/Support/VirtualFileSystem.h
index d310305b14e18..3c99b0d8efdb4 100644
--- a/llvm/include/llvm/Support/VirtualFileSystem.h
+++ b/llvm/include/llvm/Support/VirtualFileSystem.h
@@ -22,6 +22,7 @@
 #include "llvm/ADT/STLFunctionalExtras.h"
 #include "llvm/Support/Chrono.h"
 #include "llvm/Support/ErrorOr.h"
+#include "llvm/Support/Errc.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/SourceMgr.h"
@@ -478,6 +479,24 @@ struct NewInMemoryNodeInfo {
   Status makeStatus() const;
 };
 
+class NamedNodeOrError {
+  ErrorOr<std::pair<llvm::SmallString<128>, const detail::InMemoryNode *>>
+      Value;
+
+public:
+  NamedNodeOrError(llvm::SmallString<128> Name,
+                   const detail::InMemoryNode *Node)
+      : Value(std::make_pair(Name, Node)) {}
+  NamedNodeOrError(std::error_code EC) : Value(EC) {}
+  NamedNodeOrError(llvm::errc EC) : Value(EC) {}
+
+  StringRef getName() const { return (*Value).first; }
+  explicit operator bool() const { return static_cast<bool>(Value); }
+  operator std::error_code() const { return Value.getError(); }
+  std::error_code getError() const { return Value.getError(); }
+  const detail::InMemoryNode *operator*() const { return (*Value).second; }
+};
+
 } // namespace detail
 
 /// An in-memory file system.
@@ -496,7 +515,11 @@ class InMemoryFileSystem : public FileSystem {
                Optional<llvm::sys::fs::file_type> Type,
                Optional<llvm::sys::fs::perms> Perms, MakeNodeFn MakeNode);
 
-  ErrorOr<const detail::InMemoryNode *> lookupNode(const Twine &P) const;
+  /// Looks up the in-memory node for the path \param P.
+  /// If \param FollowFinalSymlink is true, the returned node is guaranteed to
+  /// not be a symlink and its path may 
diff er from \param P.
+  detail::NamedNodeOrError lookupNode(const Twine &P, bool FollowFinalSymlink,
+                                      size_t SymlinkDepth = 0) const;
 
   class DirIterator;
 
@@ -532,6 +555,18 @@ class InMemoryFileSystem : public FileSystem {
   /// successfully created, false otherwise.
   bool addHardLink(const Twine &NewLink, const Twine &Target);
 
+  /// Arbitrary max depth to search through symlinks. We can get into problems
+  /// if a link links to a link that links back to the link, for example.
+  static constexpr size_t MaxSymlinkDepth = 16;
+
+  /// Add a symbolic link. Unlike a HardLink, because \param Target doesn't need
+  /// to refer to a file (or refer to anything, as it happens). Also, an
+  /// in-memory directory for \param Target isn't automatically created.
+  bool addSymbolicLink(const Twine &NewLink, const Twine &Target,
+                       time_t ModificationTime, Optional<uint32_t> User = None,
+                       Optional<uint32_t> Group = None,
+                       Optional<llvm::sys::fs::perms> Perms = None);
+
   /// Add a buffer to the VFS with a path. The VFS does not own the buffer.
   /// If present, User, Group, Type and Perms apply to the newly-created file
   /// or directory.

diff  --git a/llvm/lib/Support/VirtualFileSystem.cpp b/llvm/lib/Support/VirtualFileSystem.cpp
index 2f457e15d5779..9c6a0c071755d 100644
--- a/llvm/lib/Support/VirtualFileSystem.cpp
+++ b/llvm/lib/Support/VirtualFileSystem.cpp
@@ -590,10 +590,15 @@ namespace vfs {
 
 namespace detail {
 
-enum InMemoryNodeKind { IME_File, IME_Directory, IME_HardLink };
+enum InMemoryNodeKind {
+  IME_File,
+  IME_Directory,
+  IME_HardLink,
+  IME_SymbolicLink,
+};
 
 /// The in memory file system is a tree of Nodes. Every node can either be a
-/// file , hardlink or a directory.
+/// file, symlink, hardlink or a directory.
 class InMemoryNode {
   InMemoryNodeKind Kind;
   std::string FileName;
@@ -662,6 +667,30 @@ class InMemoryHardLink : public InMemoryNode {
   }
 };
 
+class InMemorySymbolicLink : public InMemoryNode {
+  std::string TargetPath;
+  Status Stat;
+
+public:
+  InMemorySymbolicLink(StringRef Path, StringRef TargetPath, Status Stat)
+      : InMemoryNode(Path, IME_SymbolicLink), TargetPath(std::move(TargetPath)),
+        Stat(Stat) {}
+
+  std::string toString(unsigned Indent) const override {
+    return std::string(Indent, ' ') + "SymbolicLink to -> " + TargetPath;
+  }
+
+  Status getStatus(const Twine &RequestedName) const override {
+    return Status::copyWithNewName(Stat, RequestedName);
+  }
+
+  StringRef getTargetPath() const { return TargetPath; }
+
+  static bool classof(const InMemoryNode *N) {
+    return N->getKind() == IME_SymbolicLink;
+  }
+};
+
 /// Adapt a InMemoryFile for VFS' File interface.  The goal is to make
 /// \p InMemoryFileAdaptor mimic as much as possible the behavior of
 /// \p RealFile.
@@ -897,8 +926,9 @@ bool InMemoryFileSystem::addFileNoOwn(const Twine &P, time_t ModificationTime,
                  });
 }
 
-ErrorOr<const detail::InMemoryNode *>
-InMemoryFileSystem::lookupNode(const Twine &P) const {
+detail::NamedNodeOrError
+InMemoryFileSystem::lookupNode(const Twine &P, bool FollowFinalSymlink,
+                               size_t SymlinkDepth) const {
   SmallString<128> Path;
   P.toVector(Path);
 
@@ -912,7 +942,7 @@ InMemoryFileSystem::lookupNode(const Twine &P) const {
 
   const detail::InMemoryDirectory *Dir = Root.get();
   if (Path.empty())
-    return Dir;
+    return detail::NamedNodeOrError(Path, Dir);
 
   auto I = llvm::sys::path::begin(Path), E = llvm::sys::path::end(Path);
   while (true) {
@@ -921,30 +951,63 @@ InMemoryFileSystem::lookupNode(const Twine &P) const {
     if (!Node)
       return errc::no_such_file_or_directory;
 
+    if (auto Symlink = dyn_cast<detail::InMemorySymbolicLink>(Node)) {
+      // If we're at the end of the path, and we're not following through
+      // terminal symlinks, then we're done.
+      if (I == E && !FollowFinalSymlink)
+        return detail::NamedNodeOrError(Path, Symlink);
+
+      if (SymlinkDepth > InMemoryFileSystem::MaxSymlinkDepth)
+        return errc::no_such_file_or_directory;
+
+      SmallString<128> TargetPath = Symlink->getTargetPath();
+      if (std::error_code EC = makeAbsolute(TargetPath))
+        return EC;
+
+      // Keep going with the target. We always want to follow symlinks here
+      // because we're either at the end of a path that we want to follow, or
+      // not at the end of a path, in which case we need to follow the symlink
+      // regardless.
+      auto Target =
+          lookupNode(TargetPath, /*FollowFinalSymlink=*/true, SymlinkDepth + 1);
+      if (!Target || I == E)
+        return Target;
+
+      if (!isa<detail::InMemoryDirectory>(*Target))
+        return errc::no_such_file_or_directory;
+
+      // Otherwise, continue on the search in the symlinked directory.
+      Dir = cast<detail::InMemoryDirectory>(*Target);
+      continue;
+    }
+
     // Return the file if it's at the end of the path.
     if (auto File = dyn_cast<detail::InMemoryFile>(Node)) {
       if (I == E)
-        return File;
+        return detail::NamedNodeOrError(Path, File);
       return errc::no_such_file_or_directory;
     }
 
     // If Node is HardLink then return the resolved file.
     if (auto File = dyn_cast<detail::InMemoryHardLink>(Node)) {
       if (I == E)
-        return &File->getResolvedFile();
+        return detail::NamedNodeOrError(Path, &File->getResolvedFile());
       return errc::no_such_file_or_directory;
     }
     // Traverse directories.
     Dir = cast<detail::InMemoryDirectory>(Node);
     if (I == E)
-      return Dir;
+      return detail::NamedNodeOrError(Path, Dir);
   }
 }
 
 bool InMemoryFileSystem::addHardLink(const Twine &NewLink,
                                      const Twine &Target) {
-  auto NewLinkNode = lookupNode(NewLink);
-  auto TargetNode = lookupNode(Target);
+  auto NewLinkNode = lookupNode(NewLink, /*FollowFinalSymlink=*/false);
+  // Whether symlinks in the hardlink target are followed is
+  // implementation-defined in POSIX.
+  // We're following symlinks here to be consistent with macOS.
+  auto TargetNode = lookupNode(Target, /*FollowFinalSymlink=*/true);
   // FromPath must not have been added before. ToPath must have been added
   // before. Resolved ToPath must be a File.
   if (!TargetNode || NewLinkNode || !isa<detail::InMemoryFile>(*TargetNode))
@@ -957,8 +1020,30 @@ bool InMemoryFileSystem::addHardLink(const Twine &NewLink,
                  });
 }
 
+bool InMemoryFileSystem::addSymbolicLink(const Twine &NewLink,
+                                         const Twine &Target,
+                                         time_t ModificationTime,
+                                         Optional<uint32_t> User,
+                                         Optional<uint32_t> Group,
+                                         Optional<llvm::sys::fs::perms> Perms) {
+  auto NewLinkNode = lookupNode(NewLink, /*FollowFinalSymlink=*/false);
+  if (NewLinkNode)
+    return false;
+
+  SmallString<128> NewLinkStr, TargetStr;
+  NewLink.toVector(NewLinkStr);
+  Target.toVector(TargetStr);
+
+  return addFile(NewLinkStr, ModificationTime, nullptr, User, Group,
+                 sys::fs::file_type::symlink_file, Perms,
+                 [&](detail::NewInMemoryNodeInfo NNI) {
+                   return std::make_unique<detail::InMemorySymbolicLink>(
+                       NewLinkStr, TargetStr, NNI.makeStatus());
+                 });
+}
+
 llvm::ErrorOr<Status> InMemoryFileSystem::status(const Twine &Path) {
-  auto Node = lookupNode(Path);
+  auto Node = lookupNode(Path, /*FollowFinalSymlink=*/true);
   if (Node)
     return (*Node)->getStatus(Path);
   return Node.getError();
@@ -966,7 +1051,7 @@ llvm::ErrorOr<Status> InMemoryFileSystem::status(const Twine &Path) {
 
 llvm::ErrorOr<std::unique_ptr<File>>
 InMemoryFileSystem::openFileForRead(const Twine &Path) {
-  auto Node = lookupNode(Path);
+  auto Node = lookupNode(Path,/*FollowFinalSymlink=*/true);
   if (!Node)
     return Node.getError();
 
@@ -982,6 +1067,7 @@ InMemoryFileSystem::openFileForRead(const Twine &Path) {
 
 /// Adaptor from InMemoryDir::iterator to directory_iterator.
 class InMemoryFileSystem::DirIterator : public llvm::vfs::detail::DirIterImpl {
+  const InMemoryFileSystem *FS;
   detail::InMemoryDirectory::const_iterator I;
   detail::InMemoryDirectory::const_iterator E;
   std::string RequestedDirName;
@@ -999,6 +1085,13 @@ class InMemoryFileSystem::DirIterator : public llvm::vfs::detail::DirIterImpl {
       case detail::IME_Directory:
         Type = sys::fs::file_type::directory_file;
         break;
+      case detail::IME_SymbolicLink:
+        if (auto SymlinkTarget =
+                FS->lookupNode(Path, /*FollowFinalSymlink=*/true)) {
+          Path = SymlinkTarget.getName();
+          Type = (*SymlinkTarget)->getStatus(Path).getType();
+        }
+        break;
       }
       CurrentEntry = directory_entry(std::string(Path.str()), Type);
     } else {
@@ -1011,9 +1104,10 @@ class InMemoryFileSystem::DirIterator : public llvm::vfs::detail::DirIterImpl {
 public:
   DirIterator() = default;
 
-  explicit DirIterator(const detail::InMemoryDirectory &Dir,
-                       std::string RequestedDirName)
-      : I(Dir.begin()), E(Dir.end()),
+  DirIterator(const InMemoryFileSystem *FS,
+              const detail::InMemoryDirectory &Dir,
+              std::string RequestedDirName)
+      : FS(FS), I(Dir.begin()), E(Dir.end()),
         RequestedDirName(std::move(RequestedDirName)) {
     setCurrentEntry();
   }
@@ -1027,7 +1121,7 @@ class InMemoryFileSystem::DirIterator : public llvm::vfs::detail::DirIterImpl {
 
 directory_iterator InMemoryFileSystem::dir_begin(const Twine &Dir,
                                                  std::error_code &EC) {
-  auto Node = lookupNode(Dir);
+  auto Node = lookupNode(Dir, /*FollowFinalSymlink=*/true);
   if (!Node) {
     EC = Node.getError();
     return directory_iterator(std::make_shared<DirIterator>());
@@ -1035,7 +1129,7 @@ directory_iterator InMemoryFileSystem::dir_begin(const Twine &Dir,
 
   if (auto *DirNode = dyn_cast<detail::InMemoryDirectory>(*Node))
     return directory_iterator(
-        std::make_shared<DirIterator>(*DirNode, Dir.str()));
+        std::make_shared<DirIterator>(this, *DirNode, Dir.str()));
 
   EC = make_error_code(llvm::errc::not_a_directory);
   return directory_iterator(std::make_shared<DirIterator>());

diff  --git a/llvm/unittests/Support/VirtualFileSystemTest.cpp b/llvm/unittests/Support/VirtualFileSystemTest.cpp
index e32b3d2ef108e..1e300eec711d1 100644
--- a/llvm/unittests/Support/VirtualFileSystemTest.cpp
+++ b/llvm/unittests/Support/VirtualFileSystemTest.cpp
@@ -1301,6 +1301,13 @@ TEST_F(InMemoryFileSystemTest, AddHardLinkToADirectory) {
   EXPECT_FALSE(FS.addHardLink(Link, Dir));
 }
 
+TEST_F(InMemoryFileSystemTest, AddHardLinkToASymlink) {
+  EXPECT_TRUE(FS.addFile("/file", 0, MemoryBuffer::getMemBuffer("content")));
+  EXPECT_TRUE(FS.addSymbolicLink("/symlink", "/file", 0));
+  EXPECT_TRUE(FS.addHardLink("/hardlink", "/symlink"));
+  EXPECT_EQ((*FS.getBufferForFile("/hardlink"))->getBuffer(), "content");
+}
+
 TEST_F(InMemoryFileSystemTest, AddHardLinkFromADirectory) {
   StringRef Dir = "path/to/dummy/dir";
   StringRef Target = "path/to/dummy/dir/target";
@@ -1351,6 +1358,85 @@ TEST_F(InMemoryFileSystemTest, UniqueID) {
   EXPECT_EQ(FS.status("/a")->getUniqueID(), FS2.status("/a")->getUniqueID());
 }
 
+TEST_F(InMemoryFileSystemTest, AddSymlinkToAFile) {
+  EXPECT_TRUE(
+      FS.addFile("/some/file", 0, MemoryBuffer::getMemBuffer("contents")));
+  EXPECT_TRUE(FS.addSymbolicLink("/other/file/link", "/some/file", 0));
+  ErrorOr<vfs::Status> Stat = FS.status("/some/file");
+  EXPECT_TRUE(Stat->isRegularFile());
+}
+
+TEST_F(InMemoryFileSystemTest, AddSymlinkToADirectory) {
+  EXPECT_TRUE(FS.addSymbolicLink("/link", "/target", 0));
+  EXPECT_TRUE(
+      FS.addFile("/target/foo.h", 0, MemoryBuffer::getMemBuffer("foo")));
+  ErrorOr<vfs::Status> Stat = FS.status("/link/foo.h");
+  EXPECT_TRUE(Stat);
+  EXPECT_EQ((*Stat).getName(), "/link/foo.h");
+  EXPECT_TRUE(Stat->isRegularFile());
+}
+
+TEST_F(InMemoryFileSystemTest, AddSymlinkToASymlink) {
+  EXPECT_TRUE(FS.addSymbolicLink("/first", "/second", 0));
+  EXPECT_TRUE(FS.addSymbolicLink("/second", "/third", 0));
+  EXPECT_TRUE(FS.addFile("/third", 0, MemoryBuffer::getMemBuffer("")));
+  ErrorOr<vfs::Status> Stat = FS.status("/first");
+  EXPECT_TRUE(Stat);
+  EXPECT_EQ((*Stat).getName(), "/first");
+  // Follow-through symlinks by default. This matches RealFileSystem's
+  // semantics.
+  EXPECT_TRUE(Stat->isRegularFile());
+  Stat = FS.status("/second");
+  EXPECT_TRUE(Stat);
+  EXPECT_EQ((*Stat).getName(), "/second");
+  EXPECT_TRUE(Stat->isRegularFile());
+  Stat = FS.status("/third");
+  EXPECT_TRUE(Stat);
+  EXPECT_EQ((*Stat).getName(), "/third");
+  EXPECT_TRUE(Stat->isRegularFile());
+}
+
+TEST_F(InMemoryFileSystemTest, AddRecursiveSymlink) {
+  EXPECT_TRUE(FS.addSymbolicLink("/link-a", "/link-b", 0));
+  EXPECT_TRUE(FS.addSymbolicLink("/link-b", "/link-a", 0));
+  ErrorOr<vfs::Status> Stat = FS.status("/link-a/foo");
+  EXPECT_FALSE(Stat);
+  EXPECT_EQ(Stat.getError(), errc::no_such_file_or_directory);
+}
+
+TEST_F(InMemoryFileSystemTest, DirectoryIteratorWithSymlinkToAFile) {
+  std::error_code EC;
+
+  EXPECT_TRUE(FS.addFile("/file", 0, MemoryBuffer::getMemBuffer("")));
+  EXPECT_TRUE(FS.addSymbolicLink("/symlink", "/file", 0));
+
+  vfs::directory_iterator I = FS.dir_begin("/", EC), E;
+  ASSERT_FALSE(EC);
+
+  std::vector<std::string> Nodes;
+  for (; !EC && I != E; I.increment(EC))
+    Nodes.push_back(getPosixPath(std::string(I->path())));
+
+  EXPECT_THAT(Nodes, testing::UnorderedElementsAre("/file", "/file"));
+}
+
+TEST_F(InMemoryFileSystemTest, RecursiveDirectoryIteratorWithSymlinkToADir) {
+  std::error_code EC;
+
+  EXPECT_TRUE(FS.addFile("/dir/file", 0, MemoryBuffer::getMemBuffer("")));
+  EXPECT_TRUE(FS.addSymbolicLink("/dir_symlink", "/dir", 0));
+
+  vfs::recursive_directory_iterator I(FS, "/", EC), E;
+  ASSERT_FALSE(EC);
+
+  std::vector<std::string> Nodes;
+  for (; !EC && I != E; I.increment(EC))
+    Nodes.push_back(getPosixPath(std::string(I->path())));
+
+  EXPECT_THAT(Nodes, testing::UnorderedElementsAre("/dir", "/dir/file", "/dir",
+                                                   "/dir/file"));
+}
+
 // NOTE: in the tests below, we use '//root/' as our root directory, since it is
 // a legal *absolute* path on Windows as well as *nix.
 class VFSFromYAMLTest : public ::testing::Test {


        


More information about the llvm-commits mailing list