[llvm] 22555ba - [VFS] InMemoryFilesystem's UniqueIDs are a function of path and content.

Sam McCall via llvm-commits llvm-commits at lists.llvm.org
Wed Sep 29 14:24:26 PDT 2021


Author: Sam McCall
Date: 2021-09-29T23:24:18+02:00
New Revision: 22555bafe90dcb2e3d9d59e41e993edc0e4689fc

URL: https://github.com/llvm/llvm-project/commit/22555bafe90dcb2e3d9d59e41e993edc0e4689fc
DIFF: https://github.com/llvm/llvm-project/commit/22555bafe90dcb2e3d9d59e41e993edc0e4689fc.diff

LOG: [VFS] InMemoryFilesystem's UniqueIDs are a function of path and content.

This ensures that re-creating "the same" FS results in the same UIDs for files.
In turn, this means that creating a clang module (preamble) using one in-memory
filesystem and consuming it using another doesn't create duplicate FileEntrys
for files that are the same in both FSes.

It's tempting to give the creator control over the UIDs instead. However that
requires fiddly API changes, e.g. what should the UIDs of intermediate
directories be?
This change is more "magic" but seems safe given:
 - InMemoryFilesystem is used in testing more than production
 - comparing UIDs across filesystems is unusual
 - files with the same path and content are usually logically equivalent

(The usual reason for re-creating virtual filesystems rather than reusing them
is that typical use involves mutating their CWD and so is not threadsafe).

Differential Revision: https://reviews.llvm.org/D110711

Added: 
    

Modified: 
    llvm/lib/Support/VirtualFileSystem.cpp
    llvm/unittests/Support/VirtualFileSystemTest.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Support/VirtualFileSystem.cpp b/llvm/lib/Support/VirtualFileSystem.cpp
index ac54ff8ce1682..433b071832468 100644
--- a/llvm/lib/Support/VirtualFileSystem.cpp
+++ b/llvm/lib/Support/VirtualFileSystem.cpp
@@ -32,6 +32,7 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/ErrorOr.h"
 #include "llvm/Support/FileSystem.h"
+#include "llvm/Support/FileSystem/UniqueID.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/Process.h"
@@ -655,6 +656,9 @@ class InMemoryDirectory : public InMemoryNode {
   Status getStatus(const Twine &RequestedName) const {
     return Status::copyWithNewName(Stat, RequestedName);
   }
+
+  UniqueID getUniqueID() const { return Stat.getUniqueID(); }
+
   InMemoryNode *getChild(StringRef Name) {
     auto I = Entries.find(Name);
     if (I != Entries.end())
@@ -698,10 +702,28 @@ Status getNodeStatus(const InMemoryNode *Node, const Twine &RequestedName) {
 } // namespace
 } // namespace detail
 
+// The UniqueID of in-memory files is derived from path and content.
+// This avoids 
diff iculties in creating exactly equivalent in-memory FSes,
+// as often needed in multithreaded programs.
+static sys::fs::UniqueID getUniqueID(hash_code Hash) {
+  return sys::fs::UniqueID(std::numeric_limits<uint64_t>::max(),
+                           uint64_t(size_t(Hash)));
+}
+static sys::fs::UniqueID getFileID(sys::fs::UniqueID Parent,
+                                   llvm::StringRef Name,
+                                   llvm::StringRef Contents) {
+  return getUniqueID(llvm::hash_combine(Parent.getFile(), Name, Contents));
+}
+static sys::fs::UniqueID getDirectoryID(sys::fs::UniqueID Parent,
+                                        llvm::StringRef Name) {
+  return getUniqueID(llvm::hash_combine(Parent.getFile(), Name));
+}
+
 InMemoryFileSystem::InMemoryFileSystem(bool UseNormalizedPaths)
     : Root(new detail::InMemoryDirectory(
-          Status("", getNextVirtualUniqueID(), llvm::sys::TimePoint<>(), 0, 0,
-                 0, llvm::sys::fs::file_type::directory_file,
+          Status("", getDirectoryID(llvm::sys::fs::UniqueID(), ""),
+                 llvm::sys::TimePoint<>(), 0, 0, 0,
+                 llvm::sys::fs::file_type::directory_file,
                  llvm::sys::fs::perms::all_all))),
       UseNormalizedPaths(UseNormalizedPaths) {}
 
@@ -754,10 +776,14 @@ bool InMemoryFileSystem::addFile(const Twine &P, time_t ModificationTime,
           Child.reset(new detail::InMemoryHardLink(P.str(), *HardLinkTarget));
         else {
           // Create a new file or directory.
-          Status Stat(P.str(), getNextVirtualUniqueID(),
-                      llvm::sys::toTimePoint(ModificationTime), ResolvedUser,
-                      ResolvedGroup, Buffer->getBufferSize(), ResolvedType,
-                      ResolvedPerms);
+          Status Stat(
+              P.str(),
+              (ResolvedType == sys::fs::file_type::directory_file)
+                  ? getDirectoryID(Dir->getUniqueID(), Name)
+                  : getFileID(Dir->getUniqueID(), Name, Buffer->getBuffer()),
+              llvm::sys::toTimePoint(ModificationTime), ResolvedUser,
+              ResolvedGroup, Buffer->getBufferSize(), ResolvedType,
+              ResolvedPerms);
           if (ResolvedType == sys::fs::file_type::directory_file) {
             Child.reset(new detail::InMemoryDirectory(std::move(Stat)));
           } else {
@@ -772,9 +798,9 @@ bool InMemoryFileSystem::addFile(const Twine &P, time_t ModificationTime,
       // Create a new directory. Use the path up to here.
       Status Stat(
           StringRef(Path.str().begin(), Name.end() - Path.str().begin()),
-          getNextVirtualUniqueID(), llvm::sys::toTimePoint(ModificationTime),
-          ResolvedUser, ResolvedGroup, 0, sys::fs::file_type::directory_file,
-          NewDirectoryPerms);
+          getDirectoryID(Dir->getUniqueID(), Name),
+          llvm::sys::toTimePoint(ModificationTime), ResolvedUser, ResolvedGroup,
+          0, sys::fs::file_type::directory_file, NewDirectoryPerms);
       Dir = cast<detail::InMemoryDirectory>(Dir->addChild(
           Name, std::make_unique<detail::InMemoryDirectory>(std::move(Stat))));
       continue;

diff  --git a/llvm/unittests/Support/VirtualFileSystemTest.cpp b/llvm/unittests/Support/VirtualFileSystemTest.cpp
index 6aa2189374857..ca35b5e9f8baa 100644
--- a/llvm/unittests/Support/VirtualFileSystemTest.cpp
+++ b/llvm/unittests/Support/VirtualFileSystemTest.cpp
@@ -1279,6 +1279,26 @@ TEST_F(InMemoryFileSystemTest, RecursiveIterationWithHardLink) {
   EXPECT_THAT(Nodes, testing::UnorderedElementsAre("/a", "/a/b", "/c", "/c/d"));
 }
 
+TEST_F(InMemoryFileSystemTest, UniqueID) {
+  ASSERT_TRUE(FS.addFile("/a/b", 0, MemoryBuffer::getMemBuffer("text")));
+  ASSERT_TRUE(FS.addFile("/c/d", 0, MemoryBuffer::getMemBuffer("text")));
+  ASSERT_TRUE(FS.addHardLink("/e/f", "/a/b"));
+
+  EXPECT_EQ(FS.status("/a/b")->getUniqueID(), FS.status("/a/b")->getUniqueID());
+  EXPECT_NE(FS.status("/a/b")->getUniqueID(), FS.status("/c/d")->getUniqueID());
+  EXPECT_EQ(FS.status("/a/b")->getUniqueID(), FS.status("/e/f")->getUniqueID());
+  EXPECT_EQ(FS.status("/a")->getUniqueID(), FS.status("/a")->getUniqueID());
+  EXPECT_NE(FS.status("/a")->getUniqueID(), FS.status("/c")->getUniqueID());
+  EXPECT_NE(FS.status("/a")->getUniqueID(), FS.status("/e")->getUniqueID());
+
+  // Recreating the "same" FS yields the same UniqueIDs.
+  vfs::InMemoryFileSystem FS2;
+  ASSERT_TRUE(FS2.addFile("/a/b", 0, MemoryBuffer::getMemBuffer("text")));
+  EXPECT_EQ(FS.status("/a/b")->getUniqueID(),
+            FS2.status("/a/b")->getUniqueID());
+  EXPECT_EQ(FS.status("/a")->getUniqueID(), FS2.status("/a")->getUniqueID());
+}
+
 // NOTE: in the tests below, we use '//root/' as our root directory, since it is
 // a legal *absolute* path on Windows as well as *nix.
 class VFSFromYAMLTest : public ::testing::Test {


        


More information about the llvm-commits mailing list