[clang] 917acac - FileManager: Shrink FileEntryRef to the size of a pointer

Duncan P. N. Exon Smith via cfe-commits cfe-commits at lists.llvm.org
Tue Oct 27 11:56:03 PDT 2020


Author: Duncan P. N. Exon Smith
Date: 2020-10-27T14:55:41-04:00
New Revision: 917acac960d40280ea02ea453e594034b1be1f6b

URL: https://github.com/llvm/llvm-project/commit/917acac960d40280ea02ea453e594034b1be1f6b
DIFF: https://github.com/llvm/llvm-project/commit/917acac960d40280ea02ea453e594034b1be1f6b.diff

LOG: FileManager: Shrink FileEntryRef to the size of a pointer

Shrink `FileEntryRef` to the size of a pointer, by having it directly
reference the `StringMapEntry` the same way that `DirectoryEntryRef`
does. This makes `FileEntryRef::FileEntryRef` private as a side effect
(`FileManager` is a friend!).

There are two helper types added within `FileEntryRef`:

- `FileEntryRef::MapValue` is the type stored in
  `FileManager::SeenFileEntries`. It's a replacement for
  `SeenFileEntryOrRedirect`, where the second pointer type has been
  changed from `StringRef*` to `MapEntry*` (see next bullet).
- `FileEntryRef::MapEntry` is the instantiation of `StringMapEntry<>`
  where `MapValue` is stored. This is what `FileEntryRef` has a pointer
  to, in order to grab the name in addition to the value.

Differential Revision: https://reviews.llvm.org/D89488

Added: 
    

Modified: 
    clang/include/clang/Basic/FileManager.h
    clang/lib/Basic/FileManager.cpp
    clang/lib/Basic/SourceManager.cpp
    clang/unittests/Basic/FileManagerTest.cpp

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Basic/FileManager.h b/clang/include/clang/Basic/FileManager.h
index 8fcea2d5cbf3..d27b4260cca7 100644
--- a/clang/include/clang/Basic/FileManager.h
+++ b/clang/include/clang/Basic/FileManager.h
@@ -77,12 +77,10 @@ class FileEntry;
 /// accessed by the FileManager's client.
 class FileEntryRef {
 public:
-  FileEntryRef() = delete;
-  FileEntryRef(StringRef Name, const FileEntry &Entry)
-      : Name(Name), Entry(&Entry) {}
-
-  const StringRef getName() const { return Name; }
-  const FileEntry &getFileEntry() const { return *Entry; }
+  const StringRef getName() const { return Entry->first(); }
+  const FileEntry &getFileEntry() const {
+    return *Entry->second->V.get<FileEntry *>();
+  }
 
   inline bool isValid() const;
   inline off_t getSize() const;
@@ -91,15 +89,43 @@ class FileEntryRef {
   inline time_t getModificationTime() const;
 
   friend bool operator==(const FileEntryRef &LHS, const FileEntryRef &RHS) {
-    return LHS.Entry == RHS.Entry && LHS.Name == RHS.Name;
+    return LHS.Entry == RHS.Entry;
   }
   friend bool operator!=(const FileEntryRef &LHS, const FileEntryRef &RHS) {
     return !(LHS == RHS);
   }
 
+  struct MapValue;
+
+  /// Type used in the StringMap.
+  using MapEntry = llvm::StringMapEntry<llvm::ErrorOr<MapValue>>;
+
+  /// Type stored in the StringMap.
+  struct MapValue {
+    /// The pointer at another MapEntry is used when the FileManager should
+    /// silently forward from one name to another, which occurs in Redirecting
+    /// VFSs that use external names. In that case, the \c FileEntryRef
+    /// returned by the \c FileManager will have the external name, and not the
+    /// name that was used to lookup the file.
+    llvm::PointerUnion<FileEntry *, const MapEntry *> V;
+
+    MapValue() = delete;
+    MapValue(FileEntry &FE) : V(&FE) {}
+    MapValue(MapEntry &ME) : V(&ME) {}
+  };
+
 private:
-  StringRef Name;
-  const FileEntry *Entry;
+  friend class FileManager;
+
+  FileEntryRef() = delete;
+  explicit FileEntryRef(const MapEntry &Entry)
+      : Entry(&Entry) {
+    assert(Entry.second && "Expected payload");
+    assert(Entry.second->V && "Expected non-null");
+    assert(Entry.second->V.is<FileEntry *>() && "Expected FileEntry");
+  }
+
+  const MapEntry *Entry;
 };
 
 /// Cached information about one file (either on disk
@@ -110,7 +136,6 @@ class FileEntryRef {
 class FileEntry {
   friend class FileManager;
 
-  StringRef Name;             // Name of the file.
   std::string RealPathName;   // Real path to the file; could be empty.
   off_t Size;                 // File size in bytes.
   time_t ModTime;             // Modification time of file.
@@ -123,6 +148,14 @@ class FileEntry {
   /// The open file, if it is owned by the \p FileEntry.
   mutable std::unique_ptr<llvm::vfs::File> File;
 
+  // First access name for this FileEntry.
+  //
+  // This is Optional only to allow delayed construction (FileEntryRef has no
+  // default constructor). It should always have a value in practice.
+  //
+  // TODO: remote this once everyone that needs a name uses FileEntryRef.
+  Optional<FileEntryRef> LastRef;
+
 public:
   FileEntry()
       : UniqueID(0, 0), IsNamedPipe(false), IsValid(false)
@@ -131,7 +164,9 @@ class FileEntry {
   FileEntry(const FileEntry &) = delete;
   FileEntry &operator=(const FileEntry &) = delete;
 
-  StringRef getName() const { return Name; }
+  StringRef getName() const { return LastRef->getName(); }
+  FileEntryRef getLastRef() const { return *LastRef; }
+
   StringRef tryGetRealPathName() const { return RealPathName; }
   bool isValid() const { return IsValid; }
   off_t getSize() const { return Size; }
@@ -212,26 +247,21 @@ class FileManager : public RefCountedBase<FileManager> {
   llvm::StringMap<llvm::ErrorOr<DirectoryEntry &>, llvm::BumpPtrAllocator>
   SeenDirEntries;
 
-  /// A reference to the file entry that is associated with a particular
-  /// filename, or a reference to another filename that should be looked up
-  /// instead of the accessed filename.
-  ///
-  /// The reference to another filename is specifically useful for Redirecting
-  /// VFSs that use external names. In that case, the \c FileEntryRef returned
-  /// by the \c FileManager will have the external name, and not the name that
-  /// was used to lookup the file.
-  using SeenFileEntryOrRedirect =
-      llvm::PointerUnion<FileEntry *, const StringRef *>;
-
   /// A cache that maps paths to file entries (either real or
   /// virtual) we have looked up, or an error that occurred when we looked up
   /// the file.
   ///
   /// \see SeenDirEntries
-  llvm::StringMap<llvm::ErrorOr<SeenFileEntryOrRedirect>,
-                  llvm::BumpPtrAllocator>
+  llvm::StringMap<llvm::ErrorOr<FileEntryRef::MapValue>, llvm::BumpPtrAllocator>
       SeenFileEntries;
 
+  /// A mirror of SeenFileEntries to give fake answers for getBypassFile().
+  ///
+  /// Don't bother hooking up a BumpPtrAllocator. This should be rarely used,
+  /// and only on error paths.
+  std::unique_ptr<llvm::StringMap<llvm::ErrorOr<FileEntryRef::MapValue>>>
+      SeenBypassFileEntries;
+
   /// The canonical names of files and directories .
   llvm::DenseMap<const void *, llvm::StringRef> CanonicalNames;
 

diff  --git a/clang/lib/Basic/FileManager.cpp b/clang/lib/Basic/FileManager.cpp
index e92e9d5911c0..d26ead4a5b09 100644
--- a/clang/lib/Basic/FileManager.cpp
+++ b/clang/lib/Basic/FileManager.cpp
@@ -212,11 +212,10 @@ FileManager::getFileRef(StringRef Filename, bool openFile, bool CacheFailure) {
           SeenFileInsertResult.first->second.getError());
     // Construct and return and FileEntryRef, unless it's a redirect to another
     // filename.
-    SeenFileEntryOrRedirect Value = *SeenFileInsertResult.first->second;
-    FileEntry *FE;
-    if (LLVM_LIKELY(FE = Value.dyn_cast<FileEntry *>()))
-      return FileEntryRef(SeenFileInsertResult.first->first(), *FE);
-    return getFileRef(*Value.get<const StringRef *>(), openFile, CacheFailure);
+    FileEntryRef::MapValue Value = *SeenFileInsertResult.first->second;
+    if (LLVM_LIKELY(Value.V.is<FileEntry *>()))
+      return FileEntryRef(*SeenFileInsertResult.first);
+    return FileEntryRef(*Value.V.get<const FileEntryRef::MapEntry *>());
   }
 
   // We've not seen this before. Fill it in.
@@ -268,26 +267,29 @@ FileManager::getFileRef(StringRef Filename, bool openFile, bool CacheFailure) {
   // This occurs when one dir is symlinked to another, for example.
   FileEntry &UFE = UniqueRealFiles[Status.getUniqueID()];
 
-  NamedFileEnt->second = &UFE;
-
-  // If the name returned by getStatValue is 
diff erent than Filename, re-intern
-  // the name.
-  if (Status.getName() != Filename) {
-    auto &NewNamedFileEnt =
-        *SeenFileEntries.insert({Status.getName(), &UFE}).first;
-    assert((*NewNamedFileEnt.second).get<FileEntry *>() == &UFE &&
+  if (Status.getName() == Filename) {
+    // The name matches. Set the FileEntry.
+    NamedFileEnt->second = FileEntryRef::MapValue(UFE);
+  } else {
+    // Name mismatch. We need a redirect. First grab the actual entry we want
+    // to return.
+    auto &Redirection =
+        *SeenFileEntries.insert({Status.getName(), FileEntryRef::MapValue(UFE)})
+             .first;
+    assert(Redirection.second->V.is<FileEntry *>() &&
+           "filename redirected to a non-canonical filename?");
+    assert(Redirection.second->V.get<FileEntry *>() == &UFE &&
            "filename from getStatValue() refers to wrong file");
-    InterndFileName = NewNamedFileEnt.first().data();
-    // In addition to re-interning the name, construct a redirecting seen file
-    // entry, that will point to the name the filesystem actually wants to use.
-    StringRef *Redirect = new (CanonicalNameStorage) StringRef(InterndFileName);
-    auto SeenFileInsertResultIt = SeenFileEntries.find(Filename);
-    assert(SeenFileInsertResultIt != SeenFileEntries.end() &&
-           "unexpected SeenFileEntries cache miss");
-    SeenFileInsertResultIt->second = Redirect;
-    NamedFileEnt = &*SeenFileInsertResultIt;
+
+    // Cache the redirection in the previously-inserted entry, still available
+    // in the tentative return value.
+    NamedFileEnt->second = FileEntryRef::MapValue(Redirection);
+
+    // Fix the tentative return value.
+    NamedFileEnt = &Redirection;
   }
 
+  FileEntryRef ReturnedRef(*NamedFileEnt);
   if (UFE.isValid()) { // Already have an entry with this inode, return it.
 
     // FIXME: this hack ensures that if we look up a file by a virtual path in
@@ -299,20 +301,20 @@ FileManager::getFileRef(StringRef Filename, bool openFile, bool CacheFailure) {
     if (DirInfo != UFE.Dir && Status.IsVFSMapped)
       UFE.Dir = DirInfo;
 
-    // Always update the name to use the last name by which a file was accessed.
-    // FIXME: Neither this nor always using the first name is correct; we want
-    // to switch towards a design where we return a FileName object that
+    // Always update LastRef to the last name by which a file was accessed.
+    // FIXME: Neither this nor always using the first reference is correct; we
+    // want to switch towards a design where we return a FileName object that
     // encapsulates both the name by which the file was accessed and the
     // corresponding FileEntry.
-    // FIXME: The Name should be removed from FileEntry once all clients
-    // adopt FileEntryRef.
-    UFE.Name = InterndFileName;
+    // FIXME: LastRef should be removed from FileEntry once all clients adopt
+    // FileEntryRef.
+    UFE.LastRef = ReturnedRef;
 
-    return FileEntryRef(InterndFileName, UFE);
+    return ReturnedRef;
   }
 
   // Otherwise, we don't have this file yet, add it.
-  UFE.Name    = InterndFileName;
+  UFE.LastRef = ReturnedRef;
   UFE.Size    = Status.getSize();
   UFE.ModTime = llvm::sys::toTimeT(Status.getLastModificationTime());
   UFE.Dir     = DirInfo;
@@ -329,7 +331,7 @@ FileManager::getFileRef(StringRef Filename, bool openFile, bool CacheFailure) {
     // We should still fill the path even if we aren't opening the file.
     fillRealPathName(&UFE, InterndFileName);
   }
-  return FileEntryRef(InterndFileName, UFE);
+  return ReturnedRef;
 }
 
 const FileEntry *
@@ -341,12 +343,12 @@ FileManager::getVirtualFile(StringRef Filename, off_t Size,
   auto &NamedFileEnt = *SeenFileEntries.insert(
       {Filename, std::errc::no_such_file_or_directory}).first;
   if (NamedFileEnt.second) {
-    SeenFileEntryOrRedirect Value = *NamedFileEnt.second;
+    FileEntryRef::MapValue Value = *NamedFileEnt.second;
     FileEntry *FE;
-    if (LLVM_LIKELY(FE = Value.dyn_cast<FileEntry *>()))
+    if (LLVM_LIKELY(FE = Value.V.dyn_cast<FileEntry *>()))
       return FE;
-    return getVirtualFile(*Value.get<const StringRef *>(), Size,
-                          ModificationTime);
+    return &FileEntryRef(*Value.V.get<const FileEntryRef::MapEntry *>())
+                .getFileEntry();
   }
 
   // We've not seen this before, or the file is cached as non-existent.
@@ -372,7 +374,7 @@ FileManager::getVirtualFile(StringRef Filename, off_t Size,
       Status.getUser(), Status.getGroup(), Size,
       Status.getType(), Status.getPermissions());
 
-    NamedFileEnt.second = UFE;
+    NamedFileEnt.second = FileEntryRef::MapValue(*UFE);
 
     // If we had already opened this file, close it now so we don't
     // leak the descriptor. We're not going to use the file
@@ -381,6 +383,9 @@ FileManager::getVirtualFile(StringRef Filename, off_t Size,
       UFE->closeFile();
 
     // If we already have an entry with this inode, return it.
+    //
+    // FIXME: Surely this should add a reference by the new name, and return
+    // it instead...
     if (UFE->isValid())
       return UFE;
 
@@ -390,10 +395,10 @@ FileManager::getVirtualFile(StringRef Filename, off_t Size,
   } else {
     VirtualFileEntries.push_back(std::make_unique<FileEntry>());
     UFE = VirtualFileEntries.back().get();
-    NamedFileEnt.second = UFE;
+    NamedFileEnt.second = FileEntryRef::MapValue(*UFE);
   }
 
-  UFE->Name    = InterndFileName;
+  UFE->LastRef = FileEntryRef(NamedFileEnt);
   UFE->Size    = Size;
   UFE->ModTime = ModificationTime;
   UFE->Dir     = *DirInfo;
@@ -409,17 +414,30 @@ llvm::Optional<FileEntryRef> FileManager::getBypassFile(FileEntryRef VF) {
   if (getStatValue(VF.getName(), Status, /*isFile=*/true, /*F=*/nullptr))
     return None;
 
-  // Fill it in from the stat.
+  if (!SeenBypassFileEntries)
+    SeenBypassFileEntries = std::make_unique<
+        llvm::StringMap<llvm::ErrorOr<FileEntryRef::MapValue>>>();
+
+  // If we've already bypassed just use the existing one.
+  auto Insertion = SeenBypassFileEntries->insert(
+      {VF.getName(), std::errc::no_such_file_or_directory});
+  if (!Insertion.second)
+    return FileEntryRef(*Insertion.first);
+
+  // Fill in the new entry from the stat.
   BypassFileEntries.push_back(std::make_unique<FileEntry>());
   const FileEntry &VFE = VF.getFileEntry();
   FileEntry &BFE = *BypassFileEntries.back();
-  BFE.Name = VFE.getName();
+  Insertion.first->second = FileEntryRef::MapValue(BFE);
+  BFE.LastRef = FileEntryRef(*Insertion.first);
   BFE.Size = Status.getSize();
   BFE.Dir = VFE.Dir;
   BFE.ModTime = llvm::sys::toTimeT(Status.getLastModificationTime());
   BFE.UID = NextFileUID++;
   BFE.IsValid = true;
-  return FileEntryRef(VF.getName(), BFE);
+
+  // Save the entry in the bypass table and return.
+  return FileEntryRef(*Insertion.first);
 }
 
 bool FileManager::FixupRelativePath(SmallVectorImpl<char> &path) const {
@@ -534,13 +552,13 @@ void FileManager::GetUniqueIDMapping(
   UIDToFiles.resize(NextFileUID);
 
   // Map file entries
-  for (llvm::StringMap<llvm::ErrorOr<SeenFileEntryOrRedirect>,
+  for (llvm::StringMap<llvm::ErrorOr<FileEntryRef::MapValue>,
                        llvm::BumpPtrAllocator>::const_iterator
            FE = SeenFileEntries.begin(),
            FEEnd = SeenFileEntries.end();
        FE != FEEnd; ++FE)
-    if (llvm::ErrorOr<SeenFileEntryOrRedirect> Entry = FE->getValue()) {
-      if (const auto *FE = (*Entry).dyn_cast<FileEntry *>())
+    if (llvm::ErrorOr<FileEntryRef::MapValue> Entry = FE->getValue()) {
+      if (const auto *FE = Entry->V.dyn_cast<FileEntry *>())
         UIDToFiles[FE->getUID()] = FE;
     }
 

diff  --git a/clang/lib/Basic/SourceManager.cpp b/clang/lib/Basic/SourceManager.cpp
index 88f95d18ddf1..dde44715b9e4 100644
--- a/clang/lib/Basic/SourceManager.cpp
+++ b/clang/lib/Basic/SourceManager.cpp
@@ -702,7 +702,7 @@ const FileEntry *
 SourceManager::bypassFileContentsOverride(const FileEntry &File) {
   assert(isFileOverridden(&File));
   llvm::Optional<FileEntryRef> BypassFile =
-      FileMgr.getBypassFile(FileEntryRef(File.getName(), File));
+      FileMgr.getBypassFile(File.getLastRef());
 
   // If the file can't be found in the FS, give up.
   if (!BypassFile)

diff  --git a/clang/unittests/Basic/FileManagerTest.cpp b/clang/unittests/Basic/FileManagerTest.cpp
index dfb79b477be9..43680d5cfbed 100644
--- a/clang/unittests/Basic/FileManagerTest.cpp
+++ b/clang/unittests/Basic/FileManagerTest.cpp
@@ -285,9 +285,18 @@ TEST_F(FileManagerTest, getFileRefReturnsCorrectNameForDifferentStatPath) {
   StatCache->InjectFile("dir/f1-alias.cpp", 41, "dir/f1.cpp");
   StatCache->InjectFile("dir/f2.cpp", 42);
   StatCache->InjectFile("dir/f2-alias.cpp", 42, "dir/f2.cpp");
+
+  // This unintuitive rename-the-file-on-stat behaviour supports how the
+  // RedirectingFileSystem VFS layer responds to stats. However, even if you
+  // have two layers, you should only get a single filename back. As such the
+  // following stat cache behaviour is not supported (the correct stat entry
+  // for a double-redirection would be "dir/f1.cpp") and the getFileRef below
+  // should assert.
+  StatCache->InjectFile("dir/f1-alias-alias.cpp", 41, "dir/f1-alias.cpp");
+
   manager.setStatCache(std::move(StatCache));
 
-  // With F2, test accessing the non-redirected name first.
+  // With F1, test accessing the non-redirected name first.
   auto F1 = manager.getFileRef("dir/f1.cpp");
   auto F1Alias = manager.getFileRef("dir/f1-alias.cpp");
   auto F1Alias2 = manager.getFileRef("dir/f1-alias.cpp");
@@ -301,6 +310,11 @@ TEST_F(FileManagerTest, getFileRefReturnsCorrectNameForDifferentStatPath) {
   EXPECT_EQ(&F1->getFileEntry(), &F1Alias->getFileEntry());
   EXPECT_EQ(&F1->getFileEntry(), &F1Alias2->getFileEntry());
 
+#if !defined(NDEBUG) && GTEST_HAS_DEATH_TEST
+  EXPECT_DEATH((void)manager.getFileRef("dir/f1-alias-alias.cpp"),
+               "filename redirected to a non-canonical filename?");
+#endif
+
   // With F2, test accessing the redirected name first.
   auto F2Alias = manager.getFileRef("dir/f2-alias.cpp");
   auto F2 = manager.getFileRef("dir/f2.cpp");
@@ -487,29 +501,34 @@ TEST_F(FileManagerTest, getBypassFile) {
   // Set up a virtual file with a 
diff erent size than FakeStatCache uses.
   const FileEntry *File = Manager.getVirtualFile("/tmp/test", /*Size=*/10, 0);
   ASSERT_TRUE(File);
-  FileEntryRef Ref("/tmp/test", *File);
-  EXPECT_TRUE(Ref.isValid());
-  EXPECT_EQ(Ref.getSize(), 10);
+  const FileEntry &FE = *File;
+  EXPECT_TRUE(FE.isValid());
+  EXPECT_EQ(FE.getSize(), 10);
 
   // Calling a second time should not affect the UID or size.
-  unsigned VirtualUID = Ref.getUID();
-  EXPECT_EQ(*expectedToOptional(Manager.getFileRef("/tmp/test")), Ref);
-  EXPECT_EQ(Ref.getUID(), VirtualUID);
-  EXPECT_EQ(Ref.getSize(), 10);
+  unsigned VirtualUID = FE.getUID();
+  EXPECT_EQ(
+      &FE,
+      &expectedToOptional(Manager.getFileRef("/tmp/test"))->getFileEntry());
+  EXPECT_EQ(FE.getUID(), VirtualUID);
+  EXPECT_EQ(FE.getSize(), 10);
 
   // Bypass the file.
-  llvm::Optional<FileEntryRef> BypassRef = Manager.getBypassFile(Ref);
+  llvm::Optional<FileEntryRef> BypassRef =
+      Manager.getBypassFile(File->getLastRef());
   ASSERT_TRUE(BypassRef);
   EXPECT_TRUE(BypassRef->isValid());
-  EXPECT_EQ(BypassRef->getName(), Ref.getName());
+  EXPECT_EQ("/tmp/test", BypassRef->getName());
 
   // Check that it's 
diff erent in the right ways.
   EXPECT_NE(&BypassRef->getFileEntry(), File);
   EXPECT_NE(BypassRef->getUID(), VirtualUID);
-  EXPECT_NE(BypassRef->getSize(), Ref.getSize());
+  EXPECT_NE(BypassRef->getSize(), FE.getSize());
 
   // The virtual file should still be returned when searching.
-  EXPECT_EQ(*expectedToOptional(Manager.getFileRef("/tmp/test")), Ref);
+  EXPECT_EQ(
+      &FE,
+      &expectedToOptional(Manager.getFileRef("/tmp/test"))->getFileEntry());
 }
 
 } // anonymous namespace


        


More information about the cfe-commits mailing list