r269100 - [VFS] Reconstruct the VFS overlay tree for more accurate lookup

Bruno Cardoso Lopes via cfe-commits cfe-commits at lists.llvm.org
Tue May 10 11:43:00 PDT 2016


Author: bruno
Date: Tue May 10 13:43:00 2016
New Revision: 269100

URL: http://llvm.org/viewvc/llvm-project?rev=269100&view=rev
Log:
[VFS] Reconstruct the VFS overlay tree for more accurate lookup

The way we currently build the internal VFS overlay representation leads
to inefficient path search and might yield wrong answers when asked for
recursive or regular directory iteration.

Currently, when reading an YAML file, each YAML root entry is placed
inside a new root in the filesystem overlay. In the crash reproducer, a
simple "@import Foundation" currently maps to 43 roots, and when looking
up paths, we traverse a directory tree for each of these different
roots, until we find a match (or don't). This has two consequences:

- It's slow.
- Directory iteration gives incomplete results since it only return
results within one root - since contents of the same directory can be
declared inside different roots, the result isn't accurate.

This is in part fault of the way we currently write out the YAML file
when emitting the crash reproducer - we could generate only one root and
that would make it fast and correct again. However, we should not rely
on how the client writes the YAML, but provide a good internal
representation regardless.

This patch builds a proper virtual directory tree out of the YAML
representation, allowing faster search and proper iteration. Besides the
crash reproducer, this potentially benefits other VFS clients.

Modified:
    cfe/trunk/lib/Basic/VirtualFileSystem.cpp
    cfe/trunk/unittests/Basic/VirtualFileSystemTest.cpp

Modified: cfe/trunk/lib/Basic/VirtualFileSystem.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Basic/VirtualFileSystem.cpp?rev=269100&r1=269099&r2=269100&view=diff
==============================================================================
--- cfe/trunk/lib/Basic/VirtualFileSystem.cpp (original)
+++ cfe/trunk/lib/Basic/VirtualFileSystem.cpp Tue May 10 13:43:00 2016
@@ -719,7 +719,13 @@ public:
                             Status S)
       : Entry(EK_Directory, Name), Contents(std::move(Contents)),
         S(std::move(S)) {}
+  RedirectingDirectoryEntry(StringRef Name, Status S)
+      : Entry(EK_Directory, Name), S(std::move(S)) {}
   Status getStatus() { return S; }
+  void addContent(std::unique_ptr<Entry> Content) {
+    Contents.push_back(std::move(Content));
+  }
+  Entry *getLastContent() const { return Contents.back().get(); }
   typedef decltype(Contents)::iterator iterator;
   iterator contents_begin() { return Contents.begin(); }
   iterator contents_end() { return Contents.end(); }
@@ -747,6 +753,7 @@ public:
     return UseName == NK_NotSet ? GlobalUseExternalName
                                 : (UseName == NK_External);
   }
+  NameKind getUseName() const { return UseName; }
   static bool classof(const Entry *E) { return E->getKind() == EK_File; }
 };
 
@@ -1023,6 +1030,70 @@ class RedirectingFileSystemParser {
     return true;
   }
 
+  Entry *lookupOrCreateEntry(RedirectingFileSystem *FS, StringRef Name,
+                             Entry *ParentEntry = nullptr) {
+    if (!ParentEntry) { // Look for a existent root
+      for (const std::unique_ptr<Entry> &Root : FS->Roots) {
+        if (Name.equals(Root->getName())) {
+          ParentEntry = Root.get();
+          return ParentEntry;
+        }
+      }
+    } else { // Advance to the next component
+      auto *DE = dyn_cast<RedirectingDirectoryEntry>(ParentEntry);
+      for (std::unique_ptr<Entry> &Content :
+           llvm::make_range(DE->contents_begin(), DE->contents_end())) {
+        auto *DirContent = dyn_cast<RedirectingDirectoryEntry>(Content.get());
+        if (DirContent && Name.equals(Content->getName()))
+          return DirContent;
+      }
+    }
+
+    // ... or create a new one
+    std::unique_ptr<Entry> E = llvm::make_unique<RedirectingDirectoryEntry>(
+        Name, Status("", getNextVirtualUniqueID(), sys::TimeValue::now(), 0, 0,
+                     0, file_type::directory_file, sys::fs::all_all));
+
+    if (!ParentEntry) { // Add a new root to the overlay
+      FS->Roots.push_back(std::move(E));
+      ParentEntry = FS->Roots.back().get();
+      return ParentEntry;
+    }
+
+    auto *DE = dyn_cast<RedirectingDirectoryEntry>(ParentEntry);
+    DE->addContent(std::move(E));
+    return DE->getLastContent();
+  }
+
+  void uniqueOverlayTree(RedirectingFileSystem *FS, Entry *SrcE,
+                         Entry *NewParentE = nullptr) {
+    StringRef Name = SrcE->getName();
+    switch (SrcE->getKind()) {
+    case EK_Directory: {
+      auto *DE = dyn_cast<RedirectingDirectoryEntry>(SrcE);
+      assert(DE && "Must be a directory");
+      // Empty directories could be present in the YAML as a way to
+      // describe a file for a current directory after some of its subdir
+      // is parsed. This only leads to redundant walks, ignore it.
+      if (!Name.empty())
+        NewParentE = lookupOrCreateEntry(FS, Name, NewParentE);
+      for (std::unique_ptr<Entry> &SubEntry :
+           llvm::make_range(DE->contents_begin(), DE->contents_end()))
+        uniqueOverlayTree(FS, SubEntry.get(), NewParentE);
+      break;
+    }
+    case EK_File: {
+      auto *FE = dyn_cast<RedirectingFileEntry>(SrcE);
+      assert(FE && "Must be a file");
+      assert(NewParentE && "Parent entry must exist");
+      auto *DE = dyn_cast<RedirectingDirectoryEntry>(NewParentE);
+      DE->addContent(llvm::make_unique<RedirectingFileEntry>(
+          Name, FE->getExternalContentsPath(), FE->getUseName()));
+      break;
+    }
+    }
+  }
+
   std::unique_ptr<Entry> parseEntry(yaml::Node *N, RedirectingFileSystem *FS) {
     yaml::MappingNode *M = dyn_cast<yaml::MappingNode>(N);
     if (!M) {
@@ -1225,6 +1296,7 @@ public:
     };
 
     DenseMap<StringRef, KeyStatus> Keys(std::begin(Fields), std::end(Fields));
+    std::vector<std::unique_ptr<Entry>> RootEntries;
 
     // Parse configuration and 'roots'
     for (yaml::MappingNode::iterator I = Top->begin(), E = Top->end(); I != E;
@@ -1247,7 +1319,7 @@ public:
         for (yaml::SequenceNode::iterator I = Roots->begin(), E = Roots->end();
              I != E; ++I) {
           if (std::unique_ptr<Entry> E = parseEntry(&*I, FS))
-            FS->Roots.push_back(std::move(E));
+            RootEntries.push_back(std::move(E));
           else
             return false;
         }
@@ -1288,6 +1360,13 @@ public:
 
     if (!checkMissingKeys(Top, Keys))
       return false;
+
+    // Now that we sucessefully parsed the YAML file, canonicalize the internal
+    // representation to a proper directory tree so that we can search faster
+    // inside the VFS.
+    for (std::unique_ptr<Entry> &E : RootEntries)
+      uniqueOverlayTree(FS, E.get());
+
     return true;
   }
 };

Modified: cfe/trunk/unittests/Basic/VirtualFileSystemTest.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/unittests/Basic/VirtualFileSystemTest.cpp?rev=269100&r1=269099&r2=269100&view=diff
==============================================================================
--- cfe/trunk/unittests/Basic/VirtualFileSystemTest.cpp (original)
+++ cfe/trunk/unittests/Basic/VirtualFileSystemTest.cpp Tue May 10 13:43:00 2016
@@ -1022,9 +1022,14 @@ TEST_F(VFSFromYAMLTest, DirectoryIterati
   Lower->addDirectory("//root/");
   Lower->addDirectory("//root/foo");
   Lower->addDirectory("//root/foo/bar");
+  Lower->addDirectory("//root/zab");
+  Lower->addDirectory("//root/baz");
   Lower->addRegularFile("//root/foo/bar/a");
   Lower->addRegularFile("//root/foo/bar/b");
   Lower->addRegularFile("//root/file3");
+  Lower->addRegularFile("//root/zab/a");
+  Lower->addRegularFile("//root/zab/b");
+  Lower->addRegularFile("//root/baz/c");
   IntrusiveRefCntPtr<vfs::FileSystem> FS =
   getFromYAMLString("{ 'use-external-names': false,\n"
                     "  'roots': [\n"
@@ -1042,6 +1047,26 @@ TEST_F(VFSFromYAMLTest, DirectoryIterati
                     "                  'external-contents': '//root/foo/bar/b'\n"
                     "                }\n"
                     "              ]\n"
+                    "},\n"
+                    "{\n"
+                    "  'type': 'directory',\n"
+                    "  'name': '//root/baz',\n"
+                    "  'contents': [ {\n"
+                    "                  'type': 'file',\n"
+                    "                  'name': 'x',\n"
+                    "                  'external-contents': '//root/zab/a'\n"
+                    "                }\n"
+                    "              ]\n"
+                    "},\n"
+                    "{\n"
+                    "  'type': 'directory',\n"
+                    "  'name': '//root/baz',\n"
+                    "  'contents': [ {\n"
+                    "                  'type': 'file',\n"
+                    "                  'name': 'y',\n"
+                    "                  'external-contents': '//root/zab/b'\n"
+                    "                }\n"
+                    "              ]\n"
                     "}\n"
                     "]\n"
                     "}",
@@ -1054,8 +1079,12 @@ TEST_F(VFSFromYAMLTest, DirectoryIterati
 
   std::error_code EC;
   checkContents(O->dir_begin("//root/", EC),
-                {"//root/file1", "//root/file2", "//root/file3", "//root/foo"});
+                {"//root/file1", "//root/file2", "//root/baz", "//root/file3",
+                 "//root/foo", "//root/zab"});
 
   checkContents(O->dir_begin("//root/foo/bar", EC),
                 {"//root/foo/bar/a", "//root/foo/bar/b"});
+
+  checkContents(O->dir_begin("//root/baz", EC),
+                {"//root/baz/x", "//root/baz/y", "//root/baz/c"});
 }




More information about the cfe-commits mailing list