[llvm] 9d8a3e7 - [llvm-cov] Cache file status information
    Vedant Kumar via llvm-commits 
    llvm-commits at lists.llvm.org
       
    Wed Mar  3 10:08:28 PST 2021
    
    
  
Author: Choongwoo Han
Date: 2021-03-03T10:04:07-08:00
New Revision: 9d8a3e75b442ad7f5cdcb3c8d5b968d8189f46d2
URL: https://github.com/llvm/llvm-project/commit/9d8a3e75b442ad7f5cdcb3c8d5b968d8189f46d2
DIFF: https://github.com/llvm/llvm-project/commit/9d8a3e75b442ad7f5cdcb3c8d5b968d8189f46d2.diff
LOG: [llvm-cov] Cache file status information
Currently, getSourceFile accesses file system to check if two paths are
the same file with a thread lock, which is a huge performance bottleneck
in some cases. Currently, it's accessing file system size(files) * size(files) times.
Thus, cache file status information, which reduces file system access to size(files) times.
When I tested it with two binaries and 16 cpu cores,
it saved over 70% of time.
Binary 1: 56 secs -> 3 secs
Binary 2: 17 hours -> 4 hours
Differential Revision: https://reviews.llvm.org/D97061
Added: 
    
Modified: 
    llvm/tools/llvm-cov/CodeCoverage.cpp
Removed: 
    
################################################################################
diff  --git a/llvm/tools/llvm-cov/CodeCoverage.cpp b/llvm/tools/llvm-cov/CodeCoverage.cpp
index baa968820b63..712766ec9929 100644
--- a/llvm/tools/llvm-cov/CodeCoverage.cpp
+++ b/llvm/tools/llvm-cov/CodeCoverage.cpp
@@ -80,6 +80,12 @@ class CodeCoverageTool {
   /// directory, recursively collect all of the paths within the directory.
   void collectPaths(const std::string &Path);
 
+  /// Check if the two given files are the same file.
+  bool isEquivalentFile(StringRef FilePath1, StringRef FilePath2);
+
+  /// Retrieve a file status with a cache.
+  Optional<sys::fs::file_status> getFileStatus(StringRef FilePath);
+
   /// Return a memory buffer for the given source file.
   ErrorOr<const MemoryBuffer &> getSourceFile(StringRef SourceFile);
 
@@ -153,6 +159,9 @@ class CodeCoverageTool {
   /// remapped to, when using -path-equivalence.
   Optional<std::pair<std::string, std::string>> PathRemapping;
 
+  /// File status cache used when finding the same file.
+  StringMap<Optional<sys::fs::file_status>> FileStatusCache;
+
   /// The architecture the coverage mapping data targets.
   std::vector<StringRef> CoverageArches;
 
@@ -239,6 +248,27 @@ void CodeCoverageTool::collectPaths(const std::string &Path) {
   }
 }
 
+Optional<sys::fs::file_status>
+CodeCoverageTool::getFileStatus(StringRef FilePath) {
+  auto It = FileStatusCache.try_emplace(FilePath);
+  auto &CachedStatus = It.first->getValue();
+  if (!It.second)
+    return CachedStatus;
+
+  sys::fs::file_status Status;
+  if (!sys::fs::status(FilePath, Status))
+    CachedStatus = Status;
+  return CachedStatus;
+}
+
+bool CodeCoverageTool::isEquivalentFile(StringRef FilePath1,
+                                        StringRef FilePath2) {
+  auto Status1 = getFileStatus(FilePath1);
+  auto Status2 = getFileStatus(FilePath2);
+  return Status1.hasValue() && Status2.hasValue() &&
+         sys::fs::equivalent(Status1.getValue(), Status2.getValue());
+}
+
 ErrorOr<const MemoryBuffer &>
 CodeCoverageTool::getSourceFile(StringRef SourceFile) {
   // If we've remapped filenames, look up the real location for this file.
@@ -249,7 +279,7 @@ CodeCoverageTool::getSourceFile(StringRef SourceFile) {
       SourceFile = Loc->second;
   }
   for (const auto &Files : LoadedSourceFiles)
-    if (sys::fs::equivalent(SourceFile, Files.first))
+    if (isEquivalentFile(SourceFile, Files.first))
       return *Files.second;
   auto Buffer = MemoryBuffer::getFile(SourceFile);
   if (auto EC = Buffer.getError()) {
        
    
    
More information about the llvm-commits
mailing list