[clang] 10c0eca - [clang][Lexer] Speed up HeaderSearch when there are many HeaderMaps

Troy Johnson via cfe-commits cfe-commits at lists.llvm.org
Tue Oct 18 08:21:46 PDT 2022


Author: Troy Johnson
Date: 2022-10-18T08:21:14-07:00
New Revision: 10c0eca25523e3bf1872522bde4693aaa55513e0

URL: https://github.com/llvm/llvm-project/commit/10c0eca25523e3bf1872522bde4693aaa55513e0
DIFF: https://github.com/llvm/llvm-project/commit/10c0eca25523e3bf1872522bde4693aaa55513e0.diff

LOG: [clang][Lexer] Speed up HeaderSearch when there are many HeaderMaps

HeaderSearch already uses a caching system to avoid duplicate searches,
but the initial cold miss can take a long time if a build system has
supplied thousands of HeaderMaps. For this case, the SearchDirs vector
begins with those HeaderMaps, so a cache miss requires testing if the
sought filename is present in each of those maps. Instead, we can
consolidate the keys of those HeaderMaps into one StringMap and then
each cache miss can skip directly to the correct HeaderMap or continue
its search beyond the initial sequence of HeaderMaps. In testing on TUs
with ~15000 SearchDirs where the initial 99% are HeaderMaps, time spent
in Clang was reduced by 15%. This patch is expected to be neutral for
SearchDir vectors that do not begin with HeaderMaps.

Differential Revision: https://reviews.llvm.org/D135801

Added: 
    

Modified: 
    clang/include/clang/Lex/HeaderMap.h
    clang/include/clang/Lex/HeaderSearch.h
    clang/lib/Lex/HeaderSearch.cpp

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Lex/HeaderMap.h b/clang/include/clang/Lex/HeaderMap.h
index ca6a49bae3bfe..7930cb83598ca 100644
--- a/clang/include/clang/Lex/HeaderMap.h
+++ b/clang/include/clang/Lex/HeaderMap.h
@@ -15,6 +15,7 @@
 
 #include "clang/Basic/FileManager.h"
 #include "clang/Basic/LLVM.h"
+#include "clang/Lex/HeaderMapTypes.h"
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/Support/Compiler.h"
@@ -39,6 +40,21 @@ class HeaderMapImpl {
   // Check for a valid header and extract the byte swap.
   static bool checkHeader(const llvm::MemoryBuffer &File, bool &NeedsByteSwap);
 
+  // Make a call for every Key in the map.
+  template <typename Func> void forEachKey(Func Callback) const {
+    const HMapHeader &Hdr = getHeader();
+    unsigned NumBuckets = getEndianAdjustedWord(Hdr.NumBuckets);
+
+    for (unsigned Bucket = 0; Bucket < NumBuckets; ++Bucket) {
+      HMapBucket B = getBucket(Bucket);
+      if (B.Key != HMAP_EmptyBucketKey) {
+        Optional<StringRef> Key = getString(B.Key);
+        if (Key)
+          Callback(Key.value());
+      }
+    }
+  }
+
   /// If the specified relative filename is located in this HeaderMap return
   /// the filename it is mapped to, otherwise return an empty StringRef.
   StringRef lookupFilename(StringRef Filename,
@@ -78,6 +94,7 @@ class HeaderMap : private HeaderMapImpl {
                                            FileManager &FM);
 
   using HeaderMapImpl::dump;
+  using HeaderMapImpl::forEachKey;
   using HeaderMapImpl::getFileName;
   using HeaderMapImpl::lookupFilename;
   using HeaderMapImpl::reverseLookupFilename;

diff  --git a/clang/include/clang/Lex/HeaderSearch.h b/clang/include/clang/Lex/HeaderSearch.h
index d4b2306096972..4684f554dc24d 100644
--- a/clang/include/clang/Lex/HeaderSearch.h
+++ b/clang/include/clang/Lex/HeaderSearch.h
@@ -249,6 +249,14 @@ class HeaderSearch {
   unsigned SystemDirIdx = 0;
   bool NoCurDirSearch = false;
 
+  /// Maps HeaderMap keys to SearchDir indices. When HeaderMaps are used
+  /// heavily, SearchDirs can start with thousands of HeaderMaps, so this Index
+  /// lets us avoid scanning them all to find a match.
+  llvm::StringMap<unsigned, llvm::BumpPtrAllocator> SearchDirHeaderMapIndex;
+
+  /// The index of the first SearchDir that isn't a header map.
+  unsigned FirstNonHeaderMapSearchDirIdx = 0;
+
   /// \#include prefixes for which the 'system header' property is
   /// overridden.
   ///
@@ -330,6 +338,10 @@ class HeaderSearch {
   /// Entity used to look up stored header file information.
   ExternalHeaderFileInfoSource *ExternalSource = nullptr;
 
+  /// Scan all of the header maps at the beginning of SearchDirs and
+  /// map their keys to the SearchDir index of their header map.
+  void indexInitialHeaderMaps();
+
 public:
   HeaderSearch(std::shared_ptr<HeaderSearchOptions> HSOpts,
                SourceManager &SourceMgr, DiagnosticsEngine &Diags,
@@ -801,6 +813,10 @@ class HeaderSearch {
   }
 
   ConstSearchDirIterator search_dir_begin() const { return quoted_dir_begin(); }
+  ConstSearchDirIterator search_dir_nth(size_t n) const {
+    assert(n < SearchDirs.size());
+    return {*this, n};
+  }
   ConstSearchDirIterator search_dir_end() const { return system_dir_end(); }
   ConstSearchDirRange search_dir_range() const {
     return {search_dir_begin(), search_dir_end()};

diff  --git a/clang/lib/Lex/HeaderSearch.cpp b/clang/lib/Lex/HeaderSearch.cpp
index bd69d8f697de8..dd82f4280edb5 100644
--- a/clang/lib/Lex/HeaderSearch.cpp
+++ b/clang/lib/Lex/HeaderSearch.cpp
@@ -116,6 +116,7 @@ void HeaderSearch::SetSearchPaths(
   NoCurDirSearch = noCurDirSearch;
   SearchDirToHSEntry = std::move(searchDirToHSEntry);
   //LookupFileCache.clear();
+  indexInitialHeaderMaps();
 }
 
 void HeaderSearch::AddSearchPath(const DirectoryLookup &dir, bool isAngled) {
@@ -372,6 +373,29 @@ Module *HeaderSearch::lookupModule(StringRef ModuleName, StringRef SearchName,
   return Module;
 }
 
+void HeaderSearch::indexInitialHeaderMaps() {
+  llvm::StringMap<unsigned, llvm::BumpPtrAllocator> Index(SearchDirs.size());
+
+  // Iterate over all filename keys and associate them with the index i.
+  unsigned i = 0;
+  for (; i != SearchDirs.size(); ++i) {
+    auto &Dir = SearchDirs[i];
+
+    // We're concerned with only the initial contiguous run of header
+    // maps within SearchDirs, which can be 99% of SearchDirs when
+    // SearchDirs.size() is ~10000.
+    if (!Dir.isHeaderMap())
+      break;
+
+    // Give earlier keys precedence over identical later keys.
+    auto Callback = [&](StringRef Filename) { Index.try_emplace(Filename, i); };
+    Dir.getHeaderMap()->forEachKey(Callback);
+  }
+
+  SearchDirHeaderMapIndex = std::move(Index);
+  FirstNonHeaderMapSearchDirIdx = i;
+}
+
 //===----------------------------------------------------------------------===//
 // File lookup within a DirectoryLookup scope
 //===----------------------------------------------------------------------===//
@@ -977,24 +1001,37 @@ Optional<FileEntryRef> HeaderSearch::LookupFile(
 
   ConstSearchDirIterator NextIt = std::next(It);
 
-  // If the entry has been previously looked up, the first value will be
-  // non-zero.  If the value is equal to i (the start point of our search), then
-  // this is a matching hit.
-  if (!SkipCache && CacheLookup.StartIt == NextIt) {
-    // Skip querying potentially lots of directories for this lookup.
-    if (CacheLookup.HitIt)
-      It = CacheLookup.HitIt;
-    if (CacheLookup.MappedName) {
-      Filename = CacheLookup.MappedName;
-      if (IsMapped)
-        *IsMapped = true;
+  if (!SkipCache) {
+    if (CacheLookup.StartIt == NextIt) {
+      // HIT: Skip querying potentially lots of directories for this lookup.
+      if (CacheLookup.HitIt)
+        It = CacheLookup.HitIt;
+      if (CacheLookup.MappedName) {
+        Filename = CacheLookup.MappedName;
+        if (IsMapped)
+          *IsMapped = true;
+      }
+    } else {
+      // MISS: This is the first query, or the previous query didn't match
+      // our search start.  We will fill in our found location below, so prime
+      // the start point value.
+      CacheLookup.reset(/*NewStartIt=*/NextIt);
+
+      if (It == search_dir_begin() && FirstNonHeaderMapSearchDirIdx > 0) {
+        // Handle cold misses of user includes in the presence of many header
+        // maps.  We avoid searching perhaps thousands of header maps by
+        // jumping directly to the correct one or jumping beyond all of them.
+        auto Iter = SearchDirHeaderMapIndex.find(Filename);
+        if (Iter == SearchDirHeaderMapIndex.end())
+          // Not in index => Skip to first SearchDir after initial header maps
+          It = search_dir_nth(FirstNonHeaderMapSearchDirIdx);
+        else
+          // In index => Start with a specific header map
+          It = search_dir_nth(Iter->second);
+      }
     }
-  } else {
-    // Otherwise, this is the first query, or the previous query didn't match
-    // our search start.  We will fill in our found location below, so prime the
-    // start point value.
+  } else
     CacheLookup.reset(/*NewStartIt=*/NextIt);
-  }
 
   SmallString<64> MappedName;
 


        


More information about the cfe-commits mailing list