[llvm] 537344f - [clang][modules] Move `SLocEntry` search into `ASTReader` (#66966)

via llvm-commits llvm-commits at lists.llvm.org
Fri Oct 6 14:52:25 PDT 2023


Author: Jan Svoboda
Date: 2023-10-06T14:52:19-07:00
New Revision: 537344fc502474545d332bf5592db257cc568250

URL: https://github.com/llvm/llvm-project/commit/537344fc502474545d332bf5592db257cc568250
DIFF: https://github.com/llvm/llvm-project/commit/537344fc502474545d332bf5592db257cc568250.diff

LOG: [clang][modules] Move `SLocEntry` search into `ASTReader` (#66966)

In `SourceManager::getFileID()`, Clang performs binary search over its
buffer of `SLocEntries`. For modules, this binary search fully
deserializes the entire `SLocEntry` block for each visited entry. For
some entries, that includes decompressing the associated buffer (e.g.
the predefines buffer, macro expansion buffers, contents of volatile
files), which shows up in profiles of the dependency scanner.

This patch moves the binary search over loaded entries into `ASTReader`,
which can perform cheaper partial deserialization during the binary
search, reducing the wall time of dependency scans by ~3%. This also
reduces the number of retired instructions by ~1.4% on regular
(implicit) modules compilation.

Note that this patch drops the optimizations based on the last lookup ID
(pruning the search space and performing linear search before resorting
to the full binary search). Instead, it reduces the search space by
asking `ASTReader::GlobalSLocOffsetMap` for the containing `ModuleFile`
and only does binary search over entries of single module file.

Added: 
    

Modified: 
    clang/include/clang/Basic/SourceManager.h
    clang/include/clang/Serialization/ASTReader.h
    clang/lib/Basic/SourceManager.cpp
    clang/lib/Serialization/ASTReader.cpp
    clang/test/Modules/explicit-build-missing-files.cpp
    llvm/include/llvm/ADT/STLExtras.h

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Basic/SourceManager.h b/clang/include/clang/Basic/SourceManager.h
index 94aeda8ce58b94d..420fda111ad1369 100644
--- a/clang/include/clang/Basic/SourceManager.h
+++ b/clang/include/clang/Basic/SourceManager.h
@@ -500,6 +500,14 @@ class SLocEntry {
     return Expansion;
   }
 
+  /// Creates an incomplete SLocEntry that is only able to report its offset.
+  static SLocEntry getOffsetOnly(SourceLocation::UIntTy Offset) {
+    assert(!(Offset & (1ULL << OffsetBits)) && "Offset is too large");
+    SLocEntry E;
+    E.Offset = Offset;
+    return E;
+  }
+
   static SLocEntry get(SourceLocation::UIntTy Offset, const FileInfo &FI) {
     assert(!(Offset & (1ULL << OffsetBits)) && "Offset is too large");
     SLocEntry E;
@@ -534,6 +542,12 @@ class ExternalSLocEntrySource {
   /// entry from being loaded.
   virtual bool ReadSLocEntry(int ID) = 0;
 
+  /// Get the index ID for the loaded SourceLocation offset.
+  ///
+  /// \returns Invalid index ID (0) if an error occurred that prevented the
+  /// SLocEntry  from being loaded.
+  virtual int getSLocEntryID(SourceLocation::UIntTy SLocOffset) = 0;
+
   /// Retrieve the module import location and name for the given ID, if
   /// in fact it was loaded from a module (rather than, say, a precompiled
   /// header).
@@ -729,6 +743,12 @@ class SourceManager : public RefCountedBase<SourceManager> {
   /// Same indexing as LoadedSLocEntryTable.
   llvm::BitVector SLocEntryLoaded;
 
+  /// A bitmap that indicates whether the entries of LoadedSLocEntryTable
+  /// have already had their offset loaded from the external source.
+  ///
+  /// Superset of SLocEntryLoaded. Same indexing as SLocEntryLoaded.
+  llvm::BitVector SLocEntryOffsetLoaded;
+
   /// An external source for source location entries.
   ExternalSLocEntrySource *ExternalSLocEntries = nullptr;
 

diff  --git a/clang/include/clang/Serialization/ASTReader.h b/clang/include/clang/Serialization/ASTReader.h
index 3468b0676956024..531ad94f0906ac0 100644
--- a/clang/include/clang/Serialization/ASTReader.h
+++ b/clang/include/clang/Serialization/ASTReader.h
@@ -2154,6 +2154,12 @@ class ASTReader
 
   /// Read the source location entry with index ID.
   bool ReadSLocEntry(int ID) override;
+  /// Get the index ID for the loaded SourceLocation offset.
+  int getSLocEntryID(SourceLocation::UIntTy SLocOffset) override;
+  /// Try to read the offset of the SLocEntry at the given index in the given
+  /// module file.
+  llvm::Expected<SourceLocation::UIntTy> readSLocOffset(ModuleFile *F,
+                                                        unsigned Index);
 
   /// Retrieve the module import location and module name for the
   /// given source manager entry ID.

diff  --git a/clang/lib/Basic/SourceManager.cpp b/clang/lib/Basic/SourceManager.cpp
index b8e7650accdbfa0..4ce89267e03f427 100644
--- a/clang/lib/Basic/SourceManager.cpp
+++ b/clang/lib/Basic/SourceManager.cpp
@@ -337,6 +337,7 @@ void SourceManager::clearIDTables() {
   LocalSLocEntryTable.clear();
   LoadedSLocEntryTable.clear();
   SLocEntryLoaded.clear();
+  SLocEntryOffsetLoaded.clear();
   LastLineNoFileIDQuery = FileID();
   LastLineNoContentCache = nullptr;
   LastFileIDLookup = FileID();
@@ -459,6 +460,7 @@ SourceManager::AllocateLoadedSLocEntries(unsigned NumSLocEntries,
   }
   LoadedSLocEntryTable.resize(LoadedSLocEntryTable.size() + NumSLocEntries);
   SLocEntryLoaded.resize(LoadedSLocEntryTable.size());
+  SLocEntryOffsetLoaded.resize(LoadedSLocEntryTable.size());
   CurrentLoadedOffset -= TotalSize;
   int BaseID = -int(LoadedSLocEntryTable.size()) - 1;
   LoadedSLocEntryAllocBegin.push_back(FileID::get(BaseID));
@@ -597,7 +599,7 @@ FileID SourceManager::createFileIDImpl(ContentCache &File, StringRef Filename,
     assert(!SLocEntryLoaded[Index] && "FileID already loaded");
     LoadedSLocEntryTable[Index] = SLocEntry::get(
         LoadedOffset, FileInfo::get(IncludePos, File, FileCharacter, Filename));
-    SLocEntryLoaded[Index] = true;
+    SLocEntryLoaded[Index] = SLocEntryOffsetLoaded[Index] = true;
     return FileID::get(LoadedID);
   }
   unsigned FileSize = File.getSize();
@@ -657,7 +659,7 @@ SourceManager::createExpansionLocImpl(const ExpansionInfo &Info,
     assert(Index < LoadedSLocEntryTable.size() && "FileID out of range");
     assert(!SLocEntryLoaded[Index] && "FileID already loaded");
     LoadedSLocEntryTable[Index] = SLocEntry::get(LoadedOffset, Info);
-    SLocEntryLoaded[Index] = true;
+    SLocEntryLoaded[Index] = SLocEntryOffsetLoaded[Index] = true;
     return SourceLocation::getMacroLoc(LoadedOffset);
   }
   LocalSLocEntryTable.push_back(SLocEntry::get(NextLocalOffset, Info));
@@ -858,69 +860,7 @@ FileID SourceManager::getFileIDLoaded(SourceLocation::UIntTy SLocOffset) const {
     return FileID();
   }
 
-  // Essentially the same as the local case, but the loaded array is sorted
-  // in the other direction (decreasing order).
-  // GreaterIndex is the one where the offset is greater, which is actually a
-  // lower index!
-  unsigned GreaterIndex = 0;
-  unsigned LessIndex = LoadedSLocEntryTable.size();
-  if (LastFileIDLookup.ID < 0) {
-    // Prune the search space.
-    int LastID = LastFileIDLookup.ID;
-    if (getLoadedSLocEntryByID(LastID).getOffset() > SLocOffset)
-      GreaterIndex =
-          (-LastID - 2) + 1; // Exclude LastID, else we would have hit the cache
-    else
-      LessIndex = -LastID - 2;
-  }
-
-  // First do a linear scan from the last lookup position, if possible.
-  unsigned NumProbes;
-  bool Invalid = false;
-  for (NumProbes = 0; NumProbes < 8; ++NumProbes, ++GreaterIndex) {
-    // Make sure the entry is loaded!
-    const SrcMgr::SLocEntry &E = getLoadedSLocEntry(GreaterIndex, &Invalid);
-    if (Invalid)
-      return FileID(); // invalid entry.
-    if (E.getOffset() <= SLocOffset) {
-      FileID Res = FileID::get(-int(GreaterIndex) - 2);
-      LastFileIDLookup = Res;
-      NumLinearScans += NumProbes + 1;
-      return Res;
-    }
-  }
-
-  // Linear scan failed. Do the binary search.
-  NumProbes = 0;
-  while (true) {
-    ++NumProbes;
-    unsigned MiddleIndex = (LessIndex - GreaterIndex) / 2 + GreaterIndex;
-    const SrcMgr::SLocEntry &E = getLoadedSLocEntry(MiddleIndex, &Invalid);
-    if (Invalid)
-      return FileID(); // invalid entry.
-
-    if (E.getOffset() > SLocOffset) {
-      if (GreaterIndex == MiddleIndex) {
-        assert(0 && "binary search missed the entry");
-        return FileID();
-      }
-      GreaterIndex = MiddleIndex;
-      continue;
-    }
-
-    if (isOffsetInFileID(FileID::get(-int(MiddleIndex) - 2), SLocOffset)) {
-      FileID Res = FileID::get(-int(MiddleIndex) - 2);
-      LastFileIDLookup = Res;
-      NumBinaryProbes += NumProbes;
-      return Res;
-    }
-
-    if (LessIndex == MiddleIndex) {
-      assert(0 && "binary search missed the entry");
-      return FileID();
-    }
-    LessIndex = MiddleIndex;
-  }
+  return FileID::get(ExternalSLocEntries->getSLocEntryID(SLocOffset));
 }
 
 SourceLocation SourceManager::

diff  --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp
index b4d5325601e59c1..9545e6a99341eff 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -1444,6 +1444,79 @@ llvm::Error ASTReader::ReadSourceManagerBlock(ModuleFile &F) {
   }
 }
 
+llvm::Expected<SourceLocation::UIntTy>
+ASTReader::readSLocOffset(ModuleFile *F, unsigned Index) {
+  BitstreamCursor &Cursor = F->SLocEntryCursor;
+  SavedStreamPosition SavedPosition(Cursor);
+  if (llvm::Error Err = Cursor.JumpToBit(F->SLocEntryOffsetsBase +
+                                         F->SLocEntryOffsets[Index]))
+    return Err;
+
+  Expected<llvm::BitstreamEntry> MaybeEntry = Cursor.advance();
+  if (!MaybeEntry)
+    return MaybeEntry.takeError();
+
+  llvm::BitstreamEntry Entry = MaybeEntry.get();
+  if (Entry.Kind != llvm::BitstreamEntry::Record)
+    return llvm::createStringError(
+        std::errc::illegal_byte_sequence,
+        "incorrectly-formatted source location entry in AST file");
+
+  RecordData Record;
+  StringRef Blob;
+  Expected<unsigned> MaybeSLOC = Cursor.readRecord(Entry.ID, Record, &Blob);
+  if (!MaybeSLOC)
+    return MaybeSLOC.takeError();
+
+  switch (MaybeSLOC.get()) {
+  default:
+    return llvm::createStringError(
+        std::errc::illegal_byte_sequence,
+        "incorrectly-formatted source location entry in AST file");
+  case SM_SLOC_FILE_ENTRY:
+  case SM_SLOC_BUFFER_ENTRY:
+  case SM_SLOC_EXPANSION_ENTRY:
+    return F->SLocEntryBaseOffset + Record[0];
+  }
+}
+
+int ASTReader::getSLocEntryID(SourceLocation::UIntTy SLocOffset) {
+  auto SLocMapI =
+      GlobalSLocOffsetMap.find(SourceManager::MaxLoadedOffset - SLocOffset - 1);
+  assert(SLocMapI != GlobalSLocOffsetMap.end() &&
+         "Corrupted global sloc offset map");
+  ModuleFile *F = SLocMapI->second;
+
+  bool Invalid = false;
+
+  auto It = llvm::upper_bound(
+      llvm::index_range(0, F->LocalNumSLocEntries), SLocOffset,
+      [&](SourceLocation::UIntTy Offset, std::size_t LocalIndex) {
+        int ID = F->SLocEntryBaseID + LocalIndex;
+        std::size_t Index = -ID - 2;
+        if (!SourceMgr.SLocEntryOffsetLoaded[Index]) {
+          assert(!SourceMgr.SLocEntryLoaded[Index]);
+          auto MaybeEntryOffset = readSLocOffset(F, LocalIndex);
+          if (!MaybeEntryOffset) {
+            Error(MaybeEntryOffset.takeError());
+            Invalid = true;
+            return true;
+          }
+          SourceMgr.LoadedSLocEntryTable[Index] =
+              SrcMgr::SLocEntry::getOffsetOnly(*MaybeEntryOffset);
+          SourceMgr.SLocEntryOffsetLoaded[Index] = true;
+        }
+        return Offset < SourceMgr.LoadedSLocEntryTable[Index].getOffset();
+      });
+
+  if (Invalid)
+    return 0;
+
+  // The iterator points to the first entry with start offset greater than the
+  // offset of interest. The previous entry must contain the offset of interest.
+  return F->SLocEntryBaseID + *std::prev(It);
+}
+
 bool ASTReader::ReadSLocEntry(int ID) {
   if (ID == 0)
     return false;

diff  --git a/clang/test/Modules/explicit-build-missing-files.cpp b/clang/test/Modules/explicit-build-missing-files.cpp
index e36b5051e8319c8..3ea881d34c6b28e 100644
--- a/clang/test/Modules/explicit-build-missing-files.cpp
+++ b/clang/test/Modules/explicit-build-missing-files.cpp
@@ -50,6 +50,7 @@ int y = a2<int>;
 // CHECK: In module 'a':
 // CHECK-NEXT: a.h:1:45: error:
 
+int z = b<int>;
 // MISSING-B: could not find file '{{.*}}b.h'
 // MISSING-B-NOT: please delete the module cache
 #endif

diff  --git a/llvm/include/llvm/ADT/STLExtras.h b/llvm/include/llvm/ADT/STLExtras.h
index 5b926864f0cc4a2..c7d417324c94f49 100644
--- a/llvm/include/llvm/ADT/STLExtras.h
+++ b/llvm/include/llvm/ADT/STLExtras.h
@@ -2261,43 +2261,67 @@ template <typename... Refs> struct enumerator_result<std::size_t, Refs...> {
   mutable range_reference_tuple Storage;
 };
 
-/// Infinite stream of increasing 0-based `size_t` indices.
-struct index_stream {
-  struct iterator : iterator_facade_base<iterator, std::forward_iterator_tag,
-                                         const iterator> {
-    iterator &operator++() {
-      assert(Index != std::numeric_limits<std::size_t>::max() &&
-             "Attempting to increment end iterator");
-      ++Index;
-      return *this;
-    }
+struct index_iterator
+    : llvm::iterator_facade_base<index_iterator,
+                                 std::random_access_iterator_tag, std::size_t> {
+  index_iterator(std::size_t Index) : Index(Index) {}
 
-    // Note: This dereference operator returns a value instead of a reference
-    // and does not strictly conform to the C++17's definition of forward
-    // iterator. However, it satisfies all the forward_iterator requirements
-    // that the `zip_common` depends on and fully conforms to the C++20
-    // definition of forward iterator.
-    std::size_t operator*() const { return Index; }
+  index_iterator &operator+=(std::ptr
diff _t N) {
+    Index += N;
+    return *this;
+  }
 
-    friend bool operator==(const iterator &Lhs, const iterator &Rhs) {
-      return Lhs.Index == Rhs.Index;
-    }
+  index_iterator &operator-=(std::ptr
diff _t N) {
+    Index -= N;
+    return *this;
+  }
 
-    std::size_t Index = 0;
-  };
+  std::ptr
diff _t operator-(const index_iterator &R) const {
+    return Index - R.Index;
+  }
 
-  iterator begin() const { return {}; }
-  iterator end() const {
+  // Note: This dereference operator returns a value instead of a reference
+  // and does not strictly conform to the C++17's definition of forward
+  // iterator. However, it satisfies all the forward_iterator requirements
+  // that the `zip_common` depends on and fully conforms to the C++20
+  // definition of forward iterator.
+  std::size_t operator*() const { return Index; }
+
+  friend bool operator==(const index_iterator &Lhs, const index_iterator &Rhs) {
+    return Lhs.Index == Rhs.Index;
+  }
+
+  friend bool operator<(const index_iterator &Lhs, const index_iterator &Rhs) {
+    return Lhs.Index < Rhs.Index;
+  }
+
+private:
+  std::size_t Index;
+};
+
+/// Infinite stream of increasing 0-based `size_t` indices.
+struct index_stream {
+  index_iterator begin() const { return {0}; }
+  index_iterator end() const {
     // We approximate 'infinity' with the max size_t value, which should be good
     // enough to index over any container.
-    iterator It;
-    It.Index = std::numeric_limits<std::size_t>::max();
-    return It;
+    return index_iterator{std::numeric_limits<std::size_t>::max()};
   }
 };
 
 } // end namespace detail
 
+/// Increasing range of `size_t` indices.
+class index_range {
+  std::size_t Begin;
+  std::size_t End;
+
+public:
+  index_range(std::size_t Begin, std::size_t End) : Begin(Begin), End(End) {}
+  detail::index_iterator begin() const { return {Begin}; }
+  detail::index_iterator end() const { return {End}; }
+};
+
 /// Given two or more input ranges, returns a new range whose values are are
 /// tuples (A, B, C, ...), such that A is the 0-based index of the item in the
 /// sequence, and B, C, ..., are the values from the original input ranges. All


        


More information about the llvm-commits mailing list