[clang] [clang][modules] Move `SLocEntry` search into `ASTReader` (PR #66966)

Jan Svoboda via cfe-commits cfe-commits at lists.llvm.org
Wed Sep 20 17:46:52 PDT 2023


https://github.com/jansvoboda11 created https://github.com/llvm/llvm-project/pull/66966

In `getFileID()` the `SourceManager` ends up doing a binary search over its buffer of `SLocEntries`. For modules, this binary search fully deserializes the entire `SLocEntry` block for visited each entry. This shows up in profiles of the dependency scanner, since that operation includes decompressing buffers associated with some entries.

This patch moves the binary search over loaded entries into `ASTReader`, which now only performs partial deserialization during the binary search, speeding up the scanner by ~3.3%.


>From d26f9487dedf53ac419cbd9dfb8c29214ba0e09f Mon Sep 17 00:00:00 2001
From: Jan Svoboda <jan_svoboda at apple.com>
Date: Wed, 20 Sep 2023 17:30:45 -0700
Subject: [PATCH] [clang][modules] Move `SLocEntry` search into `ASTReader`

In `getFileID()` the `SourceManager` ends up doing a binary search over its buffer of `SLocEntries`. For modules, this binary search fully deserializes the entire `SLocEntry` block for visited each entry. This shows up in profiles of the dependency scanner, since that operation includes decompressing buffers associated with some entries.

This patch moves the binary search over loaded entries into `ASTReader`, which now only performs partial deserialization during the binary search, speeding up the scanner by ~3.3%.
---
 clang/include/clang/Basic/SourceManager.h     |  3 +
 clang/include/clang/Serialization/ASTReader.h |  6 ++
 clang/lib/Basic/SourceManager.cpp             | 70 +------------------
 clang/lib/Serialization/ASTReader.cpp         | 63 +++++++++++++++++
 4 files changed, 75 insertions(+), 67 deletions(-)

diff --git a/clang/include/clang/Basic/SourceManager.h b/clang/include/clang/Basic/SourceManager.h
index 2f846502d6f3327..a4c7facddd53d64 100644
--- a/clang/include/clang/Basic/SourceManager.h
+++ b/clang/include/clang/Basic/SourceManager.h
@@ -533,6 +533,9 @@ class ExternalSLocEntrySource {
   /// entry from being loaded.
   virtual bool ReadSLocEntry(int ID) = 0;
 
+  /// Get the index ID for the loaded SourceLocation offset.
+  virtual int getSLocEntryID(SourceLocation::UIntTy SLocOffset) = 0;
+
   /// Retrieve the module import location and name for the given ID, if
   /// in fact it was loaded from a module (rather than, say, a precompiled
   /// header).
diff --git a/clang/include/clang/Serialization/ASTReader.h b/clang/include/clang/Serialization/ASTReader.h
index dc1eb21c27801fe..e643fcf4c930f09 100644
--- a/clang/include/clang/Serialization/ASTReader.h
+++ b/clang/include/clang/Serialization/ASTReader.h
@@ -2153,6 +2153,12 @@ class ASTReader
 
   /// Read the source location entry with index ID.
   bool ReadSLocEntry(int ID) override;
+  /// Get the index ID for the loaded SourceLocation offset.
+  int getSLocEntryID(SourceLocation::UIntTy SLocOffset) override;
+  /// Read the offset of the SLocEntry at the given index in the given module
+  /// file.
+  std::optional<SourceLocation::UIntTy> readSLocOffset(ModuleFile *F,
+                                                       unsigned Index);
 
   /// Retrieve the module import location and module name for the
   /// given source manager entry ID.
diff --git a/clang/lib/Basic/SourceManager.cpp b/clang/lib/Basic/SourceManager.cpp
index 0521ac7b30339ab..f881afc2e46c5c6 100644
--- a/clang/lib/Basic/SourceManager.cpp
+++ b/clang/lib/Basic/SourceManager.cpp
@@ -864,74 +864,10 @@ FileID SourceManager::getFileIDLocal(SourceLocation::UIntTy SLocOffset) const {
 /// This function knows that the SourceLocation is in a loaded buffer, not a
 /// local one.
 FileID SourceManager::getFileIDLoaded(SourceLocation::UIntTy SLocOffset) const {
-  if (SLocOffset < CurrentLoadedOffset) {
-    assert(0 && "Invalid SLocOffset or bad function choice");
-    return FileID();
-  }
-
-  // Essentially the same as the local case, but the loaded array is sorted
-  // in the other direction (decreasing order).
-  // GreaterIndex is the one where the offset is greater, which is actually a
-  // lower index!
-  unsigned GreaterIndex = 0;
-  unsigned LessIndex = LoadedSLocEntryTable.size();
-  if (LastFileIDLookup.ID < 0) {
-    // Prune the search space.
-    int LastID = LastFileIDLookup.ID;
-    if (getLoadedSLocEntryByID(LastID).getOffset() > SLocOffset)
-      GreaterIndex =
-          (-LastID - 2) + 1; // Exclude LastID, else we would have hit the cache
-    else
-      LessIndex = -LastID - 2;
-  }
-
-  // First do a linear scan from the last lookup position, if possible.
-  unsigned NumProbes;
+  int ID = ExternalSLocEntries->getSLocEntryID(SLocOffset);
   bool Invalid = false;
-  for (NumProbes = 0; NumProbes < 8; ++NumProbes, ++GreaterIndex) {
-    // Make sure the entry is loaded!
-    const SrcMgr::SLocEntry &E = getLoadedSLocEntry(GreaterIndex, &Invalid);
-    if (Invalid)
-      return FileID(); // invalid entry.
-    if (E.getOffset() <= SLocOffset) {
-      FileID Res = FileID::get(-int(GreaterIndex) - 2);
-      LastFileIDLookup = Res;
-      NumLinearScans += NumProbes + 1;
-      return Res;
-    }
-  }
-
-  // Linear scan failed. Do the binary search.
-  NumProbes = 0;
-  while (true) {
-    ++NumProbes;
-    unsigned MiddleIndex = (LessIndex - GreaterIndex) / 2 + GreaterIndex;
-    const SrcMgr::SLocEntry &E = getLoadedSLocEntry(MiddleIndex, &Invalid);
-    if (Invalid)
-      return FileID(); // invalid entry.
-
-    if (E.getOffset() > SLocOffset) {
-      if (GreaterIndex == MiddleIndex) {
-        assert(0 && "binary search missed the entry");
-        return FileID();
-      }
-      GreaterIndex = MiddleIndex;
-      continue;
-    }
-
-    if (isOffsetInFileID(FileID::get(-int(MiddleIndex) - 2), SLocOffset)) {
-      FileID Res = FileID::get(-int(MiddleIndex) - 2);
-      LastFileIDLookup = Res;
-      NumBinaryProbes += NumProbes;
-      return Res;
-    }
-
-    if (LessIndex == MiddleIndex) {
-      assert(0 && "binary search missed the entry");
-      return FileID();
-    }
-    LessIndex = MiddleIndex;
-  }
+  (void)getLoadedSLocEntryByID(ID, &Invalid);
+  return Invalid ? FileID() : FileID::get(ID);
 }
 
 SourceLocation SourceManager::
diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp
index 0952244d037a77c..fdf89dce41aab4d 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -1444,6 +1444,69 @@ llvm::Error ASTReader::ReadSourceManagerBlock(ModuleFile &F) {
   }
 }
 
+std::optional<SourceLocation::UIntTy>
+ASTReader::readSLocOffset(ModuleFile *F, unsigned Index) {
+  BitstreamCursor &Cursor = F->SLocEntryCursor;
+  SavedStreamPosition SavedPosition(Cursor);
+  if (llvm::Error Err = Cursor.JumpToBit(F->SLocEntryOffsetsBase +
+                                         F->SLocEntryOffsets[Index])) {
+    Error(std::move(Err));
+    return std::nullopt;
+  }
+
+  Expected<llvm::BitstreamEntry> MaybeEntry = Cursor.advance();
+  if (!MaybeEntry) {
+    Error(MaybeEntry.takeError());
+    return std::nullopt;
+  }
+  llvm::BitstreamEntry Entry = MaybeEntry.get();
+
+  if (Entry.Kind != llvm::BitstreamEntry::Record) {
+    Error("incorrectly-formatted source location entry in AST file");
+    return std::nullopt;
+  }
+
+  RecordData Record;
+  StringRef Blob;
+  Expected<unsigned> MaybeSLOC = Cursor.readRecord(Entry.ID, Record, &Blob);
+  if (!MaybeSLOC) {
+    Error(MaybeSLOC.takeError());
+    return std::nullopt;
+  }
+  switch (MaybeSLOC.get()) {
+  default:
+    Error("incorrectly-formatted source location entry in AST file");
+    return std::nullopt;
+  case SM_SLOC_FILE_ENTRY:
+  case SM_SLOC_BUFFER_ENTRY:
+  case SM_SLOC_EXPANSION_ENTRY:
+    return F->SLocEntryBaseOffset + Record[0];
+  }
+}
+
+int ASTReader::getSLocEntryID(SourceLocation::UIntTy SLocOffset) {
+  auto SLocMapI =
+      GlobalSLocOffsetMap.find(SourceManager::MaxLoadedOffset - SLocOffset - 1);
+  assert(SLocMapI != GlobalSLocOffsetMap.end() &&
+         "Corrupted global sloc offset map");
+  ModuleFile *F = SLocMapI->second;
+
+  std::vector<unsigned> Indices(F->LocalNumSLocEntries);
+  for (unsigned I = 0; I != F->LocalNumSLocEntries; ++I)
+    Indices[I] = I;
+
+  auto It = llvm::upper_bound(Indices, SLocOffset,
+                    [&](SourceLocation::UIntTy Offset, unsigned Index) {
+                      auto EntryOffset = readSLocOffset(F, Index);
+                      assert(EntryOffset && "Corrupted AST file");
+                      return Offset < *EntryOffset;
+                    });
+  // The iterator points to the first entry with start offset greater than the
+  // offset of interest. The previous entry must contain the offset of interest.
+  It = std::prev(It);
+  return F->SLocEntryBaseID + *It;
+}
+
 bool ASTReader::ReadSLocEntry(int ID) {
   if (ID == 0)
     return false;



More information about the cfe-commits mailing list